diff --git a/Cargo.lock b/Cargo.lock index 4fc058cb19..e08cae4f2b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1604,6 +1604,7 @@ dependencies = [ "wgpu", "wgpu-core", "wgpu-hal", + "windows 0.60.0", "workspace-hack", ] @@ -1662,6 +1663,7 @@ dependencies = [ "ffmpeg-next", "tokio", "tracing", + "windows 0.60.0", "workspace-hack", ] diff --git a/apps/desktop/src-tauri/src/editor_window.rs b/apps/desktop/src-tauri/src/editor_window.rs index 46a3bbd1ca..66ea67b975 100644 --- a/apps/desktop/src-tauri/src/editor_window.rs +++ b/apps/desktop/src-tauri/src/editor_window.rs @@ -16,6 +16,7 @@ fn strip_frame_padding(frame: RenderedFrame) -> Result<(Vec, u32), &'static .width .checked_mul(4) .ok_or("overflow computing expected_stride")?; + if frame.padded_bytes_per_row == expected_stride { Ok((frame.data, expected_stride)) } else { @@ -30,6 +31,7 @@ fn strip_frame_padding(frame: RenderedFrame) -> Result<(Vec, u32), &'static let end = start + expected_stride as usize; stripped.extend_from_slice(&frame.data[start..end]); } + Ok((stripped, expected_stride)) } } diff --git a/apps/desktop/src-tauri/src/gpu_context.rs b/apps/desktop/src-tauri/src/gpu_context.rs index 28372a58c2..50c87ea7e2 100644 --- a/apps/desktop/src-tauri/src/gpu_context.rs +++ b/apps/desktop/src-tauri/src/gpu_context.rs @@ -42,6 +42,7 @@ pub struct SharedGpuContext { pub queue: Arc, pub adapter: Arc, pub instance: Arc, + pub is_software_adapter: bool, } static GPU: OnceCell> = OnceCell::const_new(); @@ -49,14 +50,41 @@ static GPU: OnceCell> = OnceCell::const_new(); pub async fn get_shared_gpu() -> Option<&'static SharedGpuContext> { GPU.get_or_init(|| async { let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor::default()); - let adapter = instance + + let hardware_adapter = instance .request_adapter(&wgpu::RequestAdapterOptions { power_preference: wgpu::PowerPreference::HighPerformance, force_fallback_adapter: false, compatible_surface: None, }) .await - .ok()?; + .ok(); + + let (adapter, is_software_adapter) = if let Some(adapter) = hardware_adapter { + tracing::info!( + adapter_name = adapter.get_info().name, + adapter_backend = ?adapter.get_info().backend, + "Using hardware GPU adapter for shared context" + ); + (adapter, false) + } else { + tracing::warn!("No hardware GPU adapter found, attempting software fallback for shared context"); + let software_adapter = instance + .request_adapter(&wgpu::RequestAdapterOptions { + power_preference: wgpu::PowerPreference::LowPower, + force_fallback_adapter: true, + compatible_surface: None, + }) + .await + .ok()?; + + tracing::info!( + adapter_name = software_adapter.get_info().name, + adapter_backend = ?software_adapter.get_info().backend, + "Using software adapter for shared context (CPU rendering - performance may be reduced)" + ); + (software_adapter, true) + }; let (device, queue) = adapter .request_device(&wgpu::DeviceDescriptor { @@ -72,6 +100,7 @@ pub async fn get_shared_gpu() -> Option<&'static SharedGpuContext> { queue: Arc::new(queue), adapter: Arc::new(adapter), instance: Arc::new(instance), + is_software_adapter, }) }) .await diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs index 98eccdfef2..63e874c356 100644 --- a/apps/desktop/src-tauri/src/lib.rs +++ b/apps/desktop/src-tauri/src/lib.rs @@ -809,7 +809,7 @@ pub struct RecordingInfo { enum CurrentRecordingTarget { Window { id: WindowId, - bounds: LogicalBounds, + bounds: Option, }, Screen { id: DisplayId, @@ -841,33 +841,55 @@ struct CurrentRecording { async fn get_current_recording( state: MutableState<'_, App>, ) -> Result>, ()> { + tracing::debug!("get_current_recording called"); let state = state.read().await; let (mode, capture_target, status) = match &state.recording_state { - RecordingState::None => return Ok(JsonValue::new(&None)), - RecordingState::Pending { mode, target } => (*mode, target, RecordingStatus::Pending), - RecordingState::Active(inner) => ( - inner.mode(), - inner.capture_target(), - RecordingStatus::Recording, - ), + RecordingState::None => { + tracing::debug!("get_current_recording: state is None"); + return Ok(JsonValue::new(&None)); + } + RecordingState::Pending { mode, target } => { + tracing::debug!("get_current_recording: state is Pending"); + (*mode, target, RecordingStatus::Pending) + } + RecordingState::Active(inner) => { + tracing::debug!("get_current_recording: state is Active"); + ( + inner.mode(), + inner.capture_target(), + RecordingStatus::Recording, + ) + } }; let target = match capture_target { - ScreenCaptureTarget::Display { id } => CurrentRecordingTarget::Screen { id: id.clone() }, - ScreenCaptureTarget::Window { id } => CurrentRecordingTarget::Window { - id: id.clone(), - bounds: scap_targets::Window::from_id(id) - .ok_or(())? - .display_relative_logical_bounds() - .ok_or(())?, - }, - ScreenCaptureTarget::Area { screen, bounds } => CurrentRecordingTarget::Area { - screen: screen.clone(), - bounds: *bounds, - }, + ScreenCaptureTarget::Display { id } => { + tracing::debug!("get_current_recording: target is Display"); + CurrentRecordingTarget::Screen { id: id.clone() } + } + ScreenCaptureTarget::Window { id } => { + let bounds = + scap_targets::Window::from_id(id).and_then(|w| w.display_relative_logical_bounds()); + tracing::debug!( + "get_current_recording: target is Window, bounds={:?}", + bounds + ); + CurrentRecordingTarget::Window { + id: id.clone(), + bounds, + } + } + ScreenCaptureTarget::Area { screen, bounds } => { + tracing::debug!("get_current_recording: target is Area"); + CurrentRecordingTarget::Area { + screen: screen.clone(), + bounds: *bounds, + } + } }; + tracing::debug!("get_current_recording: returning Some(CurrentRecording)"); Ok(JsonValue::new(&Some(CurrentRecording { target, mode, diff --git a/apps/desktop/src-tauri/src/screenshot_editor.rs b/apps/desktop/src-tauri/src/screenshot_editor.rs index 4b0e41d914..58f7a5f8f6 100644 --- a/apps/desktop/src-tauri/src/screenshot_editor.rs +++ b/apps/desktop/src-tauri/src/screenshot_editor.rs @@ -232,13 +232,14 @@ impl ScreenshotEditorInstances { } }; - let (instance, adapter, device, queue) = + let (instance, adapter, device, queue, is_software_adapter) = if let Some(shared) = gpu_context::get_shared_gpu().await { ( shared.instance.clone(), shared.adapter.clone(), shared.device.clone(), shared.queue.clone(), + shared.is_software_adapter, ) } else { let instance = @@ -262,7 +263,7 @@ impl ScreenshotEditorInstances { }) .await .map_err(|e| e.to_string())?; - (instance, adapter, Arc::new(device), Arc::new(queue)) + (instance, adapter, Arc::new(device), Arc::new(queue), false) }; let options = cap_rendering::RenderOptions { @@ -285,6 +286,7 @@ impl ScreenshotEditorInstances { meta: studio_meta, recording_meta: recording_meta.clone(), background_textures: Arc::new(tokio::sync::RwLock::new(HashMap::new())), + is_software_adapter, }; let (config_tx, mut config_rx) = watch::channel(loaded_config.unwrap_or_default()); @@ -304,7 +306,11 @@ impl ScreenshotEditorInstances { tokio::spawn(async move { let mut frame_renderer = FrameRenderer::new(&constants); - let mut layers = RendererLayers::new(&constants.device, &constants.queue); + let mut layers = RendererLayers::new_with_options( + &constants.device, + &constants.queue, + constants.is_software_adapter, + ); let shutdown_token = render_shutdown_token; // Initial render diff --git a/apps/desktop/src-tauri/src/windows.rs b/apps/desktop/src-tauri/src/windows.rs index b922843054..d09d54d0dd 100644 --- a/apps/desktop/src-tauri/src/windows.rs +++ b/apps/desktop/src-tauri/src/windows.rs @@ -708,6 +708,40 @@ impl ShowCapWindow { let title = CapWindowId::RecordingControls.title(); let should_protect = should_protect_window(app, &title); + let pos_x = ((monitor.size().width as f64) / monitor.scale_factor() - width) / 2.0; + let pos_y = + (monitor.size().height as f64) / monitor.scale_factor() - height - 120.0; + + debug!( + "InProgressRecording window: monitor size={:?}, scale={}, pos=({}, {})", + monitor.size(), + monitor.scale_factor(), + pos_x, + pos_y + ); + + #[cfg(target_os = "macos")] + let window = { + self.window_builder(app, "/in-progress-recording") + .maximized(false) + .resizable(false) + .fullscreen(false) + .shadow(false) + .always_on_top(true) + .transparent(true) + .visible_on_all_workspaces(true) + .content_protected(should_protect) + .inner_size(width, height) + .position(pos_x, pos_y) + .skip_taskbar(true) + .initialization_script(format!( + "window.COUNTDOWN = {};", + countdown.unwrap_or_default() + )) + .build()? + }; + + #[cfg(windows)] let window = self .window_builder(app, "/in-progress-recording") .maximized(false) @@ -719,23 +753,47 @@ impl ShowCapWindow { .visible_on_all_workspaces(true) .content_protected(should_protect) .inner_size(width, height) - .position( - ((monitor.size().width as f64) / monitor.scale_factor() - width) / 2.0, - (monitor.size().height as f64) / monitor.scale_factor() - height - 120.0, - ) - .skip_taskbar(true) + .position(pos_x, pos_y) + .skip_taskbar(false) .initialization_script(format!( "window.COUNTDOWN = {};", countdown.unwrap_or_default() )) .build()?; + debug!( + "InProgressRecording window created: label={}, inner_size={:?}, outer_position={:?}", + window.label(), + window.inner_size(), + window.outer_position() + ); + #[cfg(target_os = "macos")] { crate::platform::set_window_level(window.as_ref().window(), 1000); } - fake_window::spawn_fake_window_listener(app.clone(), window.clone()); + #[cfg(target_os = "macos")] + { + let show_result = window.show(); + debug!( + "InProgressRecording window.show() result: {:?}", + show_result + ); + fake_window::spawn_fake_window_listener(app.clone(), window.clone()); + } + + #[cfg(windows)] + { + tokio::time::sleep(std::time::Duration::from_millis(100)).await; + let show_result = window.show(); + debug!( + "InProgressRecording window.show() result: {:?}", + show_result + ); + window.set_focus().ok(); + fake_window::spawn_fake_window_listener(app.clone(), window.clone()); + } window } diff --git a/apps/desktop/src/App.tsx b/apps/desktop/src/App.tsx index 251a08d25f..5f12c3e422 100644 --- a/apps/desktop/src/App.tsx +++ b/apps/desktop/src/App.tsx @@ -22,7 +22,6 @@ import titlebar from "./utils/titlebar-state"; const queryClient = new QueryClient({ defaultOptions: { queries: { - experimental_prefetchInRender: true, refetchOnWindowFocus: false, refetchOnReconnect: false, }, diff --git a/apps/desktop/src/routes/editor/Player.tsx b/apps/desktop/src/routes/editor/Player.tsx index 315db42cc9..19fa97e295 100644 --- a/apps/desktop/src/routes/editor/Player.tsx +++ b/apps/desktop/src/routes/editor/Player.tsx @@ -485,11 +485,19 @@ function PreviewCanvas() { } }); + const isWindows = navigator.userAgent.includes("Windows"); + const initCanvas = (canvas: HTMLCanvasElement) => { if (canvasTransferredRef.current) return; const controls = canvasControls(); if (!controls) return; + if (isWindows) { + controls.initDirectCanvas(canvas); + canvasTransferredRef.current = true; + return; + } + try { const offscreen = canvas.transferControlToOffscreen(); controls.initCanvas(offscreen); diff --git a/apps/desktop/src/routes/in-progress-recording.tsx b/apps/desktop/src/routes/in-progress-recording.tsx index 050f7c21a6..a47ac36e14 100644 --- a/apps/desktop/src/routes/in-progress-recording.tsx +++ b/apps/desktop/src/routes/in-progress-recording.tsx @@ -20,6 +20,7 @@ import { createSignal, For, onCleanup, + onMount, Show, } from "solid-js"; import { createStore, produce } from "solid-js/store"; @@ -60,6 +61,17 @@ const NO_WEBCAM = "No Webcam"; const FAKE_WINDOW_BOUNDS_NAME = "recording-controls-interactive-area"; export default function () { + console.log("[in-progress-recording] Wrapper rendering"); + + document.documentElement.setAttribute("data-transparent-window", "true"); + document.body.style.background = "transparent"; + + return ; +} + +function InProgressRecordingInner() { + console.log("[in-progress-recording] Inner component rendering"); + const [state, setState] = createSignal( window.COUNTDOWN === 0 ? { variant: "initializing" } @@ -75,7 +87,16 @@ export default function () { const optionsQuery = createOptionsQuery(); const startedWithMicrophone = optionsQuery.rawOptions.micName != null; const startedWithCameraInput = optionsQuery.rawOptions.cameraID != null; - const auth = authStore.createQuery(); + + const [authData, setAuthData] = createSignal<{ + plan?: { upgraded?: boolean }; + } | null>(null); + onMount(() => { + authStore + .get() + .then(setAuthData) + .catch(() => setAuthData(null)); + }); const audioLevel = createAudioInputLevel(); const [disconnectedInputs, setDisconnectedInputs] = @@ -498,7 +519,7 @@ export default function () { return ( optionsQuery.rawOptions.mode === "instant" && // If the data is loaded and the user is not upgraded - auth.data?.plan?.upgraded === false + authData()?.plan?.upgraded === false ); }; diff --git a/apps/desktop/src/utils/frame-worker.ts b/apps/desktop/src/utils/frame-worker.ts index 18a9c75736..5b61309b6f 100644 --- a/apps/desktop/src/utils/frame-worker.ts +++ b/apps/desktop/src/utils/frame-worker.ts @@ -125,6 +125,12 @@ let lastRawFrameData: Uint8ClampedArray | null = null; let lastRawFrameWidth = 0; let lastRawFrameHeight = 0; +let webgpuFrameBuffer: Uint8ClampedArray | null = null; +let webgpuFrameBufferSize = 0; + +let frameDropCount = 0; +let lastFrameDropLogTime = 0; + let consumer: Consumer | null = null; let useSharedBuffer = false; @@ -214,6 +220,10 @@ function cleanup() { lastRawFrameData = null; lastRawFrameWidth = 0; lastRawFrameHeight = 0; + webgpuFrameBuffer = null; + webgpuFrameBufferSize = 0; + frameDropCount = 0; + lastFrameDropLogTime = 0; } function initWorker() { @@ -241,7 +251,9 @@ async function initCanvas(canvas: OffscreenCanvas): Promise { const doInit = async () => { offscreenCanvas = canvas; - if (await isWebGPUSupported()) { + const webgpuSupported = await isWebGPUSupported(); + + if (webgpuSupported) { try { webgpuRenderer = await initWebGPU(canvas); renderMode = "webgpu"; @@ -249,7 +261,8 @@ async function initCanvas(canvas: OffscreenCanvas): Promise { type: "renderer-mode", mode: "webgpu", } satisfies RendererModeMessage); - } catch { + } catch (e) { + console.error("[frame-worker] WebGPU init failed:", e); renderMode = "canvas2d"; offscreenCtx = canvas.getContext("2d", { alpha: false, @@ -382,17 +395,39 @@ async function processFrame(buffer: ArrayBuffer): Promise { } if (renderMode === "webgpu" && webgpuRenderer) { - const frameDataCopy = new Uint8ClampedArray(processedFrameData); + if (pendingRenderFrame !== null) { + frameDropCount++; + const now = performance.now(); + if (now - lastFrameDropLogTime > 1000) { + if (frameDropCount > 0) { + console.warn( + `[frame-worker] Dropped ${frameDropCount} frames in the last second`, + ); + } + frameDropCount = 0; + lastFrameDropLogTime = now; + } + } + + if (!webgpuFrameBuffer || webgpuFrameBufferSize < expectedLength) { + webgpuFrameBuffer = new Uint8ClampedArray(expectedLength); + webgpuFrameBufferSize = expectedLength; + } + webgpuFrameBuffer.set(processedFrameData); + pendingRenderFrame = { mode: "webgpu", - data: frameDataCopy, + data: webgpuFrameBuffer, width, height, }; return { type: "frame-queued", width, height }; } - lastRawFrameData = new Uint8ClampedArray(processedFrameData); + if (!lastRawFrameData || lastRawFrameData.length < expectedLength) { + lastRawFrameData = new Uint8ClampedArray(expectedLength); + } + lastRawFrameData.set(processedFrameData); lastRawFrameWidth = width; lastRawFrameHeight = height; diff --git a/apps/desktop/src/utils/queries.ts b/apps/desktop/src/utils/queries.ts index 8ee5c89a8d..53cfee00dc 100644 --- a/apps/desktop/src/utils/queries.ts +++ b/apps/desktop/src/utils/queries.ts @@ -76,6 +76,7 @@ export const listDisplaysWithThumbnails = queryOptions({ const getCurrentRecording = queryOptions({ queryKey: ["currentRecording"] as const, queryFn: () => commands.getCurrentRecording().then((d) => d[0]), + staleTime: 0, }); export const listRecordings = queryOptions({ diff --git a/apps/desktop/src/utils/socket.ts b/apps/desktop/src/utils/socket.ts index b34511f856..c4b8fe0d1c 100644 --- a/apps/desktop/src/utils/socket.ts +++ b/apps/desktop/src/utils/socket.ts @@ -25,6 +25,7 @@ export type CanvasControls = { initCanvas: (canvas: OffscreenCanvas) => void; resizeCanvas: (width: number, height: number) => void; hasRenderedFrame: () => boolean; + initDirectCanvas: (canvas: HTMLCanvasElement) => void; }; interface ReadyMessage { @@ -107,6 +108,9 @@ export function createImageDataWS( const [hasRenderedFrame, setHasRenderedFrame] = createSignal(false); let isCleanedUp = false; + let directCanvas: HTMLCanvasElement | null = null; + let directCtx: CanvasRenderingContext2D | null = null; + function cleanup() { if (isCleanedUp) return; isCleanedUp = true; @@ -134,6 +138,10 @@ export function createImageDataWS( worker.postMessage({ type: "resize", width, height }); }, hasRenderedFrame, + initDirectCanvas: (canvas: HTMLCanvasElement) => { + directCanvas = canvas; + directCtx = canvas.getContext("2d", { alpha: false }); + }, }; worker.onmessage = (e: MessageEvent) => { @@ -217,6 +225,54 @@ export function createImageDataWS( ws.onmessage = (event) => { const buffer = event.data as ArrayBuffer; + if (directCanvas && directCtx) { + const data = new Uint8Array(buffer); + if (data.length >= 12) { + const metadataOffset = data.length - 12; + const meta = new DataView(buffer, metadataOffset, 12); + const strideBytes = meta.getUint32(0, true); + const height = meta.getUint32(4, true); + const width = meta.getUint32(8, true); + + if (width > 0 && height > 0) { + const expectedRowBytes = width * 4; + let frameData: Uint8ClampedArray; + + if (strideBytes === expectedRowBytes) { + frameData = new Uint8ClampedArray( + buffer, + 0, + expectedRowBytes * height, + ); + } else { + frameData = new Uint8ClampedArray(expectedRowBytes * height); + for (let row = 0; row < height; row++) { + const srcStart = row * strideBytes; + const destStart = row * expectedRowBytes; + frameData.set( + new Uint8ClampedArray(buffer, srcStart, expectedRowBytes), + destStart, + ); + } + } + + if (directCanvas.width !== width || directCanvas.height !== height) { + directCanvas.width = width; + directCanvas.height = height; + } + + const imageData = new ImageData(frameData, width, height); + directCtx.putImageData(imageData, 0, 0); + + if (!hasRenderedFrame()) { + setHasRenderedFrame(true); + } + onmessage({ width, height }); + } + } + return; + } + if (isProcessing) { nextFrame = buffer; } else { diff --git a/apps/desktop/src/utils/tauri.ts b/apps/desktop/src/utils/tauri.ts index bdabd64664..dd122e78c2 100644 --- a/apps/desktop/src/utils/tauri.ts +++ b/apps/desktop/src/utils/tauri.ts @@ -392,7 +392,7 @@ export type CornerStyle = "squircle" | "rounded" export type Crop = { position: XY; size: XY } export type CurrentRecording = { target: CurrentRecordingTarget; mode: RecordingMode; status: RecordingStatus } export type CurrentRecordingChanged = null -export type CurrentRecordingTarget = { window: { id: WindowId; bounds: LogicalBounds } } | { screen: { id: DisplayId } } | { area: { screen: DisplayId; bounds: LogicalBounds } } +export type CurrentRecordingTarget = { window: { id: WindowId; bounds: LogicalBounds | null } } | { screen: { id: DisplayId } } | { area: { screen: DisplayId; bounds: LogicalBounds } } export type CursorAnimationStyle = "slow" | "mellow" | "custom" export type CursorConfiguration = { hide?: boolean; hideWhenIdle?: boolean; hideWhenIdleDelay?: number; size: number; type: CursorType; animationStyle: CursorAnimationStyle; tension: number; mass: number; friction: number; raw?: boolean; motionBlur?: number; useSvg?: boolean } export type CursorMeta = { imagePath: string; hotspot: XY; shape?: string | null } diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs index 82d443f24e..9e817f2e3f 100644 --- a/crates/editor/src/editor.rs +++ b/crates/editor/src/editor.rs @@ -73,8 +73,11 @@ impl Renderer { async fn run(mut self) { let mut frame_renderer = FrameRenderer::new(&self.render_constants); - let mut layers = - RendererLayers::new(&self.render_constants.device, &self.render_constants.queue); + let mut layers = RendererLayers::new_with_options( + &self.render_constants.device, + &self.render_constants.queue, + self.render_constants.is_software_adapter, + ); struct PendingFrame { segment_frames: DecodedSegmentFrames, diff --git a/crates/mediafoundation-utils/src/lib.rs b/crates/mediafoundation-utils/src/lib.rs index 5893a2e049..3ead730fc1 100644 --- a/crates/mediafoundation-utils/src/lib.rs +++ b/crates/mediafoundation-utils/src/lib.rs @@ -22,6 +22,7 @@ pub fn thread_init() { pub trait IMFMediaBufferExt { fn lock(&self) -> Result>; + fn lock_for_write(&self) -> Result>; } impl IMFMediaBufferExt for IMFMediaBuffer { @@ -38,6 +39,20 @@ impl IMFMediaBufferExt for IMFMediaBuffer { bytes: unsafe { std::slice::from_raw_parts_mut(bytes_ptr, size as usize) }, }) } + + fn lock_for_write(&self) -> Result> { + let mut bytes_ptr = null_mut(); + let mut max_length = 0; + + unsafe { + self.Lock(&mut bytes_ptr, Some(&mut max_length), None)?; + } + + Ok(IMFMediaBufferLock { + source: self, + bytes: unsafe { std::slice::from_raw_parts_mut(bytes_ptr, max_length as usize) }, + }) + } } pub struct IMFMediaBufferLock<'a> { diff --git a/crates/recording/src/capture_pipeline.rs b/crates/recording/src/capture_pipeline.rs index c308820f77..7f53cf99a2 100644 --- a/crates/recording/src/capture_pipeline.rs +++ b/crates/recording/src/capture_pipeline.rs @@ -138,29 +138,41 @@ impl MakeCapturePipeline for screen_capture::Direct3DCapture { ) -> anyhow::Result { let d3d_device = screen_capture.d3d_device.clone(); - let actual_output_path = if fragmented { - output_path + if fragmented { + let fragments_dir = output_path .parent() - .map(|p| p.join("display.mp4")) - .unwrap_or_else(|| output_path.with_file_name("display.mp4")) - } else { - output_path.clone() - }; + .map(|p| p.join("display")) + .unwrap_or_else(|| output_path.with_file_name("display")); - OutputPipeline::builder(actual_output_path) - .with_video::(screen_capture) - .with_timestamps(start_time) - .build::(WindowsMuxerConfig { - pixel_format: screen_capture::Direct3DCapture::PIXEL_FORMAT.as_dxgi(), - d3d_device, - bitrate_multiplier: 0.15f32, - frame_rate: 30u32, - output_size: None, - encoder_preferences, - fragmented, - frag_duration_us: 2_000_000, - }) - .await + OutputPipeline::builder(fragments_dir) + .with_video::(screen_capture) + .with_timestamps(start_time) + .build::(WindowsSegmentedMuxerConfig { + pixel_format: screen_capture::Direct3DCapture::PIXEL_FORMAT.as_dxgi(), + d3d_device, + bitrate_multiplier: 0.15f32, + frame_rate: 30u32, + output_size: None, + encoder_preferences, + segment_duration: std::time::Duration::from_secs(3), + }) + .await + } else { + OutputPipeline::builder(output_path.clone()) + .with_video::(screen_capture) + .with_timestamps(start_time) + .build::(WindowsMuxerConfig { + pixel_format: screen_capture::Direct3DCapture::PIXEL_FORMAT.as_dxgi(), + d3d_device, + bitrate_multiplier: 0.15f32, + frame_rate: 30u32, + output_size: None, + encoder_preferences, + fragmented: false, + frag_duration_us: 2_000_000, + }) + .await + } } async fn make_instant_mode_pipeline( diff --git a/crates/recording/src/feeds/camera.rs b/crates/recording/src/feeds/camera.rs index 73ed99a91f..5d06df067b 100644 --- a/crates/recording/src/feeds/camera.rs +++ b/crates/recording/src/feeds/camera.rs @@ -507,7 +507,7 @@ async fn setup_camera( .try_send(); if callback_num.is_multiple_of(30) { - tracing::debug!( + tracing::trace!( "Camera callback: sent frame {} to actor, result={:?}", callback_num, send_result.is_ok() @@ -564,7 +564,7 @@ async fn setup_camera( let data_len = bytes.len(); if let Ok(buffer) = unsafe { MFCreateMemoryBuffer(data_len as u32) } { let buffer_ready = { - if let Ok(mut lock) = buffer.lock() { + if let Ok(mut lock) = buffer.lock_for_write() { lock.copy_from_slice(&bytes); true } else { @@ -615,7 +615,7 @@ async fn setup_camera( .try_send(); if callback_num.is_multiple_of(30) { - tracing::debug!( + tracing::trace!( "Camera callback: sent frame {} to actor, result={:?}", callback_num, send_result.is_ok() @@ -792,7 +792,7 @@ impl Message for CameraFeed { let frame_num = CAMERA_FRAME_COUNTER.fetch_add(1, std::sync::atomic::Ordering::Relaxed); if frame_num.is_multiple_of(30) { - debug!( + trace!( "CameraFeed: received frame {}, broadcasting to {} senders", frame_num, self.senders.len() @@ -846,7 +846,7 @@ impl Message for CameraFeed { NATIVE_CAMERA_FRAME_COUNTER.fetch_add(1, std::sync::atomic::Ordering::Relaxed); if frame_num.is_multiple_of(30) { - debug!( + trace!( "CameraFeed: received native frame {}, broadcasting to {} native senders", frame_num, self.native_senders.len() diff --git a/crates/recording/src/output_pipeline/mod.rs b/crates/recording/src/output_pipeline/mod.rs index 458b20d909..8087f6a400 100644 --- a/crates/recording/src/output_pipeline/mod.rs +++ b/crates/recording/src/output_pipeline/mod.rs @@ -19,3 +19,13 @@ pub use macos::*; mod win; #[cfg(windows)] pub use win::*; + +#[cfg(windows)] +mod win_segmented; +#[cfg(windows)] +pub use win_segmented::*; + +#[cfg(windows)] +mod win_segmented_camera; +#[cfg(windows)] +pub use win_segmented_camera::*; diff --git a/crates/recording/src/output_pipeline/win.rs b/crates/recording/src/output_pipeline/win.rs index 419cc7159c..15a3842c19 100644 --- a/crates/recording/src/output_pipeline/win.rs +++ b/crates/recording/src/output_pipeline/win.rs @@ -721,7 +721,7 @@ impl AudioMuxer for WindowsCameraMuxer { } } -fn upload_mf_buffer_to_texture( +pub fn upload_mf_buffer_to_texture( device: &ID3D11Device, frame: &NativeCameraFrame, ) -> windows::core::Result { diff --git a/crates/recording/src/output_pipeline/win_segmented.rs b/crates/recording/src/output_pipeline/win_segmented.rs new file mode 100644 index 0000000000..7a134ebfea --- /dev/null +++ b/crates/recording/src/output_pipeline/win_segmented.rs @@ -0,0 +1,619 @@ +use crate::{AudioFrame, AudioMuxer, Muxer, TaskPool, VideoMuxer, screen_capture}; +use anyhow::{Context, anyhow}; +use cap_media_info::{AudioInfo, VideoInfo}; +use serde::Serialize; +use std::{ + path::PathBuf, + sync::{ + Arc, Mutex, + atomic::{AtomicBool, Ordering}, + mpsc::{SyncSender, sync_channel}, + }, + thread::JoinHandle, + time::Duration, +}; +use tracing::*; +use windows::{ + Foundation::TimeSpan, + Graphics::SizeInt32, + Win32::Graphics::{Direct3D11::ID3D11Device, Dxgi::Common::DXGI_FORMAT}, +}; + +#[derive(Debug, Clone)] +pub struct SegmentInfo { + pub path: PathBuf, + pub index: u32, + pub duration: Duration, +} + +#[derive(Serialize)] +struct FragmentEntry { + path: String, + index: u32, + duration: f64, + is_complete: bool, +} + +#[derive(Serialize)] +struct Manifest { + fragments: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + total_duration: Option, + is_complete: bool, +} + +struct SegmentState { + video_tx: SyncSender>, + output: Arc>, + encoder_handle: Option>>, +} + +struct PauseTracker { + flag: Arc, + paused_at: Option, + offset: Duration, +} + +impl PauseTracker { + fn new(flag: Arc) -> Self { + Self { + flag, + paused_at: None, + offset: Duration::ZERO, + } + } + + fn adjust(&mut self, timestamp: Duration) -> anyhow::Result> { + if self.flag.load(Ordering::Relaxed) { + if self.paused_at.is_none() { + self.paused_at = Some(timestamp); + } + return Ok(None); + } + + if let Some(start) = self.paused_at.take() { + let delta = timestamp.checked_sub(start).ok_or_else(|| { + anyhow!( + "Frame timestamp went backward during unpause (resume={start:?}, current={timestamp:?})" + ) + })?; + + self.offset = self.offset.checked_add(delta).ok_or_else(|| { + anyhow!( + "Pause offset overflow (offset={:?}, delta={delta:?})", + self.offset + ) + })?; + } + + let adjusted = timestamp.checked_sub(self.offset).ok_or_else(|| { + anyhow!( + "Adjusted timestamp underflow (timestamp={timestamp:?}, offset={:?})", + self.offset + ) + })?; + + Ok(Some(adjusted)) + } +} + +pub struct WindowsSegmentedMuxer { + base_path: PathBuf, + segment_duration: Duration, + current_index: u32, + segment_start_time: Option, + completed_segments: Vec, + + current_state: Option, + + video_config: VideoInfo, + pixel_format: DXGI_FORMAT, + d3d_device: ID3D11Device, + frame_rate: u32, + bitrate_multiplier: f32, + output_size: Option, + encoder_preferences: crate::capture_pipeline::EncoderPreferences, + + pause: PauseTracker, +} + +pub struct WindowsSegmentedMuxerConfig { + pub pixel_format: DXGI_FORMAT, + pub d3d_device: ID3D11Device, + pub frame_rate: u32, + pub bitrate_multiplier: f32, + pub output_size: Option, + pub encoder_preferences: crate::capture_pipeline::EncoderPreferences, + pub segment_duration: Duration, +} + +impl Muxer for WindowsSegmentedMuxer { + type Config = WindowsSegmentedMuxerConfig; + + async fn setup( + config: Self::Config, + output_path: PathBuf, + video_config: Option, + _audio_config: Option, + pause_flag: Arc, + _tasks: &mut TaskPool, + ) -> anyhow::Result + where + Self: Sized, + { + let video_config = + video_config.ok_or_else(|| anyhow!("invariant: video config expected"))?; + + std::fs::create_dir_all(&output_path) + .with_context(|| format!("Failed to create segments directory: {output_path:?}"))?; + + Ok(Self { + base_path: output_path, + segment_duration: config.segment_duration, + current_index: 0, + segment_start_time: None, + completed_segments: Vec::new(), + current_state: None, + video_config, + pixel_format: config.pixel_format, + d3d_device: config.d3d_device, + frame_rate: config.frame_rate, + bitrate_multiplier: config.bitrate_multiplier, + output_size: config.output_size, + encoder_preferences: config.encoder_preferences, + pause: PauseTracker::new(pause_flag), + }) + } + + fn stop(&mut self) { + if let Some(state) = &self.current_state { + let _ = state.video_tx.send(None); + } + } + + fn finish(&mut self, timestamp: Duration) -> anyhow::Result> { + if let Some(segment_start) = self.segment_start_time { + let final_duration = timestamp.saturating_sub(segment_start); + + self.completed_segments.push(SegmentInfo { + path: self.current_segment_path(), + index: self.current_index, + duration: final_duration, + }); + } + + if let Some(mut state) = self.current_state.take() { + let _ = state.video_tx.send(None); + + if let Some(handle) = state.encoder_handle.take() { + let timeout = Duration::from_secs(5); + let start = std::time::Instant::now(); + loop { + if handle.is_finished() { + let _ = handle.join(); + break; + } + if start.elapsed() > timeout { + warn!( + "Screen encoder thread did not finish within {:?}, abandoning", + timeout + ); + break; + } + std::thread::sleep(Duration::from_millis(50)); + } + } + + let mut output = state + .output + .lock() + .map_err(|_| anyhow!("Failed to lock output"))?; + output.write_trailer()?; + } + + self.finalize_manifest(); + + Ok(Ok(())) + } +} + +impl WindowsSegmentedMuxer { + fn current_segment_path(&self) -> PathBuf { + self.base_path + .join(format!("fragment_{:03}.mp4", self.current_index)) + } + + fn write_manifest(&self) { + let manifest = Manifest { + fragments: self + .completed_segments + .iter() + .map(|s| FragmentEntry { + path: s + .path + .file_name() + .unwrap_or_default() + .to_string_lossy() + .into_owned(), + index: s.index, + duration: s.duration.as_secs_f64(), + is_complete: true, + }) + .collect(), + total_duration: None, + is_complete: false, + }; + + let manifest_path = self.base_path.join("manifest.json"); + let _ = std::fs::write( + manifest_path, + serde_json::to_string_pretty(&manifest).unwrap_or_default(), + ); + } + + fn finalize_manifest(&self) { + let total_duration: Duration = self.completed_segments.iter().map(|s| s.duration).sum(); + + let manifest = Manifest { + fragments: self + .completed_segments + .iter() + .map(|s| FragmentEntry { + path: s + .path + .file_name() + .unwrap_or_default() + .to_string_lossy() + .into_owned(), + index: s.index, + duration: s.duration.as_secs_f64(), + is_complete: true, + }) + .collect(), + total_duration: Some(total_duration.as_secs_f64()), + is_complete: true, + }; + + let manifest_path = self.base_path.join("manifest.json"); + let _ = std::fs::write( + manifest_path, + serde_json::to_string_pretty(&manifest).unwrap_or_default(), + ); + } + + fn create_segment(&mut self) -> anyhow::Result<()> { + let segment_path = self.current_segment_path(); + let input_size = SizeInt32 { + Width: self.video_config.width as i32, + Height: self.video_config.height as i32, + }; + let output_size = self.output_size.unwrap_or(input_size); + + let (video_tx, video_rx) = sync_channel::>(8); + let (ready_tx, ready_rx) = sync_channel::>(1); + let output = ffmpeg::format::output(&segment_path)?; + let output = Arc::new(Mutex::new(output)); + + let d3d_device = self.d3d_device.clone(); + let pixel_format = self.pixel_format; + let frame_rate = self.frame_rate; + let bitrate_multiplier = self.bitrate_multiplier; + let video_config = self.video_config; + let encoder_preferences = self.encoder_preferences.clone(); + let output_clone = output.clone(); + + let encoder_handle = std::thread::Builder::new() + .name(format!("segment-encoder-{}", self.current_index)) + .spawn(move || { + cap_mediafoundation_utils::thread_init(); + + let encoder = (|| { + let fallback = |reason: Option| { + encoder_preferences.force_software_only(); + if let Some(reason) = reason.as_ref() { + error!("Falling back to software H264 encoder: {reason}"); + } else { + info!("Falling back to software H264 encoder"); + } + + let fallback_width = if output_size.Width > 0 { + output_size.Width as u32 + } else { + video_config.width + }; + let fallback_height = if output_size.Height > 0 { + output_size.Height as u32 + } else { + video_config.height + }; + + let mut output_guard = match output_clone.lock() { + Ok(guard) => guard, + Err(poisoned) => { + return Err(anyhow!( + "ScreenSoftwareEncoder: failed to lock output mutex: {}", + poisoned + )); + } + }; + + cap_enc_ffmpeg::h264::H264Encoder::builder(video_config) + .with_output_size(fallback_width, fallback_height) + .and_then(|builder| builder.build(&mut output_guard)) + .map(either::Right) + .map_err(|e| anyhow!("ScreenSoftwareEncoder/{e}")) + }; + + if encoder_preferences.should_force_software() { + return fallback(None); + } + + match cap_enc_mediafoundation::H264Encoder::new_with_scaled_output( + &d3d_device, + pixel_format, + input_size, + output_size, + frame_rate, + bitrate_multiplier, + ) { + Ok(encoder) => { + let width = match u32::try_from(output_size.Width) { + Ok(width) if width > 0 => width, + _ => { + return fallback(Some(format!( + "Invalid output width: {}", + output_size.Width + ))); + } + }; + + let height = match u32::try_from(output_size.Height) { + Ok(height) if height > 0 => height, + _ => { + return fallback(Some(format!( + "Invalid output height: {}", + output_size.Height + ))); + } + }; + + let muxer = { + let mut output_guard = match output_clone.lock() { + Ok(guard) => guard, + Err(poisoned) => { + return fallback(Some(format!( + "Failed to lock output mutex: {poisoned}" + ))); + } + }; + + cap_mediafoundation_ffmpeg::H264StreamMuxer::new( + &mut output_guard, + cap_mediafoundation_ffmpeg::MuxerConfig { + width, + height, + fps: frame_rate, + bitrate: encoder.bitrate(), + fragmented: false, + frag_duration_us: 0, + }, + ) + }; + + match muxer { + Ok(muxer) => Ok(either::Left((encoder, muxer))), + Err(err) => fallback(Some(err.to_string())), + } + } + Err(err) => fallback(Some(err.to_string())), + } + })(); + + let encoder = match encoder { + Ok(encoder) => { + if ready_tx.send(Ok(())).is_err() { + error!("Failed to send ready signal - receiver dropped"); + return Ok(()); + } + encoder + } + Err(e) => { + error!("Encoder setup failed: {:#}", e); + let _ = ready_tx.send(Err(anyhow!("{e}"))); + return Err(anyhow!("{e}")); + } + }; + + match encoder { + either::Left((mut encoder, mut muxer)) => { + trace!("Running native encoder for segment"); + let mut first_timestamp: Option = None; + encoder + .run( + Arc::new(AtomicBool::default()), + || { + let Ok(Some((frame, timestamp))) = video_rx.recv() else { + trace!("No more frames available for segment"); + return Ok(None); + }; + + let relative = if let Some(first) = first_timestamp { + timestamp.checked_sub(first).unwrap_or(Duration::ZERO) + } else { + first_timestamp = Some(timestamp); + Duration::ZERO + }; + let frame_time = duration_to_timespan(relative); + + Ok(Some((frame.texture().clone(), frame_time))) + }, + |output_sample| { + let mut output = output_clone.lock().unwrap(); + + let _ = muxer + .write_sample(&output_sample, &mut output) + .map_err(|e| format!("WriteSample: {e}")); + + Ok(()) + }, + ) + .context("run native encoder for segment") + } + either::Right(mut encoder) => { + while let Ok(Some((frame, time))) = video_rx.recv() { + let Ok(mut output) = output_clone.lock() else { + continue; + }; + + use scap_ffmpeg::AsFFmpeg; + + frame + .as_ffmpeg() + .context("frame as_ffmpeg") + .and_then(|frame| { + encoder + .queue_frame(frame, time, &mut output) + .context("queue_frame") + })?; + } + + Ok(()) + } + } + })?; + + ready_rx + .recv() + .map_err(|_| anyhow!("Encoder thread ended unexpectedly"))??; + + output.lock().unwrap().write_header()?; + + self.current_state = Some(SegmentState { + video_tx, + output, + encoder_handle: Some(encoder_handle), + }); + + Ok(()) + } + + fn rotate_segment(&mut self, timestamp: Duration) -> anyhow::Result<()> { + let segment_start = self.segment_start_time.unwrap_or(Duration::ZERO); + let segment_duration = timestamp.saturating_sub(segment_start); + + if let Some(mut state) = self.current_state.take() { + let _ = state.video_tx.send(None); + + if let Some(handle) = state.encoder_handle.take() { + let timeout = Duration::from_secs(5); + let start = std::time::Instant::now(); + loop { + if handle.is_finished() { + let _ = handle.join(); + break; + } + if start.elapsed() > timeout { + warn!( + "Screen encoder thread did not finish within {:?} during rotation, abandoning", + timeout + ); + break; + } + std::thread::sleep(Duration::from_millis(50)); + } + } + + let mut output = state + .output + .lock() + .map_err(|_| anyhow!("Failed to lock output"))?; + output.write_trailer()?; + + self.completed_segments.push(SegmentInfo { + path: self.current_segment_path(), + index: self.current_index, + duration: segment_duration, + }); + } + + self.current_index += 1; + self.segment_start_time = Some(timestamp); + + self.create_segment()?; + self.write_manifest(); + + info!( + "Rotated to segment {} at {:?}", + self.current_index, timestamp + ); + + Ok(()) + } +} + +impl VideoMuxer for WindowsSegmentedMuxer { + type VideoFrame = screen_capture::VideoFrame; + + fn send_video_frame( + &mut self, + frame: Self::VideoFrame, + timestamp: Duration, + ) -> anyhow::Result<()> { + let Some(adjusted_timestamp) = self.pause.adjust(timestamp)? else { + return Ok(()); + }; + + if self.current_state.is_none() { + self.segment_start_time = Some(adjusted_timestamp); + self.create_segment()?; + } + + if self.segment_start_time.is_none() { + self.segment_start_time = Some(adjusted_timestamp); + } + + let segment_elapsed = + adjusted_timestamp.saturating_sub(self.segment_start_time.unwrap_or(Duration::ZERO)); + + if segment_elapsed >= self.segment_duration { + self.rotate_segment(adjusted_timestamp)?; + } + + if let Some(state) = &self.current_state + && let Err(e) = state + .video_tx + .try_send(Some((frame.frame, adjusted_timestamp))) + { + match e { + std::sync::mpsc::TrySendError::Full(_) => { + trace!("Screen encoder channel full, dropping frame"); + } + std::sync::mpsc::TrySendError::Disconnected(_) => { + trace!("Screen encoder channel disconnected"); + } + } + } + + Ok(()) + } +} + +impl AudioMuxer for WindowsSegmentedMuxer { + fn send_audio_frame(&mut self, _frame: AudioFrame, _timestamp: Duration) -> anyhow::Result<()> { + Ok(()) + } +} + +fn duration_to_timespan(duration: Duration) -> TimeSpan { + const TICKS_PER_SEC: u64 = 10_000_000; + const NANOS_PER_TICK: u32 = 100; + + let secs_ticks = duration.as_secs().saturating_mul(TICKS_PER_SEC); + let nanos_ticks = (duration.subsec_nanos() / NANOS_PER_TICK) as u64; + let total_ticks = secs_ticks.saturating_add(nanos_ticks); + let clamped = total_ticks.min(i64::MAX as u64); + + TimeSpan { + Duration: clamped as i64, + } +} diff --git a/crates/recording/src/output_pipeline/win_segmented_camera.rs b/crates/recording/src/output_pipeline/win_segmented_camera.rs new file mode 100644 index 0000000000..97a4e5b16c --- /dev/null +++ b/crates/recording/src/output_pipeline/win_segmented_camera.rs @@ -0,0 +1,571 @@ +use crate::output_pipeline::win::{NativeCameraFrame, upload_mf_buffer_to_texture}; +use crate::{AudioFrame, AudioMuxer, Muxer, TaskPool, VideoMuxer}; +use anyhow::{Context, anyhow}; +use cap_media_info::{AudioInfo, VideoInfo}; +use serde::Serialize; +use std::{ + path::PathBuf, + sync::{ + Arc, Mutex, + atomic::{AtomicBool, Ordering}, + mpsc::{SyncSender, sync_channel}, + }, + thread::JoinHandle, + time::Duration, +}; +use tracing::*; +use windows::{Foundation::TimeSpan, Graphics::SizeInt32}; + +#[derive(Debug, Clone)] +struct SegmentInfo { + path: PathBuf, + index: u32, + duration: Duration, +} + +#[derive(Serialize)] +struct FragmentEntry { + path: String, + index: u32, + duration: f64, + is_complete: bool, +} + +#[derive(Serialize)] +struct Manifest { + fragments: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + total_duration: Option, + is_complete: bool, +} + +struct SegmentState { + video_tx: SyncSender>, + output: Arc>, + encoder_handle: Option>>, +} + +struct PauseTracker { + flag: Arc, + paused_at: Option, + offset: Duration, +} + +impl PauseTracker { + fn new(flag: Arc) -> Self { + Self { + flag, + paused_at: None, + offset: Duration::ZERO, + } + } + + fn adjust(&mut self, timestamp: Duration) -> anyhow::Result> { + if self.flag.load(Ordering::Relaxed) { + if self.paused_at.is_none() { + self.paused_at = Some(timestamp); + } + return Ok(None); + } + + if let Some(start) = self.paused_at.take() { + let delta = timestamp.checked_sub(start).ok_or_else(|| { + anyhow!( + "Frame timestamp went backward during unpause (resume={start:?}, current={timestamp:?})" + ) + })?; + + self.offset = self.offset.checked_add(delta).ok_or_else(|| { + anyhow!( + "Pause offset overflow (offset={:?}, delta={delta:?})", + self.offset + ) + })?; + } + + let adjusted = timestamp.checked_sub(self.offset).ok_or_else(|| { + anyhow!( + "Adjusted timestamp underflow (timestamp={timestamp:?}, offset={:?})", + self.offset + ) + })?; + + Ok(Some(adjusted)) + } +} + +pub struct WindowsSegmentedCameraMuxer { + base_path: PathBuf, + segment_duration: Duration, + current_index: u32, + segment_start_time: Option, + completed_segments: Vec, + + current_state: Option, + + video_config: VideoInfo, + output_height: Option, + + pause: PauseTracker, +} + +pub struct WindowsSegmentedCameraMuxerConfig { + pub output_height: Option, + pub segment_duration: Duration, +} + +impl Default for WindowsSegmentedCameraMuxerConfig { + fn default() -> Self { + Self { + output_height: None, + segment_duration: Duration::from_secs(3), + } + } +} + +impl Muxer for WindowsSegmentedCameraMuxer { + type Config = WindowsSegmentedCameraMuxerConfig; + + async fn setup( + config: Self::Config, + output_path: PathBuf, + video_config: Option, + _audio_config: Option, + pause_flag: Arc, + _tasks: &mut TaskPool, + ) -> anyhow::Result + where + Self: Sized, + { + let video_config = + video_config.ok_or_else(|| anyhow!("invariant: video config expected"))?; + + std::fs::create_dir_all(&output_path) + .with_context(|| format!("Failed to create segments directory: {output_path:?}"))?; + + Ok(Self { + base_path: output_path, + segment_duration: config.segment_duration, + current_index: 0, + segment_start_time: None, + completed_segments: Vec::new(), + current_state: None, + video_config, + output_height: config.output_height, + pause: PauseTracker::new(pause_flag), + }) + } + + fn stop(&mut self) { + if let Some(state) = &self.current_state { + let _ = state.video_tx.send(None); + } + } + + fn finish(&mut self, timestamp: Duration) -> anyhow::Result> { + if let Some(segment_start) = self.segment_start_time { + let final_duration = timestamp.saturating_sub(segment_start); + + self.completed_segments.push(SegmentInfo { + path: self.current_segment_path(), + index: self.current_index, + duration: final_duration, + }); + } + + if let Some(mut state) = self.current_state.take() { + let _ = state.video_tx.send(None); + + if let Some(handle) = state.encoder_handle.take() { + let timeout = Duration::from_secs(5); + let start = std::time::Instant::now(); + loop { + if handle.is_finished() { + let _ = handle.join(); + break; + } + if start.elapsed() > timeout { + warn!( + "Camera encoder thread did not finish within {:?}, abandoning", + timeout + ); + break; + } + std::thread::sleep(Duration::from_millis(50)); + } + } + + let mut output = state + .output + .lock() + .map_err(|_| anyhow!("Failed to lock output"))?; + output.write_trailer()?; + } + + self.finalize_manifest(); + + Ok(Ok(())) + } +} + +impl WindowsSegmentedCameraMuxer { + fn current_segment_path(&self) -> PathBuf { + self.base_path + .join(format!("fragment_{:03}.mp4", self.current_index)) + } + + fn write_manifest(&self) { + let manifest = Manifest { + fragments: self + .completed_segments + .iter() + .map(|s| FragmentEntry { + path: s + .path + .file_name() + .unwrap_or_default() + .to_string_lossy() + .into_owned(), + index: s.index, + duration: s.duration.as_secs_f64(), + is_complete: true, + }) + .collect(), + total_duration: None, + is_complete: false, + }; + + let manifest_path = self.base_path.join("manifest.json"); + if let Err(e) = std::fs::write( + &manifest_path, + serde_json::to_string_pretty(&manifest).unwrap_or_default(), + ) { + warn!( + "Failed to write manifest to {}: {e}", + manifest_path.display() + ); + } + } + + fn finalize_manifest(&self) { + let total_duration: Duration = self.completed_segments.iter().map(|s| s.duration).sum(); + + let manifest = Manifest { + fragments: self + .completed_segments + .iter() + .map(|s| FragmentEntry { + path: s + .path + .file_name() + .unwrap_or_default() + .to_string_lossy() + .into_owned(), + index: s.index, + duration: s.duration.as_secs_f64(), + is_complete: true, + }) + .collect(), + total_duration: Some(total_duration.as_secs_f64()), + is_complete: true, + }; + + let manifest_path = self.base_path.join("manifest.json"); + if let Err(e) = std::fs::write( + &manifest_path, + serde_json::to_string_pretty(&manifest).unwrap_or_default(), + ) { + warn!( + "Failed to write final manifest to {}: {e}", + manifest_path.display() + ); + } + } + + fn create_segment(&mut self, first_frame: &NativeCameraFrame) -> anyhow::Result<()> { + let segment_path = self.current_segment_path(); + + let input_size = SizeInt32 { + Width: self.video_config.width as i32, + Height: self.video_config.height as i32, + }; + + let output_height = self.output_height.unwrap_or(self.video_config.height); + let output_width = (self.video_config.width * output_height) / self.video_config.height; + let output_width = output_width & !1; + let output_height = output_height & !1; + + let output_size = SizeInt32 { + Width: output_width as i32, + Height: output_height as i32, + }; + + let frame_rate = self.video_config.fps(); + let bitrate_multiplier = 0.2f32; + let input_format = first_frame.dxgi_format(); + + let (video_tx, video_rx) = sync_channel::>(30); + let (ready_tx, ready_rx) = sync_channel::>(1); + let output = ffmpeg::format::output(&segment_path)?; + let output = Arc::new(Mutex::new(output)); + let output_clone = output.clone(); + + let encoder_handle = std::thread::Builder::new() + .name(format!("camera-segment-encoder-{}", self.current_index)) + .spawn(move || { + cap_mediafoundation_utils::thread_init(); + + let d3d_device = match crate::capture_pipeline::create_d3d_device() { + Ok(device) => device, + Err(e) => { + let _ = ready_tx.send(Err(anyhow!("Failed to create D3D device: {e}"))); + return Err(anyhow!("Failed to create D3D device: {e}")); + } + }; + + let encoder_result = cap_enc_mediafoundation::H264Encoder::new_with_scaled_output( + &d3d_device, + input_format, + input_size, + output_size, + frame_rate, + bitrate_multiplier, + ); + + let (mut encoder, mut muxer) = match encoder_result { + Ok(encoder) => { + let muxer = { + let mut output_guard = match output_clone.lock() { + Ok(guard) => guard, + Err(poisoned) => { + let _ = ready_tx.send(Err(anyhow!( + "Failed to lock output mutex: {poisoned}" + ))); + return Err(anyhow!( + "Failed to lock output mutex: {}", + poisoned + )); + } + }; + + cap_mediafoundation_ffmpeg::H264StreamMuxer::new( + &mut output_guard, + cap_mediafoundation_ffmpeg::MuxerConfig { + width: output_width, + height: output_height, + fps: frame_rate, + bitrate: encoder.bitrate(), + fragmented: false, + frag_duration_us: 0, + }, + ) + }; + + match muxer { + Ok(muxer) => (encoder, muxer), + Err(err) => { + let _ = + ready_tx.send(Err(anyhow!("Failed to create muxer: {err}"))); + return Err(anyhow!("Failed to create muxer: {err}")); + } + } + } + Err(err) => { + let _ = ready_tx.send(Err(anyhow!("Failed to create H264 encoder: {err}"))); + return Err(anyhow!("Failed to create H264 encoder: {err}")); + } + }; + + if ready_tx.send(Ok(())).is_err() { + error!("Failed to send ready signal - receiver dropped"); + return Ok(()); + } + + info!( + "Camera segment encoder started: {:?} {}x{} -> NV12 {}x{} @ {}fps", + input_format, + input_size.Width, + input_size.Height, + output_size.Width, + output_size.Height, + frame_rate + ); + + let mut first_timestamp: Option = None; + + encoder + .run( + Arc::new(AtomicBool::default()), + || { + let Ok(Some((frame, timestamp))) = video_rx.recv() else { + trace!("No more camera frames available for segment"); + return Ok(None); + }; + + let relative = if let Some(first) = first_timestamp { + timestamp.checked_sub(first).unwrap_or(Duration::ZERO) + } else { + first_timestamp = Some(timestamp); + Duration::ZERO + }; + + let texture = upload_mf_buffer_to_texture(&d3d_device, &frame)?; + Ok(Some((texture, duration_to_timespan(relative)))) + }, + |output_sample| { + let mut output = output_clone.lock().unwrap(); + muxer + .write_sample(&output_sample, &mut output) + .map_err(|e| { + windows::core::Error::new( + windows::core::HRESULT(-1), + format!("WriteSample: {e}"), + ) + }) + }, + ) + .context("run camera encoder for segment") + })?; + + ready_rx + .recv() + .map_err(|_| anyhow!("Camera encoder thread ended unexpectedly"))??; + + output.lock().unwrap().write_header()?; + + self.current_state = Some(SegmentState { + video_tx, + output, + encoder_handle: Some(encoder_handle), + }); + + Ok(()) + } + + fn rotate_segment( + &mut self, + timestamp: Duration, + next_frame: &NativeCameraFrame, + ) -> anyhow::Result<()> { + let segment_start = self.segment_start_time.unwrap_or(Duration::ZERO); + let segment_duration = timestamp.saturating_sub(segment_start); + + if let Some(mut state) = self.current_state.take() { + let _ = state.video_tx.send(None); + + if let Some(handle) = state.encoder_handle.take() { + let timeout = Duration::from_secs(5); + let start = std::time::Instant::now(); + loop { + if handle.is_finished() { + if let Err(panic_payload) = handle.join() { + warn!( + "Camera encoder thread panicked during rotation: {:?}", + panic_payload + ); + } + break; + } + if start.elapsed() > timeout { + warn!( + "Camera encoder thread did not finish within {:?} during rotation, abandoning", + timeout + ); + break; + } + std::thread::sleep(Duration::from_millis(50)); + } + } + + let mut output = state + .output + .lock() + .map_err(|_| anyhow!("Failed to lock output"))?; + output.write_trailer()?; + + self.completed_segments.push(SegmentInfo { + path: self.current_segment_path(), + index: self.current_index, + duration: segment_duration, + }); + } + + self.current_index += 1; + self.segment_start_time = Some(timestamp); + + self.create_segment(next_frame)?; + self.write_manifest(); + + info!( + "Camera rotated to segment {} at {:?}", + self.current_index, timestamp + ); + + Ok(()) + } +} + +impl VideoMuxer for WindowsSegmentedCameraMuxer { + type VideoFrame = NativeCameraFrame; + + fn send_video_frame( + &mut self, + frame: Self::VideoFrame, + timestamp: Duration, + ) -> anyhow::Result<()> { + let Some(adjusted_timestamp) = self.pause.adjust(timestamp)? else { + return Ok(()); + }; + + if self.current_state.is_none() { + self.segment_start_time = Some(adjusted_timestamp); + self.create_segment(&frame)?; + } + + if self.segment_start_time.is_none() { + self.segment_start_time = Some(adjusted_timestamp); + } + + let segment_elapsed = + adjusted_timestamp.saturating_sub(self.segment_start_time.unwrap_or(Duration::ZERO)); + + if segment_elapsed >= self.segment_duration { + self.rotate_segment(adjusted_timestamp, &frame)?; + } + + if let Some(state) = &self.current_state + && let Err(e) = state.video_tx.try_send(Some((frame, adjusted_timestamp))) + { + match e { + std::sync::mpsc::TrySendError::Full(_) => { + trace!("Camera encoder channel full, dropping frame"); + } + std::sync::mpsc::TrySendError::Disconnected(_) => { + trace!("Camera encoder channel disconnected"); + } + } + } + + Ok(()) + } +} + +impl AudioMuxer for WindowsSegmentedCameraMuxer { + fn send_audio_frame(&mut self, _frame: AudioFrame, _timestamp: Duration) -> anyhow::Result<()> { + Ok(()) + } +} + +fn duration_to_timespan(duration: Duration) -> TimeSpan { + const TICKS_PER_SEC: u64 = 10_000_000; + const NANOS_PER_TICK: u32 = 100; + + let secs_ticks = duration.as_secs().saturating_mul(TICKS_PER_SEC); + let nanos_ticks = (duration.subsec_nanos() / NANOS_PER_TICK) as u64; + let total_ticks = secs_ticks.saturating_add(nanos_ticks); + let clamped = total_ticks.min(i64::MAX as u64); + + TimeSpan { + Duration: clamped as i64, + } +} diff --git a/crates/recording/src/studio_recording.rs b/crates/recording/src/studio_recording.rs index 592679b8b4..443da9cd29 100644 --- a/crates/recording/src/studio_recording.rs +++ b/crates/recording/src/studio_recording.rs @@ -18,7 +18,10 @@ use crate::output_pipeline::{ }; #[cfg(windows)] -use crate::output_pipeline::{WindowsCameraMuxer, WindowsCameraMuxerConfig}; +use crate::output_pipeline::{ + WindowsCameraMuxer, WindowsCameraMuxerConfig, WindowsSegmentedCameraMuxer, + WindowsSegmentedCameraMuxerConfig, +}; use anyhow::{Context as _, anyhow, bail}; use cap_media_info::VideoInfo; use cap_project::{ @@ -901,16 +904,27 @@ async fn create_segment_pipeline( }; #[cfg(windows)] - let camera = OptionFuture::from(base_inputs.camera_feed.map(|camera_feed| { - OutputPipeline::builder(dir.join("camera.mp4")) - .with_video::(camera_feed) - .with_timestamps(start_time) - .build::(WindowsCameraMuxerConfig::default()) - .instrument(error_span!("camera-out")) - })) - .await - .transpose() - .context("camera pipeline setup")?; + let camera = if let Some(camera_feed) = base_inputs.camera_feed { + let pipeline = if fragmented { + let fragments_dir = dir.join("camera"); + OutputPipeline::builder(fragments_dir) + .with_video::(camera_feed) + .with_timestamps(start_time) + .build::(WindowsSegmentedCameraMuxerConfig::default()) + .instrument(error_span!("camera-out")) + .await + } else { + OutputPipeline::builder(dir.join("camera.mp4")) + .with_video::(camera_feed) + .with_timestamps(start_time) + .build::(WindowsCameraMuxerConfig::default()) + .instrument(error_span!("camera-out")) + .await + }; + Some(pipeline.context("camera pipeline setup")?) + } else { + None + }; let microphone = if let Some(mic_feed) = base_inputs.mic_feed { let pipeline = if fragmented { diff --git a/crates/rendering/Cargo.toml b/crates/rendering/Cargo.toml index 95ea663cd3..6735427df8 100644 --- a/crates/rendering/Cargo.toml +++ b/crates/rendering/Cargo.toml @@ -42,6 +42,19 @@ wgpu-hal = { workspace = true, features = ["metal"] } wgpu-core.workspace = true foreign-types = "0.5" +[target.'cfg(target_os = "windows")'.dependencies] +wgpu-hal = { workspace = true, features = ["dx12"] } +wgpu-core.workspace = true +windows = { workspace = true, features = [ + "Win32_Foundation", + "Win32_Graphics_Direct3D", + "Win32_Graphics_Direct3D11", + "Win32_Graphics_Direct3D12", + "Win32_Graphics_Dxgi", + "Win32_Graphics_Dxgi_Common", + "Win32_System_Threading", +] } + [dev-dependencies] pretty_assertions = "1.4.1" diff --git a/crates/rendering/src/composite_frame.rs b/crates/rendering/src/composite_frame.rs index e4742b1159..01f8e6afc4 100644 --- a/crates/rendering/src/composite_frame.rs +++ b/crates/rendering/src/composite_frame.rs @@ -187,7 +187,7 @@ impl CompositeVideoFramePipeline { mip_level_count: 1, sample_count: 1, dimension: wgpu::TextureDimension::D2, - format: wgpu::TextureFormat::Rgba8UnormSrgb, + format: wgpu::TextureFormat::Rgba8Unorm, usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_DST, diff --git a/crates/rendering/src/cpu_yuv.rs b/crates/rendering/src/cpu_yuv.rs new file mode 100644 index 0000000000..df278ce8d8 --- /dev/null +++ b/crates/rendering/src/cpu_yuv.rs @@ -0,0 +1,575 @@ +pub fn nv12_to_rgba( + y_data: &[u8], + uv_data: &[u8], + width: u32, + height: u32, + y_stride: u32, + uv_stride: u32, + output: &mut [u8], +) { + let width = width as usize; + let height = height as usize; + let y_stride = y_stride as usize; + let uv_stride = uv_stride as usize; + + for row in 0..height { + let y_row_start = row * y_stride; + let uv_row_start = (row / 2) * uv_stride; + let out_row_start = row * width * 4; + + for col in 0..width { + let y_idx = y_row_start + col; + let uv_idx = uv_row_start + (col / 2) * 2; + + let y = y_data.get(y_idx).copied().unwrap_or(0) as i32; + let u = uv_data.get(uv_idx).copied().unwrap_or(128) as i32; + let v = uv_data.get(uv_idx + 1).copied().unwrap_or(128) as i32; + + let c = y - 16; + let d = u - 128; + let e = v - 128; + + let r = clamp_u8((298 * c + 409 * e + 128) >> 8); + let g = clamp_u8((298 * c - 100 * d - 208 * e + 128) >> 8); + let b = clamp_u8((298 * c + 516 * d + 128) >> 8); + + let out_idx = out_row_start + col * 4; + if out_idx + 3 < output.len() { + output[out_idx] = r; + output[out_idx + 1] = g; + output[out_idx + 2] = b; + output[out_idx + 3] = 255; + } + } + } +} + +#[allow(clippy::too_many_arguments)] +pub fn yuv420p_to_rgba( + y_data: &[u8], + u_data: &[u8], + v_data: &[u8], + width: u32, + height: u32, + y_stride: u32, + uv_stride: u32, + output: &mut [u8], +) { + let width = width as usize; + let height = height as usize; + let y_stride = y_stride as usize; + let uv_stride = uv_stride as usize; + + for row in 0..height { + let y_row_start = row * y_stride; + let uv_row_start = (row / 2) * uv_stride; + let out_row_start = row * width * 4; + + for col in 0..width { + let y_idx = y_row_start + col; + let uv_idx = uv_row_start + (col / 2); + + let y = y_data.get(y_idx).copied().unwrap_or(0) as i32; + let u = u_data.get(uv_idx).copied().unwrap_or(128) as i32; + let v = v_data.get(uv_idx).copied().unwrap_or(128) as i32; + + let c = y - 16; + let d = u - 128; + let e = v - 128; + + let r = clamp_u8((298 * c + 409 * e + 128) >> 8); + let g = clamp_u8((298 * c - 100 * d - 208 * e + 128) >> 8); + let b = clamp_u8((298 * c + 516 * d + 128) >> 8); + + let out_idx = out_row_start + col * 4; + if out_idx + 3 < output.len() { + output[out_idx] = r; + output[out_idx + 1] = g; + output[out_idx + 2] = b; + output[out_idx + 3] = 255; + } + } + } +} + +#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] +pub fn nv12_to_rgba_simd( + y_data: &[u8], + uv_data: &[u8], + width: u32, + height: u32, + y_stride: u32, + uv_stride: u32, + output: &mut [u8], +) { + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + + if !is_x86_feature_detected!("sse2") { + return nv12_to_rgba(y_data, uv_data, width, height, y_stride, uv_stride, output); + } + + let width_usize = width as usize; + let height_usize = height as usize; + let y_stride_usize = y_stride as usize; + let uv_stride_usize = uv_stride as usize; + + if width_usize == 0 || height_usize == 0 { + return; + } + + let y_required = (height_usize - 1) + .saturating_mul(y_stride_usize) + .saturating_add(width_usize); + + let uv_height = height_usize.div_ceil(2); + let uv_width_bytes = width_usize.div_ceil(2) * 2; + let uv_required = uv_height + .saturating_sub(1) + .saturating_mul(uv_stride_usize) + .saturating_add(uv_width_bytes); + + let output_required = width_usize.saturating_mul(height_usize).saturating_mul(4); + + let strides_valid = y_stride_usize >= width_usize && uv_stride_usize >= uv_width_bytes; + + if !strides_valid + || y_data.len() < y_required + || uv_data.len() < uv_required + || output.len() < output_required + { + return nv12_to_rgba(y_data, uv_data, width, height, y_stride, uv_stride, output); + } + + debug_assert!( + y_stride_usize >= width_usize, + "Y stride ({y_stride_usize}) must be >= width ({width_usize})" + ); + debug_assert!( + uv_stride_usize >= uv_width_bytes, + "UV stride ({uv_stride_usize}) must be >= UV width bytes ({uv_width_bytes})" + ); + debug_assert!( + y_data.len() >= y_required, + "Y buffer too small: {} < {y_required}", + y_data.len() + ); + debug_assert!( + uv_data.len() >= uv_required, + "UV buffer too small: {} < {uv_required}", + uv_data.len() + ); + debug_assert!( + output.len() >= output_required, + "Output buffer too small: {} < {output_required}", + output.len() + ); + + let simd_width = (width_usize / 8) * 8; + + unsafe { + let c16 = _mm_set1_epi16(16); + let c128 = _mm_set1_epi16(128); + let c298 = _mm_set1_epi16(298); + let c409 = _mm_set1_epi16(409); + let c100 = _mm_set1_epi16(100); + let c208 = _mm_set1_epi16(208); + let c516 = _mm_set1_epi16(516); + let zero = _mm_setzero_si128(); + + for row in 0..height_usize { + let y_row_start = row * y_stride_usize; + let uv_row_start = (row / 2) * uv_stride_usize; + let out_row_start = row * width_usize * 4; + + let mut col = 0usize; + + while col + 8 <= simd_width { + let y_ptr = y_data.as_ptr().add(y_row_start + col); + let uv_ptr = uv_data.as_ptr().add(uv_row_start + (col / 2) * 2); + + let y8 = _mm_loadl_epi64(y_ptr as *const __m128i); + let y16 = _mm_unpacklo_epi8(y8, zero); + let y_adj = _mm_sub_epi16(y16, c16); + + let uv8 = _mm_loadl_epi64(uv_ptr as *const __m128i); + + let u8_val = _mm_and_si128(uv8, _mm_set1_epi16(0x00FF)); + let v8_val = _mm_srli_epi16(uv8, 8); + + let u_dup = _mm_unpacklo_epi16(u8_val, u8_val); + let v_dup = _mm_unpacklo_epi16(v8_val, v8_val); + + let u16 = _mm_unpacklo_epi8(u_dup, zero); + let v16 = _mm_unpacklo_epi8(v_dup, zero); + + let d = _mm_sub_epi16(u16, c128); + let e = _mm_sub_epi16(v16, c128); + + let c_scaled = _mm_mullo_epi16(y_adj, c298); + + let r_raw = _mm_add_epi16(c_scaled, _mm_mullo_epi16(e, c409)); + let r_raw = _mm_add_epi16(r_raw, c128); + let r_raw = _mm_srai_epi16(r_raw, 8); + + let g_raw = _mm_sub_epi16(c_scaled, _mm_mullo_epi16(d, c100)); + let g_raw = _mm_sub_epi16(g_raw, _mm_mullo_epi16(e, c208)); + let g_raw = _mm_add_epi16(g_raw, c128); + let g_raw = _mm_srai_epi16(g_raw, 8); + + let b_raw = _mm_add_epi16(c_scaled, _mm_mullo_epi16(d, c516)); + let b_raw = _mm_add_epi16(b_raw, c128); + let b_raw = _mm_srai_epi16(b_raw, 8); + + let r = _mm_packus_epi16(r_raw, zero); + let g = _mm_packus_epi16(g_raw, zero); + let b = _mm_packus_epi16(b_raw, zero); + let a = _mm_set1_epi8(-1i8); + + let rg_lo = _mm_unpacklo_epi8(r, g); + let ba_lo = _mm_unpacklo_epi8(b, a); + let rgba_lo = _mm_unpacklo_epi16(rg_lo, ba_lo); + let rgba_hi = _mm_unpackhi_epi16(rg_lo, ba_lo); + + let out_ptr = output.as_mut_ptr().add(out_row_start + col * 4); + _mm_storeu_si128(out_ptr as *mut __m128i, rgba_lo); + _mm_storeu_si128(out_ptr.add(16) as *mut __m128i, rgba_hi); + + col += 8; + } + + for col in simd_width..width_usize { + let y_idx = y_row_start + col; + let uv_idx = uv_row_start + (col / 2) * 2; + + let y = y_data.get(y_idx).copied().unwrap_or(0) as i32; + let u = uv_data.get(uv_idx).copied().unwrap_or(128) as i32; + let v = uv_data.get(uv_idx + 1).copied().unwrap_or(128) as i32; + + let c = y - 16; + let d = u - 128; + let e = v - 128; + + let r = clamp_u8((298 * c + 409 * e + 128) >> 8); + let g = clamp_u8((298 * c - 100 * d - 208 * e + 128) >> 8); + let b = clamp_u8((298 * c + 516 * d + 128) >> 8); + + let out_idx = out_row_start + col * 4; + if out_idx + 3 < output.len() { + output[out_idx] = r; + output[out_idx + 1] = g; + output[out_idx + 2] = b; + output[out_idx + 3] = 255; + } + } + } + } +} + +#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))] +pub fn nv12_to_rgba_simd( + y_data: &[u8], + uv_data: &[u8], + width: u32, + height: u32, + y_stride: u32, + uv_stride: u32, + output: &mut [u8], +) { + nv12_to_rgba(y_data, uv_data, width, height, y_stride, uv_stride, output); +} + +#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] +#[allow(clippy::too_many_arguments)] +pub fn yuv420p_to_rgba_simd( + y_data: &[u8], + u_data: &[u8], + v_data: &[u8], + width: u32, + height: u32, + y_stride: u32, + uv_stride: u32, + output: &mut [u8], +) { + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + + if !is_x86_feature_detected!("sse2") { + return yuv420p_to_rgba( + y_data, u_data, v_data, width, height, y_stride, uv_stride, output, + ); + } + + let width_usize = width as usize; + let height_usize = height as usize; + let y_stride_usize = y_stride as usize; + let uv_stride_usize = uv_stride as usize; + + if width_usize == 0 || height_usize == 0 { + return; + } + + let y_required = (height_usize - 1) + .saturating_mul(y_stride_usize) + .saturating_add(width_usize); + + let uv_height = height_usize.div_ceil(2); + let uv_width = width_usize.div_ceil(2); + let uv_required = uv_height + .saturating_sub(1) + .saturating_mul(uv_stride_usize) + .saturating_add(uv_width); + + let output_required = width_usize.saturating_mul(height_usize).saturating_mul(4); + + let strides_valid = y_stride_usize >= width_usize && uv_stride_usize >= uv_width; + + if !strides_valid + || y_data.len() < y_required + || u_data.len() < uv_required + || v_data.len() < uv_required + || output.len() < output_required + { + return yuv420p_to_rgba( + y_data, u_data, v_data, width, height, y_stride, uv_stride, output, + ); + } + + debug_assert!( + y_stride_usize >= width_usize, + "Y stride ({y_stride_usize}) must be >= width ({width_usize})" + ); + debug_assert!( + uv_stride_usize >= uv_width, + "UV stride ({uv_stride_usize}) must be >= UV width ({uv_width})" + ); + debug_assert!( + y_data.len() >= y_required, + "Y buffer too small: {} < {y_required}", + y_data.len() + ); + debug_assert!( + u_data.len() >= uv_required, + "U buffer too small: {} < {uv_required}", + u_data.len() + ); + debug_assert!( + v_data.len() >= uv_required, + "V buffer too small: {} < {uv_required}", + v_data.len() + ); + debug_assert!( + output.len() >= output_required, + "Output buffer too small: {} < {output_required}", + output.len() + ); + + let simd_width = (width_usize / 8) * 8; + + unsafe { + let c16 = _mm_set1_epi16(16); + let c128 = _mm_set1_epi16(128); + let c298 = _mm_set1_epi16(298); + let c409 = _mm_set1_epi16(409); + let c100 = _mm_set1_epi16(100); + let c208 = _mm_set1_epi16(208); + let c516 = _mm_set1_epi16(516); + let zero = _mm_setzero_si128(); + + for row in 0..height_usize { + let y_row_start = row * y_stride_usize; + let uv_row_start = (row / 2) * uv_stride_usize; + let out_row_start = row * width_usize * 4; + + let mut col = 0usize; + + while col + 8 <= simd_width { + let y_ptr = y_data.as_ptr().add(y_row_start + col); + let u_ptr = u_data.as_ptr().add(uv_row_start + col / 2); + let v_ptr = v_data.as_ptr().add(uv_row_start + col / 2); + + let y8 = _mm_loadl_epi64(y_ptr as *const __m128i); + let y16 = _mm_unpacklo_epi8(y8, zero); + let y_adj = _mm_sub_epi16(y16, c16); + + let u4 = _mm_cvtsi32_si128(std::ptr::read_unaligned(u_ptr as *const i32)); + let v4 = _mm_cvtsi32_si128(std::ptr::read_unaligned(v_ptr as *const i32)); + + let u_dup = _mm_unpacklo_epi8(u4, u4); + let v_dup = _mm_unpacklo_epi8(v4, v4); + + let u16 = _mm_unpacklo_epi8(u_dup, zero); + let v16 = _mm_unpacklo_epi8(v_dup, zero); + + let d = _mm_sub_epi16(u16, c128); + let e = _mm_sub_epi16(v16, c128); + + let c_scaled = _mm_mullo_epi16(y_adj, c298); + + let r_raw = _mm_add_epi16(c_scaled, _mm_mullo_epi16(e, c409)); + let r_raw = _mm_add_epi16(r_raw, c128); + let r_raw = _mm_srai_epi16(r_raw, 8); + + let g_raw = _mm_sub_epi16(c_scaled, _mm_mullo_epi16(d, c100)); + let g_raw = _mm_sub_epi16(g_raw, _mm_mullo_epi16(e, c208)); + let g_raw = _mm_add_epi16(g_raw, c128); + let g_raw = _mm_srai_epi16(g_raw, 8); + + let b_raw = _mm_add_epi16(c_scaled, _mm_mullo_epi16(d, c516)); + let b_raw = _mm_add_epi16(b_raw, c128); + let b_raw = _mm_srai_epi16(b_raw, 8); + + let r = _mm_packus_epi16(r_raw, zero); + let g = _mm_packus_epi16(g_raw, zero); + let b = _mm_packus_epi16(b_raw, zero); + let a = _mm_set1_epi8(-1i8); + + let rg_lo = _mm_unpacklo_epi8(r, g); + let ba_lo = _mm_unpacklo_epi8(b, a); + let rgba_lo = _mm_unpacklo_epi16(rg_lo, ba_lo); + let rgba_hi = _mm_unpackhi_epi16(rg_lo, ba_lo); + + let out_ptr = output.as_mut_ptr().add(out_row_start + col * 4); + _mm_storeu_si128(out_ptr as *mut __m128i, rgba_lo); + _mm_storeu_si128(out_ptr.add(16) as *mut __m128i, rgba_hi); + + col += 8; + } + + for col in simd_width..width_usize { + let y_idx = y_row_start + col; + let uv_idx = uv_row_start + (col / 2); + + let y = y_data.get(y_idx).copied().unwrap_or(0) as i32; + let u = u_data.get(uv_idx).copied().unwrap_or(128) as i32; + let v = v_data.get(uv_idx).copied().unwrap_or(128) as i32; + + let c = y - 16; + let d = u - 128; + let e = v - 128; + + let r = clamp_u8((298 * c + 409 * e + 128) >> 8); + let g = clamp_u8((298 * c - 100 * d - 208 * e + 128) >> 8); + let b = clamp_u8((298 * c + 516 * d + 128) >> 8); + + let out_idx = out_row_start + col * 4; + if out_idx + 3 < output.len() { + output[out_idx] = r; + output[out_idx + 1] = g; + output[out_idx + 2] = b; + output[out_idx + 3] = 255; + } + } + } + } +} + +#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))] +#[allow(clippy::too_many_arguments)] +pub fn yuv420p_to_rgba_simd( + y_data: &[u8], + u_data: &[u8], + v_data: &[u8], + width: u32, + height: u32, + y_stride: u32, + uv_stride: u32, + output: &mut [u8], +) { + yuv420p_to_rgba( + y_data, u_data, v_data, width, height, y_stride, uv_stride, output, + ); +} + +#[inline(always)] +fn clamp_u8(val: i32) -> u8 { + val.clamp(0, 255) as u8 +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_nv12_basic_conversion() { + let width = 4u32; + let height = 4u32; + let y_stride = 4u32; + let uv_stride = 4u32; + + let y_data: Vec = vec![ + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + ]; + let uv_data: Vec = vec![128, 128, 128, 128, 128, 128, 128, 128]; + + let mut output = vec![0u8; (width * height * 4) as usize]; + nv12_to_rgba( + &y_data, + &uv_data, + width, + height, + y_stride, + uv_stride, + &mut output, + ); + + for pixel in output.chunks(4) { + assert!(pixel[0] > 100 && pixel[0] < 140); + assert!(pixel[1] > 100 && pixel[1] < 140); + assert!(pixel[2] > 100 && pixel[2] < 140); + assert_eq!(pixel[3], 255); + } + } + + #[test] + fn test_nv12_simd_matches_scalar() { + let width = 16u32; + let height = 8u32; + let y_stride = 16u32; + let uv_stride = 16u32; + + let y_data: Vec = (0..width * height).map(|i| ((i * 7) % 256) as u8).collect(); + let uv_data: Vec = (0..uv_stride * height / 2) + .map(|i| ((i * 11 + 64) % 256) as u8) + .collect(); + + let mut output_scalar = vec![0u8; (width * height * 4) as usize]; + let mut output_simd = vec![0u8; (width * height * 4) as usize]; + + nv12_to_rgba( + &y_data, + &uv_data, + width, + height, + y_stride, + uv_stride, + &mut output_scalar, + ); + + nv12_to_rgba_simd( + &y_data, + &uv_data, + width, + height, + y_stride, + uv_stride, + &mut output_simd, + ); + + for (i, (s, d)) in output_scalar.iter().zip(output_simd.iter()).enumerate() { + let diff = (*s as i32 - *d as i32).abs(); + assert!( + diff <= 2, + "Mismatch at index {}: scalar={}, simd={}, diff={}", + i, + s, + d, + diff + ); + } + } +} diff --git a/crates/rendering/src/d3d_texture.rs b/crates/rendering/src/d3d_texture.rs new file mode 100644 index 0000000000..e7201faee1 --- /dev/null +++ b/crates/rendering/src/d3d_texture.rs @@ -0,0 +1,233 @@ +#[derive(Debug)] +pub enum D3DTextureError { + NoD3D12Device, + TextureCreationFailed(String), + SharedHandleFailed(String), + WgpuImportFailed(String), + UnsupportedFormat, + DeviceMismatch, + #[cfg(not(target_os = "windows"))] + NotSupported, +} + +impl std::fmt::Display for D3DTextureError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::NoD3D12Device => write!(f, "No D3D12 device available"), + Self::TextureCreationFailed(e) => write!(f, "Failed to create D3D texture: {e}"), + Self::SharedHandleFailed(e) => write!(f, "Failed to create shared handle: {e}"), + Self::WgpuImportFailed(e) => write!(f, "Failed to import texture to wgpu: {e}"), + Self::UnsupportedFormat => write!(f, "Unsupported pixel format for zero-copy"), + Self::DeviceMismatch => write!(f, "D3D11 and D3D12 device mismatch"), + #[cfg(not(target_os = "windows"))] + Self::NotSupported => write!(f, "D3D textures are only supported on Windows"), + } + } +} + +impl std::error::Error for D3DTextureError {} + +#[cfg(target_os = "windows")] +mod windows_impl { + use super::D3DTextureError; + use windows::{ + Win32::{ + Foundation::HANDLE, + Graphics::{ + Direct3D11::{ + D3D11_BIND_SHADER_RESOURCE, D3D11_RESOURCE_MISC_SHARED_NTHANDLE, + D3D11_TEXTURE2D_DESC, D3D11_USAGE_DEFAULT, ID3D11Device, ID3D11Texture2D, + }, + Dxgi::Common::{ + DXGI_FORMAT, DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_NV12, + DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, + DXGI_FORMAT_UNKNOWN, DXGI_SAMPLE_DESC, + }, + }, + }, + core::Interface, + }; + + pub struct D3DTextureCache { + d3d11_device: ID3D11Device, + } + + impl D3DTextureCache { + pub fn new(d3d11_device: ID3D11Device) -> Self { + Self { d3d11_device } + } + + pub fn d3d11_device(&self) -> &ID3D11Device { + &self.d3d11_device + } + + pub fn create_shared_texture( + &self, + width: u32, + height: u32, + format: DXGI_FORMAT, + ) -> Result { + SharedD3D11Texture::create(&self.d3d11_device, width, height, format) + } + } + + pub struct SharedD3D11Texture { + pub texture: ID3D11Texture2D, + pub width: u32, + pub height: u32, + pub format: DXGI_FORMAT, + } + + impl SharedD3D11Texture { + pub fn create_nv12( + device: &ID3D11Device, + width: u32, + height: u32, + ) -> Result { + Self::create(device, width, height, DXGI_FORMAT_NV12) + } + + pub fn create_r8( + device: &ID3D11Device, + width: u32, + height: u32, + ) -> Result { + Self::create(device, width, height, DXGI_FORMAT_R8_UNORM) + } + + pub fn create_rg8( + device: &ID3D11Device, + width: u32, + height: u32, + ) -> Result { + Self::create(device, width, height, DXGI_FORMAT_R8G8_UNORM) + } + + pub fn create( + device: &ID3D11Device, + width: u32, + height: u32, + format: DXGI_FORMAT, + ) -> Result { + let desc = D3D11_TEXTURE2D_DESC { + Width: width, + Height: height, + MipLevels: 1, + ArraySize: 1, + Format: format, + SampleDesc: DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Usage: D3D11_USAGE_DEFAULT, + BindFlags: D3D11_BIND_SHADER_RESOURCE.0 as u32, + CPUAccessFlags: 0, + MiscFlags: D3D11_RESOURCE_MISC_SHARED_NTHANDLE.0 as u32, + }; + + let texture = unsafe { + let mut texture: Option = None; + device + .CreateTexture2D(&desc, None, Some(&mut texture)) + .map_err(|e| D3DTextureError::TextureCreationFailed(format!("{e:?}")))?; + texture.ok_or_else(|| { + D3DTextureError::TextureCreationFailed( + "CreateTexture2D returned null".to_string(), + ) + })? + }; + + Ok(Self { + texture, + width, + height, + format, + }) + } + + pub fn as_raw_ptr(&self) -> *mut std::ffi::c_void { + self.texture.as_raw() + } + } + + pub struct D3DYuvTextures { + pub y_texture: SharedD3D11Texture, + pub uv_texture: SharedD3D11Texture, + pub width: u32, + pub height: u32, + } + + impl D3DYuvTextures { + pub fn create_nv12( + device: &ID3D11Device, + width: u32, + height: u32, + ) -> Result { + let y_texture = SharedD3D11Texture::create_r8(device, width, height)?; + let uv_texture = SharedD3D11Texture::create_rg8(device, width / 2, height / 2)?; + + Ok(Self { + y_texture, + uv_texture, + width, + height, + }) + } + } + + pub fn wgpu_to_dxgi_format(format: wgpu::TextureFormat) -> DXGI_FORMAT { + match format { + wgpu::TextureFormat::R8Unorm => DXGI_FORMAT_R8_UNORM, + wgpu::TextureFormat::Rg8Unorm => DXGI_FORMAT_R8G8_UNORM, + wgpu::TextureFormat::Rgba8Unorm => DXGI_FORMAT_R8G8B8A8_UNORM, + wgpu::TextureFormat::Bgra8Unorm => DXGI_FORMAT_B8G8R8A8_UNORM, + _ => DXGI_FORMAT_UNKNOWN, + } + } + + #[allow(unused_variables)] + pub fn import_d3d11_texture_to_wgpu( + device: &wgpu::Device, + shared_handle: HANDLE, + format: wgpu::TextureFormat, + width: u32, + height: u32, + label: Option<&str>, + ) -> Result { + Err(D3DTextureError::WgpuImportFailed( + "D3D11-to-wgpu HAL interop not yet implemented - requires wgpu HAL API updates" + .to_string(), + )) + } + + #[derive(Default)] + pub struct D3D11WgpuInterop { + _cached_width: u32, + _cached_height: u32, + _y_wgpu_texture: Option, + _uv_wgpu_texture: Option, + } + + impl D3D11WgpuInterop { + pub fn new() -> Self { + Self::default() + } + + #[allow(unused_variables)] + pub fn import_nv12_planes( + &mut self, + device: &wgpu::Device, + y_handle: HANDLE, + uv_handle: HANDLE, + width: u32, + height: u32, + ) -> Result<(&wgpu::Texture, &wgpu::Texture), D3DTextureError> { + Err(D3DTextureError::WgpuImportFailed( + "D3D11-to-wgpu HAL interop not yet implemented".to_string(), + )) + } + } +} + +#[cfg(target_os = "windows")] +pub use windows_impl::*; diff --git a/crates/rendering/src/decoder/ffmpeg.rs b/crates/rendering/src/decoder/ffmpeg.rs index 6a16856959..450e8db077 100644 --- a/crates/rendering/src/decoder/ffmpeg.rs +++ b/crates/rendering/src/decoder/ffmpeg.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + use ffmpeg::{format, frame, sys::AVHWDeviceType}; use log::debug; use std::{ diff --git a/crates/rendering/src/decoder/frame_converter.rs b/crates/rendering/src/decoder/frame_converter.rs index 30b6d40add..27a24fcaca 100644 --- a/crates/rendering/src/decoder/frame_converter.rs +++ b/crates/rendering/src/decoder/frame_converter.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + use ffmpeg::{format, frame, software}; pub struct FrameConverter { diff --git a/crates/rendering/src/decoder/media_foundation.rs b/crates/rendering/src/decoder/media_foundation.rs new file mode 100644 index 0000000000..2963f09c04 --- /dev/null +++ b/crates/rendering/src/decoder/media_foundation.rs @@ -0,0 +1,258 @@ +use std::{ + collections::BTreeMap, + path::PathBuf, + sync::{Arc, mpsc}, +}; +use tokio::sync::oneshot; +use tracing::{debug, info, warn}; +use windows::Win32::{Foundation::HANDLE, Graphics::Direct3D11::ID3D11Texture2D}; + +use super::{DecodedFrame, FRAME_CACHE_SIZE, VideoDecoderMessage}; + +#[derive(Clone)] +struct CachedFrame { + number: u32, + _texture: ID3D11Texture2D, + _shared_handle: Option, + _y_handle: Option, + _uv_handle: Option, + nv12_data: Option>, + width: u32, + height: u32, +} + +impl CachedFrame { + fn to_decoded_frame(&self) -> DecodedFrame { + if let Some(nv12_data) = &self.nv12_data { + DecodedFrame::new_nv12( + nv12_data.data.clone(), + self.width, + self.height, + nv12_data.y_stride, + nv12_data.uv_stride, + ) + } else { + warn!( + "CachedFrame has no CPU data, creating black frame (D3D11 zero-copy not implemented)" + ); + let black_data = vec![0u8; (self.width * self.height * 3 / 2) as usize]; + DecodedFrame::new_nv12(black_data, self.width, self.height, self.width, self.width) + } + } +} + +pub struct MFDecoder; + +impl MFDecoder { + pub fn spawn( + name: &'static str, + path: PathBuf, + fps: u32, + rx: mpsc::Receiver, + ready_tx: oneshot::Sender>, + ) -> Result<(), String> { + let (continue_tx, continue_rx) = mpsc::channel(); + + std::thread::spawn(move || { + let mut decoder = match cap_video_decode::MediaFoundationDecoder::new(&path) { + Err(e) => { + let _ = continue_tx.send(Err(e)); + return; + } + Ok(v) => { + info!( + "MediaFoundation decoder created for '{}': {}x{} @ {:?}fps", + name, + v.width(), + v.height(), + v.frame_rate() + ); + let _ = continue_tx.send(Ok(())); + v + } + }; + + let video_width = decoder.width(); + let video_height = decoder.height(); + + let mut cache = BTreeMap::::new(); + let mut last_decoded_frame: Option = None; + + let _ = ready_tx.send(Ok(())); + + while let Ok(r) = rx.recv() { + match r { + VideoDecoderMessage::GetFrame(requested_time, sender) => { + if sender.is_closed() { + continue; + } + + let requested_frame = (requested_time * fps as f32).floor() as u32; + + if let Some(cached) = cache.get(&requested_frame) { + if sender.send(cached.to_decoded_frame()).is_err() { + warn!( + "Failed to send cached frame {requested_frame}: receiver dropped" + ); + } + continue; + } + + let cache_min = requested_frame.saturating_sub(FRAME_CACHE_SIZE as u32 / 2); + let cache_max = requested_frame + FRAME_CACHE_SIZE as u32 / 2; + + let needs_seek = last_decoded_frame + .map(|last| { + requested_frame < last + || requested_frame.saturating_sub(last) + > FRAME_CACHE_SIZE as u32 + }) + .unwrap_or(true); + + if needs_seek { + debug!("MediaFoundation seeking to frame {requested_frame}"); + let time_100ns = frame_to_100ns(requested_frame, fps); + if let Err(e) = decoder.seek(time_100ns) { + warn!("MediaFoundation seek failed: {e}"); + } + cache.clear(); + last_decoded_frame = None; + } + + let mut sender = Some(sender); + let mut last_valid_frame: Option = None; + + loop { + match decoder.read_sample() { + Ok(Some(mf_frame)) => { + let frame_number = pts_100ns_to_frame(mf_frame.pts, fps); + + let nv12_data = match decoder.read_texture_to_cpu( + &mf_frame.texture, + mf_frame.width, + mf_frame.height, + ) { + Ok(data) => { + debug!( + frame = frame_number, + data_len = data.data.len(), + y_stride = data.y_stride, + uv_stride = data.uv_stride, + width = mf_frame.width, + height = mf_frame.height, + "read_texture_to_cpu succeeded" + ); + Some(Arc::new(data)) + } + Err(e) => { + warn!( + "Failed to read texture to CPU for frame {frame_number}: {e}" + ); + None + } + }; + + let cached = CachedFrame { + number: frame_number, + _texture: mf_frame.texture, + _shared_handle: mf_frame.shared_handle, + _y_handle: mf_frame.y_handle, + _uv_handle: mf_frame.uv_handle, + nv12_data, + width: mf_frame.width, + height: mf_frame.height, + }; + + last_decoded_frame = Some(frame_number); + + if frame_number >= cache_min && frame_number <= cache_max { + if cache.len() >= FRAME_CACHE_SIZE { + let key_to_remove = if frame_number > requested_frame { + *cache.keys().next().unwrap() + } else { + *cache.keys().next_back().unwrap() + }; + cache.remove(&key_to_remove); + } + cache.insert(frame_number, cached.clone()); + } + + if frame_number <= requested_frame { + last_valid_frame = Some(cached); + } + + if frame_number >= requested_frame { + let frame_to_send = if frame_number == requested_frame { + cache.get(&requested_frame) + } else { + last_valid_frame + .as_ref() + .or_else(|| cache.get(&frame_number)) + }; + + if let Some(frame) = frame_to_send + && let Some(s) = sender.take() + && s.send(frame.to_decoded_frame()).is_err() + { + warn!( + "Failed to send frame {}: receiver dropped", + frame.number + ); + } + break; + } + + if frame_number > cache_max { + break; + } + } + Ok(None) => { + debug!("MediaFoundation end of stream"); + break; + } + Err(e) => { + warn!("MediaFoundation read_sample error: {e}"); + break; + } + } + } + + if let Some(s) = sender.take() { + if let Some(frame) = last_valid_frame + .or_else(|| cache.values().max_by_key(|f| f.number).cloned()) + { + if s.send(frame.to_decoded_frame()).is_err() { + warn!("Failed to send fallback frame: receiver dropped"); + } + } else { + debug!( + "No frames available for request {requested_frame}, sending black frame" + ); + let black_frame = DecodedFrame::new( + vec![0u8; (video_width * video_height * 4) as usize], + video_width, + video_height, + ); + if s.send(black_frame).is_err() { + warn!("Failed to send black frame: receiver dropped"); + } + } + } + } + } + } + }); + + continue_rx.recv().map_err(|e| e.to_string())??; + + Ok(()) + } +} + +fn frame_to_100ns(frame: u32, fps: u32) -> i64 { + ((frame as i64) * 10_000_000) / (fps as i64) +} + +fn pts_100ns_to_frame(pts_100ns: i64, fps: u32) -> u32 { + ((pts_100ns * fps as i64) / 10_000_000) as u32 +} diff --git a/crates/rendering/src/decoder/mod.rs b/crates/rendering/src/decoder/mod.rs index c0ebef38a1..ca83553cfc 100644 --- a/crates/rendering/src/decoder/mod.rs +++ b/crates/rendering/src/decoder/mod.rs @@ -11,10 +11,15 @@ use tracing::debug; mod avassetreader; mod ffmpeg; mod frame_converter; +#[cfg(target_os = "windows")] +mod media_foundation; #[cfg(target_os = "macos")] use cidre::{arc::R, cv}; +#[cfg(target_os = "windows")] +use windows::Win32::{Foundation::HANDLE, Graphics::Direct3D11::ID3D11Texture2D}; + #[cfg(target_os = "macos")] pub struct SendableImageBuf(R); @@ -34,6 +39,70 @@ impl SendableImageBuf { } } +#[cfg(target_os = "windows")] +pub struct SendableD3D11Texture { + texture: ID3D11Texture2D, + shared_handle: Option, + y_handle: Option, + uv_handle: Option, +} + +#[cfg(target_os = "windows")] +unsafe impl Send for SendableD3D11Texture {} +#[cfg(target_os = "windows")] +unsafe impl Sync for SendableD3D11Texture {} + +#[cfg(target_os = "windows")] +impl SendableD3D11Texture { + pub fn new(texture: ID3D11Texture2D) -> Self { + Self { + texture, + shared_handle: None, + y_handle: None, + uv_handle: None, + } + } + + pub fn new_with_handle(texture: ID3D11Texture2D, shared_handle: Option) -> Self { + Self { + texture, + shared_handle, + y_handle: None, + uv_handle: None, + } + } + + pub fn new_with_yuv_handles( + texture: ID3D11Texture2D, + shared_handle: Option, + y_handle: Option, + uv_handle: Option, + ) -> Self { + Self { + texture, + shared_handle, + y_handle, + uv_handle, + } + } + + pub fn inner(&self) -> &ID3D11Texture2D { + &self.texture + } + + pub fn shared_handle(&self) -> Option { + self.shared_handle + } + + pub fn y_handle(&self) -> Option { + self.y_handle + } + + pub fn uv_handle(&self) -> Option { + self.uv_handle + } +} + #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum PixelFormat { Rgba, @@ -51,6 +120,8 @@ pub struct DecodedFrame { uv_stride: u32, #[cfg(target_os = "macos")] iosurface_backing: Option>, + #[cfg(target_os = "windows")] + d3d11_texture_backing: Option>, } impl fmt::Debug for DecodedFrame { @@ -77,6 +148,8 @@ impl DecodedFrame { uv_stride: 0, #[cfg(target_os = "macos")] iosurface_backing: None, + #[cfg(target_os = "windows")] + d3d11_texture_backing: None, } } @@ -90,6 +163,8 @@ impl DecodedFrame { uv_stride, #[cfg(target_os = "macos")] iosurface_backing: None, + #[cfg(target_os = "windows")] + d3d11_texture_backing: None, } } @@ -109,6 +184,8 @@ impl DecodedFrame { uv_stride, #[cfg(target_os = "macos")] iosurface_backing: None, + #[cfg(target_os = "windows")] + d3d11_texture_backing: None, } } @@ -137,6 +214,91 @@ impl DecodedFrame { self.iosurface_backing.as_ref().map(|b| b.inner()) } + #[cfg(target_os = "windows")] + pub fn new_nv12_with_d3d11_texture(width: u32, height: u32, texture: ID3D11Texture2D) -> Self { + Self { + data: Arc::new(Vec::new()), + width, + height, + format: PixelFormat::Nv12, + y_stride: width, + uv_stride: width, + d3d11_texture_backing: Some(Arc::new(SendableD3D11Texture::new(texture))), + } + } + + #[cfg(target_os = "windows")] + pub fn new_nv12_with_d3d11_texture_and_handle( + width: u32, + height: u32, + texture: ID3D11Texture2D, + shared_handle: Option, + ) -> Self { + Self { + data: Arc::new(Vec::new()), + width, + height, + format: PixelFormat::Nv12, + y_stride: width, + uv_stride: width, + d3d11_texture_backing: Some(Arc::new(SendableD3D11Texture::new_with_handle( + texture, + shared_handle, + ))), + } + } + + #[cfg(target_os = "windows")] + pub fn new_nv12_with_d3d11_texture_and_yuv_handles( + width: u32, + height: u32, + texture: ID3D11Texture2D, + shared_handle: Option, + y_handle: Option, + uv_handle: Option, + ) -> Self { + Self { + data: Arc::new(Vec::new()), + width, + height, + format: PixelFormat::Nv12, + y_stride: width, + uv_stride: width, + d3d11_texture_backing: Some(Arc::new(SendableD3D11Texture::new_with_yuv_handles( + texture, + shared_handle, + y_handle, + uv_handle, + ))), + } + } + + #[cfg(target_os = "windows")] + pub fn d3d11_texture_backing(&self) -> Option<&ID3D11Texture2D> { + self.d3d11_texture_backing.as_ref().map(|b| b.inner()) + } + + #[cfg(target_os = "windows")] + pub fn d3d11_shared_handle(&self) -> Option { + self.d3d11_texture_backing + .as_ref() + .and_then(|b| b.shared_handle()) + } + + #[cfg(target_os = "windows")] + pub fn d3d11_y_handle(&self) -> Option { + self.d3d11_texture_backing + .as_ref() + .and_then(|b| b.y_handle()) + } + + #[cfg(target_os = "windows")] + pub fn d3d11_uv_handle(&self) -> Option { + self.d3d11_texture_backing + .as_ref() + .and_then(|b| b.uv_handle()) + } + pub fn data(&self) -> &[u8] { &self.data } @@ -276,10 +438,44 @@ pub async fn spawn_decoder( let path_display = path.display().to_string(); - if cfg!(target_os = "macos") { - #[cfg(target_os = "macos")] + #[cfg(target_os = "macos")] + { avassetreader::AVAssetReaderDecoder::spawn(name, path, fps, rx, ready_tx); - } else { + } + + #[cfg(target_os = "windows")] + { + match media_foundation::MFDecoder::spawn(name, path.clone(), fps, rx, ready_tx) { + Ok(()) => { + debug!("Using MediaFoundation decoder for '{name}'"); + } + Err(mf_err) => { + debug!( + "MediaFoundation decoder failed for '{name}': {mf_err}, falling back to FFmpeg" + ); + let (ready_tx, ready_rx_new) = oneshot::channel::>(); + let (tx, rx) = mpsc::channel(); + let handle = AsyncVideoDecoderHandle { sender: tx, offset }; + + ffmpeg::FfmpegDecoder::spawn(name, path, fps, rx, ready_tx) + .map_err(|e| format!("'{name}' decoder / {e}"))?; + + return match tokio::time::timeout(std::time::Duration::from_secs(30), ready_rx_new) + .await + { + Ok(result) => result + .map_err(|e| format!("'{name}' decoder channel closed: {e}"))? + .map(|()| handle), + Err(_) => Err(format!( + "'{name}' decoder timed out after 30s initializing: {path_display}" + )), + }; + } + } + } + + #[cfg(not(any(target_os = "macos", target_os = "windows")))] + { ffmpeg::FfmpegDecoder::spawn(name, path, fps, rx, ready_tx) .map_err(|e| format!("'{name}' decoder / {e}"))?; } diff --git a/crates/rendering/src/frame_pipeline.rs b/crates/rendering/src/frame_pipeline.rs index bbeeef8c3f..49084728ee 100644 --- a/crates/rendering/src/frame_pipeline.rs +++ b/crates/rendering/src/frame_pipeline.rs @@ -195,7 +195,7 @@ impl RenderSession { mip_level_count: 1, sample_count: 1, dimension: wgpu::TextureDimension::D2, - format: wgpu::TextureFormat::Rgba8UnormSrgb, + format: wgpu::TextureFormat::Rgba8Unorm, usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_SRC, @@ -230,7 +230,7 @@ impl RenderSession { mip_level_count: 1, sample_count: 1, dimension: wgpu::TextureDimension::D2, - format: wgpu::TextureFormat::Rgba8UnormSrgb, + format: wgpu::TextureFormat::Rgba8Unorm, usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_SRC, diff --git a/crates/rendering/src/layers/background.rs b/crates/rendering/src/layers/background.rs index 199245f58e..7413117bfb 100644 --- a/crates/rendering/src/layers/background.rs +++ b/crates/rendering/src/layers/background.rs @@ -136,7 +136,7 @@ impl BackgroundLayer { mip_level_count: 1, sample_count: 1, dimension: wgpu::TextureDimension::D2, - format: wgpu::TextureFormat::Rgba8UnormSrgb, + format: wgpu::TextureFormat::Rgba8Unorm, usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, view_formats: &[], @@ -337,7 +337,7 @@ impl ImageBackgroundPipeline { module: &shader, entry_point: Some("fs_main"), targets: &[Some(wgpu::ColorTargetState { - format: wgpu::TextureFormat::Rgba8UnormSrgb, + format: wgpu::TextureFormat::Rgba8Unorm, blend: Some(wgpu::BlendState::REPLACE), write_mask: wgpu::ColorWrites::ALL, })], diff --git a/crates/rendering/src/layers/blur.rs b/crates/rendering/src/layers/blur.rs index 2aa75c82ca..59e202ac5f 100644 --- a/crates/rendering/src/layers/blur.rs +++ b/crates/rendering/src/layers/blur.rs @@ -148,7 +148,7 @@ impl BlurPipeline { module: &shader, entry_point: Some("fs_main"), targets: &[Some(wgpu::ColorTargetState { - format: wgpu::TextureFormat::Rgba8UnormSrgb, + format: wgpu::TextureFormat::Rgba8Unorm, blend: Some(wgpu::BlendState::REPLACE), write_mask: wgpu::ColorWrites::ALL, })], diff --git a/crates/rendering/src/layers/camera.rs b/crates/rendering/src/layers/camera.rs index 85ec8a4bab..17754734e0 100644 --- a/crates/rendering/src/layers/camera.rs +++ b/crates/rendering/src/layers/camera.rs @@ -213,7 +213,7 @@ impl CameraLayer { { pass.set_pipeline(&self.pipeline.render_pipeline); pass.set_bind_group(0, bind_group, &[]); - pass.draw(0..4, 0..1); + pass.draw(0..3, 0..1); } } } diff --git a/crates/rendering/src/layers/captions.rs b/crates/rendering/src/layers/captions.rs index 667b1a3f3a..b17b451bba 100644 --- a/crates/rendering/src/layers/captions.rs +++ b/crates/rendering/src/layers/captions.rs @@ -265,8 +265,7 @@ impl CaptionsLayer { let swash_cache = SwashCache::new(); let cache = Cache::new(device); let viewport = Viewport::new(device, &cache); - let mut text_atlas = - TextAtlas::new(device, queue, &cache, wgpu::TextureFormat::Rgba8UnormSrgb); + let mut text_atlas = TextAtlas::new(device, queue, &cache, wgpu::TextureFormat::Rgba8Unorm); let text_renderer = TextRenderer::new( &mut text_atlas, device, @@ -339,7 +338,7 @@ impl CaptionsLayer { module: &background_shader, entry_point: Some("fs_main"), targets: &[Some(wgpu::ColorTargetState { - format: wgpu::TextureFormat::Rgba8UnormSrgb, + format: wgpu::TextureFormat::Rgba8Unorm, blend: Some(wgpu::BlendState::ALPHA_BLENDING), write_mask: wgpu::ColorWrites::ALL, })], diff --git a/crates/rendering/src/layers/cursor.rs b/crates/rendering/src/layers/cursor.rs index 59ce163597..2fa10cbbdd 100644 --- a/crates/rendering/src/layers/cursor.rs +++ b/crates/rendering/src/layers/cursor.rs @@ -102,7 +102,7 @@ impl Statics { module: &shader, entry_point: Some("fs_main"), targets: &[Some(wgpu::ColorTargetState { - format: wgpu::TextureFormat::Rgba8UnormSrgb, + format: wgpu::TextureFormat::Rgba8Unorm, blend: Some(wgpu::BlendState { color: wgpu::BlendComponent { src_factor: wgpu::BlendFactor::One, @@ -677,7 +677,7 @@ impl CursorTexture { mip_level_count: 1, sample_count: 1, dimension: wgpu::TextureDimension::D2, - format: wgpu::TextureFormat::Rgba8UnormSrgb, + format: wgpu::TextureFormat::Rgba8Unorm, usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, view_formats: &[], }); diff --git a/crates/rendering/src/layers/display.rs b/crates/rendering/src/layers/display.rs index f6aee5362e..78e3dd9fce 100644 --- a/crates/rendering/src/layers/display.rs +++ b/crates/rendering/src/layers/display.rs @@ -22,10 +22,16 @@ pub struct DisplayLayer { last_recording_time: Option, yuv_converter: YuvToRgbaConverter, pending_copy: Option, + prefer_cpu_conversion: bool, } impl DisplayLayer { + #[allow(dead_code)] pub fn new(device: &wgpu::Device) -> Self { + Self::new_with_options(device, false) + } + + pub fn new_with_options(device: &wgpu::Device, prefer_cpu_conversion: bool) -> Self { let frame_texture_0 = CompositeVideoFramePipeline::create_frame_texture(device, 1920, 1080); let frame_texture_1 = CompositeVideoFramePipeline::create_frame_texture(device, 1920, 1080); let frame_texture_view_0 = frame_texture_0.create_view(&Default::default()); @@ -40,6 +46,10 @@ impl DisplayLayer { let yuv_converter = YuvToRgbaConverter::new(device); + if prefer_cpu_conversion { + tracing::info!("DisplayLayer initialized with CPU YUV conversion preference"); + } + Self { frame_textures: [frame_texture_0, frame_texture_1], frame_texture_views: [frame_texture_view_0, frame_texture_view_1], @@ -50,6 +60,7 @@ impl DisplayLayer { last_recording_time: None, yuv_converter, pending_copy: None, + prefer_cpu_conversion, } } @@ -69,6 +80,15 @@ impl DisplayLayer { let format = segment_frames.screen_frame.format(); let current_recording_time = segment_frames.recording_time; + tracing::trace!( + format = ?format, + actual_width, + actual_height, + frame_data_len = frame_data.len(), + recording_time = current_recording_time, + "DisplayLayer::prepare - frame info" + ); + let skipped = self .last_recording_time .is_some_and(|last| (last - current_recording_time).abs() < f32::EPSILON); @@ -130,14 +150,64 @@ impl DisplayLayer { }); #[cfg(target_os = "macos")] - if let Some(Ok(_)) = iosurface_result { - if self.yuv_converter.output_texture().is_some() { - self.pending_copy = Some(PendingTextureCopy { - width: frame_size.x, - height: frame_size.y, - dst_texture_index: next_texture, - }); - true + if !self.prefer_cpu_conversion { + if let Some(Ok(_)) = iosurface_result { + if self.yuv_converter.output_texture().is_some() { + self.pending_copy = Some(PendingTextureCopy { + width: frame_size.x, + height: frame_size.y, + dst_texture_index: next_texture, + }); + true + } else { + false + } + } else if let (Some(y_data), Some(uv_data)) = + (screen_frame.y_plane(), screen_frame.uv_plane()) + { + let y_stride = screen_frame.y_stride(); + let uv_stride = screen_frame.uv_stride(); + let convert_result = self.yuv_converter.convert_nv12( + device, + queue, + y_data, + uv_data, + frame_size.x, + frame_size.y, + y_stride, + uv_stride, + ); + + match convert_result { + Ok(_) => { + if self.yuv_converter.output_texture().is_some() { + self.pending_copy = Some(PendingTextureCopy { + width: frame_size.x, + height: frame_size.y, + dst_texture_index: next_texture, + }); + true + } else { + tracing::debug!( + width = frame_size.x, + height = frame_size.y, + y_stride, + "NV12 conversion succeeded but output texture is None, skipping copy" + ); + false + } + } + Err(e) => { + tracing::debug!( + error = ?e, + width = frame_size.x, + height = frame_size.y, + y_stride, + "NV12 to RGBA conversion failed" + ); + false + } + } } else { false } @@ -146,7 +216,7 @@ impl DisplayLayer { { let y_stride = screen_frame.y_stride(); let uv_stride = screen_frame.uv_stride(); - let convert_result = self.yuv_converter.convert_nv12( + let convert_result = self.yuv_converter.convert_nv12_cpu( device, queue, y_data, @@ -167,23 +237,11 @@ impl DisplayLayer { }); true } else { - tracing::debug!( - width = frame_size.x, - height = frame_size.y, - y_stride, - "NV12 conversion succeeded but output texture is None, skipping copy" - ); false } } Err(e) => { - tracing::debug!( - error = ?e, - width = frame_size.x, - height = frame_size.y, - y_stride, - "NV12 to RGBA conversion failed" - ); + tracing::debug!(error = ?e, "CPU NV12 conversion failed"); false } } @@ -191,22 +249,162 @@ impl DisplayLayer { false } - #[cfg(not(target_os = "macos"))] + #[cfg(target_os = "windows")] + { + let mut d3d11_succeeded = false; + + let has_y_handle = screen_frame.d3d11_y_handle().is_some(); + let has_uv_handle = screen_frame.d3d11_uv_handle().is_some(); + let has_y_plane = screen_frame.y_plane().is_some(); + let has_uv_plane = screen_frame.uv_plane().is_some(); + + tracing::debug!( + has_y_handle, + has_uv_handle, + has_y_plane, + has_uv_plane, + data_len = screen_frame.data().len(), + y_stride = screen_frame.y_stride(), + uv_stride = screen_frame.uv_stride(), + actual_width, + actual_height, + frame_size_x = frame_size.x, + frame_size_y = frame_size.y, + "Windows NV12 frame info" + ); + + if let (Some(y_handle), Some(uv_handle)) = ( + screen_frame.d3d11_y_handle(), + screen_frame.d3d11_uv_handle(), + ) { + tracing::trace!("Using D3D11 zero-copy path for NV12 conversion"); + match self.yuv_converter.convert_nv12_from_d3d11_shared_handles( + device, + queue, + y_handle, + uv_handle, + actual_width, + actual_height, + ) { + Ok(_) => { + if self.yuv_converter.output_texture().is_some() { + self.pending_copy = Some(PendingTextureCopy { + width: actual_width, + height: actual_height, + dst_texture_index: next_texture, + }); + d3d11_succeeded = true; + } + } + Err(e) => { + tracing::debug!(error = ?e, "D3D11 zero-copy conversion failed, falling back to CPU path"); + } + } + } + + if d3d11_succeeded { + true + } else if let (Some(y_data), Some(uv_data)) = + (screen_frame.y_plane(), screen_frame.uv_plane()) + { + let y_stride = screen_frame.y_stride(); + let uv_stride = screen_frame.uv_stride(); + + tracing::debug!( + y_data_len = y_data.len(), + uv_data_len = uv_data.len(), + y_stride, + uv_stride, + actual_width, + actual_height, + prefer_cpu = self.prefer_cpu_conversion, + "Attempting NV12 conversion" + ); + + let convert_result = if self.prefer_cpu_conversion { + self.yuv_converter.convert_nv12_cpu( + device, + queue, + y_data, + uv_data, + actual_width, + actual_height, + y_stride, + uv_stride, + ) + } else { + self.yuv_converter.convert_nv12( + device, + queue, + y_data, + uv_data, + actual_width, + actual_height, + y_stride, + uv_stride, + ) + }; + + match convert_result { + Ok(_) => { + tracing::debug!("NV12 conversion succeeded"); + if self.yuv_converter.output_texture().is_some() { + self.pending_copy = Some(PendingTextureCopy { + width: actual_width, + height: actual_height, + dst_texture_index: next_texture, + }); + true + } else { + tracing::warn!( + "NV12 conversion succeeded but output texture is None" + ); + false + } + } + Err(e) => { + tracing::warn!(error = ?e, "NV12 conversion failed"); + false + } + } + } else { + tracing::warn!( + "No D3D11 handles and no CPU data available for NV12 frame" + ); + false + } + } + + #[cfg(not(any(target_os = "macos", target_os = "windows")))] if let (Some(y_data), Some(uv_data)) = (screen_frame.y_plane(), screen_frame.uv_plane()) { let y_stride = screen_frame.y_stride(); let uv_stride = screen_frame.uv_stride(); - let convert_result = self.yuv_converter.convert_nv12( - device, - queue, - y_data, - uv_data, - frame_size.x, - frame_size.y, - y_stride, - uv_stride, - ); + + let convert_result = if self.prefer_cpu_conversion { + self.yuv_converter.convert_nv12_cpu( + device, + queue, + y_data, + uv_data, + frame_size.x, + frame_size.y, + y_stride, + uv_stride, + ) + } else { + self.yuv_converter.convert_nv12( + device, + queue, + y_data, + uv_data, + frame_size.x, + frame_size.y, + y_stride, + uv_stride, + ) + }; match convert_result { Ok(_) => { @@ -218,25 +416,10 @@ impl DisplayLayer { }); true } else { - tracing::debug!( - width = frame_size.x, - height = frame_size.y, - y_stride, - "NV12 conversion succeeded but output texture is None, skipping copy" - ); false } } - Err(e) => { - tracing::debug!( - error = ?e, - width = frame_size.x, - height = frame_size.y, - y_stride, - "NV12 to RGBA conversion failed" - ); - false - } + Err(_) => false, } } else { false @@ -244,32 +427,53 @@ impl DisplayLayer { } PixelFormat::Yuv420p => { let screen_frame = &segment_frames.screen_frame; - if let (Some(y_data), Some(u_data), Some(v_data)) = ( - screen_frame.y_plane(), - screen_frame.u_plane(), - screen_frame.v_plane(), - ) && self - .yuv_converter - .convert_yuv420p( - device, - queue, - y_data, - u_data, - v_data, - frame_size.x, - frame_size.y, - screen_frame.y_stride(), - screen_frame.uv_stride(), - ) - .is_ok() - && self.yuv_converter.output_texture().is_some() + let y_plane = screen_frame.y_plane(); + let u_plane = screen_frame.u_plane(); + let v_plane = screen_frame.v_plane(); + + if let (Some(y_data), Some(u_data), Some(v_data)) = (y_plane, u_plane, v_plane) { - self.pending_copy = Some(PendingTextureCopy { - width: frame_size.x, - height: frame_size.y, - dst_texture_index: next_texture, - }); - true + let convert_result = if self.prefer_cpu_conversion { + self.yuv_converter.convert_yuv420p_cpu( + device, + queue, + y_data, + u_data, + v_data, + frame_size.x, + frame_size.y, + screen_frame.y_stride(), + screen_frame.uv_stride(), + ) + } else { + self.yuv_converter.convert_yuv420p( + device, + queue, + y_data, + u_data, + v_data, + frame_size.x, + frame_size.y, + screen_frame.y_stride(), + screen_frame.uv_stride(), + ) + }; + + match convert_result { + Ok(_) => { + if self.yuv_converter.output_texture().is_some() { + self.pending_copy = Some(PendingTextureCopy { + width: frame_size.x, + height: frame_size.y, + dst_texture_index: next_texture, + }); + true + } else { + false + } + } + Err(_) => false, + } } else { false } @@ -288,10 +492,12 @@ impl DisplayLayer { pub fn copy_to_texture(&mut self, encoder: &mut wgpu::CommandEncoder) { let Some(pending) = self.pending_copy.take() else { + tracing::trace!("copy_to_texture: no pending copy"); return; }; let Some(src_texture) = self.yuv_converter.output_texture() else { + tracing::warn!("copy_to_texture: no source texture from YUV converter"); return; }; @@ -318,9 +524,18 @@ impl DisplayLayer { pub fn render(&self, pass: &mut wgpu::RenderPass<'_>) { if let Some(bind_group) = &self.bind_groups[self.current_texture] { + tracing::trace!( + current_texture_index = self.current_texture, + "DisplayLayer::render - rendering with bind group" + ); pass.set_pipeline(&self.pipeline.render_pipeline); pass.set_bind_group(0, bind_group, &[]); - pass.draw(0..4, 0..1); + pass.draw(0..3, 0..1); + } else { + tracing::warn!( + current_texture_index = self.current_texture, + "DisplayLayer::render - no bind group available" + ); } } } diff --git a/crates/rendering/src/layers/mask.rs b/crates/rendering/src/layers/mask.rs index daf62fac40..58192660b8 100644 --- a/crates/rendering/src/layers/mask.rs +++ b/crates/rendering/src/layers/mask.rs @@ -176,7 +176,7 @@ impl MaskPipeline { module: &shader, entry_point: Some("fs_main"), targets: &[Some(wgpu::ColorTargetState { - format: wgpu::TextureFormat::Rgba8UnormSrgb, + format: wgpu::TextureFormat::Rgba8Unorm, blend: Some(wgpu::BlendState::REPLACE), write_mask: wgpu::ColorWrites::ALL, })], diff --git a/crates/rendering/src/layers/text.rs b/crates/rendering/src/layers/text.rs index 5891efbe05..59265eeb15 100644 --- a/crates/rendering/src/layers/text.rs +++ b/crates/rendering/src/layers/text.rs @@ -23,8 +23,7 @@ impl TextLayer { let swash_cache = SwashCache::new(); let cache = Cache::new(device); let viewport = Viewport::new(device, &cache); - let mut text_atlas = - TextAtlas::new(device, queue, &cache, wgpu::TextureFormat::Rgba8UnormSrgb); + let mut text_atlas = TextAtlas::new(device, queue, &cache, wgpu::TextureFormat::Rgba8Unorm); let text_renderer = TextRenderer::new( &mut text_atlas, device, diff --git a/crates/rendering/src/lib.rs b/crates/rendering/src/lib.rs index 7aa34d19a4..b781874bf5 100644 --- a/crates/rendering/src/lib.rs +++ b/crates/rendering/src/lib.rs @@ -22,7 +22,10 @@ use tokio::sync::mpsc; mod composite_frame; mod coord; +pub mod cpu_yuv; mod cursor_interpolation; +#[cfg(target_os = "windows")] +pub mod d3d_texture; pub mod decoder; mod frame_pipeline; #[cfg(target_os = "macos")] @@ -268,7 +271,11 @@ pub async fn render_video_to_channel( let mut frame_renderer = FrameRenderer::new(constants); - let mut layers = RendererLayers::new(&constants.device, &constants.queue); + let mut layers = RendererLayers::new_with_options( + &constants.device, + &constants.queue, + constants.is_software_adapter, + ); loop { if frame_number >= total_frames { @@ -372,6 +379,7 @@ pub struct RenderVideoConstants { pub meta: StudioRecordingMeta, pub recording_meta: RecordingMeta, pub background_textures: std::sync::Arc>>, + pub is_software_adapter: bool, } impl RenderVideoConstants { @@ -391,14 +399,41 @@ impl RenderVideoConstants { }; let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor::default()); - let adapter = instance + + let hardware_adapter = instance .request_adapter(&wgpu::RequestAdapterOptions { power_preference: wgpu::PowerPreference::HighPerformance, force_fallback_adapter: false, compatible_surface: None, }) .await - .map_err(|_| RenderingError::NoAdapter)?; + .ok(); + + let (adapter, is_software_adapter) = if let Some(adapter) = hardware_adapter { + tracing::info!( + adapter_name = adapter.get_info().name, + adapter_backend = ?adapter.get_info().backend, + "Using hardware GPU adapter" + ); + (adapter, false) + } else { + tracing::warn!("No hardware GPU adapter found, attempting software fallback"); + let software_adapter = instance + .request_adapter(&wgpu::RequestAdapterOptions { + power_preference: wgpu::PowerPreference::LowPower, + force_fallback_adapter: true, + compatible_surface: None, + }) + .await + .map_err(|_| RenderingError::NoAdapter)?; + + tracing::info!( + adapter_name = software_adapter.get_info().name, + adapter_backend = ?software_adapter.get_info().backend, + "Using software adapter (CPU rendering - performance may be reduced)" + ); + (software_adapter, true) + }; let device_descriptor = wgpu::DeviceDescriptor { label: Some("cap-rendering-device"), @@ -419,6 +454,7 @@ impl RenderVideoConstants { background_textures, meta, recording_meta, + is_software_adapter, }) } } @@ -1598,10 +1634,18 @@ pub struct RendererLayers { impl RendererLayers { pub fn new(device: &wgpu::Device, queue: &wgpu::Queue) -> Self { + Self::new_with_options(device, queue, false) + } + + pub fn new_with_options( + device: &wgpu::Device, + queue: &wgpu::Queue, + prefer_cpu_conversion: bool, + ) -> Self { Self { background: BackgroundLayer::new(device), background_blur: BlurLayer::new(device), - display: DisplayLayer::new(device), + display: DisplayLayer::new_with_options(device, prefer_cpu_conversion), cursor: CursorLayer::new(device), camera: CameraLayer::new(device), camera_only: CameraLayer::new(device), @@ -1735,12 +1779,20 @@ impl RendererLayers { session.swap_textures(); } - if uniforms.scene.should_render_screen() { + let should_render = uniforms.scene.should_render_screen(); + tracing::trace!( + should_render_screen = should_render, + screen_opacity = uniforms.scene.screen_opacity, + screen_blur = uniforms.scene.screen_blur, + "RendererLayers::render - checking should_render_screen" + ); + + if should_render { let mut pass = render_pass!(session.current_texture_view(), wgpu::LoadOp::Load); self.display.render(&mut pass); } - if uniforms.scene.should_render_screen() { + if should_render { let mut pass = render_pass!(session.current_texture_view(), wgpu::LoadOp::Load); self.cursor.render(&mut pass); } @@ -1860,7 +1912,7 @@ pub fn create_shader_render_pipeline( module: &shader, entry_point: Some("fs_main"), targets: &[Some(wgpu::ColorTargetState { - format: wgpu::TextureFormat::Rgba8UnormSrgb, + format: wgpu::TextureFormat::Rgba8Unorm, blend: Some(wgpu::BlendState::ALPHA_BLENDING), write_mask: wgpu::ColorWrites::ALL, })], diff --git a/crates/rendering/src/yuv_converter.rs b/crates/rendering/src/yuv_converter.rs index cb5786bda1..f80699b66b 100644 --- a/crates/rendering/src/yuv_converter.rs +++ b/crates/rendering/src/yuv_converter.rs @@ -1,4 +1,4 @@ -use crate::decoder::PixelFormat; +use crate::cpu_yuv; #[cfg(target_os = "macos")] use crate::iosurface_texture::{ @@ -8,6 +8,15 @@ use crate::iosurface_texture::{ #[cfg(target_os = "macos")] use cidre::cv; +#[cfg(target_os = "windows")] +use crate::d3d_texture::D3DTextureError; + +#[cfg(target_os = "windows")] +use windows::Win32::Graphics::Direct3D11::{ + D3D11_CPU_ACCESS_READ, D3D11_MAP_READ, D3D11_MAPPED_SUBRESOURCE, D3D11_TEXTURE2D_DESC, + D3D11_USAGE_STAGING, ID3D11Device, ID3D11DeviceContext, ID3D11Texture2D, +}; + #[derive(Debug, thiserror::Error)] pub enum YuvConversionError { #[error("{plane} plane size mismatch: expected {expected}, got {actual}")] @@ -16,9 +25,21 @@ pub enum YuvConversionError { expected: usize, actual: usize, }, + #[error("{dimension} dimension ({value}) exceeds maximum allowed ({max})")] + DimensionExceedsLimit { + dimension: &'static str, + value: u32, + max: u32, + }, #[cfg(target_os = "macos")] #[error("IOSurface error: {0}")] IOSurfaceError(#[from] IOSurfaceTextureError), + #[cfg(target_os = "windows")] + #[error("D3D texture error: {0}")] + D3DTextureError(#[from] D3DTextureError), + #[cfg(target_os = "windows")] + #[error("D3D11 error: {0}")] + D3D11Error(String), } fn upload_plane_with_stride( @@ -61,26 +82,51 @@ fn upload_plane_with_stride( Ok(()) } +const MAX_TEXTURE_WIDTH: u32 = 3840; +const MAX_TEXTURE_HEIGHT: u32 = 2160; + +fn validate_dimensions(width: u32, height: u32) -> Result<(), YuvConversionError> { + if width > MAX_TEXTURE_WIDTH { + return Err(YuvConversionError::DimensionExceedsLimit { + dimension: "width", + value: width, + max: MAX_TEXTURE_WIDTH, + }); + } + if height > MAX_TEXTURE_HEIGHT { + return Err(YuvConversionError::DimensionExceedsLimit { + dimension: "height", + value: height, + max: MAX_TEXTURE_HEIGHT, + }); + } + Ok(()) +} + pub struct YuvToRgbaConverter { nv12_pipeline: wgpu::ComputePipeline, yuv420p_pipeline: wgpu::ComputePipeline, nv12_bind_group_layout: wgpu::BindGroupLayout, yuv420p_bind_group_layout: wgpu::BindGroupLayout, - y_texture: Option, - uv_texture: Option, - u_texture: Option, - v_texture: Option, - output_texture: Option, - _y_view: Option, - _uv_view: Option, - _u_view: Option, - _v_view: Option, - output_view: Option, - cached_width: u32, - cached_height: u32, - cached_format: Option, + y_texture: wgpu::Texture, + y_view: wgpu::TextureView, + uv_texture: wgpu::Texture, + uv_view: wgpu::TextureView, + u_texture: wgpu::Texture, + u_view: wgpu::TextureView, + v_texture: wgpu::Texture, + v_view: wgpu::TextureView, + output_textures: [wgpu::Texture; 2], + output_views: [wgpu::TextureView; 2], + current_output: usize, #[cfg(target_os = "macos")] iosurface_cache: Option, + #[cfg(target_os = "windows")] + d3d11_staging_texture: Option, + #[cfg(target_os = "windows")] + d3d11_staging_width: u32, + #[cfg(target_os = "windows")] + d3d11_staging_height: u32, } impl YuvToRgbaConverter { @@ -214,48 +260,43 @@ impl YuvToRgbaConverter { cache: None, }); - Self { - nv12_pipeline, - yuv420p_pipeline, - nv12_bind_group_layout, - yuv420p_bind_group_layout, - y_texture: None, - uv_texture: None, - u_texture: None, - v_texture: None, - output_texture: None, - _y_view: None, - _uv_view: None, - _u_view: None, - _v_view: None, - output_view: None, - cached_width: 0, - cached_height: 0, - cached_format: None, - #[cfg(target_os = "macos")] - iosurface_cache: IOSurfaceTextureCache::new(), - } - } + let y_texture = device.create_texture(&wgpu::TextureDescriptor { + label: Some("Y Plane Texture (Pre-allocated)"), + size: wgpu::Extent3d { + width: MAX_TEXTURE_WIDTH, + height: MAX_TEXTURE_HEIGHT, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::R8Unorm, + usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, + view_formats: &[], + }); + let y_view = y_texture.create_view(&Default::default()); - fn ensure_textures( - &mut self, - device: &wgpu::Device, - width: u32, - height: u32, - format: PixelFormat, - ) { - if self.cached_width == width - && self.cached_height == height - && self.cached_format == Some(format) - { - return; - } + let uv_texture = device.create_texture(&wgpu::TextureDescriptor { + label: Some("UV Plane Texture (Pre-allocated)"), + size: wgpu::Extent3d { + width: MAX_TEXTURE_WIDTH / 2, + height: MAX_TEXTURE_HEIGHT / 2, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rg8Unorm, + usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, + view_formats: &[], + }); + let uv_view = uv_texture.create_view(&Default::default()); - self.y_texture = Some(device.create_texture(&wgpu::TextureDescriptor { - label: Some("Y Plane Texture"), + let u_texture = device.create_texture(&wgpu::TextureDescriptor { + label: Some("U Plane Texture (Pre-allocated)"), size: wgpu::Extent3d { - width, - height, + width: MAX_TEXTURE_WIDTH / 2, + height: MAX_TEXTURE_HEIGHT / 2, depth_or_array_layers: 1, }, mip_level_count: 1, @@ -264,88 +305,87 @@ impl YuvToRgbaConverter { format: wgpu::TextureFormat::R8Unorm, usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, view_formats: &[], - })); - - match format { - PixelFormat::Nv12 => { - self.uv_texture = Some(device.create_texture(&wgpu::TextureDescriptor { - label: Some("UV Plane Texture"), - size: wgpu::Extent3d { - width: width / 2, - height: height / 2, - depth_or_array_layers: 1, - }, - mip_level_count: 1, - sample_count: 1, - dimension: wgpu::TextureDimension::D2, - format: wgpu::TextureFormat::Rg8Unorm, - usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, - view_formats: &[], - })); - self.u_texture = None; - self.v_texture = None; - } - PixelFormat::Yuv420p => { - self.u_texture = Some(device.create_texture(&wgpu::TextureDescriptor { - label: Some("U Plane Texture"), - size: wgpu::Extent3d { - width: width / 2, - height: height / 2, - depth_or_array_layers: 1, - }, - mip_level_count: 1, - sample_count: 1, - dimension: wgpu::TextureDimension::D2, - format: wgpu::TextureFormat::R8Unorm, - usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, - view_formats: &[], - })); - self.v_texture = Some(device.create_texture(&wgpu::TextureDescriptor { - label: Some("V Plane Texture"), - size: wgpu::Extent3d { - width: width / 2, - height: height / 2, - depth_or_array_layers: 1, - }, - mip_level_count: 1, - sample_count: 1, - dimension: wgpu::TextureDimension::D2, - format: wgpu::TextureFormat::R8Unorm, - usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, - view_formats: &[], - })); - self.uv_texture = None; - } - PixelFormat::Rgba => {} - } + }); + let u_view = u_texture.create_view(&Default::default()); - self.output_texture = Some(device.create_texture(&wgpu::TextureDescriptor { - label: Some("RGBA Output Texture"), + let v_texture = device.create_texture(&wgpu::TextureDescriptor { + label: Some("V Plane Texture (Pre-allocated)"), size: wgpu::Extent3d { - width, - height, + width: MAX_TEXTURE_WIDTH / 2, + height: MAX_TEXTURE_HEIGHT / 2, depth_or_array_layers: 1, }, mip_level_count: 1, sample_count: 1, dimension: wgpu::TextureDimension::D2, - format: wgpu::TextureFormat::Rgba8Unorm, - usage: wgpu::TextureUsages::STORAGE_BINDING - | wgpu::TextureUsages::TEXTURE_BINDING - | wgpu::TextureUsages::COPY_SRC, + format: wgpu::TextureFormat::R8Unorm, + usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, view_formats: &[], - })); + }); + let v_view = v_texture.create_view(&Default::default()); - self.output_view = Some( - self.output_texture - .as_ref() - .unwrap() - .create_view(&Default::default()), - ); + let create_output_texture = |label: &str| { + device.create_texture(&wgpu::TextureDescriptor { + label: Some(label), + size: wgpu::Extent3d { + width: MAX_TEXTURE_WIDTH, + height: MAX_TEXTURE_HEIGHT, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8Unorm, + usage: wgpu::TextureUsages::STORAGE_BINDING + | wgpu::TextureUsages::TEXTURE_BINDING + | wgpu::TextureUsages::COPY_SRC + | wgpu::TextureUsages::COPY_DST, + view_formats: &[], + }) + }; - self.cached_width = width; - self.cached_height = height; - self.cached_format = Some(format); + let output_texture_0 = create_output_texture("RGBA Output Texture 0 (Pre-allocated)"); + let output_texture_1 = create_output_texture("RGBA Output Texture 1 (Pre-allocated)"); + let output_view_0 = output_texture_0.create_view(&Default::default()); + let output_view_1 = output_texture_1.create_view(&Default::default()); + + Self { + nv12_pipeline, + yuv420p_pipeline, + nv12_bind_group_layout, + yuv420p_bind_group_layout, + y_texture, + y_view, + uv_texture, + uv_view, + u_texture, + u_view, + v_texture, + v_view, + output_textures: [output_texture_0, output_texture_1], + output_views: [output_view_0, output_view_1], + current_output: 0, + #[cfg(target_os = "macos")] + iosurface_cache: IOSurfaceTextureCache::new(), + #[cfg(target_os = "windows")] + d3d11_staging_texture: None, + #[cfg(target_os = "windows")] + d3d11_staging_width: 0, + #[cfg(target_os = "windows")] + d3d11_staging_height: 0, + } + } + + fn swap_output_buffer(&mut self) { + self.current_output = 1 - self.current_output; + } + + fn current_output_texture(&self) -> &wgpu::Texture { + &self.output_textures[self.current_output] + } + + fn current_output_view(&self) -> &wgpu::TextureView { + &self.output_views[self.current_output] } #[allow(clippy::too_many_arguments)] @@ -360,13 +400,10 @@ impl YuvToRgbaConverter { y_stride: u32, uv_stride: u32, ) -> Result<&wgpu::TextureView, YuvConversionError> { - self.ensure_textures(device, width, height, PixelFormat::Nv12); + validate_dimensions(width, height)?; + self.swap_output_buffer(); - let y_texture = self.y_texture.as_ref().unwrap(); - let uv_texture = self.uv_texture.as_ref().unwrap(); - let output_texture = self.output_texture.as_ref().unwrap(); - - upload_plane_with_stride(queue, y_texture, y_data, width, height, y_stride, "Y")?; + upload_plane_with_stride(queue, &self.y_texture, y_data, width, height, y_stride, "Y")?; let half_height = height / 2; let expected_uv_size = (uv_stride * half_height) as usize; @@ -380,7 +417,7 @@ impl YuvToRgbaConverter { queue.write_texture( wgpu::TexelCopyTextureInfo { - texture: uv_texture, + texture: &self.uv_texture, mip_level: 0, origin: wgpu::Origin3d::ZERO, aspect: wgpu::TextureAspect::All, @@ -404,21 +441,15 @@ impl YuvToRgbaConverter { entries: &[ wgpu::BindGroupEntry { binding: 0, - resource: wgpu::BindingResource::TextureView( - &y_texture.create_view(&Default::default()), - ), + resource: wgpu::BindingResource::TextureView(&self.y_view), }, wgpu::BindGroupEntry { binding: 1, - resource: wgpu::BindingResource::TextureView( - &uv_texture.create_view(&Default::default()), - ), + resource: wgpu::BindingResource::TextureView(&self.uv_view), }, wgpu::BindGroupEntry { binding: 2, - resource: wgpu::BindingResource::TextureView( - &output_texture.create_view(&Default::default()), - ), + resource: wgpu::BindingResource::TextureView(self.current_output_view()), }, ], }); @@ -439,7 +470,7 @@ impl YuvToRgbaConverter { queue.submit(std::iter::once(encoder.finish())); - Ok(self.output_view.as_ref().unwrap()) + Ok(self.current_output_view()) } #[cfg(target_os = "macos")] @@ -449,6 +480,8 @@ impl YuvToRgbaConverter { queue: &wgpu::Queue, image_buf: &cv::ImageBuf, ) -> Result<&wgpu::TextureView, YuvConversionError> { + self.swap_output_buffer(); + let cache = self .iosurface_cache .as_ref() @@ -461,6 +494,8 @@ impl YuvToRgbaConverter { let width = image_buf.width() as u32; let height = image_buf.height() as u32; + validate_dimensions(width, height)?; + let y_metal_texture = cache.create_y_texture(io_surface, width, height)?; let uv_metal_texture = cache.create_uv_texture(io_surface, width, height)?; @@ -482,40 +517,8 @@ impl YuvToRgbaConverter { Some("IOSurface UV Plane"), )?; - if self.cached_width != width - || self.cached_height != height - || self.cached_format != Some(PixelFormat::Nv12) - { - self.output_texture = Some(device.create_texture(&wgpu::TextureDescriptor { - label: Some("RGBA Output Texture"), - size: wgpu::Extent3d { - width, - height, - depth_or_array_layers: 1, - }, - mip_level_count: 1, - sample_count: 1, - dimension: wgpu::TextureDimension::D2, - format: wgpu::TextureFormat::Rgba8Unorm, - usage: wgpu::TextureUsages::STORAGE_BINDING - | wgpu::TextureUsages::TEXTURE_BINDING - | wgpu::TextureUsages::COPY_SRC, - view_formats: &[], - })); - - self.output_view = Some( - self.output_texture - .as_ref() - .unwrap() - .create_view(&Default::default()), - ); - - self.cached_width = width; - self.cached_height = height; - self.cached_format = Some(PixelFormat::Nv12); - } - - let output_texture = self.output_texture.as_ref().unwrap(); + let y_view = y_wgpu_texture.create_view(&Default::default()); + let uv_view = uv_wgpu_texture.create_view(&Default::default()); let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { label: Some("NV12 IOSurface Converter Bind Group"), @@ -523,21 +526,15 @@ impl YuvToRgbaConverter { entries: &[ wgpu::BindGroupEntry { binding: 0, - resource: wgpu::BindingResource::TextureView( - &y_wgpu_texture.create_view(&Default::default()), - ), + resource: wgpu::BindingResource::TextureView(&y_view), }, wgpu::BindGroupEntry { binding: 1, - resource: wgpu::BindingResource::TextureView( - &uv_wgpu_texture.create_view(&Default::default()), - ), + resource: wgpu::BindingResource::TextureView(&uv_view), }, wgpu::BindGroupEntry { binding: 2, - resource: wgpu::BindingResource::TextureView( - &output_texture.create_view(&Default::default()), - ), + resource: wgpu::BindingResource::TextureView(self.current_output_view()), }, ], }); @@ -558,7 +555,7 @@ impl YuvToRgbaConverter { queue.submit(std::iter::once(encoder.finish())); - Ok(self.output_view.as_ref().unwrap()) + Ok(self.current_output_view()) } #[allow(clippy::too_many_arguments)] @@ -574,21 +571,17 @@ impl YuvToRgbaConverter { y_stride: u32, uv_stride: u32, ) -> Result<&wgpu::TextureView, YuvConversionError> { - self.ensure_textures(device, width, height, PixelFormat::Yuv420p); - - let y_texture = self.y_texture.as_ref().unwrap(); - let u_texture = self.u_texture.as_ref().unwrap(); - let v_texture = self.v_texture.as_ref().unwrap(); - let output_texture = self.output_texture.as_ref().unwrap(); + validate_dimensions(width, height)?; + self.swap_output_buffer(); - upload_plane_with_stride(queue, y_texture, y_data, width, height, y_stride, "Y")?; + upload_plane_with_stride(queue, &self.y_texture, y_data, width, height, y_stride, "Y")?; let half_width = width / 2; let half_height = height / 2; upload_plane_with_stride( queue, - u_texture, + &self.u_texture, u_data, half_width, half_height, @@ -597,7 +590,7 @@ impl YuvToRgbaConverter { )?; upload_plane_with_stride( queue, - v_texture, + &self.v_texture, v_data, half_width, half_height, @@ -611,27 +604,19 @@ impl YuvToRgbaConverter { entries: &[ wgpu::BindGroupEntry { binding: 0, - resource: wgpu::BindingResource::TextureView( - &y_texture.create_view(&Default::default()), - ), + resource: wgpu::BindingResource::TextureView(&self.y_view), }, wgpu::BindGroupEntry { binding: 1, - resource: wgpu::BindingResource::TextureView( - &u_texture.create_view(&Default::default()), - ), + resource: wgpu::BindingResource::TextureView(&self.u_view), }, wgpu::BindGroupEntry { binding: 2, - resource: wgpu::BindingResource::TextureView( - &v_texture.create_view(&Default::default()), - ), + resource: wgpu::BindingResource::TextureView(&self.v_view), }, wgpu::BindGroupEntry { binding: 3, - resource: wgpu::BindingResource::TextureView( - &output_texture.create_view(&Default::default()), - ), + resource: wgpu::BindingResource::TextureView(self.current_output_view()), }, ], }); @@ -652,10 +637,343 @@ impl YuvToRgbaConverter { queue.submit(std::iter::once(encoder.finish())); - Ok(self.output_view.as_ref().unwrap()) + Ok(self.current_output_view()) + } + + #[cfg(target_os = "windows")] + #[allow(clippy::too_many_arguments)] + pub fn convert_nv12_from_d3d11_texture( + &mut self, + wgpu_device: &wgpu::Device, + queue: &wgpu::Queue, + d3d11_device: &ID3D11Device, + d3d11_context: &ID3D11DeviceContext, + nv12_texture: &ID3D11Texture2D, + width: u32, + height: u32, + ) -> Result<&wgpu::TextureView, YuvConversionError> { + validate_dimensions(width, height)?; + + use windows::Win32::Graphics::Dxgi::Common::DXGI_FORMAT_NV12; + + if self.d3d11_staging_width != width + || self.d3d11_staging_height != height + || self.d3d11_staging_texture.is_none() + { + let desc = D3D11_TEXTURE2D_DESC { + Width: width, + Height: height, + MipLevels: 1, + ArraySize: 1, + Format: DXGI_FORMAT_NV12, + SampleDesc: windows::Win32::Graphics::Dxgi::Common::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Usage: D3D11_USAGE_STAGING, + BindFlags: 0, + CPUAccessFlags: D3D11_CPU_ACCESS_READ.0 as u32, + MiscFlags: 0, + }; + + let staging_texture = unsafe { + let mut texture: Option = None; + d3d11_device + .CreateTexture2D(&desc, None, Some(&mut texture)) + .map_err(|e| { + YuvConversionError::D3D11Error(format!("CreateTexture2D failed: {e:?}")) + })?; + texture.ok_or_else(|| { + YuvConversionError::D3D11Error("CreateTexture2D returned null".to_string()) + })? + }; + + self.d3d11_staging_texture = Some(staging_texture); + self.d3d11_staging_width = width; + self.d3d11_staging_height = height; + } + + let staging_texture = self.d3d11_staging_texture.as_ref().ok_or_else(|| { + YuvConversionError::D3D11Error("D3D11 staging texture not initialized".to_string()) + })?; + + unsafe { + d3d11_context.CopyResource(staging_texture, nv12_texture); + } + + let mut mapped = D3D11_MAPPED_SUBRESOURCE::default(); + unsafe { + d3d11_context + .Map(staging_texture, 0, D3D11_MAP_READ, 0, Some(&mut mapped)) + .map_err(|e| YuvConversionError::D3D11Error(format!("Map failed: {e:?}")))?; + } + + let y_stride = mapped.RowPitch; + let uv_stride = mapped.RowPitch; + + let y_size = (y_stride * height) as usize; + let uv_size = (uv_stride * height / 2) as usize; + + let (y_data_vec, uv_data_vec) = unsafe { + let y_data = std::slice::from_raw_parts(mapped.pData as *const u8, y_size); + let uv_data = + std::slice::from_raw_parts((mapped.pData as *const u8).add(y_size), uv_size); + (y_data.to_vec(), uv_data.to_vec()) + }; + + unsafe { + d3d11_context.Unmap(staging_texture, 0); + } + + self.swap_output_buffer(); + + upload_plane_with_stride( + queue, + &self.y_texture, + &y_data_vec, + width, + height, + y_stride, + "Y", + )?; + + let half_height = height / 2; + queue.write_texture( + wgpu::TexelCopyTextureInfo { + texture: &self.uv_texture, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + &uv_data_vec, + wgpu::TexelCopyBufferLayout { + offset: 0, + bytes_per_row: Some(uv_stride), + rows_per_image: Some(half_height), + }, + wgpu::Extent3d { + width: width / 2, + height: half_height, + depth_or_array_layers: 1, + }, + ); + + let bind_group = wgpu_device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("NV12 D3D11 Converter Bind Group"), + layout: &self.nv12_bind_group_layout, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::TextureView(&self.y_view), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: wgpu::BindingResource::TextureView(&self.uv_view), + }, + wgpu::BindGroupEntry { + binding: 2, + resource: wgpu::BindingResource::TextureView(self.current_output_view()), + }, + ], + }); + + let mut encoder = wgpu_device.create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("NV12 D3D11 Conversion Encoder"), + }); + + { + let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("NV12 D3D11 Conversion Pass"), + ..Default::default() + }); + compute_pass.set_pipeline(&self.nv12_pipeline); + compute_pass.set_bind_group(0, &bind_group, &[]); + compute_pass.dispatch_workgroups(width.div_ceil(8), height.div_ceil(8), 1); + } + + queue.submit(std::iter::once(encoder.finish())); + + Ok(self.current_output_view()) + } + + #[cfg(target_os = "windows")] + pub fn convert_nv12_from_d3d11_shared_handles( + &mut self, + device: &wgpu::Device, + queue: &wgpu::Queue, + y_handle: windows::Win32::Foundation::HANDLE, + uv_handle: windows::Win32::Foundation::HANDLE, + width: u32, + height: u32, + ) -> Result<&wgpu::TextureView, YuvConversionError> { + validate_dimensions(width, height)?; + + use crate::d3d_texture::import_d3d11_texture_to_wgpu; + + self.swap_output_buffer(); + + let y_wgpu_texture = import_d3d11_texture_to_wgpu( + device, + y_handle, + wgpu::TextureFormat::R8Unorm, + width, + height, + Some("D3D11 Y Plane Zero-Copy"), + )?; + + let uv_wgpu_texture = import_d3d11_texture_to_wgpu( + device, + uv_handle, + wgpu::TextureFormat::Rg8Unorm, + width / 2, + height / 2, + Some("D3D11 UV Plane Zero-Copy"), + )?; + + let y_view = y_wgpu_texture.create_view(&Default::default()); + let uv_view = uv_wgpu_texture.create_view(&Default::default()); + + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("NV12 D3D11 Zero-Copy Converter Bind Group"), + layout: &self.nv12_bind_group_layout, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::TextureView(&y_view), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: wgpu::BindingResource::TextureView(&uv_view), + }, + wgpu::BindGroupEntry { + binding: 2, + resource: wgpu::BindingResource::TextureView(self.current_output_view()), + }, + ], + }); + + let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("NV12 D3D11 Zero-Copy Conversion Encoder"), + }); + + { + let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("NV12 D3D11 Zero-Copy Conversion Pass"), + ..Default::default() + }); + compute_pass.set_pipeline(&self.nv12_pipeline); + compute_pass.set_bind_group(0, &bind_group, &[]); + compute_pass.dispatch_workgroups(width.div_ceil(8), height.div_ceil(8), 1); + } + + queue.submit(std::iter::once(encoder.finish())); + + Ok(self.current_output_view()) + } + + #[allow(clippy::too_many_arguments)] + pub fn convert_nv12_cpu( + &mut self, + _device: &wgpu::Device, + queue: &wgpu::Queue, + y_data: &[u8], + uv_data: &[u8], + width: u32, + height: u32, + y_stride: u32, + uv_stride: u32, + ) -> Result<&wgpu::TextureView, YuvConversionError> { + validate_dimensions(width, height)?; + self.swap_output_buffer(); + + let mut rgba_data = vec![0u8; (width * height * 4) as usize]; + + cpu_yuv::nv12_to_rgba_simd( + y_data, + uv_data, + width, + height, + y_stride, + uv_stride, + &mut rgba_data, + ); + + queue.write_texture( + wgpu::TexelCopyTextureInfo { + texture: self.current_output_texture(), + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + &rgba_data, + wgpu::TexelCopyBufferLayout { + offset: 0, + bytes_per_row: Some(width * 4), + rows_per_image: Some(height), + }, + wgpu::Extent3d { + width, + height, + depth_or_array_layers: 1, + }, + ); + + Ok(self.current_output_view()) + } + + #[allow(clippy::too_many_arguments)] + pub fn convert_yuv420p_cpu( + &mut self, + _device: &wgpu::Device, + queue: &wgpu::Queue, + y_data: &[u8], + u_data: &[u8], + v_data: &[u8], + width: u32, + height: u32, + y_stride: u32, + uv_stride: u32, + ) -> Result<&wgpu::TextureView, YuvConversionError> { + validate_dimensions(width, height)?; + self.swap_output_buffer(); + + let mut rgba_data = vec![0u8; (width * height * 4) as usize]; + + cpu_yuv::yuv420p_to_rgba_simd( + y_data, + u_data, + v_data, + width, + height, + y_stride, + uv_stride, + &mut rgba_data, + ); + + queue.write_texture( + wgpu::TexelCopyTextureInfo { + texture: self.current_output_texture(), + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + &rgba_data, + wgpu::TexelCopyBufferLayout { + offset: 0, + bytes_per_row: Some(width * 4), + rows_per_image: Some(height), + }, + wgpu::Extent3d { + width, + height, + depth_or_array_layers: 1, + }, + ); + + Ok(self.current_output_view()) } pub fn output_texture(&self) -> Option<&wgpu::Texture> { - self.output_texture.as_ref() + Some(self.current_output_texture()) } } diff --git a/crates/scap-direct3d/src/lib.rs b/crates/scap-direct3d/src/lib.rs index 4ed2bdaecc..ac704e934f 100644 --- a/crates/scap-direct3d/src/lib.rs +++ b/crates/scap-direct3d/src/lib.rs @@ -147,7 +147,9 @@ pub struct Capturer { d3d_device: ID3D11Device, d3d_context: ID3D11DeviceContext, session: GraphicsCaptureSession, - _frame_pool: Direct3D11CaptureFramePool, + frame_pool: Direct3D11CaptureFramePool, + frame_arrived_token: i64, + stop_flag: Arc, } impl Capturer { @@ -265,11 +267,12 @@ impl Capturer { }) .transpose()?; - frame_pool + let frame_arrived_token = frame_pool .FrameArrived( &TypedEventHandler::::new({ let d3d_context = d3d_context.clone(); let d3d_device = d3d_device.clone(); + let stop_flag = stop_flag.clone(); move |frame_pool, _| { if stop_flag.load(Ordering::Relaxed) { @@ -337,11 +340,12 @@ impl Capturer { Ok(Capturer { settings, - // thread_handle: None, d3d_device, d3d_context, session, - _frame_pool: frame_pool, + frame_pool, + frame_arrived_token, + stop_flag, }) } @@ -380,7 +384,12 @@ pub enum StopCapturerError { impl Capturer { pub fn stop(&mut self) -> windows::core::Result<()> { - self.session.Close() + if self.stop_flag.swap(true, Ordering::SeqCst) { + return Ok(()); + } + let _ = self.frame_pool.RemoveFrameArrived(self.frame_arrived_token); + let _ = self.session.Close(); + self.frame_pool.Close() } } diff --git a/crates/video-decode/Cargo.toml b/crates/video-decode/Cargo.toml index 09bb6c3590..c4396ac6cb 100644 --- a/crates/video-decode/Cargo.toml +++ b/crates/video-decode/Cargo.toml @@ -15,3 +15,19 @@ workspace-hack = { version = "0.1", path = "../workspace-hack" } [target.'cfg(target_os = "macos")'.dependencies] cidre = { workspace = true } + +[target.'cfg(target_os = "windows")'.dependencies] +windows = { workspace = true, features = [ + "Win32_Foundation", + "Win32_Graphics_Direct3D", + "Win32_Graphics_Direct3D11", + "Win32_Graphics_Dxgi", + "Win32_Graphics_Dxgi_Common", + "Win32_Media", + "Win32_Media_MediaFoundation", + "Win32_Security", + "Win32_System_Com", + "Win32_System_Com_StructuredStorage", + "Win32_System_Ole", + "Win32_System_Variant", +] } diff --git a/crates/video-decode/src/lib.rs b/crates/video-decode/src/lib.rs index 65ea1d5a1f..6cd4e82171 100644 --- a/crates/video-decode/src/lib.rs +++ b/crates/video-decode/src/lib.rs @@ -1,7 +1,11 @@ #[cfg(target_os = "macos")] pub mod avassetreader; pub mod ffmpeg; +#[cfg(target_os = "windows")] +pub mod media_foundation; #[cfg(target_os = "macos")] pub use avassetreader::AVAssetReaderDecoder; pub use ffmpeg::FFmpegDecoder; +#[cfg(target_os = "windows")] +pub use media_foundation::{MFDecodedFrame, MediaFoundationDecoder, NV12Data}; diff --git a/crates/video-decode/src/media_foundation.rs b/crates/video-decode/src/media_foundation.rs new file mode 100644 index 0000000000..2b635071d2 --- /dev/null +++ b/crates/video-decode/src/media_foundation.rs @@ -0,0 +1,666 @@ +use std::path::Path; +use tracing::info; +use windows::{ + Win32::{ + Foundation::{HANDLE, HMODULE}, + Graphics::{ + Direct3D::D3D_DRIVER_TYPE_HARDWARE, + Direct3D11::{ + D3D11_BIND_SHADER_RESOURCE, D3D11_CPU_ACCESS_READ, + D3D11_CREATE_DEVICE_BGRA_SUPPORT, D3D11_CREATE_DEVICE_VIDEO_SUPPORT, + D3D11_MAP_READ, D3D11_MAPPED_SUBRESOURCE, D3D11_SDK_VERSION, D3D11_TEXTURE2D_DESC, + D3D11_USAGE_DEFAULT, D3D11_USAGE_STAGING, D3D11CreateDevice, ID3D11Device, + ID3D11DeviceContext, ID3D11Texture2D, + }, + Dxgi::Common::{DXGI_FORMAT_NV12, DXGI_SAMPLE_DESC}, + }, + Media::MediaFoundation::{ + IMFAttributes, IMFDXGIBuffer, IMFDXGIDeviceManager, IMFSample, IMFSourceReader, + MF_API_VERSION, MF_MT_FRAME_RATE, MF_MT_FRAME_SIZE, MF_MT_MAJOR_TYPE, MF_MT_SUBTYPE, + MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, MF_SOURCE_READER_D3D_MANAGER, + MF_SOURCE_READER_ENABLE_ADVANCED_VIDEO_PROCESSING, MF_SOURCE_READER_FIRST_VIDEO_STREAM, + MFCreateAttributes, MFCreateDXGIDeviceManager, MFCreateMediaType, + MFCreateSourceReaderFromURL, MFMediaType_Video, MFSTARTUP_NOSOCKET, MFShutdown, + MFStartup, MFVideoFormat_NV12, + }, + System::Com::{COINIT_MULTITHREADED, CoInitializeEx, CoUninitialize}, + }, + core::{Interface, PCWSTR}, +}; + +pub struct MFDecodedFrame { + pub texture: ID3D11Texture2D, + pub shared_handle: Option, + pub y_texture: Option, + pub y_handle: Option, + pub uv_texture: Option, + pub uv_handle: Option, + pub width: u32, + pub height: u32, + pub pts: i64, +} + +pub struct NV12Data { + pub data: Vec, + pub y_stride: u32, + pub uv_stride: u32, +} + +pub struct MediaFoundationDecoder { + source_reader: IMFSourceReader, + d3d11_device: ID3D11Device, + d3d11_context: ID3D11DeviceContext, + _device_manager: IMFDXGIDeviceManager, + width: u32, + height: u32, + frame_rate_num: u32, + frame_rate_den: u32, + staging_texture: Option, + staging_width: u32, + staging_height: u32, +} + +struct MFInitGuard; + +impl Drop for MFInitGuard { + fn drop(&mut self) { + unsafe { + let _ = MFShutdown(); + CoUninitialize(); + } + } +} + +impl MediaFoundationDecoder { + pub fn new(path: impl AsRef) -> Result { + unsafe { Self::new_inner(path.as_ref()) } + } + + unsafe fn new_inner(path: &Path) -> Result { + unsafe { + CoInitializeEx(None, COINIT_MULTITHREADED) + .ok() + .map_err(|e| format!("Failed to initialize COM: {e:?}"))?; + + MFStartup(MF_API_VERSION, MFSTARTUP_NOSOCKET) + .map_err(|e| format!("Failed to start Media Foundation: {e:?}"))?; + } + + let guard = MFInitGuard; + + let (d3d11_device, d3d11_context) = unsafe { create_d3d11_device()? }; + let device_manager = unsafe { create_dxgi_device_manager(&d3d11_device)? }; + + let source_reader = unsafe { create_source_reader(path, &device_manager)? }; + + unsafe { configure_output_type(&source_reader)? }; + + let (width, height, frame_rate_num, frame_rate_den) = + unsafe { get_video_info(&source_reader)? }; + + info!( + "MediaFoundation decoder initialized: {}x{} @ {}/{}fps", + width, height, frame_rate_num, frame_rate_den + ); + + std::mem::forget(guard); + + Ok(Self { + source_reader, + d3d11_device, + d3d11_context, + _device_manager: device_manager, + width, + height, + frame_rate_num, + frame_rate_den, + staging_texture: None, + staging_width: 0, + staging_height: 0, + }) + } + + pub fn width(&self) -> u32 { + self.width + } + + pub fn height(&self) -> u32 { + self.height + } + + pub fn frame_rate(&self) -> (u32, u32) { + (self.frame_rate_num, self.frame_rate_den) + } + + pub fn d3d11_device(&self) -> &ID3D11Device { + &self.d3d11_device + } + + pub fn read_texture_to_cpu( + &mut self, + texture: &ID3D11Texture2D, + width: u32, + height: u32, + ) -> Result { + unsafe { self.read_texture_to_cpu_inner(texture, width, height) } + } + + unsafe fn read_texture_to_cpu_inner( + &mut self, + texture: &ID3D11Texture2D, + width: u32, + height: u32, + ) -> Result { + if self.staging_width != width + || self.staging_height != height + || self.staging_texture.is_none() + { + let desc = D3D11_TEXTURE2D_DESC { + Width: width, + Height: height, + MipLevels: 1, + ArraySize: 1, + Format: DXGI_FORMAT_NV12, + SampleDesc: DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Usage: D3D11_USAGE_STAGING, + BindFlags: 0, + CPUAccessFlags: D3D11_CPU_ACCESS_READ.0 as u32, + MiscFlags: 0, + }; + + let staging_texture = unsafe { + let mut tex: Option = None; + self.d3d11_device + .CreateTexture2D(&desc, None, Some(&mut tex)) + .map_err(|e| format!("CreateTexture2D staging failed: {e:?}"))?; + tex.ok_or("CreateTexture2D staging returned null")? + }; + + self.staging_texture = Some(staging_texture); + self.staging_width = width; + self.staging_height = height; + } + + let staging = self.staging_texture.as_ref().unwrap(); + + unsafe { + self.d3d11_context.CopyResource(staging, texture); + } + + let mut mapped = D3D11_MAPPED_SUBRESOURCE::default(); + unsafe { + self.d3d11_context + .Map(staging, 0, D3D11_MAP_READ, 0, Some(&mut mapped)) + .map_err(|e| format!("Map staging texture failed: {e:?}"))?; + } + + let y_stride = mapped.RowPitch; + let y_height = height; + let uv_height = height / 2; + + let y_size = (y_stride * y_height) as usize; + let uv_size = (y_stride * uv_height) as usize; + let total_size = y_size + uv_size; + + let mut data = vec![0u8; total_size]; + unsafe { + std::ptr::copy_nonoverlapping(mapped.pData as *const u8, data.as_mut_ptr(), total_size); + self.d3d11_context.Unmap(staging, 0); + } + + Ok(NV12Data { + data, + y_stride, + uv_stride: y_stride, + }) + } + + pub fn read_sample(&mut self) -> Result, String> { + unsafe { self.read_sample_inner() } + } + + unsafe fn read_sample_inner(&mut self) -> Result, String> { + let mut stream_index = 0u32; + let mut flags = 0u32; + let mut timestamp = 0i64; + let mut sample: Option = None; + + unsafe { + self.source_reader + .ReadSample( + MF_SOURCE_READER_FIRST_VIDEO_STREAM.0 as u32, + 0, + Some(&mut stream_index), + Some(&mut flags), + Some(&mut timestamp), + Some(&mut sample), + ) + .map_err(|e| format!("ReadSample failed: {e:?}"))?; + } + + const MF_SOURCE_READERF_ENDOFSTREAM: u32 = 0x00000001; + const MF_SOURCE_READERF_ERROR: u32 = 0x00000002; + + if flags & MF_SOURCE_READERF_ENDOFSTREAM != 0 { + return Ok(None); + } + + if flags & MF_SOURCE_READERF_ERROR != 0 { + return Err("Stream error".to_string()); + } + + let Some(sample) = sample else { + return Ok(None); + }; + + let buffer = unsafe { + sample + .GetBufferByIndex(0) + .map_err(|e| format!("GetBufferByIndex failed: {e:?}"))? + }; + + let dxgi_buffer: IMFDXGIBuffer = buffer + .cast() + .map_err(|e| format!("Failed to cast to IMFDXGIBuffer: {e:?}"))?; + + let texture = unsafe { + let mut texture: Option = None; + dxgi_buffer + .GetResource(&ID3D11Texture2D::IID, &mut texture as *mut _ as *mut _) + .map_err(|e| format!("GetResource failed: {e:?}"))?; + texture.ok_or("GetResource returned null texture")? + }; + + let subresource_index = unsafe { + dxgi_buffer + .GetSubresourceIndex() + .map_err(|e| format!("GetSubresourceIndex failed: {e:?}"))? + }; + + let (output_texture, shared_handle) = unsafe { + copy_texture_subresource( + &self.d3d11_device, + &self.d3d11_context, + &texture, + subresource_index, + self.width, + self.height, + )? + }; + + let yuv_planes = unsafe { + create_yuv_plane_textures( + &self.d3d11_device, + &self.d3d11_context, + &output_texture, + self.width, + self.height, + ) + .ok() + }; + + let (y_texture, y_handle, uv_texture, uv_handle) = yuv_planes + .map(|p| { + ( + Some(p.y_texture), + p.y_handle, + Some(p.uv_texture), + p.uv_handle, + ) + }) + .unwrap_or((None, None, None, None)); + + Ok(Some(MFDecodedFrame { + texture: output_texture, + shared_handle, + y_texture, + y_handle, + uv_texture, + uv_handle, + width: self.width, + height: self.height, + pts: timestamp, + })) + } + + pub fn seek(&mut self, time_100ns: i64) -> Result<(), String> { + use std::mem::MaybeUninit; + use windows::Win32::System::Com::StructuredStorage::PROPVARIANT; + + let mut prop = MaybeUninit::::zeroed(); + unsafe { + let prop_ptr = prop.as_mut_ptr(); + let inner_ptr = std::ptr::addr_of_mut!((*prop_ptr).Anonymous.Anonymous); + let inner = &mut *inner_ptr; + inner.vt = windows::Win32::System::Variant::VT_I8; + inner.Anonymous.hVal = time_100ns; + + let prop = prop.assume_init(); + self.source_reader + .SetCurrentPosition(&windows::core::GUID::zeroed(), &prop) + .map_err(|e| format!("Seek failed: {e:?}"))?; + } + + Ok(()) + } +} + +impl Drop for MediaFoundationDecoder { + fn drop(&mut self) { + unsafe { + let _ = MFShutdown(); + CoUninitialize(); + } + } +} + +unsafe fn create_d3d11_device() -> Result<(ID3D11Device, ID3D11DeviceContext), String> { + let flags = D3D11_CREATE_DEVICE_VIDEO_SUPPORT | D3D11_CREATE_DEVICE_BGRA_SUPPORT; + + let feature_levels = [ + windows::Win32::Graphics::Direct3D::D3D_FEATURE_LEVEL_11_1, + windows::Win32::Graphics::Direct3D::D3D_FEATURE_LEVEL_11_0, + windows::Win32::Graphics::Direct3D::D3D_FEATURE_LEVEL_10_1, + windows::Win32::Graphics::Direct3D::D3D_FEATURE_LEVEL_10_0, + ]; + + let mut device: Option = None; + let mut context: Option = None; + + unsafe { + D3D11CreateDevice( + None, + D3D_DRIVER_TYPE_HARDWARE, + HMODULE::default(), + flags, + Some(&feature_levels), + D3D11_SDK_VERSION, + Some(&mut device), + None, + Some(&mut context), + ) + .map_err(|e| format!("D3D11CreateDevice failed: {e:?}"))?; + } + + let device = device.ok_or("D3D11CreateDevice returned null device")?; + let context = context.ok_or("D3D11CreateDevice returned null context")?; + + let multithread: windows::Win32::Graphics::Direct3D11::ID3D11Multithread = device + .cast() + .map_err(|e| format!("Failed to get ID3D11Multithread: {e:?}"))?; + unsafe { + let _ = multithread.SetMultithreadProtected(true); + } + + Ok((device, context)) +} + +unsafe fn create_dxgi_device_manager( + device: &ID3D11Device, +) -> Result { + let mut reset_token = 0u32; + let mut manager: Option = None; + + unsafe { + MFCreateDXGIDeviceManager(&mut reset_token, &mut manager) + .map_err(|e| format!("MFCreateDXGIDeviceManager failed: {e:?}"))?; + } + + let manager = manager.ok_or("MFCreateDXGIDeviceManager returned null")?; + + unsafe { + manager + .ResetDevice(device, reset_token) + .map_err(|e| format!("ResetDevice failed: {e:?}"))?; + } + + Ok(manager) +} + +unsafe fn create_source_reader( + path: &Path, + device_manager: &IMFDXGIDeviceManager, +) -> Result { + let mut attributes: Option = None; + unsafe { + MFCreateAttributes(&mut attributes, 4) + .map_err(|e| format!("MFCreateAttributes failed: {e:?}"))?; + } + + let attributes = attributes.ok_or("MFCreateAttributes returned null")?; + + unsafe { + attributes + .SetUnknown(&MF_SOURCE_READER_D3D_MANAGER, device_manager) + .map_err(|e| format!("SetUnknown D3D_MANAGER failed: {e:?}"))?; + + attributes + .SetUINT32(&MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, 1) + .map_err(|e| format!("SetUINT32 ENABLE_HARDWARE_TRANSFORMS failed: {e:?}"))?; + + attributes + .SetUINT32(&MF_SOURCE_READER_ENABLE_ADVANCED_VIDEO_PROCESSING, 1) + .map_err(|e| format!("SetUINT32 ENABLE_ADVANCED_VIDEO_PROCESSING failed: {e:?}"))?; + } + + let path_wide: Vec = path + .to_string_lossy() + .encode_utf16() + .chain(std::iter::once(0)) + .collect(); + + let source_reader = unsafe { + MFCreateSourceReaderFromURL(PCWSTR(path_wide.as_ptr()), &attributes) + .map_err(|e| format!("MFCreateSourceReaderFromURL failed: {e:?}"))? + }; + + Ok(source_reader) +} + +unsafe fn configure_output_type(source_reader: &IMFSourceReader) -> Result<(), String> { + let media_type = + unsafe { MFCreateMediaType().map_err(|e| format!("MFCreateMediaType failed: {e:?}"))? }; + + unsafe { + media_type + .SetGUID(&MF_MT_MAJOR_TYPE, &MFMediaType_Video) + .map_err(|e| format!("SetGUID MAJOR_TYPE failed: {e:?}"))?; + + media_type + .SetGUID(&MF_MT_SUBTYPE, &MFVideoFormat_NV12) + .map_err(|e| format!("SetGUID SUBTYPE failed: {e:?}"))?; + + source_reader + .SetCurrentMediaType( + MF_SOURCE_READER_FIRST_VIDEO_STREAM.0 as u32, + None, + &media_type, + ) + .map_err(|e| format!("SetCurrentMediaType failed: {e:?}"))?; + } + + Ok(()) +} + +unsafe fn get_video_info(source_reader: &IMFSourceReader) -> Result<(u32, u32, u32, u32), String> { + let media_type = unsafe { + source_reader + .GetCurrentMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM.0 as u32) + .map_err(|e| format!("GetCurrentMediaType failed: {e:?}"))? + }; + + let frame_size = unsafe { + media_type + .GetUINT64(&MF_MT_FRAME_SIZE) + .map_err(|e| format!("GetUINT64 FRAME_SIZE failed: {e:?}"))? + }; + + let width = (frame_size >> 32) as u32; + let height = frame_size as u32; + + let frame_rate = unsafe { + media_type + .GetUINT64(&MF_MT_FRAME_RATE) + .unwrap_or((30 << 32) | 1) + }; + + let frame_rate_num = (frame_rate >> 32) as u32; + let frame_rate_den = frame_rate as u32; + + Ok((width, height, frame_rate_num, frame_rate_den.max(1))) +} + +struct YuvPlaneTextures { + y_texture: ID3D11Texture2D, + y_handle: Option, + uv_texture: ID3D11Texture2D, + uv_handle: Option, +} + +unsafe fn create_yuv_plane_textures( + device: &ID3D11Device, + context: &ID3D11DeviceContext, + nv12_texture: &ID3D11Texture2D, + width: u32, + height: u32, +) -> Result { + use windows::Win32::Graphics::Dxgi::Common::{DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM}; + + let y_desc = D3D11_TEXTURE2D_DESC { + Width: width, + Height: height, + MipLevels: 1, + ArraySize: 1, + Format: DXGI_FORMAT_R8_UNORM, + SampleDesc: DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Usage: D3D11_USAGE_DEFAULT, + BindFlags: D3D11_BIND_SHADER_RESOURCE.0 as u32, + CPUAccessFlags: 0, + MiscFlags: 0, + }; + + let mut y_texture: Option = None; + unsafe { + device + .CreateTexture2D(&y_desc, None, Some(&mut y_texture)) + .map_err(|e| format!("CreateTexture2D Y failed: {e:?}"))?; + } + let y_texture = y_texture.ok_or("CreateTexture2D Y returned null")?; + + let uv_desc = D3D11_TEXTURE2D_DESC { + Width: width / 2, + Height: height / 2, + MipLevels: 1, + ArraySize: 1, + Format: DXGI_FORMAT_R8G8_UNORM, + SampleDesc: DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Usage: D3D11_USAGE_DEFAULT, + BindFlags: D3D11_BIND_SHADER_RESOURCE.0 as u32, + CPUAccessFlags: 0, + MiscFlags: 0, + }; + + let mut uv_texture: Option = None; + unsafe { + device + .CreateTexture2D(&uv_desc, None, Some(&mut uv_texture)) + .map_err(|e| format!("CreateTexture2D UV failed: {e:?}"))?; + } + let uv_texture = uv_texture.ok_or("CreateTexture2D UV returned null")?; + + unsafe { + context.CopySubresourceRegion( + &y_texture, + 0, + 0, + 0, + 0, + nv12_texture, + 0, + Some(&windows::Win32::Graphics::Direct3D11::D3D11_BOX { + left: 0, + top: 0, + front: 0, + right: width, + bottom: height, + back: 1, + }), + ); + + context.CopySubresourceRegion( + &uv_texture, + 0, + 0, + 0, + 0, + nv12_texture, + 1, + Some(&windows::Win32::Graphics::Direct3D11::D3D11_BOX { + left: 0, + top: 0, + front: 0, + right: width / 2, + bottom: height / 2, + back: 1, + }), + ); + } + + Ok(YuvPlaneTextures { + y_texture, + y_handle: None, + uv_texture, + uv_handle: None, + }) +} + +unsafe fn copy_texture_subresource( + device: &ID3D11Device, + context: &ID3D11DeviceContext, + source: &ID3D11Texture2D, + subresource_index: u32, + width: u32, + height: u32, +) -> Result<(ID3D11Texture2D, Option), String> { + let desc = D3D11_TEXTURE2D_DESC { + Width: width, + Height: height, + MipLevels: 1, + ArraySize: 1, + Format: DXGI_FORMAT_NV12, + SampleDesc: DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Usage: D3D11_USAGE_DEFAULT, + BindFlags: D3D11_BIND_SHADER_RESOURCE.0 as u32, + CPUAccessFlags: 0, + MiscFlags: 0, + }; + + let mut output_texture: Option = None; + unsafe { + device + .CreateTexture2D(&desc, None, Some(&mut output_texture)) + .map_err(|e| format!("CreateTexture2D failed: {e:?}"))?; + } + + let output_texture = output_texture.ok_or("CreateTexture2D returned null")?; + + unsafe { + context.CopySubresourceRegion(&output_texture, 0, 0, 0, 0, source, subresource_index, None); + } + + Ok((output_texture, None)) +} + +unsafe impl Send for MediaFoundationDecoder {}