diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 2e45d6b647..a9944f0f3d 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -41,7 +41,8 @@ "Bash(rustfmt:*)", "Bash(cargo tree:*)", "WebFetch(domain:github.com)", - "WebFetch(domain:docs.rs)" + "WebFetch(domain:docs.rs)", + "WebFetch(domain:gix.github.io)" ], "deny": [], "ask": [] diff --git a/Cargo.lock b/Cargo.lock index e08cae4f2b..4e294dca50 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1110,6 +1110,7 @@ dependencies = [ "ffmpeg-next", "inquire", "thiserror 1.0.69", + "tracing", "windows 0.60.0", "windows-core 0.60.1", "workspace-hack", @@ -1432,6 +1433,7 @@ dependencies = [ name = "cap-gpu-converters" version = "0.1.0" dependencies = [ + "thiserror 1.0.69", "wgpu", "workspace-hack", ] diff --git a/crates/camera-ffmpeg/Cargo.toml b/crates/camera-ffmpeg/Cargo.toml index 019697c6b0..6c98a31a48 100644 --- a/crates/camera-ffmpeg/Cargo.toml +++ b/crates/camera-ffmpeg/Cargo.toml @@ -7,6 +7,7 @@ license = "MIT" [dependencies] ffmpeg = { workspace = true } thiserror.workspace = true +tracing.workspace = true cap-camera = { path = "../camera" } workspace-hack = { version = "0.1", path = "../workspace-hack" } diff --git a/crates/camera-ffmpeg/src/macos.rs b/crates/camera-ffmpeg/src/macos.rs index 80f263be10..b3649f67d9 100644 --- a/crates/camera-ffmpeg/src/macos.rs +++ b/crates/camera-ffmpeg/src/macos.rs @@ -1,6 +1,8 @@ use cap_camera::CapturedFrame; use cap_camera_avfoundation::ImageBufExt; use cidre::*; +use ffmpeg::{format::Pixel, software::scaling}; +use std::sync::atomic::{AtomicBool, Ordering}; use crate::CapturedFrameExt; @@ -14,10 +16,41 @@ pub enum AsFFmpegError { expected: usize, found: usize, }, + #[error("Swscale fallback failed for format '{format}': {reason}")] + SwscaleFallbackFailed { format: String, reason: String }, #[error("{0}")] Native(#[from] cidre::os::Error), } +struct FourccInfo { + pixel: Pixel, + bytes_per_pixel: usize, +} + +fn fourcc_to_pixel_format(fourcc: &str) -> Option { + match fourcc { + "ABGR" => Some(FourccInfo { + pixel: Pixel::ABGR, + bytes_per_pixel: 4, + }), + "b64a" => Some(FourccInfo { + pixel: Pixel::RGBA64BE, + bytes_per_pixel: 8, + }), + "b48r" => Some(FourccInfo { + pixel: Pixel::RGB48BE, + bytes_per_pixel: 6, + }), + "L016" => Some(FourccInfo { + pixel: Pixel::GRAY16LE, + bytes_per_pixel: 2, + }), + _ => None, + } +} + +static FALLBACK_WARNING_LOGGED: AtomicBool = AtomicBool::new(false); + impl CapturedFrameExt for CapturedFrame { fn as_ffmpeg(&self) -> Result { let native = self.native().clone(); @@ -162,6 +195,29 @@ impl CapturedFrameExt for CapturedFrame { ff_frame } + "RGBA" => { + let mut ff_frame = ffmpeg::frame::Video::new( + ffmpeg::format::Pixel::RGBA, + width as u32, + height as u32, + ); + + let src_stride = native.image_buf().plane_bytes_per_row(0); + let dest_stride = ff_frame.stride(0); + + let src_bytes = bytes_lock.plane_data(0); + let dest_bytes = &mut ff_frame.data_mut(0); + + for y in 0..height { + let row_width = width * 4; + let src_row = &src_bytes[y * src_stride..y * src_stride + row_width]; + let dest_row = &mut dest_bytes[y * dest_stride..y * dest_stride + row_width]; + + dest_row.copy_from_slice(src_row); + } + + ff_frame + } "24BG" => { let mut ff_frame = ffmpeg::frame::Video::new( ffmpeg::format::Pixel::BGR24, @@ -185,6 +241,29 @@ impl CapturedFrameExt for CapturedFrame { ff_frame } + "24RG" => { + let mut ff_frame = ffmpeg::frame::Video::new( + ffmpeg::format::Pixel::RGB24, + width as u32, + height as u32, + ); + + let src_stride = native.image_buf().plane_bytes_per_row(0); + let dest_stride = ff_frame.stride(0); + + let src_bytes = bytes_lock.plane_data(0); + let dest_bytes = &mut ff_frame.data_mut(0); + + for y in 0..height { + let row_width = width * 3; + let src_row = &src_bytes[y * src_stride..y * src_stride + row_width]; + let dest_row = &mut dest_bytes[y * dest_stride..y * dest_stride + row_width]; + + dest_row.copy_from_slice(src_row); + } + + ff_frame + } "y420" => { let plane_count = native.image_buf().plane_count(); if plane_count < 3 { @@ -220,8 +299,82 @@ impl CapturedFrameExt for CapturedFrame { ff_frame } + "L008" | "GRAY" => { + let mut ff_frame = ffmpeg::frame::Video::new( + ffmpeg::format::Pixel::GRAY8, + width as u32, + height as u32, + ); + + let src_stride = native.image_buf().plane_bytes_per_row(0); + let dest_stride = ff_frame.stride(0); + + let src_bytes = bytes_lock.plane_data(0); + let dest_bytes = &mut ff_frame.data_mut(0); + + for y in 0..height { + let row_width = width; + let src_row = &src_bytes[y * src_stride..y * src_stride + row_width]; + let dest_row = &mut dest_bytes[y * dest_stride..y * dest_stride + row_width]; + + dest_row.copy_from_slice(src_row); + } + + ff_frame + } format => { - return Err(AsFFmpegError::UnsupportedSubType(format.to_string())); + if let Some(info) = fourcc_to_pixel_format(format) { + if !FALLBACK_WARNING_LOGGED.swap(true, Ordering::Relaxed) { + tracing::warn!( + "Using swscale fallback for camera format '{}' - this may impact performance", + format + ); + } + + let mut src_frame = + ffmpeg::frame::Video::new(info.pixel, width as u32, height as u32); + + let src_stride = native.image_buf().plane_bytes_per_row(0); + let dest_stride = src_frame.stride(0); + let src_bytes = bytes_lock.plane_data(0); + let dest_bytes = &mut src_frame.data_mut(0); + + let row_width = width * info.bytes_per_pixel; + for y in 0..height { + let src_row = &src_bytes[y * src_stride..y * src_stride + row_width]; + let dest_row = + &mut dest_bytes[y * dest_stride..y * dest_stride + row_width]; + dest_row.copy_from_slice(src_row); + } + + let mut scaler = scaling::Context::get( + info.pixel, + width as u32, + height as u32, + Pixel::RGBA, + width as u32, + height as u32, + scaling::flag::Flags::FAST_BILINEAR, + ) + .map_err(|e| AsFFmpegError::SwscaleFallbackFailed { + format: format.to_string(), + reason: format!("Failed to create scaler: {e}"), + })?; + + let mut output_frame = + ffmpeg::frame::Video::new(Pixel::RGBA, width as u32, height as u32); + + scaler.run(&src_frame, &mut output_frame).map_err(|e| { + AsFFmpegError::SwscaleFallbackFailed { + format: format.to_string(), + reason: format!("Conversion failed: {e}"), + } + })?; + + output_frame + } else { + return Err(AsFFmpegError::UnsupportedSubType(format.to_string())); + } } }; diff --git a/crates/camera-ffmpeg/src/windows.rs b/crates/camera-ffmpeg/src/windows.rs index cfcb075f80..8f255269ac 100644 --- a/crates/camera-ffmpeg/src/windows.rs +++ b/crates/camera-ffmpeg/src/windows.rs @@ -1,3 +1,5 @@ +use std::{cell::RefCell, collections::VecDeque}; + use cap_camera::CapturedFrame; use cap_camera_windows::PixelFormat; use ffmpeg::{Packet, format::Pixel, frame::Video as FFVideo}; @@ -12,6 +14,10 @@ pub enum AsFFmpegError { Empty, #[error("MJPEG decode error: {0}")] MjpegDecodeError(String), + #[error("H264 decode error: {0}")] + H264DecodeError(String), + #[error("H264 decoder needs more data (non-fatal)")] + H264NeedMoreData, } fn decode_mjpeg(bytes: &[u8]) -> Result { @@ -38,6 +44,160 @@ fn decode_mjpeg(bytes: &[u8]) -> Result { Ok(decoded_frame) } +pub struct H264Decoder { + decoder: ffmpeg::codec::decoder::Video, + received_keyframe: bool, + frame_buffer: VecDeque, +} + +impl H264Decoder { + pub fn new() -> Result { + let codec = ffmpeg::codec::decoder::find(ffmpeg::codec::Id::H264) + .ok_or_else(|| AsFFmpegError::H264DecodeError("H264 codec not found".to_string()))?; + + let decoder_context = ffmpeg::codec::context::Context::new_with_codec(codec); + + let decoder = decoder_context.decoder().video().map_err(|e| { + AsFFmpegError::H264DecodeError(format!("Failed to create decoder: {e}")) + })?; + + Ok(Self { + decoder, + received_keyframe: false, + frame_buffer: VecDeque::new(), + }) + } + + pub fn decode(&mut self, bytes: &[u8]) -> Result, AsFFmpegError> { + if let Some(frame) = self.frame_buffer.pop_front() { + return Ok(Some(frame)); + } + + if !self.received_keyframe && !Self::contains_keyframe(bytes) { + return Ok(None); + } + + if Self::contains_keyframe(bytes) { + self.received_keyframe = true; + } + + let packet = Packet::copy(bytes); + + loop { + match self.decoder.send_packet(&packet) { + Ok(()) => break, + Err(ffmpeg::Error::Other { errno }) if errno == ffmpeg::error::EAGAIN => { + self.drain_frames()?; + } + Err(e) => { + return Err(AsFFmpegError::H264DecodeError(format!( + "Failed to send packet: {e}" + ))); + } + } + } + + self.drain_frames()?; + + Ok(self.frame_buffer.pop_front()) + } + + pub fn flush(&mut self) -> Result, AsFFmpegError> { + if let Err(e) = self.decoder.send_eof() + && !matches!(e, ffmpeg::Error::Eof) + { + return Err(AsFFmpegError::H264DecodeError(format!( + "Failed to send EOF: {e}" + ))); + } + + self.drain_frames()?; + + Ok(self.frame_buffer.drain(..).collect()) + } + + pub fn reset(&mut self) -> Result<(), AsFFmpegError> { + *self = Self::new()?; + Ok(()) + } + + fn drain_frames(&mut self) -> Result<(), AsFFmpegError> { + loop { + let mut decoded_frame = FFVideo::empty(); + match self.decoder.receive_frame(&mut decoded_frame) { + Ok(()) => { + self.frame_buffer.push_back(decoded_frame); + } + Err(ffmpeg::Error::Other { errno }) if errno == ffmpeg::error::EAGAIN => { + return Ok(()); + } + Err(ffmpeg::Error::Eof) => { + return Ok(()); + } + Err(e) => { + return Err(AsFFmpegError::H264DecodeError(format!( + "Failed to receive frame: {e}" + ))); + } + } + } + } + + fn contains_keyframe(bytes: &[u8]) -> bool { + let mut i = 0; + while i + 4 < bytes.len() { + if bytes[i] == 0 && bytes[i + 1] == 0 { + let (start_code_len, nal_start) = if bytes[i + 2] == 1 { + (3, i + 3) + } else if bytes[i + 2] == 0 && i + 3 < bytes.len() && bytes[i + 3] == 1 { + (4, i + 4) + } else { + i += 1; + continue; + }; + + if nal_start < bytes.len() { + let nal_unit_type = bytes[nal_start] & 0x1F; + match nal_unit_type { + 5 | 7 | 8 => return true, + _ => {} + } + } + + i += start_code_len; + } else { + i += 1; + } + } + false + } +} + +thread_local! { + static H264_DECODER: RefCell> = const { RefCell::new(None) }; +} + +fn decode_h264(bytes: &[u8]) -> Result { + H264_DECODER.with(|decoder_cell| { + let mut decoder_opt = decoder_cell.borrow_mut(); + + if decoder_opt.is_none() { + *decoder_opt = Some(H264Decoder::new()?); + } + + let decoder = decoder_opt.as_mut().unwrap(); + decoder + .decode(bytes)? + .ok_or(AsFFmpegError::H264NeedMoreData) + }) +} + +pub fn reset_h264_decoder() { + H264_DECODER.with(|decoder_cell| { + *decoder_cell.borrow_mut() = None; + }); +} + impl CapturedFrameExt for CapturedFrame { fn as_ffmpeg(&self) -> Result { let native = self.native(); @@ -232,6 +392,98 @@ impl CapturedFrameExt for CapturedFrame { ff_frame } + PixelFormat::GRAY8 => { + let mut ff_frame = FFVideo::new(Pixel::GRAY8, width as u32, height as u32); + + let stride = ff_frame.stride(0); + + for y in 0..height { + let row_width = width; + let src_row = &bytes[y * row_width..]; + let dest_row = &mut ff_frame.data_mut(0)[y * stride..]; + dest_row[0..row_width].copy_from_slice(&src_row[0..row_width]); + } + + ff_frame + } + PixelFormat::GRAY16 => { + let mut ff_frame = FFVideo::new(Pixel::GRAY16LE, width as u32, height as u32); + + let stride = ff_frame.stride(0); + let src_stride = width * 2; + + for y in 0..height { + let src_row = &bytes[y * src_stride..]; + let dest_row = &mut ff_frame.data_mut(0)[y * stride..]; + dest_row[0..src_stride].copy_from_slice(&src_row[0..src_stride]); + } + + ff_frame + } + PixelFormat::NV21 => { + let mut ff_frame = FFVideo::new(Pixel::NV12, width as u32, height as u32); + + let stride = ff_frame.stride(0); + for y in 0..height { + let src_row = &bytes[y * width..]; + let dest_row = &mut ff_frame.data_mut(0)[y * stride..]; + dest_row[0..width].copy_from_slice(&src_row[0..width]); + } + + let stride = ff_frame.stride(1); + let src_uv = &bytes[width * height..]; + + for y in 0..height / 2 { + let row_width = width; + let src_row = &src_uv[y * row_width..]; + let dest_row = &mut ff_frame.data_mut(1)[y * stride..]; + for x in 0..width / 2 { + dest_row[x * 2] = src_row[x * 2 + 1]; + dest_row[x * 2 + 1] = src_row[x * 2]; + } + } + + ff_frame + } + PixelFormat::RGB565 => { + let mut ff_frame = FFVideo::new(Pixel::RGB565LE, width as u32, height as u32); + + let stride = ff_frame.stride(0); + let src_stride = width * 2; + + for y in 0..height { + let src_row = &bytes[(height - y - 1) * src_stride..]; + let dest_row = &mut ff_frame.data_mut(0)[y * stride..]; + dest_row[0..src_stride].copy_from_slice(&src_row[0..src_stride]); + } + + ff_frame + } + PixelFormat::P010 => { + let mut ff_frame = FFVideo::new(Pixel::P010LE, width as u32, height as u32); + + let stride = ff_frame.stride(0); + let src_stride = width * 2; + + for y in 0..height { + let src_row = &bytes[y * src_stride..]; + let dest_row = &mut ff_frame.data_mut(0)[y * stride..]; + dest_row[0..src_stride].copy_from_slice(&src_row[0..src_stride]); + } + + let stride = ff_frame.stride(1); + let uv_offset = width * height * 2; + let src_stride = width * 2; + + for y in 0..height / 2 { + let src_row = &bytes[uv_offset + y * src_stride..]; + let dest_row = &mut ff_frame.data_mut(1)[y * stride..]; + dest_row[0..src_stride].copy_from_slice(&src_row[0..src_stride]); + } + + ff_frame + } + PixelFormat::H264 => decode_h264(&bytes)?, }) } } diff --git a/crates/camera-windows/src/lib.rs b/crates/camera-windows/src/lib.rs index fb8b41eb4d..58aa97349e 100644 --- a/crates/camera-windows/src/lib.rs +++ b/crates/camera-windows/src/lib.rs @@ -12,6 +12,15 @@ use std::{ use windows::Win32::Media::{DirectShow::*, KernelStreaming::*, MediaFoundation::*}; use windows_core::GUID; +const MF_VIDEO_FORMAT_L8: GUID = GUID::from_u128(0x00000050_0000_0010_8000_00aa00389b71); +const MF_VIDEO_FORMAT_L16: GUID = GUID::from_u128(0x00000051_0000_0010_8000_00aa00389b71); +// FOURCCMap GUID for 'NV21' - identical for both DirectShow MEDIASUBTYPE and Media Foundation +const MEDIASUBTYPE_NV21: GUID = GUID::from_u128(0x3132564e_0000_0010_8000_00aa00389b71); +const MF_VIDEO_FORMAT_RGB565: GUID = GUID::from_u128(0x00000017_0000_0010_8000_00aa00389b71); +const MF_VIDEO_FORMAT_P010: GUID = GUID::from_u128(0x30313050_0000_0010_8000_00aa00389b71); + +const MEDIASUBTYPE_Y800: GUID = GUID::from_u128(0x30303859_0000_0010_8000_00aa00389b71); + #[derive(Clone)] pub struct VideoDeviceInfo { id: OsString, @@ -221,18 +230,24 @@ pub enum FrameInner { DirectShow(IMediaSample), } -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq)] pub enum PixelFormat { ARGB, RGB24, RGB32, YUV420P, NV12, + NV21, YUYV422, UYVY422, MJPEG, YV12, BGR24, + GRAY8, + GRAY16, + RGB565, + P010, + H264, } #[derive(Clone)] @@ -462,6 +477,12 @@ impl MFPixelFormat { t if t == MFVideoFormat_NV12 => PixelFormat::NV12, t if t == MFVideoFormat_MJPG => PixelFormat::MJPEG, t if t == MFVideoFormat_YV12 => PixelFormat::YV12, + t if t == MF_VIDEO_FORMAT_L8 => PixelFormat::GRAY8, + t if t == MF_VIDEO_FORMAT_L16 => PixelFormat::GRAY16, + t if t == MEDIASUBTYPE_NV21 => PixelFormat::NV21, + t if t == MF_VIDEO_FORMAT_RGB565 => PixelFormat::RGB565, + t if t == MF_VIDEO_FORMAT_P010 => PixelFormat::P010, + t if t == MFVideoFormat_H264 => PixelFormat::H264, _ => return None, }) }; @@ -510,6 +531,9 @@ impl DSPixelFormat { t if t == MEDIASUBTYPE_NV12 => PixelFormat::NV12, t if t == MEDIASUBTYPE_MJPG => PixelFormat::MJPEG, t if t == MEDIASUBTYPE_YV12 => PixelFormat::YV12, + t if t == MEDIASUBTYPE_Y800 || t == MEDIASUBTYPE_RGB8 => PixelFormat::GRAY8, + t if t == MEDIASUBTYPE_NV21 => PixelFormat::NV21, + t if t == MEDIASUBTYPE_RGB565 => PixelFormat::RGB565, _ => return None, }) }; diff --git a/crates/enc-ffmpeg/src/mux/mp4.rs b/crates/enc-ffmpeg/src/mux/mp4.rs index 3c9e31f055..023564b8e1 100644 --- a/crates/enc-ffmpeg/src/mux/mp4.rs +++ b/crates/enc-ffmpeg/src/mux/mp4.rs @@ -81,7 +81,7 @@ impl MP4File { } pub fn video_format() -> RawVideoFormat { - RawVideoFormat::YUYV420 + RawVideoFormat::Yuv420p } pub fn queue_video_frame( diff --git a/crates/frame-converter/src/d3d11.rs b/crates/frame-converter/src/d3d11.rs index e3617939d3..694c4868a3 100644 --- a/crates/frame-converter/src/d3d11.rs +++ b/crates/frame-converter/src/d3d11.rs @@ -567,7 +567,7 @@ unsafe fn copy_frame_to_mapped(frame: &frame::Video, dst: *mut u8, dst_stride: u } } } - Pixel::YUYV422 | Pixel::UYVY422 => { + Pixel::YUYV422 => { let row_bytes = frame.width() as usize * 2; for y in 0..height { unsafe { diff --git a/crates/gpu-converters/Cargo.toml b/crates/gpu-converters/Cargo.toml index c5970f8406..6de3a68e61 100644 --- a/crates/gpu-converters/Cargo.toml +++ b/crates/gpu-converters/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" edition = "2024" [dependencies] +thiserror.workspace = true wgpu.workspace = true workspace-hack = { version = "0.1", path = "../workspace-hack" } diff --git a/crates/gpu-converters/src/bgra_rgba/mod.rs b/crates/gpu-converters/src/bgra_rgba/mod.rs new file mode 100644 index 0000000000..52e11dcca4 --- /dev/null +++ b/crates/gpu-converters/src/bgra_rgba/mod.rs @@ -0,0 +1,170 @@ +use wgpu::{self, util::DeviceExt}; + +use crate::{ + GpuConverterError, + util::{copy_texture_to_buffer_command, read_buffer_to_vec}, +}; + +pub struct BGRAToRGBA { + device: wgpu::Device, + queue: wgpu::Queue, + pipeline: wgpu::ComputePipeline, + bind_group_layout: wgpu::BindGroupLayout, +} + +impl BGRAToRGBA { + pub async fn new() -> Result { + let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor::default()); + + let adapter = instance + .request_adapter(&wgpu::RequestAdapterOptions { + power_preference: wgpu::PowerPreference::HighPerformance, + force_fallback_adapter: false, + compatible_surface: None, + }) + .await?; + + let (device, queue) = adapter + .request_device(&wgpu::DeviceDescriptor::default()) + .await?; + + let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some("BGRA to RGBA Converter"), + source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(include_str!( + "./shader.wgsl" + ))), + }); + + let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("BGRA to RGBA Bind Group Layout"), + entries: &[ + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: false }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::StorageTexture { + access: wgpu::StorageTextureAccess::WriteOnly, + format: wgpu::TextureFormat::Rgba8Unorm, + view_dimension: wgpu::TextureViewDimension::D2, + }, + count: None, + }, + ], + }); + + let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("BGRA to RGBA Pipeline Layout"), + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + + let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: Some("BGRA to RGBA Pipeline"), + layout: Some(&pipeline_layout), + module: &shader, + entry_point: Some("main"), + compilation_options: Default::default(), + cache: None, + }); + + Ok(Self { + device, + queue, + pipeline, + bind_group_layout, + }) + } + + pub fn convert( + &self, + bgra_data: &[u8], + width: u32, + height: u32, + ) -> Result, wgpu::PollError> { + let input_texture = self.device.create_texture_with_data( + &self.queue, + &wgpu::TextureDescriptor { + label: Some("BGRA Input Texture"), + size: wgpu::Extent3d { + width, + height, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Bgra8Unorm, + usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, + view_formats: &[], + }, + wgpu::util::TextureDataOrder::MipMajor, + bgra_data, + ); + + let output_texture = self.device.create_texture(&wgpu::TextureDescriptor { + label: Some("BGRA to RGBA Output Texture"), + size: wgpu::Extent3d { + width, + height, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8Unorm, + usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::COPY_SRC, + view_formats: &[], + }); + + let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("BGRA to RGBA Bind Group"), + layout: &self.bind_group_layout, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::TextureView( + &input_texture.create_view(&Default::default()), + ), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: wgpu::BindingResource::TextureView( + &output_texture.create_view(&Default::default()), + ), + }, + ], + }); + + let mut encoder = self + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("BGRA to RGBA Encoder"), + }); + + { + let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("BGRA to RGBA Pass"), + ..Default::default() + }); + compute_pass.set_pipeline(&self.pipeline); + compute_pass.set_bind_group(0, &bind_group, &[]); + compute_pass.dispatch_workgroups(width.div_ceil(8), height.div_ceil(8), 1); + } + + let output_buffer = + copy_texture_to_buffer_command(&self.device, &output_texture, &mut encoder); + + let _submission = self.queue.submit(std::iter::once(encoder.finish())); + + read_buffer_to_vec(&output_buffer, &self.device) + } +} diff --git a/crates/gpu-converters/src/bgra_rgba/shader.wgsl b/crates/gpu-converters/src/bgra_rgba/shader.wgsl new file mode 100644 index 0000000000..43aa9874be --- /dev/null +++ b/crates/gpu-converters/src/bgra_rgba/shader.wgsl @@ -0,0 +1,20 @@ +@group(0) @binding(0) +var bgra_input: texture_2d; + +@group(0) @binding(1) +var rgba_output: texture_storage_2d; + +@compute +@workgroup_size(8, 8) +fn main(@builtin(global_invocation_id) global_id: vec3) { + let dims = textureDimensions(bgra_input); + if (global_id.x >= dims.x || global_id.y >= dims.y) { + return; + } + + let bgra = textureLoad(bgra_input, global_id.xy, 0); + + let rgba = vec4(bgra.b, bgra.g, bgra.r, bgra.a); + + textureStore(rgba_output, global_id.xy, rgba); +} diff --git a/crates/gpu-converters/src/lib.rs b/crates/gpu-converters/src/lib.rs index ba5fd28255..fe71fae6b1 100644 --- a/crates/gpu-converters/src/lib.rs +++ b/crates/gpu-converters/src/lib.rs @@ -1,12 +1,39 @@ +mod bgra_rgba; mod nv12_rgba; mod util; mod uyvy; mod uyvy_nv12; mod uyvy_rgba; +mod yuyv; +mod yuyv_nv12; +mod yuyv_rgba; +pub use bgra_rgba::BGRAToRGBA; pub use nv12_rgba::NV12ToRGBA; pub use uyvy_nv12::UYVYToNV12; pub use uyvy_rgba::UYVYToRGBA; +pub use yuyv_nv12::YUYVToNV12; +pub use yuyv_rgba::YUYVToRGBA; + +#[derive(Debug, thiserror::Error)] +pub enum GpuConverterError { + #[error("Failed to request GPU adapter: {0}")] + RequestAdapterFailed(#[from] wgpu::RequestAdapterError), + #[error("Failed to request GPU device: {0}")] + RequestDeviceFailed(#[from] wgpu::RequestDeviceError), +} + +#[derive(Debug, thiserror::Error)] +pub enum ConvertError { + #[error("YUYV format requires even width, got {width}")] + OddWidth { width: u32 }, + #[error("buffer size mismatch: expected {expected} bytes, got {actual}")] + BufferSizeMismatch { expected: usize, actual: usize }, + #[error("failed to create input texture: {0}")] + TextureCreation(String), + #[error("GPU poll error: {0}")] + Poll(#[from] wgpu::PollError), +} pub struct NV12Input<'a> { y_data: &'a [u8], diff --git a/crates/gpu-converters/src/yuyv.rs b/crates/gpu-converters/src/yuyv.rs new file mode 100644 index 0000000000..6ddc92fa78 --- /dev/null +++ b/crates/gpu-converters/src/yuyv.rs @@ -0,0 +1,53 @@ +use wgpu::util::DeviceExt; + +pub fn create_input_texture( + device: &wgpu::Device, + queue: &wgpu::Queue, + data: &[u8], + width: u32, + height: u32, +) -> Result { + if width == 0 { + return Err("YUYV texture width must be non-zero".to_string()); + } + if height == 0 { + return Err("YUYV texture height must be non-zero".to_string()); + } + if !width.is_multiple_of(2) { + return Err(format!( + "YUYV texture width must be even (got {width}), as YUYV encodes pairs of pixels" + )); + } + let expected_len = (width as usize) + .checked_mul(height as usize) + .and_then(|v| v.checked_mul(2)) + .ok_or_else(|| { + format!("YUYV texture dimensions overflow: {width}x{height} is too large") + })?; + let actual_len = data.len(); + if actual_len != expected_len { + return Err(format!( + "YUYV data length mismatch: expected {expected_len} bytes ({width}x{height}x2), got {actual_len} bytes" + )); + } + + Ok(device.create_texture_with_data( + queue, + &wgpu::TextureDescriptor { + label: Some("YUYV Texture"), + size: wgpu::Extent3d { + width: width / 2, + height, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8Uint, + usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, + view_formats: &[], + }, + wgpu::util::TextureDataOrder::MipMajor, + data, + )) +} diff --git a/crates/gpu-converters/src/yuyv_nv12/mod.rs b/crates/gpu-converters/src/yuyv_nv12/mod.rs new file mode 100644 index 0000000000..62dcbb0bf5 --- /dev/null +++ b/crates/gpu-converters/src/yuyv_nv12/mod.rs @@ -0,0 +1,230 @@ +use wgpu::{self, util::DeviceExt}; + +use crate::{ConvertError, GpuConverterError, util::read_buffer_to_vec, yuyv}; + +pub struct YUYVToNV12 { + device: wgpu::Device, + queue: wgpu::Queue, + pipeline: wgpu::ComputePipeline, + bind_group_layout: wgpu::BindGroupLayout, +} + +impl YUYVToNV12 { + pub async fn new() -> Result { + let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor::default()); + + let adapter = instance + .request_adapter(&wgpu::RequestAdapterOptions { + power_preference: wgpu::PowerPreference::HighPerformance, + force_fallback_adapter: false, + compatible_surface: None, + }) + .await?; + + let (device, queue) = adapter + .request_device(&wgpu::DeviceDescriptor::default()) + .await?; + + let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some("YUYV to NV12 Converter"), + source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(include_str!( + "./shader.wgsl" + ))), + }); + + let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("YUYV to NV12 Bind Group Layout"), + entries: &[ + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Uint, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 2, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 3, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + ], + }); + + let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("YUYV to NV12 Pipeline Layout"), + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + + let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: Some("YUYV to NV12 Pipeline"), + layout: Some(&pipeline_layout), + module: &shader, + entry_point: Some("main"), + compilation_options: Default::default(), + cache: None, + }); + + Ok(Self { + device, + queue, + pipeline, + bind_group_layout, + }) + } + + pub fn convert( + &self, + yuyv_data: &[u8], + width: u32, + height: u32, + ) -> Result<(Vec, Vec), ConvertError> { + if !width.is_multiple_of(2) { + return Err(ConvertError::OddWidth { width }); + } + + let expected_size = (width as usize) * (height as usize) * 2; + if yuyv_data.len() != expected_size { + return Err(ConvertError::BufferSizeMismatch { + expected: expected_size, + actual: yuyv_data.len(), + }); + } + + let yuyv_texture = + yuyv::create_input_texture(&self.device, &self.queue, yuyv_data, width, height) + .map_err(ConvertError::TextureCreation)?; + + let width_u64 = u64::from(width); + let height_u64 = u64::from(height); + let y_plane_size = width_u64 * height_u64; + let uv_plane_size = (width_u64 * height_u64) / 2; + + let y_write_buffer = self.device.create_buffer(&wgpu::BufferDescriptor { + label: Some("YUYV to NV12 Y Plane Buffer"), + size: y_plane_size, + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + + let uv_write_buffer = self.device.create_buffer(&wgpu::BufferDescriptor { + label: Some("YUYV to NV12 UV Plane Buffer"), + size: uv_plane_size, + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + + let dimensions_buffer = self + .device + .create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("YUYV to NV12 Dimensions Buffer"), + contents: [width.to_ne_bytes(), height.to_ne_bytes()].as_flattened(), + usage: wgpu::BufferUsages::UNIFORM, + }); + + let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("YUYV to NV12 Bind Group"), + layout: &self.bind_group_layout, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::TextureView( + &yuyv_texture.create_view(&Default::default()), + ), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: wgpu::BindingResource::Buffer( + y_write_buffer.as_entire_buffer_binding(), + ), + }, + wgpu::BindGroupEntry { + binding: 2, + resource: wgpu::BindingResource::Buffer( + uv_write_buffer.as_entire_buffer_binding(), + ), + }, + wgpu::BindGroupEntry { + binding: 3, + resource: wgpu::BindingResource::Buffer( + dimensions_buffer.as_entire_buffer_binding(), + ), + }, + ], + }); + + let mut encoder = self + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("YUYV to NV12 Encoder"), + }); + + { + let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("YUYV to NV12 Pass"), + ..Default::default() + }); + compute_pass.set_pipeline(&self.pipeline); + compute_pass.set_bind_group(0, &bind_group, &[]); + compute_pass.dispatch_workgroups((width / 2).div_ceil(8), height.div_ceil(8), 1); + } + + let y_read_buffer = self.device.create_buffer(&wgpu::BufferDescriptor { + label: Some("YUYV to NV12 Y Read Buffer"), + size: y_write_buffer.size(), + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + + let uv_read_buffer = self.device.create_buffer(&wgpu::BufferDescriptor { + label: Some("YUYV to NV12 UV Read Buffer"), + size: uv_write_buffer.size(), + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + + encoder.copy_buffer_to_buffer(&y_write_buffer, 0, &y_read_buffer, 0, y_write_buffer.size()); + encoder.copy_buffer_to_buffer( + &uv_write_buffer, + 0, + &uv_read_buffer, + 0, + uv_write_buffer.size(), + ); + + let _submission = self.queue.submit(std::iter::once(encoder.finish())); + + Ok(( + read_buffer_to_vec(&y_read_buffer, &self.device).map_err(ConvertError::Poll)?, + read_buffer_to_vec(&uv_read_buffer, &self.device).map_err(ConvertError::Poll)?, + )) + } +} diff --git a/crates/gpu-converters/src/yuyv_nv12/shader.wgsl b/crates/gpu-converters/src/yuyv_nv12/shader.wgsl new file mode 100644 index 0000000000..f9eb3dbf44 --- /dev/null +++ b/crates/gpu-converters/src/yuyv_nv12/shader.wgsl @@ -0,0 +1,37 @@ +@group(0) @binding(0) var yuyv_input: texture_2d; + +@group(0) @binding(1) var y_plane: array; +@group(0) @binding(2) var uv_plane: array; + +@group(0) @binding(3) var dimensions: vec2; + +@compute +@workgroup_size(8, 8) +fn main(@builtin(global_invocation_id) global_id: vec3) { + let dims = textureDimensions(yuyv_input); + if (global_id.x >= dims.x || global_id.y >= dims.y) { + return; + } + + let yuyv = textureLoad(yuyv_input, global_id.xy, 0).rgba; + + let y0 = yuyv.r; + let u = yuyv.g; + let y1 = yuyv.b; + let v = yuyv.a; + + let width = dimensions.x; + let x = global_id.x; + let y = global_id.y; + + let y_base = y * width + x * 2u; + y_plane[y_base] = y0; + y_plane[y_base + 1u] = y1; + + if ((y & 1u) == 0u) { + let uv_row = y / 2u; + let uv_base = uv_row * width + x * 2u; + uv_plane[uv_base] = u; + uv_plane[uv_base + 1u] = v; + } +} diff --git a/crates/gpu-converters/src/yuyv_rgba/mod.rs b/crates/gpu-converters/src/yuyv_rgba/mod.rs new file mode 100644 index 0000000000..30d9c3d86c --- /dev/null +++ b/crates/gpu-converters/src/yuyv_rgba/mod.rs @@ -0,0 +1,165 @@ +use crate::{ + ConvertError, GpuConverterError, + util::{copy_texture_to_buffer_command, read_buffer_to_vec}, + yuyv, +}; + +pub struct YUYVToRGBA { + device: wgpu::Device, + queue: wgpu::Queue, + pipeline: wgpu::ComputePipeline, + bind_group_layout: wgpu::BindGroupLayout, +} + +impl YUYVToRGBA { + pub async fn new() -> Result { + let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor::default()); + + let adapter = instance + .request_adapter(&wgpu::RequestAdapterOptions { + power_preference: wgpu::PowerPreference::HighPerformance, + force_fallback_adapter: false, + compatible_surface: None, + }) + .await?; + + let (device, queue) = adapter + .request_device(&wgpu::DeviceDescriptor::default()) + .await?; + + let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some("YUYV to RGBA Converter"), + source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(include_str!( + "./shader.wgsl" + ))), + }); + + let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("YUYV to RGBA Bind Group Layout"), + entries: &[ + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Uint, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::StorageTexture { + access: wgpu::StorageTextureAccess::WriteOnly, + format: wgpu::TextureFormat::Rgba8Unorm, + view_dimension: wgpu::TextureViewDimension::D2, + }, + count: None, + }, + ], + }); + + let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("YUYV to RGBA Pipeline Layout"), + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + + let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: Some("YUYV to RGBA Pipeline"), + layout: Some(&pipeline_layout), + module: &shader, + entry_point: Some("main"), + compilation_options: Default::default(), + cache: None, + }); + + Ok(Self { + device, + queue, + pipeline, + bind_group_layout, + }) + } + + pub fn convert( + &self, + yuyv_data: &[u8], + width: u32, + height: u32, + ) -> Result, ConvertError> { + if !width.is_multiple_of(2) { + return Err(ConvertError::OddWidth { width }); + } + + let expected_size = (width as usize) * (height as usize) * 2; + if yuyv_data.len() != expected_size { + return Err(ConvertError::BufferSizeMismatch { + expected: expected_size, + actual: yuyv_data.len(), + }); + } + + let yuyv_texture = + yuyv::create_input_texture(&self.device, &self.queue, yuyv_data, width, height) + .map_err(ConvertError::TextureCreation)?; + + let output_texture = self.device.create_texture(&wgpu::TextureDescriptor { + label: Some("YUYV to RGBA Output Texture"), + size: wgpu::Extent3d { + width, + height, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8Unorm, + usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::COPY_SRC, + view_formats: &[], + }); + + let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("YUYV to RGBA Bind Group"), + layout: &self.bind_group_layout, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::TextureView( + &yuyv_texture.create_view(&Default::default()), + ), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: wgpu::BindingResource::TextureView( + &output_texture.create_view(&Default::default()), + ), + }, + ], + }); + + let mut encoder = self + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("YUYV to RGBA Encoder"), + }); + + { + let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("YUYV to RGBA Pass"), + ..Default::default() + }); + compute_pass.set_pipeline(&self.pipeline); + compute_pass.set_bind_group(0, &bind_group, &[]); + compute_pass.dispatch_workgroups((width / 2).div_ceil(8), height.div_ceil(8), 1); + } + + let output_buffer = + copy_texture_to_buffer_command(&self.device, &output_texture, &mut encoder); + + self.queue.submit(std::iter::once(encoder.finish())); + + Ok(read_buffer_to_vec(&output_buffer, &self.device)?) + } +} diff --git a/crates/media-info/src/lib.rs b/crates/media-info/src/lib.rs index 5b1e55d635..71b1682999 100644 --- a/crates/media-info/src/lib.rs +++ b/crates/media-info/src/lib.rs @@ -208,9 +208,14 @@ pub enum RawVideoFormat { Uyvy, RawRgb, Nv12, + Nv21, Gray, - YUYV420, + Gray16, + Yuyv422, + Yuv420p, Rgba, + Rgb565, + P010, } #[derive(Debug, Copy, Clone)] @@ -231,9 +236,14 @@ impl VideoInfo { RawVideoFormat::Uyvy => Pixel::UYVY422, RawVideoFormat::RawRgb => Pixel::RGB24, RawVideoFormat::Nv12 => Pixel::NV12, + RawVideoFormat::Nv21 => Pixel::NV21, RawVideoFormat::Gray => Pixel::GRAY8, - RawVideoFormat::YUYV420 => Pixel::YUV420P, + RawVideoFormat::Gray16 => Pixel::GRAY16LE, + RawVideoFormat::Yuyv422 => Pixel::YUYV422, + RawVideoFormat::Yuv420p => Pixel::YUV420P, RawVideoFormat::Rgba => Pixel::RGBA, + RawVideoFormat::Rgb565 => Pixel::RGB565LE, + RawVideoFormat::P010 => Pixel::P010LE, }, width, height, diff --git a/crates/recording/src/output_pipeline/win.rs b/crates/recording/src/output_pipeline/win.rs index 15a3842c19..9a801344ef 100644 --- a/crates/recording/src/output_pipeline/win.rs +++ b/crates/recording/src/output_pipeline/win.rs @@ -721,6 +721,22 @@ impl AudioMuxer for WindowsCameraMuxer { } } +fn convert_uyvy_to_yuyv(src: &[u8], width: u32, height: u32) -> Vec { + let total_bytes = (width * height * 2) as usize; + let mut dst = vec![0u8; total_bytes]; + + for i in (0..src.len().min(total_bytes)).step_by(4) { + if i + 3 < src.len() && i + 3 < total_bytes { + dst[i] = src[i + 1]; + dst[i + 1] = src[i]; + dst[i + 2] = src[i + 3]; + dst[i + 3] = src[i + 2]; + } + } + + dst +} + pub fn upload_mf_buffer_to_texture( device: &ID3D11Device, frame: &NativeCameraFrame, @@ -744,7 +760,19 @@ pub fn upload_mf_buffer_to_texture( .lock() .map_err(|_| windows::core::Error::from(windows::core::HRESULT(-1)))?; let lock = buffer_guard.lock()?; - let data = &*lock; + let original_data = &*lock; + + let converted_buffer: Option>; + let data: &[u8] = if frame.pixel_format == cap_camera_windows::PixelFormat::UYVY422 { + converted_buffer = Some(convert_uyvy_to_yuyv( + original_data, + frame.width, + frame.height, + )); + converted_buffer.as_ref().unwrap() + } else { + original_data + }; let row_pitch = frame.width * bytes_per_pixel; diff --git a/crates/rendering/src/decoder/avassetreader.rs b/crates/rendering/src/decoder/avassetreader.rs index 0503c474be..8192e0c2ce 100644 --- a/crates/rendering/src/decoder/avassetreader.rs +++ b/crates/rendering/src/decoder/avassetreader.rs @@ -17,7 +17,7 @@ use tokio::{runtime::Handle as TokioHandle, sync::oneshot}; use crate::{DecodedFrame, PixelFormat}; -use super::frame_converter::copy_rgba_plane; +use super::frame_converter::{copy_bgra_to_rgba, copy_rgba_plane}; use super::{FRAME_CACHE_SIZE, VideoDecoderMessage, pts_to_frame}; #[derive(Clone)] @@ -104,6 +104,21 @@ impl ImageBufProcessor { let bytes = copy_rgba_plane(slice, bytes_per_row, width, height); (bytes, PixelFormat::Rgba, width as u32 * 4, 0) } + format::Pixel::BGRA => { + let bytes_per_row = image_buf.plane_bytes_per_row(0); + let width = image_buf.width(); + let height = image_buf.height(); + + let slice = unsafe { + std::slice::from_raw_parts::<'static, _>( + image_buf.plane_base_address(0), + bytes_per_row * height, + ) + }; + + let bytes = copy_bgra_to_rgba(slice, bytes_per_row, width, height); + (bytes, PixelFormat::Rgba, width as u32 * 4, 0) + } format::Pixel::NV12 => { let y_stride = image_buf.plane_bytes_per_row(0); let uv_stride = image_buf.plane_bytes_per_row(1); diff --git a/crates/rendering/src/decoder/frame_converter.rs b/crates/rendering/src/decoder/frame_converter.rs index 27a24fcaca..045af998f9 100644 --- a/crates/rendering/src/decoder/frame_converter.rs +++ b/crates/rendering/src/decoder/frame_converter.rs @@ -92,3 +92,22 @@ pub fn copy_rgba_plane(data: &[u8], stride: usize, width: usize, height: usize) frame_buffer } + +#[cfg(target_os = "macos")] +pub fn copy_bgra_to_rgba(data: &[u8], stride: usize, width: usize, height: usize) -> Vec { + debug_assert!(stride >= width * 4, "stride too small for BGRA frame"); + + let row_len = width * 4; + let mut frame_buffer = Vec::with_capacity(row_len * height); + + for row in data.chunks(stride).take(height) { + for pixel in row[..row_len].chunks_exact(4) { + frame_buffer.push(pixel[2]); + frame_buffer.push(pixel[1]); + frame_buffer.push(pixel[0]); + frame_buffer.push(pixel[3]); + } + } + + frame_buffer +} diff --git a/crates/video-decode/src/avassetreader.rs b/crates/video-decode/src/avassetreader.rs index 8d2c72cd9f..d7e4d1e48a 100644 --- a/crates/video-decode/src/avassetreader.rs +++ b/crates/video-decode/src/avassetreader.rs @@ -38,7 +38,7 @@ impl AVAssetReaderDecoder { .map_err(|e| format!("video decoder / {e}"))?; ( - pixel_to_pixel_format(decoder.format()), + pixel_to_pixel_format(decoder.format())?, decoder.width(), decoder.height(), ) @@ -159,13 +159,22 @@ impl<'a> Iterator for FramesIter<'a> { } } -pub fn pixel_to_pixel_format(pixel: avformat::Pixel) -> cv::PixelFormat { +pub fn pixel_to_pixel_format(pixel: avformat::Pixel) -> Result { match pixel { - avformat::Pixel::NV12 => cv::PixelFormat::_420V, - // this is intentional, it works and is faster /shrug - avformat::Pixel::YUV420P => cv::PixelFormat::_420V, - avformat::Pixel::RGBA => cv::PixelFormat::_32_RGBA, - _ => todo!(), + avformat::Pixel::NV12 => Ok(cv::PixelFormat::_420V), + avformat::Pixel::YUV420P => Ok(cv::PixelFormat::_420V), + avformat::Pixel::RGBA => Ok(cv::PixelFormat::_32_RGBA), + avformat::Pixel::BGRA => Ok(cv::PixelFormat::_32_BGRA), + other => { + tracing::error!( + pixel_format = ?other, + "Unhandled pixel format encountered - no mapping to cv::PixelFormat available" + ); + Err(format!( + "Unsupported pixel format: {:?}. Supported formats: NV12, YUV420P, RGBA, BGRA", + other + )) + } } } @@ -173,6 +182,7 @@ pub fn pixel_format_to_pixel(format: cv::PixelFormat) -> avformat::Pixel { match format { cv::PixelFormat::_420V => avformat::Pixel::NV12, cv::PixelFormat::_32_RGBA => avformat::Pixel::RGBA, - _ => todo!(), + cv::PixelFormat::_32_BGRA => avformat::Pixel::BGRA, + _ => avformat::Pixel::RGBA, } }