/* * Copyright (C) 2025 Niklas Haas * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include "utils.h" #include "ops.h" #ifndef BIT_DEPTH # define BIT_DEPTH 8 #endif #if BIT_DEPTH > 8 # define fn(name) name ## _16bpc # define VALUE_BITS 19 # define VALUE_MAX 0x7FFFF # define PIXEL_MAX 0xFFFF # define pixel_t uint16_t # define value_t int32_t # define value2_t v2i32_t # define value3_t v3i32_t # define value4_t v4i32_t #else # define fn(name) name ## _8bpc # define VALUE_BITS 15 # define VALUE_MAX 0x7FFF # define PIXEL_MAX 0xFF # define pixel_t uint8_t # define value_t int16_t # define value2_t v2i16_t # define value3_t v3i16_t # define value4_t v4i16_t #endif union fn(chunk_t) { /* Universal accessors */ uint8_t u8[SWS_CHUNK_SIZE]; uint16_t u16[SWS_CHUNK_SIZE]; /* Per-bit depth accessors */ pixel_t p[SWS_CHUNK_SIZE]; value_t v[SWS_CHUNK_SIZE]; }; #define chunk_t union fn(chunk_t) /* Extend the border of a chunk */ static av_always_inline void fn(pad_input)(chunk_t *restrict x, chunk_t *restrict y, chunk_t *restrict z, chunk_t *restrict w, const int pixels) { const pixel_t edge_x = x->p[pixels - 1]; const pixel_t edge_y = y->p[pixels - 1]; const pixel_t edge_z = z->p[pixels - 1]; const pixel_t edge_w = w->p[pixels - 1]; for (int i = pixels; i < SWS_CHUNK_SIZE; i++) { x->p[i] = edge_x; y->p[i] = edge_y; z->p[i] = edge_z; w->p[i] = edge_w; } } static av_always_inline void fn(read_planar)(const pixel_t *restrict in0, const pixel_t *restrict in1, const pixel_t *restrict in2, const pixel_t *restrict in3, chunk_t *restrict x, chunk_t *restrict y, chunk_t *restrict z, chunk_t *restrict w, const int pixels, const int comps) { for (int i = 0; i < pixels; i++) { x->p[i] = in0[i]; y->p[i] = (comps > 1) ? in1[i] : 0; z->p[i] = (comps > 2) ? in2[i] : 0; w->p[i] = (comps > 3) ? in3[i] : PIXEL_MAX; } fn(pad_input)(x, y, z, w, pixels); } static av_always_inline void fn(read_packed)(const pixel_t *restrict in0, const pixel_t *restrict in1, const pixel_t *restrict in2, const pixel_t *restrict in3, chunk_t *restrict x, chunk_t *restrict y, chunk_t *restrict z, chunk_t *restrict w, const int pixels, const int comps) { for (int i = 0; i < pixels; i++) { x->p[i] = in0[comps * i + 0]; y->p[i] = (comps > 1) ? in0[comps * i + 1] : 0; z->p[i] = (comps > 2) ? in0[comps * i + 2] : 0; w->p[i] = (comps > 3) ? in0[comps * i + 3] : 0xFF; } fn(pad_input)(x, y, z, w, pixels); } static av_always_inline void fn(write_planar)(pixel_t *restrict out0, pixel_t *restrict out1, pixel_t *restrict out2, pixel_t *restrict out3, const chunk_t *restrict x, const chunk_t *restrict y, const chunk_t *restrict z, const chunk_t *restrict w, const int pixels, const int comps) { for (int i = 0; i < pixels; i++) { out0[i] = x->p[i]; if (comps > 1) out1[i] = y->p[i]; if (comps > 2) out2[i] = z->p[i]; if (comps > 3) out3[i] = w->p[i]; } } static av_always_inline void fn(write_packed)(pixel_t *restrict out0, pixel_t *restrict out1, pixel_t *restrict out2, pixel_t *restrict out3, const chunk_t *restrict x, const chunk_t *restrict y, const chunk_t *restrict z, const chunk_t *restrict w, const int pixels, const int comps) { for (int i = 0; i < pixels; i++) { out0[comps * i + 0] = x->p[i]; if (comps > 1) out0[comps * i + 1] = y->p[i]; if (comps > 2) out0[comps * i + 2] = z->p[i]; if (comps > 3) out0[comps * i + 3] = w->p[i]; } } static av_always_inline void fn(swizzle)(chunk_t *restrict x, chunk_t *restrict y, chunk_t *restrict z, chunk_t *restrict w, const SwsSwizzleOp swizzle) { const chunk_t in[4] = { *x, *y, *z, *w }; *x = in[swizzle.x]; *y = in[swizzle.y]; *z = in[swizzle.z]; *w = in[swizzle.w]; } /** * We need all 2x2 combinations of (input depth, output depth) for these * functions, so we template them by value depth and make the pixel depth * non-generic; e.g. shift_chunk_8_to_16 expands 8 bit input to HBD value_t */ static av_always_inline void fn(expand_chunk_8_to)(chunk_t *restrict out, const chunk_t *restrict in, const bool full) { static_assert(VALUE_BITS >= 8 && VALUE_BITS <= 24, "VALUE_BITS out of range"); const int shift = VALUE_BITS - 8; #if 0 if (full) { if (VALUE_BITS > 16) { /* Three copies needed */ const int rshift = 24 - VALUE_BITS; for (int i = 0; i < SWS_CHUNK_SIZE; i++) out->v[i] = (in->u8[i] * 0x10101LU) >> rshift; } else { const int rshift = VALUE_BITS - shift; for (int i = 0; i < SWS_CHUNK_SIZE; i++) out->v[i] = (in->u8[i] << shift) | (in->u8[i] >> rshift); } } else { for (int i = 0; i < SWS_CHUNK_SIZE; i++) out->v[i] = in->u8[i] << shift; } #else for (int i = 0; i < SWS_CHUNK_SIZE; i++) { const value_t x = in->u8[i] << shift; out->v[i] = full ? x | (x >> 8) | (x >> 16) : x; } #endif } static av_always_inline void fn(expand_chunk_16_to)(chunk_t *restrict out, const chunk_t *restrict in, const int depth, const bool msb, const bool full) { const int shift = VALUE_BITS - (msb ? 16 : depth); av_assert2(VALUE_BITS <= 2 * depth); for (int i = 0; i < SWS_CHUNK_SIZE; i++) { const uint16_t x16 = in->u16[i]; const value_t x = shift >= 0 ? x16 << shift : x16 >> -shift; out->v[i] = full ? x | (x >> depth) : x; } } static av_always_inline void fn(expand_8_to)(chunk_t *restrict out_x, chunk_t *restrict out_y, chunk_t *restrict out_z, chunk_t *restrict out_w, const chunk_t *restrict in_x, const chunk_t *restrict in_y, const chunk_t *restrict in_z, const chunk_t *restrict in_w, const SwsExpandOp op) { av_assert2(op.depth == 8); fn(expand_chunk_8_to)(out_x, in_x, op.full.luma); fn(expand_chunk_8_to)(out_y, in_y, op.full.chroma); fn(expand_chunk_8_to)(out_z, in_z, op.full.chroma); fn(expand_chunk_8_to)(out_w, in_w, true); } static av_always_inline void fn(expand_16_to)(chunk_t *restrict out_x, chunk_t *restrict out_y, chunk_t *restrict out_z, chunk_t *restrict out_w, const chunk_t *restrict in_x, const chunk_t *restrict in_y, const chunk_t *restrict in_z, const chunk_t *restrict in_w, const SwsExpandOp op) { fn(expand_chunk_16_to)(out_x, in_x, op.depth, op.msb, op.full.luma); fn(expand_chunk_16_to)(out_y, in_y, op.depth, op.msb, op.full.chroma); fn(expand_chunk_16_to)(out_z, in_z, op.depth, op.msb, op.full.chroma); fn(expand_chunk_16_to)(out_w, in_w, op.depth, op.msb, true); } static av_always_inline void fn(compress_8_from)(chunk_t *restrict out_x, chunk_t *restrict out_y, chunk_t *restrict out_z, chunk_t *restrict out_w, const chunk_t *restrict in_x, const chunk_t *restrict in_y, const chunk_t *restrict in_z, const chunk_t *restrict in_w, const SwsCompressOp op) { av_assert2(op.depth == 8); const int shift = VALUE_BITS - 8; for (int i = 0; i < SWS_CHUNK_SIZE; i++) { out_x->u8[i] = av_clip_uint8(in_x->v[i] >> shift); out_y->u8[i] = av_clip_uint8(in_y->v[i] >> shift); out_z->u8[i] = av_clip_uint8(in_z->v[i] >> shift); out_w->u8[i] = av_clip_uint8(in_w->v[i] >> shift); } } #if BIT_DEPTH > 8 /* Never output 16 bit from low bit depth intermediates */ static av_always_inline void fn(compress_16_from)(chunk_t *restrict out_x, chunk_t *restrict out_y, chunk_t *restrict out_z, chunk_t *restrict out_w, const chunk_t *restrict in_x, const chunk_t *restrict in_y, const chunk_t *restrict in_z, const chunk_t *restrict in_w, const SwsCompressOp op) { const int shift16 = VALUE_BITS - 16; const int shift = 16 - op.depth; const int mask = ((1 << op.depth) - 1) << shift; for (int i = 0; i < SWS_CHUNK_SIZE; i++) { const uint16_t x16 = av_clip_uint16(in_x->v[i] >> shift16); const uint16_t y16 = av_clip_uint16(in_y->v[i] >> shift16); const uint16_t z16 = av_clip_uint16(in_z->v[i] >> shift16); const uint16_t w16 = av_clip_uint16(in_w->v[i] >> shift16); if (op.msb) { out_x->u16[i] = x16 & mask; out_y->u16[i] = y16 & mask; out_z->u16[i] = z16 & mask; out_w->u16[i] = w16 & mask; } else { out_x->u16[i] = x16 >> shift; out_y->u16[i] = y16 >> shift; out_z->u16[i] = z16 >> shift; out_w->u16[i] = w16 >> shift; } } } #endif #if 0 #define DEF_READ_WRAPPER(name, planar, bits, comps, swizzle, decode) \ static void fn(op_##name)(chunk4_t *restrict out, int pixels, \ const uint8_t *restrict in0, \ const uint8_t *restrict in1, \ const uint8_t *restrict in2, \ const uint8_t *restrict in3) \ { \ fn(read_##planar##_##bits)(out, pixels, in0, in1, in2, in3, comps); \ fn(swizzle)(out, swizzle); \ fn(decode)(out, decode); \ } DEF_READ_WRAPPER(test, planar, 4, SWS_FROM_RGBA, SWS_DEC_RGB8) static const SwsOpEntry fn(sws_op_table_c)[] = { /* Atomic read wrappers */ //{{ { SWS_OP_READ_BYTES, .rw_bytes = { 1, 1, false }} }, .read = op_read_planar_1 }, /* Atomic write wrappers */ }; #endif #undef fn #undef VALUE_BITS #undef VALUE_MAX #undef PIXEL_MAX #undef pixel_t #undef value_t #undef value2_t #undef value3_t #undef value4_t #undef chunk_t