typedef struct v3u16_t { uint16_t x, y, z; } v3u16_t; /** * v0 and v1 are 'black' and 'white' * v1 and v2 are closest RGB/CMY vertices * x >= y >= z are relative weights */ static av_always_inline v3u16_t barycentric(int shift, int x, int y, int z, v3u16_t v0, v3u16_t v1, v3u16_t v2, v3u16_t v3) { const int a = (1 << shift) - x; const int b = x - y; const int c = y - z; const int d = z; av_assert2(x >= y); av_assert2(y >= z); return (v3u16_t) { (a * v0.x + b * v1.x + c * v2.x + d * v3.x) >> shift, (a * v0.y + b * v1.y + c * v2.y + d * v3.y) >> shift, (a * v0.z + b * v1.z + c * v2.z + d * v3.z) >> shift, }; } static av_always_inline v3u16_t tetrahedral(const SwsLut3D *lut3d, int Rx, int Gx, int Bx, int Rf, int Gf, int Bf) { const int shift = 16 - INPUT_LUT_BITS; const int Rn = FFMIN(Rx + 1, INPUT_LUT_SIZE - 1); const int Gn = FFMIN(Gx + 1, INPUT_LUT_SIZE - 1); const int Bn = FFMIN(Bx + 1, INPUT_LUT_SIZE - 1); const v3u16_t c000 = lut3d->input[Bx][Gx][Rx]; const v3u16_t c111 = lut3d->input[Bn][Gn][Rn]; if (Rf > Gf) { if (Gf > Bf) { const v3u16_t c100 = lut3d->input[Bx][Gx][Rn]; const v3u16_t c110 = lut3d->input[Bx][Gn][Rn]; return barycentric(shift, Rf, Gf, Bf, c000, c100, c110, c111); } else if (Rf > Bf) { const v3u16_t c100 = lut3d->input[Bx][Gx][Rn]; const v3u16_t c101 = lut3d->input[Bn][Gx][Rn]; return barycentric(shift, Rf, Bf, Gf, c000, c100, c101, c111); } else { const v3u16_t c001 = lut3d->input[Bn][Gx][Rx]; const v3u16_t c101 = lut3d->input[Bn][Gx][Rn]; return barycentric(shift, Bf, Rf, Gf, c000, c001, c101, c111); } } else { if (Bf > Gf) { const v3u16_t c001 = lut3d->input[Bn][Gx][Rx]; const v3u16_t c011 = lut3d->input[Bn][Gn][Rx]; return barycentric(shift, Bf, Gf, Rf, c000, c001, c011, c111); } else if (Bf > Rf) { const v3u16_t c010 = lut3d->input[Bx][Gn][Rx]; const v3u16_t c011 = lut3d->input[Bn][Gn][Rx]; return barycentric(shift, Gf, Bf, Rf, c000, c010, c011, c111); } else { const v3u16_t c010 = lut3d->input[Bx][Gn][Rx]; const v3u16_t c110 = lut3d->input[Bx][Gn][Rn]; return barycentric(shift, Gf, Rf, Bf, c000, c010, c110, c111); } } } static av_always_inline v3u16_t lookup_input16(const SwsLut3D *lut3d, v3u16_t rgb) { const int shift = 16 - INPUT_LUT_BITS; const int Rx = rgb.x >> shift; const int Gx = rgb.y >> shift; const int Bx = rgb.z >> shift; const int Rf = rgb.x & ((1 << shift) - 1); const int Gf = rgb.y & ((1 << shift) - 1); const int Bf = rgb.z & ((1 << shift) - 1); return tetrahedral(lut3d, Rx, Gx, Bx, Rf, Gf, Bf); } void sws_lut3d_apply(const SwsLut3D *lut3d, const uint8_t *in, int in_stride, uint8_t *out, int out_stride, int w, int h) { while (h--) { const uint16_t *in16 = (const uint16_t *) in; uint16_t *out16 = (uint16_t *) out; for (int x = 0; x < w; x++) { v3u16_t c = { in16[0], in16[1], in16[2] }; c = lookup_input16(lut3d, c); out16[0] = c.x; out16[1] = c.y; out16[2] = c.z; out16[3] = in16[3]; in16 += 4; out16 += 4; } in += in_stride; out += out_stride; } }