108#ifndef BC6H_ENC_IMPLEMENTATION
110#ifndef BC6H_ENC_INCLUDED
111#define BC6H_ENC_INCLUDED
115void DecodeBC6HU(
void* pDest,
const void* pSrc)
noexcept;
116void DecodeBC6HS(
void* pDest,
const void* pSrc)
noexcept;
117void EncodeBC6HU(
void* pDest,
const void* pSrc)
noexcept;
118void EncodeBC6HS(
void* pDest,
const void* pSrc)
noexcept;
127# ifdef BC6H_SSE_INTRINSICS
128# include <immintrin.h>
131# ifdef BC6H_ARM_NEON_INTRINSICS
132# include <arm_neon.h>
136# define BC6H_ASSERT(expression)
137# define BC6H_ASSERT_UNDEF
141#define FLT_MAX 3.402823466e+38F
145#define FLT_MIN 1.175494351e-38F
148#define BC6H_INLINE inline
156using HALF = uint16_t;
158#if !defined(BC6H_HALF_TO_FLOAT)
161struct FastHalfToFloat
165 m_MantissaTable[0] = 0;
166 for (
int i = 1; i < 1024; i++)
168 uint32_t m = i << 13;
170 while ((m & 0x00800000) == 0)
177 m_MantissaTable[i] = m | e;
179 for (
int i = 1024; i < 2048; i++)
180 m_MantissaTable[i] = (i - 1024) << 13;
181 m_ExponentTable[0] = 0;
182 for (
int i = 1; i < 31; i++)
183 m_ExponentTable[i] = 0x38000000 + (i << 23);
184 m_ExponentTable[31] = 0x7f800000;
185 m_ExponentTable[32] = 0x80000000;
186 for (
int i = 33; i < 63; i++)
187 m_ExponentTable[i] = 0xb8000000 + ((i - 32) << 23);
188 m_ExponentTable[63] = 0xff800000;
189 m_OffsetTable[0] = 0;
190 for (
int i = 1; i < 32; i++)
191 m_OffsetTable[i] = 1024;
192 m_OffsetTable[32] = 0;
193 for (
int i = 33; i < 64; i++)
194 m_OffsetTable[i] = 1024;
196 uint32_t m_MantissaTable[2048];
197 uint32_t m_ExponentTable[64];
198 uint32_t m_OffsetTable[64];
200 BC6H_INLINE uint32_t Convert(uint16_t h)
const
202 uint32_t exp = h >> 10;
203 return m_MantissaTable[m_OffsetTable[exp] + (h & 0x3ff)] + m_ExponentTable[exp];
206FastHalfToFloat g_FastHalfToFloat;
208# define BC6H_HALF_TO_FLOAT g_FastHalfToFloat.Convert
209# define BC6H_HALF_TO_FLOAT_UNDEF
213#if !defined(BC6H_FLOAT_TO_HALF)
216# pragma warning(push)
217# pragma warning(disable : 4146)
220BC6H_INLINE uint32_t _uint32_li(uint32_t a) {
return (a); }
221BC6H_INLINE uint32_t _uint32_dec(uint32_t a) {
return (a - 1); }
222BC6H_INLINE uint32_t _uint32_inc(uint32_t a) {
return (a + 1); }
223BC6H_INLINE uint32_t _uint32_not(uint32_t a) {
return (~a); }
224BC6H_INLINE uint32_t _uint32_neg(uint32_t a) {
return (-a); }
225BC6H_INLINE uint32_t _uint32_ext(uint32_t a) {
return (((int32_t)a) >> 31); }
226BC6H_INLINE uint32_t _uint32_and(uint32_t a, uint32_t b) {
return (a & b); }
227BC6H_INLINE uint32_t _uint32_andc(uint32_t a, uint32_t b) {
return (a & ~b); }
228BC6H_INLINE uint32_t _uint32_or(uint32_t a, uint32_t b) {
return (a | b); }
229BC6H_INLINE uint32_t _uint32_srl(uint32_t a,
int sa) {
return (a >> sa); }
230BC6H_INLINE uint32_t _uint32_sll(uint32_t a,
int sa) {
return (a << sa); }
231BC6H_INLINE uint32_t _uint32_add(uint32_t a, uint32_t b) {
return (a + b); }
232BC6H_INLINE uint32_t _uint32_sub(uint32_t a, uint32_t b) {
return (a - b); }
233BC6H_INLINE uint32_t _uint32_sels(uint32_t test, uint32_t a, uint32_t b)
235 const uint32_t mask = _uint32_ext(test);
236 const uint32_t sel_a = _uint32_and(a, mask);
237 const uint32_t sel_b = _uint32_andc(b, mask);
238 const uint32_t result = _uint32_or(sel_a, sel_b);
241BC6H_INLINE uint16_t half_from_float(uint32_t f)
243 const uint32_t one = _uint32_li(0x00000001);
244 const uint32_t f_s_mask = _uint32_li(0x80000000);
245 const uint32_t f_e_mask = _uint32_li(0x7f800000);
246 const uint32_t f_m_mask = _uint32_li(0x007fffff);
247 const uint32_t f_m_hidden_bit = _uint32_li(0x00800000);
248 const uint32_t f_m_round_bit = _uint32_li(0x00001000);
249 const uint32_t f_snan_mask = _uint32_li(0x7fc00000);
250 const uint32_t f_e_pos = _uint32_li(0x00000017);
251 const uint32_t h_e_pos = _uint32_li(0x0000000a);
252 const uint32_t h_e_mask = _uint32_li(0x00007c00);
253 const uint32_t h_snan_mask = _uint32_li(0x00007e00);
254 const uint32_t h_e_mask_value = _uint32_li(0x0000001f);
255 const uint32_t f_h_s_pos_offset = _uint32_li(0x00000010);
256 const uint32_t f_h_bias_offset = _uint32_li(0x00000070);
257 const uint32_t f_h_m_pos_offset = _uint32_li(0x0000000d);
258 const uint32_t h_nan_min = _uint32_li(0x00007c01);
259 const uint32_t f_h_e_biased_flag = _uint32_li(0x0000008f);
260 const uint32_t f_s = _uint32_and(f, f_s_mask);
261 const uint32_t f_e = _uint32_and(f, f_e_mask);
262 const uint16_t h_s = _uint32_srl(f_s, f_h_s_pos_offset);
263 const uint32_t f_m = _uint32_and(f, f_m_mask);
264 const uint16_t f_e_amount = _uint32_srl(f_e, f_e_pos);
265 const uint32_t f_e_half_bias = _uint32_sub(f_e_amount, f_h_bias_offset);
266 const uint32_t f_snan = _uint32_and(f, f_snan_mask);
267 const uint32_t f_m_round_mask = _uint32_and(f_m, f_m_round_bit);
268 const uint32_t f_m_round_offset = _uint32_sll(f_m_round_mask, one);
269 const uint32_t f_m_rounded = _uint32_add(f_m, f_m_round_offset);
270 const uint32_t f_m_denorm_sa = _uint32_sub(one, f_e_half_bias);
271 const uint32_t f_m_with_hidden = _uint32_or(f_m_rounded, f_m_hidden_bit);
272 const uint32_t f_m_denorm = _uint32_srl(f_m_with_hidden, f_m_denorm_sa);
273 const uint32_t h_m_denorm = _uint32_srl(f_m_denorm, f_h_m_pos_offset);
274 const uint32_t f_m_rounded_overflow = _uint32_and(f_m_rounded, f_m_hidden_bit);
275 const uint32_t m_nan = _uint32_srl(f_m, f_h_m_pos_offset);
276 const uint32_t h_em_nan = _uint32_or(h_e_mask, m_nan);
277 const uint32_t h_e_norm_overflow_offset = _uint32_inc(f_e_half_bias);
278 const uint32_t h_e_norm_overflow = _uint32_sll(h_e_norm_overflow_offset, h_e_pos);
279 const uint32_t h_e_norm = _uint32_sll(f_e_half_bias, h_e_pos);
280 const uint32_t h_m_norm = _uint32_srl(f_m_rounded, f_h_m_pos_offset);
281 const uint32_t h_em_norm = _uint32_or(h_e_norm, h_m_norm);
282 const uint32_t is_h_ndenorm_msb = _uint32_sub(f_h_bias_offset, f_e_amount);
283 const uint32_t is_f_e_flagged_msb = _uint32_sub(f_h_e_biased_flag, f_e_half_bias);
284 const uint32_t is_h_denorm_msb = _uint32_not(is_h_ndenorm_msb);
285 const uint32_t is_f_m_eqz_msb = _uint32_dec(f_m);
286 const uint32_t is_h_nan_eqz_msb = _uint32_dec(m_nan);
287 const uint32_t is_f_inf_msb = _uint32_and(is_f_e_flagged_msb, is_f_m_eqz_msb);
288 const uint32_t is_f_nan_underflow_msb = _uint32_and(is_f_e_flagged_msb, is_h_nan_eqz_msb);
289 const uint32_t is_e_overflow_msb = _uint32_sub(h_e_mask_value, f_e_half_bias);
290 const uint32_t is_h_inf_msb = _uint32_or(is_e_overflow_msb, is_f_inf_msb);
291 const uint32_t is_f_nsnan_msb = _uint32_sub(f_snan, f_snan_mask);
292 const uint32_t is_m_norm_overflow_msb = _uint32_neg(f_m_rounded_overflow);
293 const uint32_t is_f_snan_msb = _uint32_not(is_f_nsnan_msb);
294 const uint32_t h_em_overflow_result = _uint32_sels(is_m_norm_overflow_msb, h_e_norm_overflow, h_em_norm);
295 const uint32_t h_em_nan_result = _uint32_sels(is_f_e_flagged_msb, h_em_nan, h_em_overflow_result);
296 const uint32_t h_em_nan_underflow_result = _uint32_sels(is_f_nan_underflow_msb, h_nan_min, h_em_nan_result);
297 const uint32_t h_em_inf_result = _uint32_sels(is_h_inf_msb, h_e_mask, h_em_nan_underflow_result);
298 const uint32_t h_em_denorm_result = _uint32_sels(is_h_denorm_msb, h_m_denorm, h_em_inf_result);
299 const uint32_t h_em_snan_result = _uint32_sels(is_f_snan_msb, h_snan_mask, h_em_denorm_result);
300 const uint32_t h_result = _uint32_or(h_s, h_em_snan_result);
301 return (uint16_t)(h_result);
307# define BC6H_FLOAT_TO_HALF half_from_float
308# define BC6H_FLOAT_TO_HALF_UNDEF
312# if defined(BC6H_SSE_INTRINSICS)
313using XMVECTOR = __m128;
314# elif defined(BC6H_ARM_NEON_INTRINSICS)
315using XMVECTOR = float32x4_t;
327# if defined(BC6H_SSE_INTRINSICS) || defined(BC6H_NEON_INTRINSICS)
328typedef const XMVECTOR FXMVECTOR;
330typedef const XMVECTOR& FXMVECTOR;
354BC6H_INLINE
float XMConvertHalfToFloat(HALF h)
noexcept
356# if defined(BC6H_SSE_INTRINSICS)
357 __m128i V1 = _mm_cvtsi32_si128(
static_cast<int>(h));
358 __m128 V2 = _mm_cvtph_ps(V1);
359 return _mm_cvtss_f32(V2);
360# elif defined(BC6H_ARM_NEON_INTRINSICS)
361 uint16x4_t vHalf = vdup_n_u16(h);
362 float32x4_t vFloat = vcvt_f32_f16(vreinterpret_f16_u16(vHalf));
363 return vgetq_lane_f32(vFloat, 0);
365 uint32_t f = BC6H_HALF_TO_FLOAT(h);
366 return *
reinterpret_cast<float*
>(&f);
369BC6H_INLINE HALF XMConvertFloatToHalf(
float f)
noexcept
371 return BC6H_FLOAT_TO_HALF(*
reinterpret_cast<uint32_t*
>(&f));
374 struct alignas(16) XMFLOAT4A :
public XMFLOAT4
376 using XMFLOAT4::XMFLOAT4;
379 BC6H_INLINE
void XMStoreFloat4A(XMFLOAT4A* pDestination, FXMVECTOR V)
noexcept
381 BC6H_ASSERT((
reinterpret_cast<uintptr_t
>(pDestination) & 0xF) == 0);
383# if defined(BC6H_SSE_INTRINSICS)
384 _mm_store_ps(&pDestination->x, V);
385# elif defined(BC6H_ARM_NEON_INTRINSICS)
386# if defined(_MSC_VER) && !defined(__clang__)
387 vst1q_f32_ex(
reinterpret_cast<float*
>(pDestination), V, 128);
389 vst1q_f32(
reinterpret_cast<float*
>(pDestination), V);
392 pDestination->x = V.x;
393 pDestination->y = V.y;
394 pDestination->z = V.z;
395 pDestination->w = V.w;
398BC6H_INLINE
void XMStoreHalf4(XMHALF4* pDestination, FXMVECTOR V)
noexcept
401# if defined(BC6H_SSE_INTRINSICS)
402 __m128i V1 = _mm_cvtps_ph(V, _MM_FROUND_TO_NEAREST_INT);
403 _mm_storel_epi64(
reinterpret_cast<__m128i*
>(pDestination), V1);
405 pDestination->x = XMConvertFloatToHalf(V.x);
406 pDestination->y = XMConvertFloatToHalf(V.y);
407 pDestination->z = XMConvertFloatToHalf(V.z);
408 pDestination->w = XMConvertFloatToHalf(V.w);
411BC6H_INLINE XMVECTOR XMLoadFloat4(
const XMFLOAT4* pSource)
noexcept
413# if defined BC6H_SSE_INTRINSICS
414 return _mm_loadu_ps(&pSource->x);
415# elif defined BC6H_ARM_NEON_INTRINSIC
416 return vld1q_f32(
reinterpret_cast<const float*
>(pSource));
426BC6H_INLINE XMVECTOR XMVectorSubtract(FXMVECTOR V1, FXMVECTOR V2)
noexcept
428#if defined(BC6H_SSE_INTRINSICS)
429 return _mm_sub_ps(V1, V2);
430#elif defined(BC6H_ARM_NEON_INTRINSIC)
431 return vsubq_f32(V1, V2);
439struct alignas(16) XMVECTORU32
447 BC6H_INLINE
operator XMVECTOR() const noexcept {
return v; }
449# if defined(BC6H_SSE_INTRINSICS)
450 BC6H_INLINE
operator __m128i() const noexcept
452 return _mm_castps_si128(v);
454 BC6H_INLINE
operator __m128d() const noexcept {
return _mm_castps_pd(v); }
455# elif defined(BC6H_ARM_NEON_INTRINSIC) && defined(__GNUC__)
456 BC6H_INLINE
operator int32x4_t() const noexcept
458 return vreinterpretq_s32_f32(v);
460 BC6H_INLINE
operator uint32x4_t() const noexcept {
return vreinterpretq_u32_f32(v); }
464const XMVECTORU32 g_XMMask3 = {{{0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000}}};
466# if defined(BC6H_SSE_INTRINSICS) || defined(BC6H_ARM_NEON_INTRINSIC)
477BC6H_INLINE XMVECTOR XMVector3Dot(FXMVECTOR V1, FXMVECTOR V2)
479 return _mm_dp_ps(V1, V2, 0x7f);
482BC6H_INLINE
float XMVectorGetX(FXMVECTOR V)
noexcept
484# if defined(BC6H_SSE_INTRINSICS)
485 return _mm_cvtss_f32(V);
486# elif defined(BC6H_ARM_NEON_INTRINSIC)
487 return vgetq_lane_f32(V, 0);
490BC6H_INLINE
float XMVectorDot(FXMVECTOR a, FXMVECTOR b)
492 return XMVectorGetX(XMVector3Dot(a, b));
495BC6H_INLINE
float XMVectorDot(FXMVECTOR a, FXMVECTOR b)
498 return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
502BC6H_INLINE XMVECTOR XMLoadSInt4(
const XMINT4* pSource)
noexcept
504# if defined(BC6H_SSE_INTRINSICS)
505 __m128i V = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(pSource));
506 return _mm_cvtepi32_ps(V);
507# elif defined(BC6H_ARM_NEON_INTRINSICS_)
508 int32x4_t v = vld1q_s32(
reinterpret_cast<const int32_t*
>(pSource));
509 return vcvtq_f32_s32(v);
512 V.x =
static_cast<float>(pSource->x);
513 V.y =
static_cast<float>(pSource->y);
514 V.z =
static_cast<float>(pSource->z);
515 V.w =
static_cast<float>(pSource->w);
520template <
typename _Tp,
size_t _Nm>
521constexpr size_t std__size(
const _Tp (&)[_Nm])
noexcept
527const T& std__max(
const T& a,
const T& b)
noexcept
529 return (a < b) ? b : a;
533const T& std__min(
const T& a,
const T& b)
noexcept
535 return (b < a) ? b : a;
539void std__swap(T& a, T& b)
noexcept
554 HDRColorA() =
default;
555 HDRColorA(
float _r,
float _g,
float _b,
float _a) noexcept :
556 r(_r), g(_g), b(_b), a(_a) {}
557 HDRColorA(
const HDRColorA& c) noexcept :
558 r(c.r), g(c.g), b(c.b), a(c.a) {}
561 HDRColorA operator+(
const HDRColorA& c)
const noexcept
563 return HDRColorA(r + c.r, g + c.g, b + c.b, a + c.a);
566 HDRColorA operator-(
const HDRColorA& c)
const noexcept
568 return HDRColorA(r - c.r, g - c.g, b - c.b, a - c.a);
571 HDRColorA operator*(
float f)
const noexcept
573 return HDRColorA(r * f, g * f, b * f, a * f);
576 HDRColorA operator/(
float f)
const noexcept
578 const float fInv = 1.0f / f;
579 return HDRColorA(r * fInv, g * fInv, b * fInv, a * fInv);
582 float operator*(
const HDRColorA& c)
const noexcept
584 return r * c.r + g * c.g + b * c.b + a * c.a;
588 HDRColorA& operator+=(
const HDRColorA& c)
noexcept
597 HDRColorA& operator-=(
const HDRColorA& c)
noexcept
606 HDRColorA& operator*=(
float f)
noexcept
615 HDRColorA& operator/=(
float f)
noexcept
617 const float fInv = 1.0f / f;
625 HDRColorA& Clamp(
float fMin,
float fMax)
noexcept
627 r = std__min<float>(fMax, std__max<float>(fMin, r));
628 g = std__min<float>(fMax, std__max<float>(fMin, g));
629 b = std__min<float>(fMax, std__max<float>(fMin, b));
630 a = std__min<float>(fMax, std__max<float>(fMin, a));
634 HDRColorA(
const LDRColorA& c)
noexcept;
635 HDRColorA& operator=(
const LDRColorA& c)
noexcept;
642constexpr uint16_t F16S_MASK = 0x8000;
643constexpr uint16_t F16EM_MASK = 0x7fff;
644constexpr uint16_t F16MAX = 0x7bff;
646constexpr size_t BC6H_NUM_PIXELS_PER_BLOCK = 16;
647constexpr size_t BC6H_MAX_REGIONS = 2;
648constexpr size_t BC6H_MAX_INDICES = 16;
649constexpr size_t BC6H_NUM_CHANNELS = 3;
650constexpr size_t BC6H_MAX_SHAPES = 32;
651constexpr int32_t BC6H_WEIGHT_MAX = 64;
652constexpr uint32_t BC6H_WEIGHT_SHIFT = 6;
653constexpr int32_t BC6H_WEIGHT_ROUND = 32;
655constexpr float fEpsilon = (0.25f / 64.0f) * (0.25f / 64.0f);
656constexpr float pC3[] = {2.0f / 2.0f, 1.0f / 2.0f, 0.0f / 2.0f};
657constexpr float pD3[] = {0.0f / 2.0f, 1.0f / 2.0f, 2.0f / 2.0f};
658constexpr float pC4[] = {3.0f / 3.0f, 2.0f / 3.0f, 1.0f / 3.0f, 0.0f / 3.0f};
659constexpr float pD4[] = {0.0f / 3.0f, 1.0f / 3.0f, 2.0f / 3.0f, 3.0f / 3.0f};
662const uint8_t g_aPartitionTable[2][32][16] =
666 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
667 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
668 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
669 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
670 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
671 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
672 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
673 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
674 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
675 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
676 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
677 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
678 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
679 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
680 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
681 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
682 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
683 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
684 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
685 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
686 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
687 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
688 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
689 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
690 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
691 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
692 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
693 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
694 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
695 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
696 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
697 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
702 {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1},
703 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1},
704 {0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1},
705 {0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1},
706 {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1},
707 {0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1},
708 {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1},
709 {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1},
710 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1},
711 {0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
712 {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
713 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1},
714 {0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
715 {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1},
716 {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
717 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1},
718 {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1},
719 {0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
720 {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0},
721 {0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0},
722 {0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
723 {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0},
724 {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0},
725 {0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1},
726 {0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0},
727 {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0},
728 {0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0},
729 {0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0},
730 {0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0},
731 {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0},
732 {0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0},
733 {0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0}
738const uint8_t g_aFixUp[2][32][3] =
813const int g_aWeights3[] = {0, 9, 18, 27, 37, 46, 55, 64};
814const int g_aWeights4[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64};
821 LDRColorA() =
default;
822 LDRColorA(uint8_t _r, uint8_t _g, uint8_t _b, uint8_t _a) noexcept :
823 r(_r), g(_g), b(_b), a(_a) {}
825 const uint8_t& operator[](
size_t uElement)
const noexcept
833 default: BC6H_ASSERT(
false);
return r;
837 uint8_t& operator[](
size_t uElement)
noexcept
845 default: BC6H_ASSERT(
false);
return r;
849 LDRColorA operator=(
const HDRColorA& c)
noexcept
853 tmp = tmp.Clamp(0.0f, 1.0f) * 255.0f;
854 ret.r = uint8_t(tmp.r + 0.001f);
855 ret.g = uint8_t(tmp.g + 0.001f);
856 ret.b = uint8_t(tmp.b + 0.001f);
857 ret.a = uint8_t(tmp.a + 0.001f);
862static_assert(
sizeof(LDRColorA) == 4,
"Unexpected packing");
870BC6H_INLINE HDRColorA::HDRColorA(
const LDRColorA& c)
noexcept
872 r = float(c.r) * (1.0f / 255.0f);
873 g = float(c.g) * (1.0f / 255.0f);
874 b = float(c.b) * (1.0f / 255.0f);
875 a = float(c.a) * (1.0f / 255.0f);
878BC6H_INLINE HDRColorA& HDRColorA::operator=(
const LDRColorA& c)
noexcept
880 r =
static_cast<float>(c.r);
881 g =
static_cast<float>(c.g);
882 b =
static_cast<float>(c.b);
883 a =
static_cast<float>(c.a);
894 INTColor() =
default;
895 INTColor(
int nr,
int ng,
int nb) noexcept :
896 r(nr), g(ng), b(nb), pad(0) {}
897 INTColor(
const INTColor& c) noexcept :
898 r(c.r), g(c.g), b(c.b), pad(0) {}
900 INTColor& operator+=(
const INTColor& c)
noexcept
908 INTColor& operator-=(
const INTColor& c)
noexcept
916 INTColor& operator&=(
const INTColor& c)
noexcept
924 int& operator[](uint8_t i)
noexcept
926 BC6H_ASSERT(i <
sizeof(INTColor) /
sizeof(
int));
927 return reinterpret_cast<int*
>(
this)[i];
930 void Set(
const HDRColorA& c,
bool bSigned)
noexcept
934 const XMVECTOR v = XMLoadFloat4(
reinterpret_cast<const XMFLOAT4*
>(&c));
935 XMStoreHalf4(&aF16, v);
937 r = F16ToINT(aF16.x, bSigned);
938 g = F16ToINT(aF16.y, bSigned);
939 b = F16ToINT(aF16.z, bSigned);
942 INTColor& Clamp(
int iMin,
int iMax)
noexcept
944 r = std__min<int>(iMax, std__max<int>(iMin, r));
945 g = std__min<int>(iMax, std__max<int>(iMin, g));
946 b = std__min<int>(iMax, std__max<int>(iMin, b));
950 INTColor& SignExtend(
const LDRColorA& Prec)
noexcept
952#define BC6H_SIGN_EXTEND(x, nb) ((((x) & (1 << ((nb)-1))) ? ((~0) ^ ((1 << (nb)) - 1)) : 0) | (x))
953 r = BC6H_SIGN_EXTEND(r,
int(Prec.r));
954 g = BC6H_SIGN_EXTEND(g,
int(Prec.g));
955 b = BC6H_SIGN_EXTEND(b,
int(Prec.b));
956#undef BC6H_SIGN_EXTEND
960 void ToF16(HALF aF16[3],
bool bSigned)
const noexcept
962 aF16[0] = INT2F16(r, bSigned);
963 aF16[1] = INT2F16(g, bSigned);
964 aF16[2] = INT2F16(b, bSigned);
968 static int F16ToINT(
const HALF& f,
bool bSigned)
noexcept
970 uint16_t input = *
reinterpret_cast<const uint16_t*
>(&f);
974 s = input & F16S_MASK;
976 if (input > F16MAX) out = F16MAX;
979 out = s ? -out : out;
983 if (input & F16S_MASK) out = 0;
990 static HALF INT2F16(
int input,
bool bSigned)
noexcept
1002 out = uint16_t(s | input);
1006 BC6H_ASSERT(input >= 0 && input <= F16MAX);
1007 out =
static_cast<uint16_t
>(input);
1010 *
reinterpret_cast<uint16_t*
>(&h) = out;
1015static_assert(
sizeof(INTColor) == 16,
"Unexpected packing");
1023template <
size_t SizeInBytes>
1027 uint8_t GetBit(
size_t& uStartBit)
const noexcept
1029 BC6H_ASSERT(uStartBit < 128);
1030 const size_t uIndex = uStartBit >> 3;
1031 auto const ret =
static_cast<uint8_t
>((m_uBits[uIndex] >> (uStartBit - (uIndex << 3))) & 0x01);
1036 uint8_t GetBits(
size_t& uStartBit,
size_t uNumBits)
const noexcept
1038 if (uNumBits == 0)
return 0;
1039 BC6H_ASSERT(uStartBit + uNumBits <= 128 && uNumBits <= 8);
1041 const size_t uIndex = uStartBit >> 3;
1042 const size_t uBase = uStartBit - (uIndex << 3);
1043 if (uBase + uNumBits > 8)
1045 const size_t uFirstIndexBits = 8 - uBase;
1046 const size_t uNextIndexBits = uNumBits - uFirstIndexBits;
1047 ret =
static_cast<uint8_t
>((unsigned(m_uBits[uIndex]) >> uBase) | ((unsigned(m_uBits[uIndex + 1]) & ((1u << uNextIndexBits) - 1)) << uFirstIndexBits));
1051 ret =
static_cast<uint8_t
>((m_uBits[uIndex] >> uBase) & ((1 << uNumBits) - 1));
1053 BC6H_ASSERT(ret < (1 << uNumBits));
1054 uStartBit += uNumBits;
1058 void SetBit(
size_t& uStartBit, uint8_t uValue)
noexcept
1060 BC6H_ASSERT(uStartBit < 128 && uValue < 2);
1061 size_t uIndex = uStartBit >> 3;
1062 const size_t uBase = uStartBit - (uIndex << 3);
1063 m_uBits[uIndex] &= ~(1 << uBase);
1064 m_uBits[uIndex] |= uValue << uBase;
1068 void SetBits(
size_t& uStartBit,
size_t uNumBits, uint8_t uValue)
noexcept
1072 BC6H_ASSERT(uStartBit + uNumBits <= 128 && uNumBits <= 8);
1073 BC6H_ASSERT(uValue < (1 << uNumBits));
1074 size_t uIndex = uStartBit >> 3;
1075 const size_t uBase = uStartBit - (uIndex << 3);
1076 if (uBase + uNumBits > 8)
1078 const size_t uFirstIndexBits = 8 - uBase;
1079 const size_t uNextIndexBits = uNumBits - uFirstIndexBits;
1080 m_uBits[uIndex] &= ~(((1 << uFirstIndexBits) - 1) << uBase);
1081 m_uBits[uIndex] |= uValue << uBase;
1082 m_uBits[uIndex + 1] &= ~((1 << uNextIndexBits) - 1);
1083 m_uBits[uIndex + 1] |= uValue >> uFirstIndexBits;
1087 m_uBits[uIndex] &= ~(((1 << uNumBits) - 1) << uBase);
1088 m_uBits[uIndex] |= uValue << uBase;
1090 uStartBit += uNumBits;
1094 uint8_t m_uBits[SizeInBytes];
1098class D3DX_BC6H :
private CBits<16>
1101 void Decode(
bool bSigned, HDRColorA* pOut)
const noexcept;
1102 void Encode(
bool bSigned,
const HDRColorA*
const pIn)
noexcept;
1106#pragma warning(push)
1107#pragma warning(disable : 4480)
1109 enum EField : uint8_t
1131 struct ModeDescriptor
1140 uint8_t uPartitions;
1143 LDRColorA RGBAPrec[BC6H_MAX_REGIONS][2];
1147#pragma warning(push)
1148#pragma warning(disable : 4512)
1156 const HDRColorA*
const aHDRPixels;
1157 INTEndPntPair aUnqEndPts[BC6H_MAX_SHAPES][BC6H_MAX_REGIONS];
1158 INTColor aIPixels[BC6H_NUM_PIXELS_PER_BLOCK];
1160 EncodeParams(
const HDRColorA*
const aOriginal,
bool bSignedFormat) noexcept :
1161 fBestErr(FLT_MAX), bSigned(bSignedFormat), uMode(0), uShape(0), aHDRPixels(aOriginal), aUnqEndPts{}, aIPixels{}
1163 for (
size_t i = 0; i < BC6H_NUM_PIXELS_PER_BLOCK; ++i)
1165 aIPixels[i].Set(aOriginal[i], bSigned);
1173 static int Quantize(
int iValue,
int prec,
bool bSigned)
noexcept;
1174 static int Unquantize(
int comp, uint8_t uBitsPerComp,
bool bSigned)
noexcept;
1175 static int FinishUnquantize(
int comp,
bool bSigned)
noexcept;
1177 static bool EndPointsFit(
const EncodeParams* pEP,
const INTEndPntPair aEndPts[])
noexcept;
1179 void GeneratePaletteQuantized(
const EncodeParams* pEP,
const INTEndPntPair& endPts, INTColor aPalette[])
const noexcept;
1180 float MapColorsQuantized(
const EncodeParams* pEP,
const INTColor aColors[],
size_t np,
const INTEndPntPair& endPts)
const noexcept;
1181 float PerturbOne(
const EncodeParams* pEP,
const INTColor aColors[],
size_t np, uint8_t ch,
const INTEndPntPair& oldEndPts, INTEndPntPair& newEndPts,
float fOldErr,
int do_b)
const noexcept;
1182 void OptimizeOne(
const EncodeParams* pEP,
const INTColor aColors[],
size_t np,
float aOrgErr,
const INTEndPntPair& aOrgEndPts, INTEndPntPair& aOptEndPts)
const noexcept;
1183 void OptimizeEndPoints(
const EncodeParams* pEP,
const float aOrgErr[],
const INTEndPntPair aOrgEndPts[], INTEndPntPair aOptEndPts[])
const noexcept;
1184 static void SwapIndices(
const EncodeParams* pEP, INTEndPntPair aEndPts[],
size_t aIndices[])
noexcept;
1185 void AssignIndices(
const EncodeParams* pEP,
const INTEndPntPair aEndPts[],
size_t aIndices[],
float aTotErr[])
const noexcept;
1186 void QuantizeEndPts(
const EncodeParams* pEP, INTEndPntPair* qQntEndPts)
const noexcept;
1187 void EmitBlock(
const EncodeParams* pEP,
const INTEndPntPair aEndPts[],
const size_t aIndices[])
noexcept;
1188 void Refine(EncodeParams* pEP)
noexcept;
1190 static void GeneratePaletteUnquantized(
const EncodeParams* pEP,
size_t uRegion, INTColor aPalette[])
noexcept;
1191 float MapColors(
const EncodeParams* pEP,
size_t uRegion,
size_t np,
const size_t* auIndex)
const noexcept;
1192 float RoughMSE(EncodeParams* pEP)
const noexcept;
1195 static const ModeDescriptor ms_aDesc[][82];
1196 static const ModeInfo ms_aInfo[];
1197 static const int ms_aModeToInfo[];
1201const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] =
2409const D3DX_BC6H::ModeInfo D3DX_BC6H::ms_aInfo[] =
2411 {0x00, 1,
true, 3, {{LDRColorA(10, 10, 10, 0), LDRColorA(5, 5, 5, 0)}, {LDRColorA(5, 5, 5, 0), LDRColorA(5, 5, 5, 0)}}},
2412 {0x01, 1,
true, 3, {{LDRColorA(7, 7, 7, 0), LDRColorA(6, 6, 6, 0)}, {LDRColorA(6, 6, 6, 0), LDRColorA(6, 6, 6, 0)}}},
2413 {0x02, 1,
true, 3, {{LDRColorA(11, 11, 11, 0), LDRColorA(5, 4, 4, 0)}, {LDRColorA(5, 4, 4, 0), LDRColorA(5, 4, 4, 0)}}},
2414 {0x06, 1,
true, 3, {{LDRColorA(11, 11, 11, 0), LDRColorA(4, 5, 4, 0)}, {LDRColorA(4, 5, 4, 0), LDRColorA(4, 5, 4, 0)}}},
2415 {0x0a, 1,
true, 3, {{LDRColorA(11, 11, 11, 0), LDRColorA(4, 4, 5, 0)}, {LDRColorA(4, 4, 5, 0), LDRColorA(4, 4, 5, 0)}}},
2416 {0x0e, 1,
true, 3, {{LDRColorA(9, 9, 9, 0), LDRColorA(5, 5, 5, 0)}, {LDRColorA(5, 5, 5, 0), LDRColorA(5, 5, 5, 0)}}},
2417 {0x12, 1,
true, 3, {{LDRColorA(8, 8, 8, 0), LDRColorA(6, 5, 5, 0)}, {LDRColorA(6, 5, 5, 0), LDRColorA(6, 5, 5, 0)}}},
2418 {0x16, 1,
true, 3, {{LDRColorA(8, 8, 8, 0), LDRColorA(5, 6, 5, 0)}, {LDRColorA(5, 6, 5, 0), LDRColorA(5, 6, 5, 0)}}},
2419 {0x1a, 1,
true, 3, {{LDRColorA(8, 8, 8, 0), LDRColorA(5, 5, 6, 0)}, {LDRColorA(5, 5, 6, 0), LDRColorA(5, 5, 6, 0)}}},
2420 {0x1e, 1,
false, 3, {{LDRColorA(6, 6, 6, 0), LDRColorA(6, 6, 6, 0)}, {LDRColorA(6, 6, 6, 0), LDRColorA(6, 6, 6, 0)}}},
2421 {0x03, 0,
false, 4, {{LDRColorA(10, 10, 10, 0), LDRColorA(10, 10, 10, 0)}, {LDRColorA(0, 0, 0, 0), LDRColorA(0, 0, 0, 0)}}},
2422 {0x07, 0,
true, 4, {{LDRColorA(11, 11, 11, 0), LDRColorA(9, 9, 9, 0)}, {LDRColorA(0, 0, 0, 0), LDRColorA(0, 0, 0, 0)}}},
2423 {0x0b, 0,
true, 4, {{LDRColorA(12, 12, 12, 0), LDRColorA(8, 8, 8, 0)}, {LDRColorA(0, 0, 0, 0), LDRColorA(0, 0, 0, 0)}}},
2424 {0x0f, 0,
true, 4, {{LDRColorA(16, 16, 16, 0), LDRColorA(4, 4, 4, 0)}, {LDRColorA(0, 0, 0, 0), LDRColorA(0, 0, 0, 0)}}},
2427const int D3DX_BC6H::ms_aModeToInfo[] =
2466BC6H_INLINE
bool IsFixUpOffset(
size_t uPartitions,
size_t uShape,
size_t uOffset)
noexcept
2468 BC6H_ASSERT(uPartitions < 3 && uShape < BC6H_MAX_SHAPES && uOffset < 16);
2469 for (
size_t p = 0; p <= uPartitions; p++)
2471 if (uOffset == g_aFixUp[uPartitions][uShape][p])
2479BC6H_INLINE
void TransformForward(INTEndPntPair aEndPts[])
noexcept
2481 aEndPts[0].B -= aEndPts[0].A;
2482 aEndPts[1].A -= aEndPts[0].A;
2483 aEndPts[1].B -= aEndPts[0].A;
2486BC6H_INLINE
void TransformInverse(INTEndPntPair aEndPts[],
const LDRColorA& Prec,
bool bSigned)
noexcept
2488 const INTColor WrapMask((1 << Prec.r) - 1, (1 << Prec.g) - 1, (1 << Prec.b) - 1);
2489 aEndPts[0].B += aEndPts[0].A;
2490 aEndPts[0].B &= WrapMask;
2491 aEndPts[1].A += aEndPts[0].A;
2492 aEndPts[1].A &= WrapMask;
2493 aEndPts[1].B += aEndPts[0].A;
2494 aEndPts[1].B &= WrapMask;
2497 aEndPts[0].B.SignExtend(Prec);
2498 aEndPts[1].A.SignExtend(Prec);
2499 aEndPts[1].B.SignExtend(Prec);
2503BC6H_INLINE
float Norm(
const INTColor& a,
const INTColor& b)
noexcept
2505 const float dr = float(a.r) - float(b.r);
2506 const float dg = float(a.g) - float(b.g);
2507 const float db = float(a.b) - float(b.b);
2508 return dr * dr + dg * dg + db * db;
2512BC6H_INLINE
int NBits(
int n,
bool bIsSigned)
noexcept
2521 for (nb = 0; n; ++nb, n >>= 1)
2523 return nb + (bIsSigned ? 1 : 0);
2527 BC6H_ASSERT(bIsSigned);
2528 for (nb = 0; n < -1; ++nb, n >>= 1)
2535 const HDRColorA*
const pPoints,
2540 const size_t* pIndex)
noexcept
2542 constexpr float fError = FLT_MAX;
2543 const float* pC = (3 == cSteps) ? pC3 : pC4;
2544 const float* pD = (3 == cSteps) ? pD3 : pD4;
2547 HDRColorA X(FLT_MAX, FLT_MAX, FLT_MAX, 0.0f);
2548 HDRColorA Y(-FLT_MAX, -FLT_MAX, -FLT_MAX, 0.0f);
2550 for (
size_t iPoint = 0; iPoint < cPixels; iPoint++)
2552 if (pPoints[pIndex[iPoint]].r < X.r) X.r = pPoints[pIndex[iPoint]].r;
2553 if (pPoints[pIndex[iPoint]].g < X.g) X.g = pPoints[pIndex[iPoint]].g;
2554 if (pPoints[pIndex[iPoint]].b < X.b) X.b = pPoints[pIndex[iPoint]].b;
2555 if (pPoints[pIndex[iPoint]].r > Y.r) Y.r = pPoints[pIndex[iPoint]].r;
2556 if (pPoints[pIndex[iPoint]].g > Y.g) Y.g = pPoints[pIndex[iPoint]].g;
2557 if (pPoints[pIndex[iPoint]].b > Y.b) Y.b = pPoints[pIndex[iPoint]].b;
2566 const float fAB = AB.r * AB.r + AB.g * AB.g + AB.b * AB.b;
2581 const float fABInv = 1.0f / fAB;
2584 Dir.r = AB.r * fABInv;
2585 Dir.g = AB.g * fABInv;
2586 Dir.b = AB.b * fABInv;
2589 Mid.r = (X.r + Y.r) * 0.5f;
2590 Mid.g = (X.g + Y.g) * 0.5f;
2591 Mid.b = (X.b + Y.b) * 0.5f;
2594 fDir[0] = fDir[1] = fDir[2] = fDir[3] = 0.0f;
2596 for (
size_t iPoint = 0; iPoint < cPixels; iPoint++)
2599 Pt.r = (pPoints[pIndex[iPoint]].r - Mid.r) * Dir.r;
2600 Pt.g = (pPoints[pIndex[iPoint]].g - Mid.g) * Dir.g;
2601 Pt.b = (pPoints[pIndex[iPoint]].b - Mid.b) * Dir.b;
2604 f = Pt.r + Pt.g + Pt.b;
2606 f = Pt.r + Pt.g - Pt.b;
2608 f = Pt.r - Pt.g + Pt.b;
2610 f = Pt.r - Pt.g - Pt.b;
2614 float fDirMax = fDir[0];
2617 for (
size_t iDir = 1; iDir < 4; iDir++)
2619 if (fDir[iDir] > fDirMax)
2621 fDirMax = fDir[iDir];
2626 if (iDirMax & 2) std__swap(X.g, Y.g);
2627 if (iDirMax & 1) std__swap(X.b, Y.b);
2630 if (fAB < 1.0f / 4096.0f)
2642 auto const fSteps =
static_cast<float>(cSteps - 1);
2644 for (
size_t iIteration = 0; iIteration < 8; iIteration++)
2647 HDRColorA pSteps[4] = {};
2649 for (
size_t iStep = 0; iStep < cSteps; iStep++)
2651 pSteps[iStep].r = X.r * pC[iStep] + Y.r * pD[iStep];
2652 pSteps[iStep].g = X.g * pC[iStep] + Y.g * pD[iStep];
2653 pSteps[iStep].b = X.b * pC[iStep] + Y.b * pD[iStep];
2661 const float fLen = (Dir.r * Dir.r + Dir.g * Dir.g + Dir.b * Dir.b);
2663 if (fLen < (1.0f / 4096.0f))
2666 const float fScale = fSteps / fLen;
2673 float d2X = 0.0f, d2Y = 0.0f;
2674 HDRColorA dX(0.0f, 0.0f, 0.0f, 0.0f), dY(0.0f, 0.0f, 0.0f, 0.0f);
2676 for (
size_t iPoint = 0; iPoint < cPixels; iPoint++)
2678 const float fDot = (pPoints[pIndex[iPoint]].r - X.r) * Dir.r +
2679 (pPoints[pIndex[iPoint]].g - X.g) * Dir.g +
2680 (pPoints[pIndex[iPoint]].b - X.b) * Dir.b;
2685 else if (fDot >= fSteps)
2688 iStep = uint32_t(fDot + 0.5f);
2691 Diff.r = pSteps[iStep].r - pPoints[pIndex[iPoint]].r;
2692 Diff.g = pSteps[iStep].g - pPoints[pIndex[iPoint]].g;
2693 Diff.b = pSteps[iStep].b - pPoints[pIndex[iPoint]].b;
2695 const float fC = pC[iStep] * (1.0f / 8.0f);
2696 const float fD = pD[iStep] * (1.0f / 8.0f);
2698 d2X += fC * pC[iStep];
2699 dX.r += fC * Diff.r;
2700 dX.g += fC * Diff.g;
2701 dX.b += fC * Diff.b;
2703 d2Y += fD * pD[iStep];
2704 dY.r += fD * Diff.r;
2705 dY.g += fD * Diff.g;
2706 dY.b += fD * Diff.b;
2712 const float f = -1.0f / d2X;
2721 const float f = -1.0f / d2Y;
2728 if ((dX.r * dX.r < fEpsilon) && (dX.g * dX.g < fEpsilon) && (dX.b * dX.b < fEpsilon) &&
2729 (dY.r * dY.r < fEpsilon) && (dY.g * dY.g < fEpsilon) && (dY.b * dY.b < fEpsilon))
2744void FillWithErrorColors(HDRColorA* pOut)
noexcept
2746 for (
size_t i = 0; i < BC6H_NUM_PIXELS_PER_BLOCK; ++i)
2750 pOut[i] = HDRColorA(1.0f, 0.0f, 1.0f, 1.0f);
2753 pOut[i] = HDRColorA(0.0f, 0.0f, 0.0f, 1.0f);
2758void D3DX_BC6H::Decode(
bool bSigned, HDRColorA* pOut)
const noexcept
2762 size_t uStartBit = 0;
2763 uint8_t uMode = GetBits(uStartBit, 2u);
2764 if (uMode != 0x00 && uMode != 0x01)
2766 uMode =
static_cast<uint8_t
>((unsigned(GetBits(uStartBit, 3)) << 2) | uMode);
2769 BC6H_ASSERT(uMode < 32);
2771 if (ms_aModeToInfo[uMode] >= 0)
2773 BC6H_ASSERT(
static_cast<unsigned int>(ms_aModeToInfo[uMode]) < std__size(ms_aInfo));
2774 const ModeDescriptor* desc = ms_aDesc[ms_aModeToInfo[uMode]];
2776 BC6H_ASSERT(
static_cast<unsigned int>(ms_aModeToInfo[uMode]) < std__size(ms_aDesc));
2777 const ModeInfo& info = ms_aInfo[ms_aModeToInfo[uMode]];
2779 INTEndPntPair aEndPts[BC6H_MAX_REGIONS] = {};
2780 uint32_t uShape = 0;
2783 const size_t uHeaderBits = info.uPartitions > 0 ? 82u : 65u;
2784 while (uStartBit < uHeaderBits)
2786 const size_t uCurBit = uStartBit;
2787 if (GetBit(uStartBit))
2789 switch (desc[uCurBit].m_eField)
2791 case D: uShape |= 1 << uint32_t(desc[uCurBit].m_uBit);
break;
2792 case RW: aEndPts[0].A.r |= 1 << uint32_t(desc[uCurBit].m_uBit);
break;
2793 case RX: aEndPts[0].B.r |= 1 << uint32_t(desc[uCurBit].m_uBit);
break;
2794 case RY: aEndPts[1].A.r |= 1 << uint32_t(desc[uCurBit].m_uBit);
break;
2795 case RZ: aEndPts[1].B.r |= 1 << uint32_t(desc[uCurBit].m_uBit);
break;
2796 case GW: aEndPts[0].A.g |= 1 << uint32_t(desc[uCurBit].m_uBit);
break;
2797 case GX: aEndPts[0].B.g |= 1 << uint32_t(desc[uCurBit].m_uBit);
break;
2798 case GY: aEndPts[1].A.g |= 1 << uint32_t(desc[uCurBit].m_uBit);
break;
2799 case GZ: aEndPts[1].B.g |= 1 << uint32_t(desc[uCurBit].m_uBit);
break;
2800 case BW: aEndPts[0].A.b |= 1 << uint32_t(desc[uCurBit].m_uBit);
break;
2801 case BX: aEndPts[0].B.b |= 1 << uint32_t(desc[uCurBit].m_uBit);
break;
2802 case BY: aEndPts[1].A.b |= 1 << uint32_t(desc[uCurBit].m_uBit);
break;
2803 case BZ: aEndPts[1].B.b |= 1 << uint32_t(desc[uCurBit].m_uBit);
break;
2806 BC6H_LOG(
"BC6H: Invalid header bits encountered during decoding\n");
2808 FillWithErrorColors(pOut);
2815 BC6H_ASSERT(uShape < 64);
2820 aEndPts[0].A.SignExtend(info.RGBAPrec[0][0]);
2822 if (bSigned || info.bTransformed)
2824 BC6H_ASSERT(info.uPartitions < BC6H_MAX_REGIONS);
2825 for (
size_t p = 0; p <= info.uPartitions; ++p)
2829 aEndPts[p].A.SignExtend(info.RGBAPrec[p][0]);
2831 aEndPts[p].B.SignExtend(info.RGBAPrec[p][1]);
2836 if (info.bTransformed)
2838 TransformInverse(aEndPts, info.RGBAPrec[0][0], bSigned);
2842 for (
size_t i = 0; i < BC6H_NUM_PIXELS_PER_BLOCK; ++i)
2844 const size_t uNumBits = IsFixUpOffset(info.uPartitions, uShape, i) ? info.uIndexPrec - 1u : info.uIndexPrec;
2845 if (uStartBit + uNumBits > 128)
2848 BC6H_LOG(
"BC6H: Invalid block encountered during decoding\n");
2850 FillWithErrorColors(pOut);
2853 const uint8_t uIndex = GetBits(uStartBit, uNumBits);
2855 if (uIndex >= ((info.uPartitions > 0) ? 8 : 16))
2858 BC6H_LOG(
"BC6H: Invalid index encountered during decoding\n");
2860 FillWithErrorColors(pOut);
2864 const size_t uRegion = g_aPartitionTable[info.uPartitions][uShape][i];
2865 BC6H_ASSERT(uRegion < BC6H_MAX_REGIONS);
2868 const int r1 = Unquantize(aEndPts[uRegion].A.r, info.RGBAPrec[0][0].r, bSigned);
2869 const int g1 = Unquantize(aEndPts[uRegion].A.g, info.RGBAPrec[0][0].g, bSigned);
2870 const int b1 = Unquantize(aEndPts[uRegion].A.b, info.RGBAPrec[0][0].b, bSigned);
2871 const int r2 = Unquantize(aEndPts[uRegion].B.r, info.RGBAPrec[0][0].r, bSigned);
2872 const int g2 = Unquantize(aEndPts[uRegion].B.g, info.RGBAPrec[0][0].g, bSigned);
2873 const int b2 = Unquantize(aEndPts[uRegion].B.b, info.RGBAPrec[0][0].b, bSigned);
2874 const int* aWeights = info.uPartitions > 0 ? g_aWeights3 : g_aWeights4;
2876 fc.r = FinishUnquantize((r1 * (BC6H_WEIGHT_MAX - aWeights[uIndex]) + r2 * aWeights[uIndex] + BC6H_WEIGHT_ROUND) >> BC6H_WEIGHT_SHIFT, bSigned);
2877 fc.g = FinishUnquantize((g1 * (BC6H_WEIGHT_MAX - aWeights[uIndex]) + g2 * aWeights[uIndex] + BC6H_WEIGHT_ROUND) >> BC6H_WEIGHT_SHIFT, bSigned);
2878 fc.b = FinishUnquantize((b1 * (BC6H_WEIGHT_MAX - aWeights[uIndex]) + b2 * aWeights[uIndex] + BC6H_WEIGHT_ROUND) >> BC6H_WEIGHT_SHIFT, bSigned);
2881 fc.ToF16(rgb, bSigned);
2883 pOut[i].r = XMConvertHalfToFloat(rgb[0]);
2884 pOut[i].g = XMConvertHalfToFloat(rgb[1]);
2885 pOut[i].b = XMConvertHalfToFloat(rgb[2]);
2892 const char* warnstr =
"BC6H: Invalid mode encountered during decoding\n";
2895 case 0x13: warnstr =
"BC6H: Reserved mode 10011 encountered during decoding\n";
break;
2896 case 0x17: warnstr =
"BC6H: Reserved mode 10111 encountered during decoding\n";
break;
2897 case 0x1B: warnstr =
"BC6H: Reserved mode 11011 encountered during decoding\n";
break;
2898 case 0x1F: warnstr =
"BC6H: Reserved mode 11111 encountered during decoding\n";
break;
2903 for (
size_t i = 0; i < BC6H_NUM_PIXELS_PER_BLOCK; ++i)
2905 pOut[i] = HDRColorA(0.0f, 0.0f, 0.0f, 1.0f);
2910void D3DX_BC6H::Encode(
bool bSigned,
const HDRColorA*
const pIn)
noexcept
2914 EncodeParams EP(pIn, bSigned);
2916 for (EP.uMode = 0; EP.uMode < std__size(ms_aInfo) && EP.fBestErr > 0; ++EP.uMode)
2918 const uint8_t uShapes = ms_aInfo[EP.uMode].uPartitions ? 32u : 1u;
2921 const size_t uItems = std__max<size_t>(1u,
size_t(uShapes >> 2));
2922 float afRoughMSE[BC6H_MAX_SHAPES];
2923 uint8_t auShape[BC6H_MAX_SHAPES];
2926 for (EP.uShape = 0; EP.uShape < uShapes; ++EP.uShape)
2928 size_t uShape = EP.uShape;
2929 afRoughMSE[uShape] = RoughMSE(&EP);
2930 auShape[uShape] =
static_cast<uint8_t
>(uShape);
2934 for (
size_t i = 0; i < uItems; i++)
2936 for (
size_t j = i + 1; j < uShapes; j++)
2938 if (afRoughMSE[i] > afRoughMSE[j])
2940 std__swap(afRoughMSE[i], afRoughMSE[j]);
2941 std__swap(auShape[i], auShape[j]);
2946 for (
size_t i = 0; i < uItems && EP.fBestErr > 0; i++)
2948 EP.uShape = auShape[i];
2954int D3DX_BC6H::Quantize(
int iValue,
int prec,
bool bSigned)
noexcept
2956 BC6H_ASSERT(prec > 1);
2960 BC6H_ASSERT(iValue >= -F16MAX && iValue <= F16MAX);
2966 q = (prec >= 16) ? iValue : (iValue << (prec - 1)) / (F16MAX + 1);
2969 BC6H_ASSERT(q > -(1 << (prec - 1)) && q < (1 << (prec - 1)));
2973 BC6H_ASSERT(iValue >= 0 && iValue <= F16MAX);
2974 q = (prec >= 15) ? iValue : (iValue << prec) / (F16MAX + 1);
2975 BC6H_ASSERT(q >= 0 && q < (1 << prec));
2981int D3DX_BC6H::Unquantize(
int comp, uint8_t uBitsPerComp,
bool bSigned)
noexcept
2986 if (uBitsPerComp >= 16)
2998 if (comp == 0) unq = 0;
2999 else if (comp >= ((1 << (uBitsPerComp - 1)) - 1))
3002 unq = ((comp << 15) + 0x4000) >> (uBitsPerComp - 1);
3009 if (uBitsPerComp >= 15) unq = comp;
3012 else if (comp == ((1 << uBitsPerComp) - 1))
3015 unq = ((comp << 16) + 0x8000) >> uBitsPerComp;
3021int D3DX_BC6H::FinishUnquantize(
int comp,
bool bSigned)
noexcept
3025 return (comp < 0) ? -(((-comp) * 31) >> 5) : (comp * 31) >> 5;
3029 return (comp * 31) >> 6;
3033bool D3DX_BC6H::EndPointsFit(
const EncodeParams* pEP,
const INTEndPntPair aEndPts[])
noexcept
3036 const bool bTransformed = ms_aInfo[pEP->uMode].bTransformed;
3037 const bool bIsSigned = pEP->bSigned;
3038 const LDRColorA& Prec0 = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
3039 const LDRColorA& Prec1 = ms_aInfo[pEP->uMode].RGBAPrec[0][1];
3040 const LDRColorA& Prec2 = ms_aInfo[pEP->uMode].RGBAPrec[1][0];
3041 const LDRColorA& Prec3 = ms_aInfo[pEP->uMode].RGBAPrec[1][1];
3044 aBits[0].r = NBits(aEndPts[0].A.r, bIsSigned);
3045 aBits[0].g = NBits(aEndPts[0].A.g, bIsSigned);
3046 aBits[0].b = NBits(aEndPts[0].A.b, bIsSigned);
3047 aBits[1].r = NBits(aEndPts[0].B.r, bTransformed || bIsSigned);
3048 aBits[1].g = NBits(aEndPts[0].B.g, bTransformed || bIsSigned);
3049 aBits[1].b = NBits(aEndPts[0].B.b, bTransformed || bIsSigned);
3050 if (aBits[0].r > Prec0.r || aBits[1].r > Prec1.r ||
3051 aBits[0].g > Prec0.g || aBits[1].g > Prec1.g ||
3052 aBits[0].b > Prec0.b || aBits[1].b > Prec1.b)
3055 if (ms_aInfo[pEP->uMode].uPartitions)
3057 aBits[2].r = NBits(aEndPts[1].A.r, bTransformed || bIsSigned);
3058 aBits[2].g = NBits(aEndPts[1].A.g, bTransformed || bIsSigned);
3059 aBits[2].b = NBits(aEndPts[1].A.b, bTransformed || bIsSigned);
3060 aBits[3].r = NBits(aEndPts[1].B.r, bTransformed || bIsSigned);
3061 aBits[3].g = NBits(aEndPts[1].B.g, bTransformed || bIsSigned);
3062 aBits[3].b = NBits(aEndPts[1].B.b, bTransformed || bIsSigned);
3064 if (aBits[2].r > Prec2.r || aBits[3].r > Prec3.r ||
3065 aBits[2].g > Prec2.g || aBits[3].g > Prec3.g ||
3066 aBits[2].b > Prec2.b || aBits[3].b > Prec3.b)
3073void D3DX_BC6H::GeneratePaletteQuantized(
const EncodeParams* pEP,
const INTEndPntPair& endPts, INTColor aPalette[])
const noexcept
3076 const size_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
3077 const size_t uNumIndices = size_t(1) << uIndexPrec;
3078 BC6H_ASSERT(uNumIndices > 0);
3079 const LDRColorA& Prec = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
3082 INTEndPntPair unqEndPts;
3083 unqEndPts.A.r = Unquantize(endPts.A.r, Prec.r, pEP->bSigned);
3084 unqEndPts.A.g = Unquantize(endPts.A.g, Prec.g, pEP->bSigned);
3085 unqEndPts.A.b = Unquantize(endPts.A.b, Prec.b, pEP->bSigned);
3086 unqEndPts.B.r = Unquantize(endPts.B.r, Prec.r, pEP->bSigned);
3087 unqEndPts.B.g = Unquantize(endPts.B.g, Prec.g, pEP->bSigned);
3088 unqEndPts.B.b = Unquantize(endPts.B.b, Prec.b, pEP->bSigned);
3091 const int* aWeights =
nullptr;
3095 aWeights = g_aWeights3;
3096 BC6H_ASSERT(uNumIndices <= 8);
3099 aWeights = g_aWeights4;
3100 BC6H_ASSERT(uNumIndices <= 16);
3104 for (
size_t i = 0; i < uNumIndices; ++i)
3107 aPalette[i] = INTColor(0, 0, 0);
3112 for (
size_t i = 0; i < uNumIndices; ++i)
3114 aPalette[i].r = FinishUnquantize(
3115 (unqEndPts.A.r * (BC6H_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.r * aWeights[i] + BC6H_WEIGHT_ROUND) >> BC6H_WEIGHT_SHIFT,
3117 aPalette[i].g = FinishUnquantize(
3118 (unqEndPts.A.g * (BC6H_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.g * aWeights[i] + BC6H_WEIGHT_ROUND) >> BC6H_WEIGHT_SHIFT,
3120 aPalette[i].b = FinishUnquantize(
3121 (unqEndPts.A.b * (BC6H_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.b * aWeights[i] + BC6H_WEIGHT_ROUND) >> BC6H_WEIGHT_SHIFT,
3127float D3DX_BC6H::MapColorsQuantized(
const EncodeParams* pEP,
const INTColor aColors[],
size_t np,
const INTEndPntPair& endPts)
const noexcept
3131 const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
3132 auto const uNumIndices =
static_cast<const uint8_t
>(1u << uIndexPrec);
3133 INTColor aPalette[BC6H_MAX_INDICES];
3134 GeneratePaletteQuantized(pEP, endPts, aPalette);
3137 for (
size_t i = 0; i < np; ++i)
3139 const XMVECTOR vcolors = XMLoadSInt4(
reinterpret_cast<const XMINT4*
>(&aColors[i]));
3142 XMVECTOR tpal = XMLoadSInt4(
reinterpret_cast<const XMINT4*
>(&aPalette[0]));
3143 tpal = XMVectorSubtract(vcolors, tpal);
3144 float fBestErr = XMVectorDot(tpal, tpal);
3146 for (
int j = 1; j < uNumIndices && fBestErr > 0; ++j)
3149 tpal = XMLoadSInt4(
reinterpret_cast<const XMINT4*
>(&aPalette[j]));
3150 tpal = XMVectorSubtract(vcolors, tpal);
3151 const float fErr = XMVectorDot(tpal, tpal);
3152 if (fErr > fBestErr)
break;
3153 if (fErr < fBestErr) fBestErr = fErr;
3155 fTotErr += fBestErr;
3160float D3DX_BC6H::PerturbOne(
const EncodeParams* pEP,
const INTColor aColors[],
size_t np, uint8_t ch,
const INTEndPntPair& oldEndPts, INTEndPntPair& newEndPts,
float fOldErr,
int do_b)
const noexcept
3166 case 0: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].r;
break;
3167 case 1: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].g;
break;
3168 case 2: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].b;
break;
3171 newEndPts = oldEndPts;
3174 INTEndPntPair tmpEndPts;
3175 float fMinErr = fOldErr;
3179 tmpEndPts = newEndPts = oldEndPts;
3182 for (
int step = 1 << (uPrec - 1); step; step >>= 1)
3184 bool bImproved =
false;
3185 for (
int sign = -1; sign <= 1; sign += 2)
3189 tmpEndPts.A[ch] = newEndPts.A[ch] + sign * step;
3190 if (tmpEndPts.A[ch] < 0 || tmpEndPts.A[ch] >= (1 << uPrec))
3195 tmpEndPts.B[ch] = newEndPts.B[ch] + sign * step;
3196 if (tmpEndPts.B[ch] < 0 || tmpEndPts.B[ch] >= (1 << uPrec))
3200 const float fErr = MapColorsQuantized(pEP, aColors, np, tmpEndPts);
3206 beststep = sign * step;
3213 newEndPts.A[ch] += beststep;
3215 newEndPts.B[ch] += beststep;
3221void D3DX_BC6H::OptimizeOne(
const EncodeParams* pEP,
const INTColor aColors[],
size_t np,
float aOrgErr,
const INTEndPntPair& aOrgEndPts, INTEndPntPair& aOptEndPts)
const noexcept
3224 float aOptErr = aOrgErr;
3225 aOptEndPts.A = aOrgEndPts.A;
3226 aOptEndPts.B = aOrgEndPts.B;
3228 INTEndPntPair new_a, new_b;
3229 INTEndPntPair newEndPts;
3233 for (uint8_t ch = 0; ch < BC6H_NUM_CHANNELS; ++ch)
3237 const float fErr0 = PerturbOne(pEP, aColors, np, ch, aOptEndPts, new_a, aOptErr, 0);
3238 const float fErr1 = PerturbOne(pEP, aColors, np, ch, aOptEndPts, new_b, aOptErr, 1);
3242 if (fErr0 >= aOptErr)
continue;
3243 aOptEndPts.A[ch] = new_a.A[ch];
3249 if (fErr1 >= aOptErr)
continue;
3250 aOptEndPts.B[ch] = new_b.B[ch];
3258 const float fErr = PerturbOne(pEP, aColors, np, ch, aOptEndPts, newEndPts, aOptErr, do_b);
3259 if (fErr >= aOptErr)
3262 aOptEndPts.A[ch] = newEndPts.A[ch];
3264 aOptEndPts.B[ch] = newEndPts.B[ch];
3271void D3DX_BC6H::OptimizeEndPoints(
const EncodeParams* pEP,
const float aOrgErr[],
const INTEndPntPair aOrgEndPts[], INTEndPntPair aOptEndPts[])
const noexcept
3274 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
3275 BC6H_ASSERT(uPartitions < BC6H_MAX_REGIONS);
3276 INTColor aPixels[BC6H_NUM_PIXELS_PER_BLOCK];
3278 for (
size_t p = 0; p <= uPartitions; ++p)
3282 for (
size_t i = 0; i < BC6H_NUM_PIXELS_PER_BLOCK; ++i)
3284 if (g_aPartitionTable[p][pEP->uShape][i] == p)
3286 aPixels[np++] = pEP->aIPixels[i];
3290 OptimizeOne(pEP, aPixels, np, aOrgErr[p], aOrgEndPts[p], aOptEndPts[p]);
3295void D3DX_BC6H::SwapIndices(
const EncodeParams* pEP, INTEndPntPair aEndPts[],
size_t aIndices[])
noexcept
3298 const size_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
3299 const size_t uNumIndices = size_t(1) << ms_aInfo[pEP->uMode].uIndexPrec;
3300 const size_t uHighIndexBit = uNumIndices >> 1;
3302 BC6H_ASSERT(uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES);
3304 for (
size_t p = 0; p <= uPartitions; ++p)
3306 const size_t i = g_aFixUp[uPartitions][pEP->uShape][p];
3307 BC6H_ASSERT(g_aPartitionTable[uPartitions][pEP->uShape][i] == p);
3308 if (aIndices[i] & uHighIndexBit)
3311 std__swap(aEndPts[p].A, aEndPts[p].B);
3313 for (
size_t j = 0; j < BC6H_NUM_PIXELS_PER_BLOCK; ++j)
3314 if (g_aPartitionTable[uPartitions][pEP->uShape][j] == p)
3315 aIndices[j] = uNumIndices - 1 - aIndices[j];
3321void D3DX_BC6H::AssignIndices(
const EncodeParams* pEP,
const INTEndPntPair aEndPts[],
size_t aIndices[],
float aTotErr[])
const noexcept
3324 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
3325 auto const uNumIndices =
static_cast<const uint8_t
>(1u << ms_aInfo[pEP->uMode].uIndexPrec);
3327 BC6H_ASSERT(uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES);
3330 INTColor aPalette[BC6H_MAX_REGIONS][BC6H_MAX_INDICES];
3332 for (
size_t p = 0; p <= uPartitions; ++p)
3334 GeneratePaletteQuantized(pEP, aEndPts[p], aPalette[p]);
3338 for (
size_t i = 0; i < BC6H_NUM_PIXELS_PER_BLOCK; ++i)
3340 const uint8_t uRegion = g_aPartitionTable[uPartitions][pEP->uShape][i];
3341 BC6H_ASSERT(uRegion < BC6H_MAX_REGIONS);
3342 float fBestErr = Norm(pEP->aIPixels[i], aPalette[uRegion][0]);
3345 for (uint8_t j = 1; j < uNumIndices && fBestErr > 0; ++j)
3347 const float fErr = Norm(pEP->aIPixels[i], aPalette[uRegion][j]);
3348 if (fErr > fBestErr)
break;
3349 if (fErr < fBestErr)
3355 aTotErr[uRegion] += fBestErr;
3359void D3DX_BC6H::QuantizeEndPts(
const EncodeParams* pEP, INTEndPntPair* aQntEndPts)
const noexcept
3361 BC6H_ASSERT(pEP && aQntEndPts);
3362 const INTEndPntPair* aUnqEndPts = pEP->aUnqEndPts[pEP->uShape];
3363 const LDRColorA& Prec = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
3364 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
3365 BC6H_ASSERT(uPartitions < BC6H_MAX_REGIONS);
3367 for (
size_t p = 0; p <= uPartitions; ++p)
3369 aQntEndPts[p].A.r = Quantize(aUnqEndPts[p].A.r, Prec.r, pEP->bSigned);
3370 aQntEndPts[p].A.g = Quantize(aUnqEndPts[p].A.g, Prec.g, pEP->bSigned);
3371 aQntEndPts[p].A.b = Quantize(aUnqEndPts[p].A.b, Prec.b, pEP->bSigned);
3372 aQntEndPts[p].B.r = Quantize(aUnqEndPts[p].B.r, Prec.r, pEP->bSigned);
3373 aQntEndPts[p].B.g = Quantize(aUnqEndPts[p].B.g, Prec.g, pEP->bSigned);
3374 aQntEndPts[p].B.b = Quantize(aUnqEndPts[p].B.b, Prec.b, pEP->bSigned);
3378void D3DX_BC6H::EmitBlock(
const EncodeParams* pEP,
const INTEndPntPair aEndPts[],
const size_t aIndices[])
noexcept
3381 const uint8_t uRealMode = ms_aInfo[pEP->uMode].uMode;
3382 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
3383 const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
3384 const size_t uHeaderBits = uPartitions > 0 ? 82u : 65u;
3385 const ModeDescriptor* desc = ms_aDesc[pEP->uMode];
3386 size_t uStartBit = 0;
3388 while (uStartBit < uHeaderBits)
3390 switch (desc[uStartBit].m_eField)
3392 case M: SetBit(uStartBit, uint8_t(uRealMode >> desc[uStartBit].m_uBit) & 0x01u);
break;
3393 case D: SetBit(uStartBit, uint8_t(pEP->uShape >> desc[uStartBit].m_uBit) & 0x01u);
break;
3394 case RW: SetBit(uStartBit, uint8_t(aEndPts[0].A.r >> desc[uStartBit].m_uBit) & 0x01u);
break;
3395 case RX: SetBit(uStartBit, uint8_t(aEndPts[0].B.r >> desc[uStartBit].m_uBit) & 0x01u);
break;
3396 case RY: SetBit(uStartBit, uint8_t(aEndPts[1].A.r >> desc[uStartBit].m_uBit) & 0x01u);
break;
3397 case RZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.r >> desc[uStartBit].m_uBit) & 0x01u);
break;
3398 case GW: SetBit(uStartBit, uint8_t(aEndPts[0].A.g >> desc[uStartBit].m_uBit) & 0x01u);
break;
3399 case GX: SetBit(uStartBit, uint8_t(aEndPts[0].B.g >> desc[uStartBit].m_uBit) & 0x01u);
break;
3400 case GY: SetBit(uStartBit, uint8_t(aEndPts[1].A.g >> desc[uStartBit].m_uBit) & 0x01u);
break;
3401 case GZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.g >> desc[uStartBit].m_uBit) & 0x01u);
break;
3402 case BW: SetBit(uStartBit, uint8_t(aEndPts[0].A.b >> desc[uStartBit].m_uBit) & 0x01u);
break;
3403 case BX: SetBit(uStartBit, uint8_t(aEndPts[0].B.b >> desc[uStartBit].m_uBit) & 0x01u);
break;
3404 case BY: SetBit(uStartBit, uint8_t(aEndPts[1].A.b >> desc[uStartBit].m_uBit) & 0x01u);
break;
3405 case BZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.b >> desc[uStartBit].m_uBit) & 0x01u);
break;
3406 default: BC6H_ASSERT(
false);
3410 for (
size_t i = 0; i < BC6H_NUM_PIXELS_PER_BLOCK; ++i)
3412 if (IsFixUpOffset(ms_aInfo[pEP->uMode].uPartitions, pEP->uShape, i))
3413 SetBits(uStartBit, uIndexPrec - 1u,
static_cast<uint8_t
>(aIndices[i]));
3415 SetBits(uStartBit, uIndexPrec,
static_cast<uint8_t
>(aIndices[i]));
3417 BC6H_ASSERT(uStartBit == 128);
3420void D3DX_BC6H::Refine(EncodeParams* pEP)
noexcept
3423 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
3424 BC6H_ASSERT(uPartitions < BC6H_MAX_REGIONS);
3426 const bool bTransformed = ms_aInfo[pEP->uMode].bTransformed;
3427 float aOrgErr[BC6H_MAX_REGIONS], aOptErr[BC6H_MAX_REGIONS];
3428 INTEndPntPair aOrgEndPts[BC6H_MAX_REGIONS], aOptEndPts[BC6H_MAX_REGIONS];
3429 size_t aOrgIdx[BC6H_NUM_PIXELS_PER_BLOCK], aOptIdx[BC6H_NUM_PIXELS_PER_BLOCK];
3431 QuantizeEndPts(pEP, aOrgEndPts);
3432 AssignIndices(pEP, aOrgEndPts, aOrgIdx, aOrgErr);
3433 SwapIndices(pEP, aOrgEndPts, aOrgIdx);
3435 if (bTransformed) TransformForward(aOrgEndPts);
3436 if (EndPointsFit(pEP, aOrgEndPts))
3438 if (bTransformed) TransformInverse(aOrgEndPts, ms_aInfo[pEP->uMode].RGBAPrec[0][0], pEP->bSigned);
3439 OptimizeEndPoints(pEP, aOrgErr, aOrgEndPts, aOptEndPts);
3440 AssignIndices(pEP, aOptEndPts, aOptIdx, aOptErr);
3441 SwapIndices(pEP, aOptEndPts, aOptIdx);
3443 float fOrgTotErr = 0.0f, fOptTotErr = 0.0f;
3444 for (
size_t p = 0; p <= uPartitions; ++p)
3446 fOrgTotErr += aOrgErr[p];
3447 fOptTotErr += aOptErr[p];
3450 if (bTransformed) TransformForward(aOptEndPts);
3451 if (EndPointsFit(pEP, aOptEndPts) && fOptTotErr < fOrgTotErr && fOptTotErr < pEP->fBestErr)
3453 pEP->fBestErr = fOptTotErr;
3454 EmitBlock(pEP, aOptEndPts, aOptIdx);
3456 else if (fOrgTotErr < pEP->fBestErr)
3460 if (bTransformed) TransformForward(aOrgEndPts);
3461 pEP->fBestErr = fOrgTotErr;
3462 EmitBlock(pEP, aOrgEndPts, aOrgIdx);
3467void D3DX_BC6H::GeneratePaletteUnquantized(
const EncodeParams* pEP,
size_t uRegion, INTColor aPalette[])
noexcept
3470 BC6H_ASSERT(uRegion < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES);
3471 const INTEndPntPair& endPts = pEP->aUnqEndPts[pEP->uShape][uRegion];
3472 const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
3473 auto const uNumIndices =
static_cast<const uint8_t
>(1u << uIndexPrec);
3474 BC6H_ASSERT(uNumIndices > 0);
3476 const int* aWeights =
nullptr;
3480 aWeights = g_aWeights3;
3481 BC6H_ASSERT(uNumIndices <= 8);
3484 aWeights = g_aWeights4;
3485 BC6H_ASSERT(uNumIndices <= 16);
3489 for (
size_t i = 0; i < uNumIndices; ++i)
3492 aPalette[i] = INTColor(0, 0, 0);
3497 for (
size_t i = 0; i < uNumIndices; ++i)
3499 aPalette[i].r = (endPts.A.r * (BC6H_WEIGHT_MAX - aWeights[i]) + endPts.B.r * aWeights[i] + BC6H_WEIGHT_ROUND) >> BC6H_WEIGHT_SHIFT;
3500 aPalette[i].g = (endPts.A.g * (BC6H_WEIGHT_MAX - aWeights[i]) + endPts.B.g * aWeights[i] + BC6H_WEIGHT_ROUND) >> BC6H_WEIGHT_SHIFT;
3501 aPalette[i].b = (endPts.A.b * (BC6H_WEIGHT_MAX - aWeights[i]) + endPts.B.b * aWeights[i] + BC6H_WEIGHT_ROUND) >> BC6H_WEIGHT_SHIFT;
3505float D3DX_BC6H::MapColors(
const EncodeParams* pEP,
size_t uRegion,
size_t np,
const size_t* auIndex)
const noexcept
3508 const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
3509 auto const uNumIndices =
static_cast<const uint8_t
>(1u << uIndexPrec);
3510 INTColor aPalette[BC6H_MAX_INDICES];
3511 GeneratePaletteUnquantized(pEP, uRegion, aPalette);
3513 float fTotalErr = 0.0f;
3514 for (
size_t i = 0; i < np; ++i)
3516 float fBestErr = Norm(pEP->aIPixels[auIndex[i]], aPalette[0]);
3517 for (uint8_t j = 1; j < uNumIndices && fBestErr > 0.0f; ++j)
3519 const float fErr = Norm(pEP->aIPixels[auIndex[i]], aPalette[j]);
3520 if (fErr > fBestErr)
break;
3521 if (fErr < fBestErr) fBestErr = fErr;
3523 fTotalErr += fBestErr;
3531# ifdef BC6H_USE_AU_PIX_TABLE
3532size_t g_auPixIdx[BC6H_MAX_SHAPES][BC6H_MAX_REGIONS][BC6H_MAX_REGIONS][BC6H_NUM_PIXELS_PER_BLOCK];
3533size_t g_np[BC6H_MAX_SHAPES][BC6H_MAX_REGIONS][BC6H_MAX_REGIONS];
3539 for (
size_t shape = 0; shape < BC6H_MAX_SHAPES; shape++)
3541 for (
size_t uPartitions = 0; uPartitions < BC6H_MAX_REGIONS; uPartitions++)
3543 for (
size_t p = 0; p < BC6H_MAX_REGIONS; ++p)
3546 for (
size_t i = 0; i < BC6H_NUM_PIXELS_PER_BLOCK; ++i)
3548 if (g_aPartitionTable[uPartitions][shape][i] == p)
3550 g_auPixIdx[shape][uPartitions][p][np++] = i;
3555 g_np[shape][uPartitions][p] = np;
3562static InitTable init_au_pix_table;
3565float D3DX_BC6H::RoughMSE(EncodeParams* pEP)
const noexcept
3568 BC6H_ASSERT(pEP->uShape < BC6H_MAX_SHAPES);
3570 INTEndPntPair* aEndPts = pEP->aUnqEndPts[pEP->uShape];
3572 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
3573 BC6H_ASSERT(uPartitions < BC6H_MAX_REGIONS);
3575 #ifndef BC6H_USE_AU_PIX_TABLE
3576 size_t auPixIdx[BC6H_NUM_PIXELS_PER_BLOCK];
3579 float fError = 0.0f;
3580 for (
size_t p = 0; p <= uPartitions; ++p)
3582 #ifdef BC6H_USE_AU_PIX_TABLE
3583 const size_t* auPixIdx = g_auPixIdx[pEP->uShape][uPartitions][p];
3584 size_t np = g_np[pEP->uShape][uPartitions][p];
3587 for (
size_t i = 0; i < BC6H_NUM_PIXELS_PER_BLOCK; ++i)
3589 if (g_aPartitionTable[uPartitions][pEP->uShape][i] == p)
3597 BC6H_ASSERT(np > 0);
3600 aEndPts[p].A = pEP->aIPixels[auPixIdx[0]];
3601 aEndPts[p].B = pEP->aIPixels[auPixIdx[0]];
3606 aEndPts[p].A = pEP->aIPixels[auPixIdx[0]];
3607 aEndPts[p].B = pEP->aIPixels[auPixIdx[1]];
3612 OptimizeRGB(pEP->aHDRPixels, &epA, &epB, 4, np, auPixIdx);
3613 aEndPts[p].A.Set(epA, pEP->bSigned);
3614 aEndPts[p].B.Set(epB, pEP->bSigned);
3617 aEndPts[p].A.Clamp(-F16MAX, F16MAX);
3618 aEndPts[p].B.Clamp(-F16MAX, F16MAX);
3622 aEndPts[p].A.Clamp(0, F16MAX);
3623 aEndPts[p].B.Clamp(0, F16MAX);
3626 fError += MapColors(pEP, p, np, auPixIdx);
3638void DecodeBC6HU(
void* pDest,
const void* pSrc)
noexcept
3640 static_assert(
sizeof(Impl::D3DX_BC6H) == 16,
"D3DX_BC6H should be 16 bytes");
3641 reinterpret_cast<const Impl::D3DX_BC6H*
>(pSrc)->Decode(
false,
reinterpret_cast<Impl::HDRColorA*
>(pDest));
3644void DecodeBC6HS(
void* pDest,
const void* pSrc)
noexcept
3646 static_assert(
sizeof(Impl::D3DX_BC6H) == 16,
"D3DX_BC6H should be 16 bytes");
3647 reinterpret_cast<const Impl::D3DX_BC6H*
>(pSrc)->Decode(
true,
reinterpret_cast<Impl::HDRColorA*
>(pDest));
3650void EncodeBC6HU(
void* pDest,
const void* pSrc)
noexcept
3652 static_assert(
sizeof(Impl::D3DX_BC6H) == 16,
"D3DX_BC6H should be 16 bytes");
3653 reinterpret_cast<Impl::D3DX_BC6H*
>(pDest)->Encode(
false,
reinterpret_cast<const Impl::HDRColorA*
>(pSrc));
3656void EncodeBC6HS(
void* pDest,
const void* pSrc)
noexcept
3658 static_assert(
sizeof(Impl::D3DX_BC6H) == 16,
"D3DX_BC6H should be 16 bytes");
3659 reinterpret_cast<Impl::D3DX_BC6H*
>(pDest)->Encode(
true,
reinterpret_cast<const Impl::HDRColorA*
>(pSrc));
3664# ifdef BC6H_ASSERT_UNDEF
3665# undef BC6H_ASSERT_UNDEF
3668# ifdef BC6H_HALF_TO_FLOAT_UNDEF
3669# undef BC6H_HALF_TO_FLOAT_UNDEF
3670# undef BC6H_HALF_TO_FLOAT
3673# ifdef BC6H_FLOAT_TO_HALF_UNDEF
3674# undef BC6H_FLOAT_TO_HALF_UNDEF
3675# undef BC6H_FLOAT_TO_HALF