Grindstone Game Engine v0.2.0
An open source game engine and toolkit.
Loading...
Searching...
No Matches
bc6h_enc.h
1/*
2bc6h_enc -- https://github.com/0xc0de/bc6h_enc
3
4 Single file library for BC6H compression with no external dependencies.
5
6 The code is based on BC6HBC7.cpp from DirectXTex and localized into a single header library
7 with no external dependencies.
8
9 CREDITS:
10 Alexander Samusev (0xc0de)
11
12 Do this:
13 #define BC6H_ENC_IMPLEMENTATION
14 before you include this file in *one* C++ file to create the implementation.
15
16 // i.e. it should look like this:
17 #include ...
18 #include ...
19 #include ...
20 #define BC6H_ENC_IMPLEMENTATION
21 #include "bc6h_enc.h"
22
23 You can define:
24 - for debug logging:
25 #define BC6H_LOG(s) YourPrint(s)
26 - for asserts:
27 #define BC6H_ASSERT(expression) YourAssert(expression)
28 - to override float<->half packing:
29 #define BC6H_HALF_TO_FLOAT(h) YourImpl(h)
30 #define BC6H_FLOAT_TO_HALF(f) YourImpl(f)
31
32
33 Public interface:
34 bc6h_enc::DecodeBC6HU(void* pDest, const void* pSrc)
35 bc6h_enc::DecodeBC6HS(void* pDest, const void* pSrc)
36 bc6h_enc::EncodeBC6HU(void* pDest, const void* pSrc)
37 bc6h_enc::EncodeBC6HS(void* pDest, const void* pSrc)
38
39 LICENSE
40 -----------------------------------------------------------------------------------
41 MIT License
42
43 Copyright (c) 2022 Alexander Samusev
44
45 Permission is hereby granted, free of charge, to any person obtaining a copy
46 of this software and associated documentation files (the "Software"), to deal
47 in the Software without restriction, including without limitation the rights
48 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
49 copies of the Software, and to permit persons to whom the Software is
50 furnished to do so, subject to the following conditions:
51
52 The above copyright notice and this permission notice shall be included in all
53 copies or substantial portions of the Software.
54
55 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
56 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
57 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
58 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
59 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
60 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
61 SOFTWARE.
62 -----------------------------------------------------------------------------------
63 DirectXTex
64 The MIT License (MIT)
65
66 Copyright (c) 2011-2022 Microsoft Corp
67
68 Permission is hereby granted, free of charge, to any person obtaining a copy of this
69 software and associated documentation files (the "Software"), to deal in the Software
70 without restriction, including without limitation the rights to use, copy, modify,
71 merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
72 permit persons to whom the Software is furnished to do so, subject to the following
73 conditions:
74
75 The above copyright notice and this permission notice shall be included in all copies
76 or substantial portions of the Software.
77
78 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
79 INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
80 PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
81 HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
82 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
83 OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
84-----------------------------------------------------------------------------------
85 Branch-free implementation of half-precision (16 bit) floating point
86 Copyright 2006 Mike Acton <macton@gmail.com>
87
88 Permission is hereby granted, free of charge, to any person obtaining a
89 copy of this software and associated documentation files (the "Software"),
90 to deal in the Software without restriction, including without limitation
91 the rights to use, copy, modify, merge, publish, distribute, sublicense,
92 and/or sell copies of the Software, and to permit persons to whom the
93 Software is furnished to do so, subject to the following conditions:
94
95 The above copyright notice and this permission notice shall be included
96 in all copies or substantial portions of the Software.
97
98 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
99 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
100 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
101 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
102 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
103 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
104 THE SOFTWARE
105 -----------------------------------------------------------------------------------
106*/
107
108#ifndef BC6H_ENC_IMPLEMENTATION
109
110#ifndef BC6H_ENC_INCLUDED
111#define BC6H_ENC_INCLUDED
112
113namespace bc6h_enc
114{
115void DecodeBC6HU(void* pDest, const void* pSrc) noexcept;
116void DecodeBC6HS(void* pDest, const void* pSrc) noexcept;
117void EncodeBC6HU(void* pDest, const void* pSrc) noexcept;
118void EncodeBC6HS(void* pDest, const void* pSrc) noexcept;
119} // namespace bc6h_enc
120
121#endif
122
123#else
124
125#include <stdint.h>
126
127# ifdef BC6H_SSE_INTRINSICS
128# include <immintrin.h>
129# endif
130
131# ifdef BC6H_ARM_NEON_INTRINSICS
132# include <arm_neon.h>
133# endif
134
135#ifndef BC6H_ASSERT
136# define BC6H_ASSERT(expression)
137# define BC6H_ASSERT_UNDEF
138#endif
139
140#ifndef FLT_MAX
141#define FLT_MAX 3.402823466e+38F // max value
142#endif
143
144#ifndef FLT_MIN
145#define FLT_MIN 1.175494351e-38F // min normalized positive value
146#endif
147
148#define BC6H_INLINE inline
149
150namespace bc6h_enc
151{
152
153namespace Impl
154{
155
156using HALF = uint16_t;
157
158#if !defined(BC6H_HALF_TO_FLOAT)
159// Fast half to float conversion based on:
160// http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
161struct FastHalfToFloat
162{
163 FastHalfToFloat()
164 {
165 m_MantissaTable[0] = 0;
166 for (int i = 1; i < 1024; i++)
167 {
168 uint32_t m = i << 13;
169 uint32_t e = 0;
170 while ((m & 0x00800000) == 0)
171 {
172 e -= 0x00800000;
173 m <<= 1;
174 }
175 m &= ~0x00800000;
176 e += 0x38800000;
177 m_MantissaTable[i] = m | e;
178 }
179 for (int i = 1024; i < 2048; i++)
180 m_MantissaTable[i] = (i - 1024) << 13;
181 m_ExponentTable[0] = 0;
182 for (int i = 1; i < 31; i++)
183 m_ExponentTable[i] = 0x38000000 + (i << 23);
184 m_ExponentTable[31] = 0x7f800000;
185 m_ExponentTable[32] = 0x80000000;
186 for (int i = 33; i < 63; i++)
187 m_ExponentTable[i] = 0xb8000000 + ((i - 32) << 23);
188 m_ExponentTable[63] = 0xff800000;
189 m_OffsetTable[0] = 0;
190 for (int i = 1; i < 32; i++)
191 m_OffsetTable[i] = 1024;
192 m_OffsetTable[32] = 0;
193 for (int i = 33; i < 64; i++)
194 m_OffsetTable[i] = 1024;
195 }
196 uint32_t m_MantissaTable[2048];
197 uint32_t m_ExponentTable[64];
198 uint32_t m_OffsetTable[64];
199
200 BC6H_INLINE uint32_t Convert(uint16_t h) const
201 {
202 uint32_t exp = h >> 10;
203 return m_MantissaTable[m_OffsetTable[exp] + (h & 0x3ff)] + m_ExponentTable[exp];
204 }
205};
206FastHalfToFloat g_FastHalfToFloat;
207
208# define BC6H_HALF_TO_FLOAT g_FastHalfToFloat.Convert
209# define BC6H_HALF_TO_FLOAT_UNDEF
210
211#endif
212
213#if !defined(BC6H_FLOAT_TO_HALF)
214
215# ifdef _MSC_VER
216# pragma warning(push)
217# pragma warning(disable : 4146) // unary minus operator applied to unsigned type, result still unsigned
218# endif
219
220BC6H_INLINE uint32_t _uint32_li(uint32_t a) { return (a); }
221BC6H_INLINE uint32_t _uint32_dec(uint32_t a) { return (a - 1); }
222BC6H_INLINE uint32_t _uint32_inc(uint32_t a) { return (a + 1); }
223BC6H_INLINE uint32_t _uint32_not(uint32_t a) { return (~a); }
224BC6H_INLINE uint32_t _uint32_neg(uint32_t a) { return (-a); }
225BC6H_INLINE uint32_t _uint32_ext(uint32_t a) { return (((int32_t)a) >> 31); }
226BC6H_INLINE uint32_t _uint32_and(uint32_t a, uint32_t b) { return (a & b); }
227BC6H_INLINE uint32_t _uint32_andc(uint32_t a, uint32_t b) { return (a & ~b); }
228BC6H_INLINE uint32_t _uint32_or(uint32_t a, uint32_t b) { return (a | b); }
229BC6H_INLINE uint32_t _uint32_srl(uint32_t a, int sa) { return (a >> sa); }
230BC6H_INLINE uint32_t _uint32_sll(uint32_t a, int sa) { return (a << sa); }
231BC6H_INLINE uint32_t _uint32_add(uint32_t a, uint32_t b) { return (a + b); }
232BC6H_INLINE uint32_t _uint32_sub(uint32_t a, uint32_t b) { return (a - b); }
233BC6H_INLINE uint32_t _uint32_sels(uint32_t test, uint32_t a, uint32_t b)
234{
235 const uint32_t mask = _uint32_ext(test);
236 const uint32_t sel_a = _uint32_and(a, mask);
237 const uint32_t sel_b = _uint32_andc(b, mask);
238 const uint32_t result = _uint32_or(sel_a, sel_b);
239 return (result);
240}
241BC6H_INLINE uint16_t half_from_float(uint32_t f)
242{
243 const uint32_t one = _uint32_li(0x00000001);
244 const uint32_t f_s_mask = _uint32_li(0x80000000);
245 const uint32_t f_e_mask = _uint32_li(0x7f800000);
246 const uint32_t f_m_mask = _uint32_li(0x007fffff);
247 const uint32_t f_m_hidden_bit = _uint32_li(0x00800000);
248 const uint32_t f_m_round_bit = _uint32_li(0x00001000);
249 const uint32_t f_snan_mask = _uint32_li(0x7fc00000);
250 const uint32_t f_e_pos = _uint32_li(0x00000017);
251 const uint32_t h_e_pos = _uint32_li(0x0000000a);
252 const uint32_t h_e_mask = _uint32_li(0x00007c00);
253 const uint32_t h_snan_mask = _uint32_li(0x00007e00);
254 const uint32_t h_e_mask_value = _uint32_li(0x0000001f);
255 const uint32_t f_h_s_pos_offset = _uint32_li(0x00000010);
256 const uint32_t f_h_bias_offset = _uint32_li(0x00000070);
257 const uint32_t f_h_m_pos_offset = _uint32_li(0x0000000d);
258 const uint32_t h_nan_min = _uint32_li(0x00007c01);
259 const uint32_t f_h_e_biased_flag = _uint32_li(0x0000008f);
260 const uint32_t f_s = _uint32_and(f, f_s_mask);
261 const uint32_t f_e = _uint32_and(f, f_e_mask);
262 const uint16_t h_s = _uint32_srl(f_s, f_h_s_pos_offset);
263 const uint32_t f_m = _uint32_and(f, f_m_mask);
264 const uint16_t f_e_amount = _uint32_srl(f_e, f_e_pos);
265 const uint32_t f_e_half_bias = _uint32_sub(f_e_amount, f_h_bias_offset);
266 const uint32_t f_snan = _uint32_and(f, f_snan_mask);
267 const uint32_t f_m_round_mask = _uint32_and(f_m, f_m_round_bit);
268 const uint32_t f_m_round_offset = _uint32_sll(f_m_round_mask, one);
269 const uint32_t f_m_rounded = _uint32_add(f_m, f_m_round_offset);
270 const uint32_t f_m_denorm_sa = _uint32_sub(one, f_e_half_bias);
271 const uint32_t f_m_with_hidden = _uint32_or(f_m_rounded, f_m_hidden_bit);
272 const uint32_t f_m_denorm = _uint32_srl(f_m_with_hidden, f_m_denorm_sa);
273 const uint32_t h_m_denorm = _uint32_srl(f_m_denorm, f_h_m_pos_offset);
274 const uint32_t f_m_rounded_overflow = _uint32_and(f_m_rounded, f_m_hidden_bit);
275 const uint32_t m_nan = _uint32_srl(f_m, f_h_m_pos_offset);
276 const uint32_t h_em_nan = _uint32_or(h_e_mask, m_nan);
277 const uint32_t h_e_norm_overflow_offset = _uint32_inc(f_e_half_bias);
278 const uint32_t h_e_norm_overflow = _uint32_sll(h_e_norm_overflow_offset, h_e_pos);
279 const uint32_t h_e_norm = _uint32_sll(f_e_half_bias, h_e_pos);
280 const uint32_t h_m_norm = _uint32_srl(f_m_rounded, f_h_m_pos_offset);
281 const uint32_t h_em_norm = _uint32_or(h_e_norm, h_m_norm);
282 const uint32_t is_h_ndenorm_msb = _uint32_sub(f_h_bias_offset, f_e_amount);
283 const uint32_t is_f_e_flagged_msb = _uint32_sub(f_h_e_biased_flag, f_e_half_bias);
284 const uint32_t is_h_denorm_msb = _uint32_not(is_h_ndenorm_msb);
285 const uint32_t is_f_m_eqz_msb = _uint32_dec(f_m);
286 const uint32_t is_h_nan_eqz_msb = _uint32_dec(m_nan);
287 const uint32_t is_f_inf_msb = _uint32_and(is_f_e_flagged_msb, is_f_m_eqz_msb);
288 const uint32_t is_f_nan_underflow_msb = _uint32_and(is_f_e_flagged_msb, is_h_nan_eqz_msb);
289 const uint32_t is_e_overflow_msb = _uint32_sub(h_e_mask_value, f_e_half_bias);
290 const uint32_t is_h_inf_msb = _uint32_or(is_e_overflow_msb, is_f_inf_msb);
291 const uint32_t is_f_nsnan_msb = _uint32_sub(f_snan, f_snan_mask);
292 const uint32_t is_m_norm_overflow_msb = _uint32_neg(f_m_rounded_overflow);
293 const uint32_t is_f_snan_msb = _uint32_not(is_f_nsnan_msb);
294 const uint32_t h_em_overflow_result = _uint32_sels(is_m_norm_overflow_msb, h_e_norm_overflow, h_em_norm);
295 const uint32_t h_em_nan_result = _uint32_sels(is_f_e_flagged_msb, h_em_nan, h_em_overflow_result);
296 const uint32_t h_em_nan_underflow_result = _uint32_sels(is_f_nan_underflow_msb, h_nan_min, h_em_nan_result);
297 const uint32_t h_em_inf_result = _uint32_sels(is_h_inf_msb, h_e_mask, h_em_nan_underflow_result);
298 const uint32_t h_em_denorm_result = _uint32_sels(is_h_denorm_msb, h_m_denorm, h_em_inf_result);
299 const uint32_t h_em_snan_result = _uint32_sels(is_f_snan_msb, h_snan_mask, h_em_denorm_result);
300 const uint32_t h_result = _uint32_or(h_s, h_em_snan_result);
301 return (uint16_t)(h_result);
302}
303# ifdef _MSC_VER
304# pragma warning(pop)
305# endif
306
307# define BC6H_FLOAT_TO_HALF half_from_float
308# define BC6H_FLOAT_TO_HALF_UNDEF
309
310#endif
311
312# if defined(BC6H_SSE_INTRINSICS)
313using XMVECTOR = __m128;
314# elif defined(BC6H_ARM_NEON_INTRINSICS)
315using XMVECTOR = float32x4_t;
316# else
317struct XMVECTOR
318{
319 float x;
320 float y;
321 float z;
322 float w;
323};
324# endif
325
326// Fix-up for (1st-3rd) XMVECTOR parameters that are pass-in-register for x86, ARM, ARM64, and vector call; by reference otherwise
327# if defined(BC6H_SSE_INTRINSICS) || defined(BC6H_NEON_INTRINSICS)
328typedef const XMVECTOR FXMVECTOR;
329# else
330typedef const XMVECTOR& FXMVECTOR;
331# endif
332
333struct XMINT4
334{
335 int32_t x;
336 int32_t y;
337 int32_t z;
338 int32_t w;
339};
340struct XMHALF4
341{
342 HALF x;
343 HALF y;
344 HALF z;
345 HALF w;
346};
347struct XMFLOAT4
348{
349 float x;
350 float y;
351 float z;
352 float w;
353};
354BC6H_INLINE float XMConvertHalfToFloat(HALF h) noexcept
355{
356# if defined(BC6H_SSE_INTRINSICS)
357 __m128i V1 = _mm_cvtsi32_si128(static_cast<int>(h));
358 __m128 V2 = _mm_cvtph_ps(V1);
359 return _mm_cvtss_f32(V2);
360# elif defined(BC6H_ARM_NEON_INTRINSICS)
361 uint16x4_t vHalf = vdup_n_u16(h);
362 float32x4_t vFloat = vcvt_f32_f16(vreinterpret_f16_u16(vHalf));
363 return vgetq_lane_f32(vFloat, 0);
364# else
365 uint32_t f = BC6H_HALF_TO_FLOAT(h);
366 return *reinterpret_cast<float*>(&f);
367# endif
368}
369BC6H_INLINE HALF XMConvertFloatToHalf(float f) noexcept
370{
371 return BC6H_FLOAT_TO_HALF(*reinterpret_cast<uint32_t*>(&f));
372}
373
374 struct alignas(16) XMFLOAT4A : public XMFLOAT4
375{
376 using XMFLOAT4::XMFLOAT4;
377};
378
379 BC6H_INLINE void XMStoreFloat4A(XMFLOAT4A* pDestination, FXMVECTOR V) noexcept
380{
381 BC6H_ASSERT((reinterpret_cast<uintptr_t>(pDestination) & 0xF) == 0);
382
383# if defined(BC6H_SSE_INTRINSICS)
384 _mm_store_ps(&pDestination->x, V);
385# elif defined(BC6H_ARM_NEON_INTRINSICS)
386# if defined(_MSC_VER) && !defined(__clang__)
387 vst1q_f32_ex(reinterpret_cast<float*>(pDestination), V, 128);
388# else
389 vst1q_f32(reinterpret_cast<float*>(pDestination), V);
390# endif
391# else
392 pDestination->x = V.x;
393 pDestination->y = V.y;
394 pDestination->z = V.z;
395 pDestination->w = V.w;
396# endif
397}
398BC6H_INLINE void XMStoreHalf4(XMHALF4* pDestination, FXMVECTOR V) noexcept
399{
400//# if defined(BC6H_SSE_INTRINSICS /* _XM_F16C_INTRINSICS_*/)// && !defined(_XM_NO_INTRINSICS_)
401# if defined(BC6H_SSE_INTRINSICS)
402 __m128i V1 = _mm_cvtps_ph(V, _MM_FROUND_TO_NEAREST_INT);
403 _mm_storel_epi64(reinterpret_cast<__m128i*>(pDestination), V1);
404# else
405 pDestination->x = XMConvertFloatToHalf(V.x);
406 pDestination->y = XMConvertFloatToHalf(V.y);
407 pDestination->z = XMConvertFloatToHalf(V.z);
408 pDestination->w = XMConvertFloatToHalf(V.w);
409# endif
410}
411BC6H_INLINE XMVECTOR XMLoadFloat4(const XMFLOAT4* pSource) noexcept
412{
413# if defined BC6H_SSE_INTRINSICS
414 return _mm_loadu_ps(&pSource->x);
415# elif defined BC6H_ARM_NEON_INTRINSIC
416 return vld1q_f32(reinterpret_cast<const float*>(pSource));
417# else
418 XMVECTOR V;
419 V.x = pSource->x;
420 V.y = pSource->y;
421 V.z = pSource->z;
422 V.w = pSource->w;
423 return V;
424# endif
425}
426BC6H_INLINE XMVECTOR XMVectorSubtract(FXMVECTOR V1, FXMVECTOR V2) noexcept
427{
428#if defined(BC6H_SSE_INTRINSICS)
429 return _mm_sub_ps(V1, V2);
430#elif defined(BC6H_ARM_NEON_INTRINSIC)
431 return vsubq_f32(V1, V2);
432#else
433 return {V1.x - V2.x,
434 V1.y - V2.y,
435 V1.z - V2.z,
436 V1.w - V2.w};
437#endif
438}
439struct alignas(16) XMVECTORU32
440{
441 union
442 {
443 uint32_t u[4];
444 XMVECTOR v;
445 };
446
447 BC6H_INLINE operator XMVECTOR() const noexcept { return v; }
448
449# if defined(BC6H_SSE_INTRINSICS)
450 BC6H_INLINE operator __m128i() const noexcept
451 {
452 return _mm_castps_si128(v);
453 }
454 BC6H_INLINE operator __m128d() const noexcept { return _mm_castps_pd(v); }
455# elif defined(BC6H_ARM_NEON_INTRINSIC) && defined(__GNUC__)
456 BC6H_INLINE operator int32x4_t() const noexcept
457 {
458 return vreinterpretq_s32_f32(v);
459 }
460 BC6H_INLINE operator uint32x4_t() const noexcept { return vreinterpretq_u32_f32(v); }
461# endif
462};
463
464const XMVECTORU32 g_XMMask3 = {{{0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000}}};
465
466# if defined(BC6H_SSE_INTRINSICS) || defined(BC6H_ARM_NEON_INTRINSIC)
467
468// SSE2
469//BC6H_INLINE XMVECTOR XMVector3Dot(FXMVECTOR V1, FXMVECTOR V2)
470//{
471// XMVECTOR vTemp = _mm_mul_ps(V1, V2);
472// vTemp = _mm_and_ps(vTemp, g_XMMask3);
473// vTemp = _mm_hadd_ps(vTemp, vTemp);
474// return _mm_hadd_ps(vTemp, vTemp);
475//}
476// SSE3
477BC6H_INLINE XMVECTOR XMVector3Dot(FXMVECTOR V1, FXMVECTOR V2)
478{
479 return _mm_dp_ps(V1, V2, 0x7f);
480}
481
482BC6H_INLINE float XMVectorGetX(FXMVECTOR V) noexcept
483{
484# if defined(BC6H_SSE_INTRINSICS)
485 return _mm_cvtss_f32(V);
486# elif defined(BC6H_ARM_NEON_INTRINSIC)
487 return vgetq_lane_f32(V, 0);
488# endif
489}
490BC6H_INLINE float XMVectorDot(FXMVECTOR a, FXMVECTOR b)
491{
492 return XMVectorGetX(XMVector3Dot(a, b));
493}
494#else
495BC6H_INLINE float XMVectorDot(FXMVECTOR a, FXMVECTOR b)
496{
497 // XMVectorGetX(XMVector3Dot(a, b))
498 return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
499}
500#endif
501
502BC6H_INLINE XMVECTOR XMLoadSInt4(const XMINT4* pSource) noexcept
503{
504# if defined(BC6H_SSE_INTRINSICS)
505 __m128i V = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pSource));
506 return _mm_cvtepi32_ps(V);
507# elif defined(BC6H_ARM_NEON_INTRINSICS_)
508 int32x4_t v = vld1q_s32(reinterpret_cast<const int32_t*>(pSource));
509 return vcvtq_f32_s32(v);
510# else
511 XMVECTOR V;
512 V.x = static_cast<float>(pSource->x);
513 V.y = static_cast<float>(pSource->y);
514 V.z = static_cast<float>(pSource->z);
515 V.w = static_cast<float>(pSource->w);
516 return V;
517# endif
518}
519
520template <typename _Tp, size_t _Nm>
521constexpr size_t std__size(const _Tp (&)[_Nm]) noexcept
522{
523 return _Nm;
524}
525
526template <class T>
527const T& std__max(const T& a, const T& b) noexcept
528{
529 return (a < b) ? b : a;
530}
531
532template <class T>
533const T& std__min(const T& a, const T& b) noexcept
534{
535 return (b < a) ? b : a;
536}
537
538template <class T>
539void std__swap(T& a, T& b) noexcept
540{
541 T temp(a);
542 a = b;
543 b = temp;
544}
545
546class LDRColorA;
547
548class HDRColorA
549{
550public:
551 float r, g, b, a;
552
553public:
554 HDRColorA() = default;
555 HDRColorA(float _r, float _g, float _b, float _a) noexcept :
556 r(_r), g(_g), b(_b), a(_a) {}
557 HDRColorA(const HDRColorA& c) noexcept :
558 r(c.r), g(c.g), b(c.b), a(c.a) {}
559
560 // binary operators
561 HDRColorA operator+(const HDRColorA& c) const noexcept
562 {
563 return HDRColorA(r + c.r, g + c.g, b + c.b, a + c.a);
564 }
565
566 HDRColorA operator-(const HDRColorA& c) const noexcept
567 {
568 return HDRColorA(r - c.r, g - c.g, b - c.b, a - c.a);
569 }
570
571 HDRColorA operator*(float f) const noexcept
572 {
573 return HDRColorA(r * f, g * f, b * f, a * f);
574 }
575
576 HDRColorA operator/(float f) const noexcept
577 {
578 const float fInv = 1.0f / f;
579 return HDRColorA(r * fInv, g * fInv, b * fInv, a * fInv);
580 }
581
582 float operator*(const HDRColorA& c) const noexcept
583 {
584 return r * c.r + g * c.g + b * c.b + a * c.a;
585 }
586
587 // assignment operators
588 HDRColorA& operator+=(const HDRColorA& c) noexcept
589 {
590 r += c.r;
591 g += c.g;
592 b += c.b;
593 a += c.a;
594 return *this;
595 }
596
597 HDRColorA& operator-=(const HDRColorA& c) noexcept
598 {
599 r -= c.r;
600 g -= c.g;
601 b -= c.b;
602 a -= c.a;
603 return *this;
604 }
605
606 HDRColorA& operator*=(float f) noexcept
607 {
608 r *= f;
609 g *= f;
610 b *= f;
611 a *= f;
612 return *this;
613 }
614
615 HDRColorA& operator/=(float f) noexcept
616 {
617 const float fInv = 1.0f / f;
618 r *= fInv;
619 g *= fInv;
620 b *= fInv;
621 a *= fInv;
622 return *this;
623 }
624
625 HDRColorA& Clamp(float fMin, float fMax) noexcept
626 {
627 r = std__min<float>(fMax, std__max<float>(fMin, r));
628 g = std__min<float>(fMax, std__max<float>(fMin, g));
629 b = std__min<float>(fMax, std__max<float>(fMin, b));
630 a = std__min<float>(fMax, std__max<float>(fMin, a));
631 return *this;
632 }
633
634 HDRColorA(const LDRColorA& c) noexcept;
635 HDRColorA& operator=(const LDRColorA& c) noexcept;
636};
637
638//-------------------------------------------------------------------------------------
639// Constants
640//-------------------------------------------------------------------------------------
641
642constexpr uint16_t F16S_MASK = 0x8000; // f16 sign mask
643constexpr uint16_t F16EM_MASK = 0x7fff; // f16 exp & mantissa mask
644constexpr uint16_t F16MAX = 0x7bff; // MAXFLT bit pattern for XMHALF
645
646constexpr size_t BC6H_NUM_PIXELS_PER_BLOCK = 16;
647constexpr size_t BC6H_MAX_REGIONS = 2;
648constexpr size_t BC6H_MAX_INDICES = 16;
649constexpr size_t BC6H_NUM_CHANNELS = 3;
650constexpr size_t BC6H_MAX_SHAPES = 32;
651constexpr int32_t BC6H_WEIGHT_MAX = 64;
652constexpr uint32_t BC6H_WEIGHT_SHIFT = 6;
653constexpr int32_t BC6H_WEIGHT_ROUND = 32;
654
655constexpr float fEpsilon = (0.25f / 64.0f) * (0.25f / 64.0f);
656constexpr float pC3[] = {2.0f / 2.0f, 1.0f / 2.0f, 0.0f / 2.0f};
657constexpr float pD3[] = {0.0f / 2.0f, 1.0f / 2.0f, 2.0f / 2.0f};
658constexpr float pC4[] = {3.0f / 3.0f, 2.0f / 3.0f, 1.0f / 3.0f, 0.0f / 3.0f};
659constexpr float pD4[] = {0.0f / 3.0f, 1.0f / 3.0f, 2.0f / 3.0f, 3.0f / 3.0f};
660
661// Partition, Shape, Pixel (index into 4x4 block)
662const uint8_t g_aPartitionTable[2][32][16] =
663 {
664 {
665 // 1 Region case has no subsets (all 0)
666 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
667 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
668 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
669 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
670 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
671 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
672 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
673 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
674 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
675 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
676 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
677 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
678 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
679 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
680 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
681 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
682 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
683 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
684 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
685 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
686 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
687 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
688 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
689 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
690 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
691 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
692 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
693 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
694 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
695 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
696 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
697 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
698 },
699
700 {
701 // BC6H/BC7 Partition Set for 2 Subsets
702 {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1}, // Shape 0
703 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1}, // Shape 1
704 {0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1}, // Shape 2
705 {0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1}, // Shape 3
706 {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1}, // Shape 4
707 {0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}, // Shape 5
708 {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}, // Shape 6
709 {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1}, // Shape 7
710 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1}, // Shape 8
711 {0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, // Shape 9
712 {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1}, // Shape 10
713 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1}, // Shape 11
714 {0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, // Shape 12
715 {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}, // Shape 13
716 {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, // Shape 14
717 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1}, // Shape 15
718 {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1}, // Shape 16
719 {0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0}, // Shape 17
720 {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0}, // Shape 18
721 {0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // Shape 19
722 {0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0}, // Shape 20
723 {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0}, // Shape 21
724 {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // Shape 22
725 {0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1}, // Shape 23
726 {0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // Shape 24
727 {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // Shape 25
728 {0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0}, // Shape 26
729 {0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0}, // Shape 27
730 {0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0}, // Shape 28
731 {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0}, // Shape 29
732 {0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0}, // Shape 30
733 {0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0} // Shape 31
734 }
735};
736
737// Partition, Shape, Fixup
738const uint8_t g_aFixUp[2][32][3] =
739 {
740 {
741 // No fix-ups for 1st subset for BC6H or BC7
742 {0, 0, 0},
743 {0, 0, 0},
744 {0, 0, 0},
745 {0, 0, 0},
746 {0, 0, 0},
747 {0, 0, 0},
748 {0, 0, 0},
749 {0, 0, 0},
750 {0, 0, 0},
751 {0, 0, 0},
752 {0, 0, 0},
753 {0, 0, 0},
754 {0, 0, 0},
755 {0, 0, 0},
756 {0, 0, 0},
757 {0, 0, 0},
758 {0, 0, 0},
759 {0, 0, 0},
760 {0, 0, 0},
761 {0, 0, 0},
762 {0, 0, 0},
763 {0, 0, 0},
764 {0, 0, 0},
765 {0, 0, 0},
766 {0, 0, 0},
767 {0, 0, 0},
768 {0, 0, 0},
769 {0, 0, 0},
770 {0, 0, 0},
771 {0, 0, 0},
772 {0, 0, 0},
773 {0, 0, 0}
774 },
775
776 {
777 // BC6H/BC7 Partition Set Fixups for 2 Subsets
778 {0, 15, 0},
779 {0, 15, 0},
780 {0, 15, 0},
781 {0, 15, 0},
782 {0, 15, 0},
783 {0, 15, 0},
784 {0, 15, 0},
785 {0, 15, 0},
786 {0, 15, 0},
787 {0, 15, 0},
788 {0, 15, 0},
789 {0, 15, 0},
790 {0, 15, 0},
791 {0, 15, 0},
792 {0, 15, 0},
793 {0, 15, 0},
794 {0, 15, 0},
795 {0, 2, 0},
796 {0, 8, 0},
797 {0, 2, 0},
798 {0, 2, 0},
799 {0, 8, 0},
800 {0, 8, 0},
801 {0, 15, 0},
802 {0, 2, 0},
803 {0, 8, 0},
804 {0, 2, 0},
805 {0, 2, 0},
806 {0, 8, 0},
807 {0, 8, 0},
808 {0, 2, 0},
809 {0, 2, 0}
810 }
811};
812
813const int g_aWeights3[] = {0, 9, 18, 27, 37, 46, 55, 64};
814const int g_aWeights4[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64};
815
816class LDRColorA
817{
818public:
819 uint8_t r, g, b, a;
820
821 LDRColorA() = default;
822 LDRColorA(uint8_t _r, uint8_t _g, uint8_t _b, uint8_t _a) noexcept :
823 r(_r), g(_g), b(_b), a(_a) {}
824
825 const uint8_t& operator[](size_t uElement) const noexcept
826 {
827 switch (uElement)
828 {
829 case 0: return r;
830 case 1: return g;
831 case 2: return b;
832 case 3: return a;
833 default: BC6H_ASSERT(false); return r;
834 }
835 }
836
837 uint8_t& operator[](size_t uElement) noexcept
838 {
839 switch (uElement)
840 {
841 case 0: return r;
842 case 1: return g;
843 case 2: return b;
844 case 3: return a;
845 default: BC6H_ASSERT(false); return r;
846 }
847 }
848
849 LDRColorA operator=(const HDRColorA& c) noexcept
850 {
851 LDRColorA ret;
852 HDRColorA tmp(c);
853 tmp = tmp.Clamp(0.0f, 1.0f) * 255.0f;
854 ret.r = uint8_t(tmp.r + 0.001f);
855 ret.g = uint8_t(tmp.g + 0.001f);
856 ret.b = uint8_t(tmp.b + 0.001f);
857 ret.a = uint8_t(tmp.a + 0.001f);
858 return ret;
859 }
860};
861
862static_assert(sizeof(LDRColorA) == 4, "Unexpected packing");
863
864struct LDREndPntPair
865{
866 LDRColorA A;
867 LDRColorA B;
868};
869
870BC6H_INLINE HDRColorA::HDRColorA(const LDRColorA& c) noexcept
871{
872 r = float(c.r) * (1.0f / 255.0f);
873 g = float(c.g) * (1.0f / 255.0f);
874 b = float(c.b) * (1.0f / 255.0f);
875 a = float(c.a) * (1.0f / 255.0f);
876}
877
878BC6H_INLINE HDRColorA& HDRColorA::operator=(const LDRColorA& c) noexcept
879{
880 r = static_cast<float>(c.r);
881 g = static_cast<float>(c.g);
882 b = static_cast<float>(c.b);
883 a = static_cast<float>(c.a);
884 return *this;
885}
886
887class INTColor
888{
889public:
890 int r, g, b;
891 int pad;
892
893public:
894 INTColor() = default;
895 INTColor(int nr, int ng, int nb) noexcept :
896 r(nr), g(ng), b(nb), pad(0) {}
897 INTColor(const INTColor& c) noexcept :
898 r(c.r), g(c.g), b(c.b), pad(0) {}
899
900 INTColor& operator+=(const INTColor& c) noexcept
901 {
902 r += c.r;
903 g += c.g;
904 b += c.b;
905 return *this;
906 }
907
908 INTColor& operator-=(const INTColor& c) noexcept
909 {
910 r -= c.r;
911 g -= c.g;
912 b -= c.b;
913 return *this;
914 }
915
916 INTColor& operator&=(const INTColor& c) noexcept
917 {
918 r &= c.r;
919 g &= c.g;
920 b &= c.b;
921 return *this;
922 }
923
924 int& operator[](uint8_t i) noexcept
925 {
926 BC6H_ASSERT(i < sizeof(INTColor) / sizeof(int));
927 return reinterpret_cast<int*>(this)[i];
928 }
929
930 void Set(const HDRColorA& c, bool bSigned) noexcept
931 {
932 XMHALF4 aF16;
933
934 const XMVECTOR v = XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(&c));
935 XMStoreHalf4(&aF16, v);
936
937 r = F16ToINT(aF16.x, bSigned);
938 g = F16ToINT(aF16.y, bSigned);
939 b = F16ToINT(aF16.z, bSigned);
940 }
941
942 INTColor& Clamp(int iMin, int iMax) noexcept
943 {
944 r = std__min<int>(iMax, std__max<int>(iMin, r));
945 g = std__min<int>(iMax, std__max<int>(iMin, g));
946 b = std__min<int>(iMax, std__max<int>(iMin, b));
947 return *this;
948 }
949
950 INTColor& SignExtend(const LDRColorA& Prec) noexcept
951 {
952#define BC6H_SIGN_EXTEND(x, nb) ((((x) & (1 << ((nb)-1))) ? ((~0) ^ ((1 << (nb)) - 1)) : 0) | (x))
953 r = BC6H_SIGN_EXTEND(r, int(Prec.r));
954 g = BC6H_SIGN_EXTEND(g, int(Prec.g));
955 b = BC6H_SIGN_EXTEND(b, int(Prec.b));
956#undef BC6H_SIGN_EXTEND
957 return *this;
958 }
959
960 void ToF16(HALF aF16[3], bool bSigned) const noexcept
961 {
962 aF16[0] = INT2F16(r, bSigned);
963 aF16[1] = INT2F16(g, bSigned);
964 aF16[2] = INT2F16(b, bSigned);
965 }
966
967private:
968 static int F16ToINT(const HALF& f, bool bSigned) noexcept
969 {
970 uint16_t input = *reinterpret_cast<const uint16_t*>(&f);
971 int out, s;
972 if (bSigned)
973 {
974 s = input & F16S_MASK;
975 input &= F16EM_MASK;
976 if (input > F16MAX) out = F16MAX;
977 else
978 out = input;
979 out = s ? -out : out;
980 }
981 else
982 {
983 if (input & F16S_MASK) out = 0;
984 else
985 out = input;
986 }
987 return out;
988 }
989
990 static HALF INT2F16(int input, bool bSigned) noexcept
991 {
992 HALF h;
993 uint16_t out;
994 if (bSigned)
995 {
996 int s = 0;
997 if (input < 0)
998 {
999 s = F16S_MASK;
1000 input = -input;
1001 }
1002 out = uint16_t(s | input);
1003 }
1004 else
1005 {
1006 BC6H_ASSERT(input >= 0 && input <= F16MAX);
1007 out = static_cast<uint16_t>(input);
1008 }
1009
1010 *reinterpret_cast<uint16_t*>(&h) = out;
1011 return h;
1012 }
1013};
1014
1015static_assert(sizeof(INTColor) == 16, "Unexpected packing");
1016
1017struct INTEndPntPair
1018{
1019 INTColor A;
1020 INTColor B;
1021};
1022
1023template <size_t SizeInBytes>
1024class CBits
1025{
1026public:
1027 uint8_t GetBit(size_t& uStartBit) const noexcept
1028 {
1029 BC6H_ASSERT(uStartBit < 128);
1030 const size_t uIndex = uStartBit >> 3;
1031 auto const ret = static_cast<uint8_t>((m_uBits[uIndex] >> (uStartBit - (uIndex << 3))) & 0x01);
1032 uStartBit++;
1033 return ret;
1034 }
1035
1036 uint8_t GetBits(size_t& uStartBit, size_t uNumBits) const noexcept
1037 {
1038 if (uNumBits == 0) return 0;
1039 BC6H_ASSERT(uStartBit + uNumBits <= 128 && uNumBits <= 8);
1040 uint8_t ret;
1041 const size_t uIndex = uStartBit >> 3;
1042 const size_t uBase = uStartBit - (uIndex << 3);
1043 if (uBase + uNumBits > 8)
1044 {
1045 const size_t uFirstIndexBits = 8 - uBase;
1046 const size_t uNextIndexBits = uNumBits - uFirstIndexBits;
1047 ret = static_cast<uint8_t>((unsigned(m_uBits[uIndex]) >> uBase) | ((unsigned(m_uBits[uIndex + 1]) & ((1u << uNextIndexBits) - 1)) << uFirstIndexBits));
1048 }
1049 else
1050 {
1051 ret = static_cast<uint8_t>((m_uBits[uIndex] >> uBase) & ((1 << uNumBits) - 1));
1052 }
1053 BC6H_ASSERT(ret < (1 << uNumBits));
1054 uStartBit += uNumBits;
1055 return ret;
1056 }
1057
1058 void SetBit(size_t& uStartBit, uint8_t uValue) noexcept
1059 {
1060 BC6H_ASSERT(uStartBit < 128 && uValue < 2);
1061 size_t uIndex = uStartBit >> 3;
1062 const size_t uBase = uStartBit - (uIndex << 3);
1063 m_uBits[uIndex] &= ~(1 << uBase);
1064 m_uBits[uIndex] |= uValue << uBase;
1065 uStartBit++;
1066 }
1067
1068 void SetBits(size_t& uStartBit, size_t uNumBits, uint8_t uValue) noexcept
1069 {
1070 if (uNumBits == 0)
1071 return;
1072 BC6H_ASSERT(uStartBit + uNumBits <= 128 && uNumBits <= 8);
1073 BC6H_ASSERT(uValue < (1 << uNumBits));
1074 size_t uIndex = uStartBit >> 3;
1075 const size_t uBase = uStartBit - (uIndex << 3);
1076 if (uBase + uNumBits > 8)
1077 {
1078 const size_t uFirstIndexBits = 8 - uBase;
1079 const size_t uNextIndexBits = uNumBits - uFirstIndexBits;
1080 m_uBits[uIndex] &= ~(((1 << uFirstIndexBits) - 1) << uBase);
1081 m_uBits[uIndex] |= uValue << uBase;
1082 m_uBits[uIndex + 1] &= ~((1 << uNextIndexBits) - 1);
1083 m_uBits[uIndex + 1] |= uValue >> uFirstIndexBits;
1084 }
1085 else
1086 {
1087 m_uBits[uIndex] &= ~(((1 << uNumBits) - 1) << uBase);
1088 m_uBits[uIndex] |= uValue << uBase;
1089 }
1090 uStartBit += uNumBits;
1091 }
1092
1093private:
1094 uint8_t m_uBits[SizeInBytes];
1095};
1096
1097// BC6H compression (16 bits per texel)
1098class D3DX_BC6H : private CBits<16>
1099{
1100public:
1101 void Decode(bool bSigned, HDRColorA* pOut) const noexcept;
1102 void Encode(bool bSigned, const HDRColorA* const pIn) noexcept;
1103
1104private:
1105#ifdef _MSC_VER
1106#pragma warning(push)
1107#pragma warning(disable : 4480)
1108#endif
1109 enum EField : uint8_t
1110 {
1111 NA, // N/A
1112 M, // Mode
1113 D, // Shape
1114 RW,
1115 RX,
1116 RY,
1117 RZ,
1118 GW,
1119 GX,
1120 GY,
1121 GZ,
1122 BW,
1123 BX,
1124 BY,
1125 BZ,
1126 };
1127#ifdef _MSC_VER
1128#pragma warning(pop)
1129#endif
1130
1131 struct ModeDescriptor
1132 {
1133 EField m_eField;
1134 uint8_t m_uBit;
1135 };
1136
1137 struct ModeInfo
1138 {
1139 uint8_t uMode;
1140 uint8_t uPartitions;
1141 bool bTransformed;
1142 uint8_t uIndexPrec;
1143 LDRColorA RGBAPrec[BC6H_MAX_REGIONS][2];
1144 };
1145
1146#ifdef _MSC_VER
1147#pragma warning(push)
1148#pragma warning(disable : 4512)
1149#endif
1150 struct EncodeParams
1151 {
1152 float fBestErr;
1153 const bool bSigned;
1154 uint8_t uMode;
1155 uint8_t uShape;
1156 const HDRColorA* const aHDRPixels;
1157 INTEndPntPair aUnqEndPts[BC6H_MAX_SHAPES][BC6H_MAX_REGIONS];
1158 INTColor aIPixels[BC6H_NUM_PIXELS_PER_BLOCK];
1159
1160 EncodeParams(const HDRColorA* const aOriginal, bool bSignedFormat) noexcept :
1161 fBestErr(FLT_MAX), bSigned(bSignedFormat), uMode(0), uShape(0), aHDRPixels(aOriginal), aUnqEndPts{}, aIPixels{}
1162 {
1163 for (size_t i = 0; i < BC6H_NUM_PIXELS_PER_BLOCK; ++i)
1164 {
1165 aIPixels[i].Set(aOriginal[i], bSigned);
1166 }
1167 }
1168 };
1169#ifdef _MSC_VER
1170#pragma warning(pop)
1171#endif
1172
1173 static int Quantize(int iValue, int prec, bool bSigned) noexcept;
1174 static int Unquantize(int comp, uint8_t uBitsPerComp, bool bSigned) noexcept;
1175 static int FinishUnquantize(int comp, bool bSigned) noexcept;
1176
1177 static bool EndPointsFit(const EncodeParams* pEP, const INTEndPntPair aEndPts[]) noexcept;
1178
1179 void GeneratePaletteQuantized(const EncodeParams* pEP, const INTEndPntPair& endPts, INTColor aPalette[]) const noexcept;
1180 float MapColorsQuantized(const EncodeParams* pEP, const INTColor aColors[], size_t np, const INTEndPntPair& endPts) const noexcept;
1181 float PerturbOne(const EncodeParams* pEP, const INTColor aColors[], size_t np, uint8_t ch, const INTEndPntPair& oldEndPts, INTEndPntPair& newEndPts, float fOldErr, int do_b) const noexcept;
1182 void OptimizeOne(const EncodeParams* pEP, const INTColor aColors[], size_t np, float aOrgErr, const INTEndPntPair& aOrgEndPts, INTEndPntPair& aOptEndPts) const noexcept;
1183 void OptimizeEndPoints(const EncodeParams* pEP, const float aOrgErr[], const INTEndPntPair aOrgEndPts[], INTEndPntPair aOptEndPts[]) const noexcept;
1184 static void SwapIndices(const EncodeParams* pEP, INTEndPntPair aEndPts[], size_t aIndices[]) noexcept;
1185 void AssignIndices(const EncodeParams* pEP, const INTEndPntPair aEndPts[], size_t aIndices[], float aTotErr[]) const noexcept;
1186 void QuantizeEndPts(const EncodeParams* pEP, INTEndPntPair* qQntEndPts) const noexcept;
1187 void EmitBlock(const EncodeParams* pEP, const INTEndPntPair aEndPts[], const size_t aIndices[]) noexcept;
1188 void Refine(EncodeParams* pEP) noexcept;
1189
1190 static void GeneratePaletteUnquantized(const EncodeParams* pEP, size_t uRegion, INTColor aPalette[]) noexcept;
1191 float MapColors(const EncodeParams* pEP, size_t uRegion, size_t np, const size_t* auIndex) const noexcept;
1192 float RoughMSE(EncodeParams* pEP) const noexcept;
1193
1194private:
1195 static const ModeDescriptor ms_aDesc[][82];
1196 static const ModeInfo ms_aInfo[];
1197 static const int ms_aModeToInfo[];
1198};
1199
1200// BC6H Compression
1201const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] =
1202 {
1203 {
1204 // Mode 1 (0x00) - 10 5 5 5
1205 {M, 0},
1206 {M, 1},
1207 {GY, 4},
1208 {BY, 4},
1209 {BZ, 4},
1210 {RW, 0},
1211 {RW, 1},
1212 {RW, 2},
1213 {RW, 3},
1214 {RW, 4},
1215 {RW, 5},
1216 {RW, 6},
1217 {RW, 7},
1218 {RW, 8},
1219 {RW, 9},
1220 {GW, 0},
1221 {GW, 1},
1222 {GW, 2},
1223 {GW, 3},
1224 {GW, 4},
1225 {GW, 5},
1226 {GW, 6},
1227 {GW, 7},
1228 {GW, 8},
1229 {GW, 9},
1230 {BW, 0},
1231 {BW, 1},
1232 {BW, 2},
1233 {BW, 3},
1234 {BW, 4},
1235 {BW, 5},
1236 {BW, 6},
1237 {BW, 7},
1238 {BW, 8},
1239 {BW, 9},
1240 {RX, 0},
1241 {RX, 1},
1242 {RX, 2},
1243 {RX, 3},
1244 {RX, 4},
1245 {GZ, 4},
1246 {GY, 0},
1247 {GY, 1},
1248 {GY, 2},
1249 {GY, 3},
1250 {GX, 0},
1251 {GX, 1},
1252 {GX, 2},
1253 {GX, 3},
1254 {GX, 4},
1255 {BZ, 0},
1256 {GZ, 0},
1257 {GZ, 1},
1258 {GZ, 2},
1259 {GZ, 3},
1260 {BX, 0},
1261 {BX, 1},
1262 {BX, 2},
1263 {BX, 3},
1264 {BX, 4},
1265 {BZ, 1},
1266 {BY, 0},
1267 {BY, 1},
1268 {BY, 2},
1269 {BY, 3},
1270 {RY, 0},
1271 {RY, 1},
1272 {RY, 2},
1273 {RY, 3},
1274 {RY, 4},
1275 {BZ, 2},
1276 {RZ, 0},
1277 {RZ, 1},
1278 {RZ, 2},
1279 {RZ, 3},
1280 {RZ, 4},
1281 {BZ, 3},
1282 {D, 0},
1283 {D, 1},
1284 {D, 2},
1285 {D, 3},
1286 {D, 4},
1287 },
1288
1289 {
1290 // Mode 2 (0x01) - 7 6 6 6
1291 {M, 0},
1292 {M, 1},
1293 {GY, 5},
1294 {GZ, 4},
1295 {GZ, 5},
1296 {RW, 0},
1297 {RW, 1},
1298 {RW, 2},
1299 {RW, 3},
1300 {RW, 4},
1301 {RW, 5},
1302 {RW, 6},
1303 {BZ, 0},
1304 {BZ, 1},
1305 {BY, 4},
1306 {GW, 0},
1307 {GW, 1},
1308 {GW, 2},
1309 {GW, 3},
1310 {GW, 4},
1311 {GW, 5},
1312 {GW, 6},
1313 {BY, 5},
1314 {BZ, 2},
1315 {GY, 4},
1316 {BW, 0},
1317 {BW, 1},
1318 {BW, 2},
1319 {BW, 3},
1320 {BW, 4},
1321 {BW, 5},
1322 {BW, 6},
1323 {BZ, 3},
1324 {BZ, 5},
1325 {BZ, 4},
1326 {RX, 0},
1327 {RX, 1},
1328 {RX, 2},
1329 {RX, 3},
1330 {RX, 4},
1331 {RX, 5},
1332 {GY, 0},
1333 {GY, 1},
1334 {GY, 2},
1335 {GY, 3},
1336 {GX, 0},
1337 {GX, 1},
1338 {GX, 2},
1339 {GX, 3},
1340 {GX, 4},
1341 {GX, 5},
1342 {GZ, 0},
1343 {GZ, 1},
1344 {GZ, 2},
1345 {GZ, 3},
1346 {BX, 0},
1347 {BX, 1},
1348 {BX, 2},
1349 {BX, 3},
1350 {BX, 4},
1351 {BX, 5},
1352 {BY, 0},
1353 {BY, 1},
1354 {BY, 2},
1355 {BY, 3},
1356 {RY, 0},
1357 {RY, 1},
1358 {RY, 2},
1359 {RY, 3},
1360 {RY, 4},
1361 {RY, 5},
1362 {RZ, 0},
1363 {RZ, 1},
1364 {RZ, 2},
1365 {RZ, 3},
1366 {RZ, 4},
1367 {RZ, 5},
1368 {D, 0},
1369 {D, 1},
1370 {D, 2},
1371 {D, 3},
1372 {D, 4},
1373 },
1374
1375 {
1376 // Mode 3 (0x02) - 11 5 4 4
1377 {M, 0},
1378 {M, 1},
1379 {M, 2},
1380 {M, 3},
1381 {M, 4},
1382 {RW, 0},
1383 {RW, 1},
1384 {RW, 2},
1385 {RW, 3},
1386 {RW, 4},
1387 {RW, 5},
1388 {RW, 6},
1389 {RW, 7},
1390 {RW, 8},
1391 {RW, 9},
1392 {GW, 0},
1393 {GW, 1},
1394 {GW, 2},
1395 {GW, 3},
1396 {GW, 4},
1397 {GW, 5},
1398 {GW, 6},
1399 {GW, 7},
1400 {GW, 8},
1401 {GW, 9},
1402 {BW, 0},
1403 {BW, 1},
1404 {BW, 2},
1405 {BW, 3},
1406 {BW, 4},
1407 {BW, 5},
1408 {BW, 6},
1409 {BW, 7},
1410 {BW, 8},
1411 {BW, 9},
1412 {RX, 0},
1413 {RX, 1},
1414 {RX, 2},
1415 {RX, 3},
1416 {RX, 4},
1417 {RW, 10},
1418 {GY, 0},
1419 {GY, 1},
1420 {GY, 2},
1421 {GY, 3},
1422 {GX, 0},
1423 {GX, 1},
1424 {GX, 2},
1425 {GX, 3},
1426 {GW, 10},
1427 {BZ, 0},
1428 {GZ, 0},
1429 {GZ, 1},
1430 {GZ, 2},
1431 {GZ, 3},
1432 {BX, 0},
1433 {BX, 1},
1434 {BX, 2},
1435 {BX, 3},
1436 {BW, 10},
1437 {BZ, 1},
1438 {BY, 0},
1439 {BY, 1},
1440 {BY, 2},
1441 {BY, 3},
1442 {RY, 0},
1443 {RY, 1},
1444 {RY, 2},
1445 {RY, 3},
1446 {RY, 4},
1447 {BZ, 2},
1448 {RZ, 0},
1449 {RZ, 1},
1450 {RZ, 2},
1451 {RZ, 3},
1452 {RZ, 4},
1453 {BZ, 3},
1454 {D, 0},
1455 {D, 1},
1456 {D, 2},
1457 {D, 3},
1458 {D, 4},
1459 },
1460
1461 {
1462 // Mode 4 (0x06) - 11 4 5 4
1463 {M, 0},
1464 {M, 1},
1465 {M, 2},
1466 {M, 3},
1467 {M, 4},
1468 {RW, 0},
1469 {RW, 1},
1470 {RW, 2},
1471 {RW, 3},
1472 {RW, 4},
1473 {RW, 5},
1474 {RW, 6},
1475 {RW, 7},
1476 {RW, 8},
1477 {RW, 9},
1478 {GW, 0},
1479 {GW, 1},
1480 {GW, 2},
1481 {GW, 3},
1482 {GW, 4},
1483 {GW, 5},
1484 {GW, 6},
1485 {GW, 7},
1486 {GW, 8},
1487 {GW, 9},
1488 {BW, 0},
1489 {BW, 1},
1490 {BW, 2},
1491 {BW, 3},
1492 {BW, 4},
1493 {BW, 5},
1494 {BW, 6},
1495 {BW, 7},
1496 {BW, 8},
1497 {BW, 9},
1498 {RX, 0},
1499 {RX, 1},
1500 {RX, 2},
1501 {RX, 3},
1502 {RW, 10},
1503 {GZ, 4},
1504 {GY, 0},
1505 {GY, 1},
1506 {GY, 2},
1507 {GY, 3},
1508 {GX, 0},
1509 {GX, 1},
1510 {GX, 2},
1511 {GX, 3},
1512 {GX, 4},
1513 {GW, 10},
1514 {GZ, 0},
1515 {GZ, 1},
1516 {GZ, 2},
1517 {GZ, 3},
1518 {BX, 0},
1519 {BX, 1},
1520 {BX, 2},
1521 {BX, 3},
1522 {BW, 10},
1523 {BZ, 1},
1524 {BY, 0},
1525 {BY, 1},
1526 {BY, 2},
1527 {BY, 3},
1528 {RY, 0},
1529 {RY, 1},
1530 {RY, 2},
1531 {RY, 3},
1532 {BZ, 0},
1533 {BZ, 2},
1534 {RZ, 0},
1535 {RZ, 1},
1536 {RZ, 2},
1537 {RZ, 3},
1538 {GY, 4},
1539 {BZ, 3},
1540 {D, 0},
1541 {D, 1},
1542 {D, 2},
1543 {D, 3},
1544 {D, 4},
1545 },
1546
1547 {
1548 // Mode 5 (0x0a) - 11 4 4 5
1549 {M, 0},
1550 {M, 1},
1551 {M, 2},
1552 {M, 3},
1553 {M, 4},
1554 {RW, 0},
1555 {RW, 1},
1556 {RW, 2},
1557 {RW, 3},
1558 {RW, 4},
1559 {RW, 5},
1560 {RW, 6},
1561 {RW, 7},
1562 {RW, 8},
1563 {RW, 9},
1564 {GW, 0},
1565 {GW, 1},
1566 {GW, 2},
1567 {GW, 3},
1568 {GW, 4},
1569 {GW, 5},
1570 {GW, 6},
1571 {GW, 7},
1572 {GW, 8},
1573 {GW, 9},
1574 {BW, 0},
1575 {BW, 1},
1576 {BW, 2},
1577 {BW, 3},
1578 {BW, 4},
1579 {BW, 5},
1580 {BW, 6},
1581 {BW, 7},
1582 {BW, 8},
1583 {BW, 9},
1584 {RX, 0},
1585 {RX, 1},
1586 {RX, 2},
1587 {RX, 3},
1588 {RW, 10},
1589 {BY, 4},
1590 {GY, 0},
1591 {GY, 1},
1592 {GY, 2},
1593 {GY, 3},
1594 {GX, 0},
1595 {GX, 1},
1596 {GX, 2},
1597 {GX, 3},
1598 {GW, 10},
1599 {BZ, 0},
1600 {GZ, 0},
1601 {GZ, 1},
1602 {GZ, 2},
1603 {GZ, 3},
1604 {BX, 0},
1605 {BX, 1},
1606 {BX, 2},
1607 {BX, 3},
1608 {BX, 4},
1609 {BW, 10},
1610 {BY, 0},
1611 {BY, 1},
1612 {BY, 2},
1613 {BY, 3},
1614 {RY, 0},
1615 {RY, 1},
1616 {RY, 2},
1617 {RY, 3},
1618 {BZ, 1},
1619 {BZ, 2},
1620 {RZ, 0},
1621 {RZ, 1},
1622 {RZ, 2},
1623 {RZ, 3},
1624 {BZ, 4},
1625 {BZ, 3},
1626 {D, 0},
1627 {D, 1},
1628 {D, 2},
1629 {D, 3},
1630 {D, 4},
1631 },
1632
1633 {
1634 // Mode 6 (0x0e) - 9 5 5 5
1635 {M, 0},
1636 {M, 1},
1637 {M, 2},
1638 {M, 3},
1639 {M, 4},
1640 {RW, 0},
1641 {RW, 1},
1642 {RW, 2},
1643 {RW, 3},
1644 {RW, 4},
1645 {RW, 5},
1646 {RW, 6},
1647 {RW, 7},
1648 {RW, 8},
1649 {BY, 4},
1650 {GW, 0},
1651 {GW, 1},
1652 {GW, 2},
1653 {GW, 3},
1654 {GW, 4},
1655 {GW, 5},
1656 {GW, 6},
1657 {GW, 7},
1658 {GW, 8},
1659 {GY, 4},
1660 {BW, 0},
1661 {BW, 1},
1662 {BW, 2},
1663 {BW, 3},
1664 {BW, 4},
1665 {BW, 5},
1666 {BW, 6},
1667 {BW, 7},
1668 {BW, 8},
1669 {BZ, 4},
1670 {RX, 0},
1671 {RX, 1},
1672 {RX, 2},
1673 {RX, 3},
1674 {RX, 4},
1675 {GZ, 4},
1676 {GY, 0},
1677 {GY, 1},
1678 {GY, 2},
1679 {GY, 3},
1680 {GX, 0},
1681 {GX, 1},
1682 {GX, 2},
1683 {GX, 3},
1684 {GX, 4},
1685 {BZ, 0},
1686 {GZ, 0},
1687 {GZ, 1},
1688 {GZ, 2},
1689 {GZ, 3},
1690 {BX, 0},
1691 {BX, 1},
1692 {BX, 2},
1693 {BX, 3},
1694 {BX, 4},
1695 {BZ, 1},
1696 {BY, 0},
1697 {BY, 1},
1698 {BY, 2},
1699 {BY, 3},
1700 {RY, 0},
1701 {RY, 1},
1702 {RY, 2},
1703 {RY, 3},
1704 {RY, 4},
1705 {BZ, 2},
1706 {RZ, 0},
1707 {RZ, 1},
1708 {RZ, 2},
1709 {RZ, 3},
1710 {RZ, 4},
1711 {BZ, 3},
1712 {D, 0},
1713 {D, 1},
1714 {D, 2},
1715 {D, 3},
1716 {D, 4},
1717 },
1718
1719 {
1720 // Mode 7 (0x12) - 8 6 5 5
1721 {M, 0},
1722 {M, 1},
1723 {M, 2},
1724 {M, 3},
1725 {M, 4},
1726 {RW, 0},
1727 {RW, 1},
1728 {RW, 2},
1729 {RW, 3},
1730 {RW, 4},
1731 {RW, 5},
1732 {RW, 6},
1733 {RW, 7},
1734 {GZ, 4},
1735 {BY, 4},
1736 {GW, 0},
1737 {GW, 1},
1738 {GW, 2},
1739 {GW, 3},
1740 {GW, 4},
1741 {GW, 5},
1742 {GW, 6},
1743 {GW, 7},
1744 {BZ, 2},
1745 {GY, 4},
1746 {BW, 0},
1747 {BW, 1},
1748 {BW, 2},
1749 {BW, 3},
1750 {BW, 4},
1751 {BW, 5},
1752 {BW, 6},
1753 {BW, 7},
1754 {BZ, 3},
1755 {BZ, 4},
1756 {RX, 0},
1757 {RX, 1},
1758 {RX, 2},
1759 {RX, 3},
1760 {RX, 4},
1761 {RX, 5},
1762 {GY, 0},
1763 {GY, 1},
1764 {GY, 2},
1765 {GY, 3},
1766 {GX, 0},
1767 {GX, 1},
1768 {GX, 2},
1769 {GX, 3},
1770 {GX, 4},
1771 {BZ, 0},
1772 {GZ, 0},
1773 {GZ, 1},
1774 {GZ, 2},
1775 {GZ, 3},
1776 {BX, 0},
1777 {BX, 1},
1778 {BX, 2},
1779 {BX, 3},
1780 {BX, 4},
1781 {BZ, 1},
1782 {BY, 0},
1783 {BY, 1},
1784 {BY, 2},
1785 {BY, 3},
1786 {RY, 0},
1787 {RY, 1},
1788 {RY, 2},
1789 {RY, 3},
1790 {RY, 4},
1791 {RY, 5},
1792 {RZ, 0},
1793 {RZ, 1},
1794 {RZ, 2},
1795 {RZ, 3},
1796 {RZ, 4},
1797 {RZ, 5},
1798 {D, 0},
1799 {D, 1},
1800 {D, 2},
1801 {D, 3},
1802 {D, 4},
1803 },
1804
1805 {
1806 // Mode 8 (0x16) - 8 5 6 5
1807 {M, 0},
1808 {M, 1},
1809 {M, 2},
1810 {M, 3},
1811 {M, 4},
1812 {RW, 0},
1813 {RW, 1},
1814 {RW, 2},
1815 {RW, 3},
1816 {RW, 4},
1817 {RW, 5},
1818 {RW, 6},
1819 {RW, 7},
1820 {BZ, 0},
1821 {BY, 4},
1822 {GW, 0},
1823 {GW, 1},
1824 {GW, 2},
1825 {GW, 3},
1826 {GW, 4},
1827 {GW, 5},
1828 {GW, 6},
1829 {GW, 7},
1830 {GY, 5},
1831 {GY, 4},
1832 {BW, 0},
1833 {BW, 1},
1834 {BW, 2},
1835 {BW, 3},
1836 {BW, 4},
1837 {BW, 5},
1838 {BW, 6},
1839 {BW, 7},
1840 {GZ, 5},
1841 {BZ, 4},
1842 {RX, 0},
1843 {RX, 1},
1844 {RX, 2},
1845 {RX, 3},
1846 {RX, 4},
1847 {GZ, 4},
1848 {GY, 0},
1849 {GY, 1},
1850 {GY, 2},
1851 {GY, 3},
1852 {GX, 0},
1853 {GX, 1},
1854 {GX, 2},
1855 {GX, 3},
1856 {GX, 4},
1857 {GX, 5},
1858 {GZ, 0},
1859 {GZ, 1},
1860 {GZ, 2},
1861 {GZ, 3},
1862 {BX, 0},
1863 {BX, 1},
1864 {BX, 2},
1865 {BX, 3},
1866 {BX, 4},
1867 {BZ, 1},
1868 {BY, 0},
1869 {BY, 1},
1870 {BY, 2},
1871 {BY, 3},
1872 {RY, 0},
1873 {RY, 1},
1874 {RY, 2},
1875 {RY, 3},
1876 {RY, 4},
1877 {BZ, 2},
1878 {RZ, 0},
1879 {RZ, 1},
1880 {RZ, 2},
1881 {RZ, 3},
1882 {RZ, 4},
1883 {BZ, 3},
1884 {D, 0},
1885 {D, 1},
1886 {D, 2},
1887 {D, 3},
1888 {D, 4},
1889 },
1890
1891 {
1892 // Mode 9 (0x1a) - 8 5 5 6
1893 {M, 0},
1894 {M, 1},
1895 {M, 2},
1896 {M, 3},
1897 {M, 4},
1898 {RW, 0},
1899 {RW, 1},
1900 {RW, 2},
1901 {RW, 3},
1902 {RW, 4},
1903 {RW, 5},
1904 {RW, 6},
1905 {RW, 7},
1906 {BZ, 1},
1907 {BY, 4},
1908 {GW, 0},
1909 {GW, 1},
1910 {GW, 2},
1911 {GW, 3},
1912 {GW, 4},
1913 {GW, 5},
1914 {GW, 6},
1915 {GW, 7},
1916 {BY, 5},
1917 {GY, 4},
1918 {BW, 0},
1919 {BW, 1},
1920 {BW, 2},
1921 {BW, 3},
1922 {BW, 4},
1923 {BW, 5},
1924 {BW, 6},
1925 {BW, 7},
1926 {BZ, 5},
1927 {BZ, 4},
1928 {RX, 0},
1929 {RX, 1},
1930 {RX, 2},
1931 {RX, 3},
1932 {RX, 4},
1933 {GZ, 4},
1934 {GY, 0},
1935 {GY, 1},
1936 {GY, 2},
1937 {GY, 3},
1938 {GX, 0},
1939 {GX, 1},
1940 {GX, 2},
1941 {GX, 3},
1942 {GX, 4},
1943 {BZ, 0},
1944 {GZ, 0},
1945 {GZ, 1},
1946 {GZ, 2},
1947 {GZ, 3},
1948 {BX, 0},
1949 {BX, 1},
1950 {BX, 2},
1951 {BX, 3},
1952 {BX, 4},
1953 {BX, 5},
1954 {BY, 0},
1955 {BY, 1},
1956 {BY, 2},
1957 {BY, 3},
1958 {RY, 0},
1959 {RY, 1},
1960 {RY, 2},
1961 {RY, 3},
1962 {RY, 4},
1963 {BZ, 2},
1964 {RZ, 0},
1965 {RZ, 1},
1966 {RZ, 2},
1967 {RZ, 3},
1968 {RZ, 4},
1969 {BZ, 3},
1970 {D, 0},
1971 {D, 1},
1972 {D, 2},
1973 {D, 3},
1974 {D, 4},
1975 },
1976
1977 {
1978 // Mode 10 (0x1e) - 6 6 6 6
1979 {M, 0},
1980 {M, 1},
1981 {M, 2},
1982 {M, 3},
1983 {M, 4},
1984 {RW, 0},
1985 {RW, 1},
1986 {RW, 2},
1987 {RW, 3},
1988 {RW, 4},
1989 {RW, 5},
1990 {GZ, 4},
1991 {BZ, 0},
1992 {BZ, 1},
1993 {BY, 4},
1994 {GW, 0},
1995 {GW, 1},
1996 {GW, 2},
1997 {GW, 3},
1998 {GW, 4},
1999 {GW, 5},
2000 {GY, 5},
2001 {BY, 5},
2002 {BZ, 2},
2003 {GY, 4},
2004 {BW, 0},
2005 {BW, 1},
2006 {BW, 2},
2007 {BW, 3},
2008 {BW, 4},
2009 {BW, 5},
2010 {GZ, 5},
2011 {BZ, 3},
2012 {BZ, 5},
2013 {BZ, 4},
2014 {RX, 0},
2015 {RX, 1},
2016 {RX, 2},
2017 {RX, 3},
2018 {RX, 4},
2019 {RX, 5},
2020 {GY, 0},
2021 {GY, 1},
2022 {GY, 2},
2023 {GY, 3},
2024 {GX, 0},
2025 {GX, 1},
2026 {GX, 2},
2027 {GX, 3},
2028 {GX, 4},
2029 {GX, 5},
2030 {GZ, 0},
2031 {GZ, 1},
2032 {GZ, 2},
2033 {GZ, 3},
2034 {BX, 0},
2035 {BX, 1},
2036 {BX, 2},
2037 {BX, 3},
2038 {BX, 4},
2039 {BX, 5},
2040 {BY, 0},
2041 {BY, 1},
2042 {BY, 2},
2043 {BY, 3},
2044 {RY, 0},
2045 {RY, 1},
2046 {RY, 2},
2047 {RY, 3},
2048 {RY, 4},
2049 {RY, 5},
2050 {RZ, 0},
2051 {RZ, 1},
2052 {RZ, 2},
2053 {RZ, 3},
2054 {RZ, 4},
2055 {RZ, 5},
2056 {D, 0},
2057 {D, 1},
2058 {D, 2},
2059 {D, 3},
2060 {D, 4},
2061 },
2062
2063 {
2064 // Mode 11 (0x03) - 10 10
2065 {M, 0},
2066 {M, 1},
2067 {M, 2},
2068 {M, 3},
2069 {M, 4},
2070 {RW, 0},
2071 {RW, 1},
2072 {RW, 2},
2073 {RW, 3},
2074 {RW, 4},
2075 {RW, 5},
2076 {RW, 6},
2077 {RW, 7},
2078 {RW, 8},
2079 {RW, 9},
2080 {GW, 0},
2081 {GW, 1},
2082 {GW, 2},
2083 {GW, 3},
2084 {GW, 4},
2085 {GW, 5},
2086 {GW, 6},
2087 {GW, 7},
2088 {GW, 8},
2089 {GW, 9},
2090 {BW, 0},
2091 {BW, 1},
2092 {BW, 2},
2093 {BW, 3},
2094 {BW, 4},
2095 {BW, 5},
2096 {BW, 6},
2097 {BW, 7},
2098 {BW, 8},
2099 {BW, 9},
2100 {RX, 0},
2101 {RX, 1},
2102 {RX, 2},
2103 {RX, 3},
2104 {RX, 4},
2105 {RX, 5},
2106 {RX, 6},
2107 {RX, 7},
2108 {RX, 8},
2109 {RX, 9},
2110 {GX, 0},
2111 {GX, 1},
2112 {GX, 2},
2113 {GX, 3},
2114 {GX, 4},
2115 {GX, 5},
2116 {GX, 6},
2117 {GX, 7},
2118 {GX, 8},
2119 {GX, 9},
2120 {BX, 0},
2121 {BX, 1},
2122 {BX, 2},
2123 {BX, 3},
2124 {BX, 4},
2125 {BX, 5},
2126 {BX, 6},
2127 {BX, 7},
2128 {BX, 8},
2129 {BX, 9},
2130 {NA, 0},
2131 {NA, 0},
2132 {NA, 0},
2133 {NA, 0},
2134 {NA, 0},
2135 {NA, 0},
2136 {NA, 0},
2137 {NA, 0},
2138 {NA, 0},
2139 {NA, 0},
2140 {NA, 0},
2141 {NA, 0},
2142 {NA, 0},
2143 {NA, 0},
2144 {NA, 0},
2145 {NA, 0},
2146 {NA, 0},
2147 },
2148
2149 {
2150 // Mode 12 (0x07) - 11 9
2151 {M, 0},
2152 {M, 1},
2153 {M, 2},
2154 {M, 3},
2155 {M, 4},
2156 {RW, 0},
2157 {RW, 1},
2158 {RW, 2},
2159 {RW, 3},
2160 {RW, 4},
2161 {RW, 5},
2162 {RW, 6},
2163 {RW, 7},
2164 {RW, 8},
2165 {RW, 9},
2166 {GW, 0},
2167 {GW, 1},
2168 {GW, 2},
2169 {GW, 3},
2170 {GW, 4},
2171 {GW, 5},
2172 {GW, 6},
2173 {GW, 7},
2174 {GW, 8},
2175 {GW, 9},
2176 {BW, 0},
2177 {BW, 1},
2178 {BW, 2},
2179 {BW, 3},
2180 {BW, 4},
2181 {BW, 5},
2182 {BW, 6},
2183 {BW, 7},
2184 {BW, 8},
2185 {BW, 9},
2186 {RX, 0},
2187 {RX, 1},
2188 {RX, 2},
2189 {RX, 3},
2190 {RX, 4},
2191 {RX, 5},
2192 {RX, 6},
2193 {RX, 7},
2194 {RX, 8},
2195 {RW, 10},
2196 {GX, 0},
2197 {GX, 1},
2198 {GX, 2},
2199 {GX, 3},
2200 {GX, 4},
2201 {GX, 5},
2202 {GX, 6},
2203 {GX, 7},
2204 {GX, 8},
2205 {GW, 10},
2206 {BX, 0},
2207 {BX, 1},
2208 {BX, 2},
2209 {BX, 3},
2210 {BX, 4},
2211 {BX, 5},
2212 {BX, 6},
2213 {BX, 7},
2214 {BX, 8},
2215 {BW, 10},
2216 {NA, 0},
2217 {NA, 0},
2218 {NA, 0},
2219 {NA, 0},
2220 {NA, 0},
2221 {NA, 0},
2222 {NA, 0},
2223 {NA, 0},
2224 {NA, 0},
2225 {NA, 0},
2226 {NA, 0},
2227 {NA, 0},
2228 {NA, 0},
2229 {NA, 0},
2230 {NA, 0},
2231 {NA, 0},
2232 {NA, 0},
2233 },
2234
2235 {
2236 // Mode 13 (0x0b) - 12 8
2237 {M, 0},
2238 {M, 1},
2239 {M, 2},
2240 {M, 3},
2241 {M, 4},
2242 {RW, 0},
2243 {RW, 1},
2244 {RW, 2},
2245 {RW, 3},
2246 {RW, 4},
2247 {RW, 5},
2248 {RW, 6},
2249 {RW, 7},
2250 {RW, 8},
2251 {RW, 9},
2252 {GW, 0},
2253 {GW, 1},
2254 {GW, 2},
2255 {GW, 3},
2256 {GW, 4},
2257 {GW, 5},
2258 {GW, 6},
2259 {GW, 7},
2260 {GW, 8},
2261 {GW, 9},
2262 {BW, 0},
2263 {BW, 1},
2264 {BW, 2},
2265 {BW, 3},
2266 {BW, 4},
2267 {BW, 5},
2268 {BW, 6},
2269 {BW, 7},
2270 {BW, 8},
2271 {BW, 9},
2272 {RX, 0},
2273 {RX, 1},
2274 {RX, 2},
2275 {RX, 3},
2276 {RX, 4},
2277 {RX, 5},
2278 {RX, 6},
2279 {RX, 7},
2280 {RW, 11},
2281 {RW, 10},
2282 {GX, 0},
2283 {GX, 1},
2284 {GX, 2},
2285 {GX, 3},
2286 {GX, 4},
2287 {GX, 5},
2288 {GX, 6},
2289 {GX, 7},
2290 {GW, 11},
2291 {GW, 10},
2292 {BX, 0},
2293 {BX, 1},
2294 {BX, 2},
2295 {BX, 3},
2296 {BX, 4},
2297 {BX, 5},
2298 {BX, 6},
2299 {BX, 7},
2300 {BW, 11},
2301 {BW, 10},
2302 {NA, 0},
2303 {NA, 0},
2304 {NA, 0},
2305 {NA, 0},
2306 {NA, 0},
2307 {NA, 0},
2308 {NA, 0},
2309 {NA, 0},
2310 {NA, 0},
2311 {NA, 0},
2312 {NA, 0},
2313 {NA, 0},
2314 {NA, 0},
2315 {NA, 0},
2316 {NA, 0},
2317 {NA, 0},
2318 {NA, 0},
2319 },
2320
2321 {
2322 // Mode 14 (0x0f) - 16 4
2323 {M, 0},
2324 {M, 1},
2325 {M, 2},
2326 {M, 3},
2327 {M, 4},
2328 {RW, 0},
2329 {RW, 1},
2330 {RW, 2},
2331 {RW, 3},
2332 {RW, 4},
2333 {RW, 5},
2334 {RW, 6},
2335 {RW, 7},
2336 {RW, 8},
2337 {RW, 9},
2338 {GW, 0},
2339 {GW, 1},
2340 {GW, 2},
2341 {GW, 3},
2342 {GW, 4},
2343 {GW, 5},
2344 {GW, 6},
2345 {GW, 7},
2346 {GW, 8},
2347 {GW, 9},
2348 {BW, 0},
2349 {BW, 1},
2350 {BW, 2},
2351 {BW, 3},
2352 {BW, 4},
2353 {BW, 5},
2354 {BW, 6},
2355 {BW, 7},
2356 {BW, 8},
2357 {BW, 9},
2358 {RX, 0},
2359 {RX, 1},
2360 {RX, 2},
2361 {RX, 3},
2362 {RW, 15},
2363 {RW, 14},
2364 {RW, 13},
2365 {RW, 12},
2366 {RW, 11},
2367 {RW, 10},
2368 {GX, 0},
2369 {GX, 1},
2370 {GX, 2},
2371 {GX, 3},
2372 {GW, 15},
2373 {GW, 14},
2374 {GW, 13},
2375 {GW, 12},
2376 {GW, 11},
2377 {GW, 10},
2378 {BX, 0},
2379 {BX, 1},
2380 {BX, 2},
2381 {BX, 3},
2382 {BW, 15},
2383 {BW, 14},
2384 {BW, 13},
2385 {BW, 12},
2386 {BW, 11},
2387 {BW, 10},
2388 {NA, 0},
2389 {NA, 0},
2390 {NA, 0},
2391 {NA, 0},
2392 {NA, 0},
2393 {NA, 0},
2394 {NA, 0},
2395 {NA, 0},
2396 {NA, 0},
2397 {NA, 0},
2398 {NA, 0},
2399 {NA, 0},
2400 {NA, 0},
2401 {NA, 0},
2402 {NA, 0},
2403 {NA, 0},
2404 {NA, 0},
2405 },
2406};
2407
2408// Mode, Partitions, Transformed, IndexPrec, RGBAPrec
2409const D3DX_BC6H::ModeInfo D3DX_BC6H::ms_aInfo[] =
2410 {
2411 {0x00, 1, true, 3, {{LDRColorA(10, 10, 10, 0), LDRColorA(5, 5, 5, 0)}, {LDRColorA(5, 5, 5, 0), LDRColorA(5, 5, 5, 0)}}}, // Mode 1
2412 {0x01, 1, true, 3, {{LDRColorA(7, 7, 7, 0), LDRColorA(6, 6, 6, 0)}, {LDRColorA(6, 6, 6, 0), LDRColorA(6, 6, 6, 0)}}}, // Mode 2
2413 {0x02, 1, true, 3, {{LDRColorA(11, 11, 11, 0), LDRColorA(5, 4, 4, 0)}, {LDRColorA(5, 4, 4, 0), LDRColorA(5, 4, 4, 0)}}}, // Mode 3
2414 {0x06, 1, true, 3, {{LDRColorA(11, 11, 11, 0), LDRColorA(4, 5, 4, 0)}, {LDRColorA(4, 5, 4, 0), LDRColorA(4, 5, 4, 0)}}}, // Mode 4
2415 {0x0a, 1, true, 3, {{LDRColorA(11, 11, 11, 0), LDRColorA(4, 4, 5, 0)}, {LDRColorA(4, 4, 5, 0), LDRColorA(4, 4, 5, 0)}}}, // Mode 5
2416 {0x0e, 1, true, 3, {{LDRColorA(9, 9, 9, 0), LDRColorA(5, 5, 5, 0)}, {LDRColorA(5, 5, 5, 0), LDRColorA(5, 5, 5, 0)}}}, // Mode 6
2417 {0x12, 1, true, 3, {{LDRColorA(8, 8, 8, 0), LDRColorA(6, 5, 5, 0)}, {LDRColorA(6, 5, 5, 0), LDRColorA(6, 5, 5, 0)}}}, // Mode 7
2418 {0x16, 1, true, 3, {{LDRColorA(8, 8, 8, 0), LDRColorA(5, 6, 5, 0)}, {LDRColorA(5, 6, 5, 0), LDRColorA(5, 6, 5, 0)}}}, // Mode 8
2419 {0x1a, 1, true, 3, {{LDRColorA(8, 8, 8, 0), LDRColorA(5, 5, 6, 0)}, {LDRColorA(5, 5, 6, 0), LDRColorA(5, 5, 6, 0)}}}, // Mode 9
2420 {0x1e, 1, false, 3, {{LDRColorA(6, 6, 6, 0), LDRColorA(6, 6, 6, 0)}, {LDRColorA(6, 6, 6, 0), LDRColorA(6, 6, 6, 0)}}}, // Mode 10
2421 {0x03, 0, false, 4, {{LDRColorA(10, 10, 10, 0), LDRColorA(10, 10, 10, 0)}, {LDRColorA(0, 0, 0, 0), LDRColorA(0, 0, 0, 0)}}}, // Mode 11
2422 {0x07, 0, true, 4, {{LDRColorA(11, 11, 11, 0), LDRColorA(9, 9, 9, 0)}, {LDRColorA(0, 0, 0, 0), LDRColorA(0, 0, 0, 0)}}}, // Mode 12
2423 {0x0b, 0, true, 4, {{LDRColorA(12, 12, 12, 0), LDRColorA(8, 8, 8, 0)}, {LDRColorA(0, 0, 0, 0), LDRColorA(0, 0, 0, 0)}}}, // Mode 13
2424 {0x0f, 0, true, 4, {{LDRColorA(16, 16, 16, 0), LDRColorA(4, 4, 4, 0)}, {LDRColorA(0, 0, 0, 0), LDRColorA(0, 0, 0, 0)}}}, // Mode 14
2425};
2426
2427const int D3DX_BC6H::ms_aModeToInfo[] =
2428 {
2429 0, // Mode 1 - 0x00
2430 1, // Mode 2 - 0x01
2431 2, // Mode 3 - 0x02
2432 10, // Mode 11 - 0x03
2433 -1, // Invalid - 0x04
2434 -1, // Invalid - 0x05
2435 3, // Mode 4 - 0x06
2436 11, // Mode 12 - 0x07
2437 -1, // Invalid - 0x08
2438 -1, // Invalid - 0x09
2439 4, // Mode 5 - 0x0a
2440 12, // Mode 13 - 0x0b
2441 -1, // Invalid - 0x0c
2442 -1, // Invalid - 0x0d
2443 5, // Mode 6 - 0x0e
2444 13, // Mode 14 - 0x0f
2445 -1, // Invalid - 0x10
2446 -1, // Invalid - 0x11
2447 6, // Mode 7 - 0x12
2448 -1, // Reserved - 0x13
2449 -1, // Invalid - 0x14
2450 -1, // Invalid - 0x15
2451 7, // Mode 8 - 0x16
2452 -1, // Reserved - 0x17
2453 -1, // Invalid - 0x18
2454 -1, // Invalid - 0x19
2455 8, // Mode 9 - 0x1a
2456 -1, // Reserved - 0x1b
2457 -1, // Invalid - 0x1c
2458 -1, // Invalid - 0x1d
2459 9, // Mode 10 - 0x1e
2460 -1, // Resreved - 0x1f
2461};
2462
2463//-------------------------------------------------------------------------------------
2464// Helper functions
2465//-------------------------------------------------------------------------------------
2466BC6H_INLINE bool IsFixUpOffset(size_t uPartitions, size_t uShape, size_t uOffset) noexcept
2467{
2468 BC6H_ASSERT(uPartitions < 3 && uShape < BC6H_MAX_SHAPES && uOffset < 16);
2469 for (size_t p = 0; p <= uPartitions; p++)
2470 {
2471 if (uOffset == g_aFixUp[uPartitions][uShape][p])
2472 {
2473 return true;
2474 }
2475 }
2476 return false;
2477}
2478
2479BC6H_INLINE void TransformForward(INTEndPntPair aEndPts[]) noexcept
2480{
2481 aEndPts[0].B -= aEndPts[0].A;
2482 aEndPts[1].A -= aEndPts[0].A;
2483 aEndPts[1].B -= aEndPts[0].A;
2484}
2485
2486BC6H_INLINE void TransformInverse(INTEndPntPair aEndPts[], const LDRColorA& Prec, bool bSigned) noexcept
2487{
2488 const INTColor WrapMask((1 << Prec.r) - 1, (1 << Prec.g) - 1, (1 << Prec.b) - 1);
2489 aEndPts[0].B += aEndPts[0].A;
2490 aEndPts[0].B &= WrapMask;
2491 aEndPts[1].A += aEndPts[0].A;
2492 aEndPts[1].A &= WrapMask;
2493 aEndPts[1].B += aEndPts[0].A;
2494 aEndPts[1].B &= WrapMask;
2495 if (bSigned)
2496 {
2497 aEndPts[0].B.SignExtend(Prec);
2498 aEndPts[1].A.SignExtend(Prec);
2499 aEndPts[1].B.SignExtend(Prec);
2500 }
2501}
2502
2503BC6H_INLINE float Norm(const INTColor& a, const INTColor& b) noexcept
2504{
2505 const float dr = float(a.r) - float(b.r);
2506 const float dg = float(a.g) - float(b.g);
2507 const float db = float(a.b) - float(b.b);
2508 return dr * dr + dg * dg + db * db;
2509}
2510
2511// return # of bits needed to store n. handle signed or unsigned cases properly
2512BC6H_INLINE int NBits(int n, bool bIsSigned) noexcept
2513{
2514 int nb;
2515 if (n == 0)
2516 {
2517 return 0; // no bits needed for 0, signed or not
2518 }
2519 else if (n > 0)
2520 {
2521 for (nb = 0; n; ++nb, n >>= 1)
2522 ;
2523 return nb + (bIsSigned ? 1 : 0);
2524 }
2525 else
2526 {
2527 BC6H_ASSERT(bIsSigned);
2528 for (nb = 0; n < -1; ++nb, n >>= 1)
2529 ;
2530 return nb + 1;
2531 }
2532}
2533
2534float OptimizeRGB(
2535 const HDRColorA* const pPoints,
2536 HDRColorA* pX,
2537 HDRColorA* pY,
2538 uint32_t cSteps,
2539 size_t cPixels,
2540 const size_t* pIndex) noexcept
2541{
2542 constexpr float fError = FLT_MAX;
2543 const float* pC = (3 == cSteps) ? pC3 : pC4;
2544 const float* pD = (3 == cSteps) ? pD3 : pD4;
2545
2546 // Find Min and Max points, as starting point
2547 HDRColorA X(FLT_MAX, FLT_MAX, FLT_MAX, 0.0f);
2548 HDRColorA Y(-FLT_MAX, -FLT_MAX, -FLT_MAX, 0.0f);
2549
2550 for (size_t iPoint = 0; iPoint < cPixels; iPoint++)
2551 {
2552 if (pPoints[pIndex[iPoint]].r < X.r) X.r = pPoints[pIndex[iPoint]].r;
2553 if (pPoints[pIndex[iPoint]].g < X.g) X.g = pPoints[pIndex[iPoint]].g;
2554 if (pPoints[pIndex[iPoint]].b < X.b) X.b = pPoints[pIndex[iPoint]].b;
2555 if (pPoints[pIndex[iPoint]].r > Y.r) Y.r = pPoints[pIndex[iPoint]].r;
2556 if (pPoints[pIndex[iPoint]].g > Y.g) Y.g = pPoints[pIndex[iPoint]].g;
2557 if (pPoints[pIndex[iPoint]].b > Y.b) Y.b = pPoints[pIndex[iPoint]].b;
2558 }
2559
2560 // Diagonal axis
2561 HDRColorA AB;
2562 AB.r = Y.r - X.r;
2563 AB.g = Y.g - X.g;
2564 AB.b = Y.b - X.b;
2565
2566 const float fAB = AB.r * AB.r + AB.g * AB.g + AB.b * AB.b;
2567
2568 // Single color block.. no need to root-find
2569 if (fAB < FLT_MIN)
2570 {
2571 pX->r = X.r;
2572 pX->g = X.g;
2573 pX->b = X.b;
2574 pY->r = Y.r;
2575 pY->g = Y.g;
2576 pY->b = Y.b;
2577 return 0.0f;
2578 }
2579
2580 // Try all four axis directions, to determine which diagonal best fits data
2581 const float fABInv = 1.0f / fAB;
2582
2583 HDRColorA Dir;
2584 Dir.r = AB.r * fABInv;
2585 Dir.g = AB.g * fABInv;
2586 Dir.b = AB.b * fABInv;
2587
2588 HDRColorA Mid;
2589 Mid.r = (X.r + Y.r) * 0.5f;
2590 Mid.g = (X.g + Y.g) * 0.5f;
2591 Mid.b = (X.b + Y.b) * 0.5f;
2592
2593 float fDir[4];
2594 fDir[0] = fDir[1] = fDir[2] = fDir[3] = 0.0f;
2595
2596 for (size_t iPoint = 0; iPoint < cPixels; iPoint++)
2597 {
2598 HDRColorA Pt;
2599 Pt.r = (pPoints[pIndex[iPoint]].r - Mid.r) * Dir.r;
2600 Pt.g = (pPoints[pIndex[iPoint]].g - Mid.g) * Dir.g;
2601 Pt.b = (pPoints[pIndex[iPoint]].b - Mid.b) * Dir.b;
2602
2603 float f;
2604 f = Pt.r + Pt.g + Pt.b;
2605 fDir[0] += f * f;
2606 f = Pt.r + Pt.g - Pt.b;
2607 fDir[1] += f * f;
2608 f = Pt.r - Pt.g + Pt.b;
2609 fDir[2] += f * f;
2610 f = Pt.r - Pt.g - Pt.b;
2611 fDir[3] += f * f;
2612 }
2613
2614 float fDirMax = fDir[0];
2615 size_t iDirMax = 0;
2616
2617 for (size_t iDir = 1; iDir < 4; iDir++)
2618 {
2619 if (fDir[iDir] > fDirMax)
2620 {
2621 fDirMax = fDir[iDir];
2622 iDirMax = iDir;
2623 }
2624 }
2625
2626 if (iDirMax & 2) std__swap(X.g, Y.g);
2627 if (iDirMax & 1) std__swap(X.b, Y.b);
2628
2629 // Two color block.. no need to root-find
2630 if (fAB < 1.0f / 4096.0f)
2631 {
2632 pX->r = X.r;
2633 pX->g = X.g;
2634 pX->b = X.b;
2635 pY->r = Y.r;
2636 pY->g = Y.g;
2637 pY->b = Y.b;
2638 return 0.0f;
2639 }
2640
2641 // Use Newton's Method to find local minima of sum-of-squares error.
2642 auto const fSteps = static_cast<float>(cSteps - 1);
2643
2644 for (size_t iIteration = 0; iIteration < 8; iIteration++)
2645 {
2646 // Calculate new steps
2647 HDRColorA pSteps[4] = {};
2648
2649 for (size_t iStep = 0; iStep < cSteps; iStep++)
2650 {
2651 pSteps[iStep].r = X.r * pC[iStep] + Y.r * pD[iStep];
2652 pSteps[iStep].g = X.g * pC[iStep] + Y.g * pD[iStep];
2653 pSteps[iStep].b = X.b * pC[iStep] + Y.b * pD[iStep];
2654 }
2655
2656 // Calculate color direction
2657 Dir.r = Y.r - X.r;
2658 Dir.g = Y.g - X.g;
2659 Dir.b = Y.b - X.b;
2660
2661 const float fLen = (Dir.r * Dir.r + Dir.g * Dir.g + Dir.b * Dir.b);
2662
2663 if (fLen < (1.0f / 4096.0f))
2664 break;
2665
2666 const float fScale = fSteps / fLen;
2667
2668 Dir.r *= fScale;
2669 Dir.g *= fScale;
2670 Dir.b *= fScale;
2671
2672 // Evaluate function, and derivatives
2673 float d2X = 0.0f, d2Y = 0.0f;
2674 HDRColorA dX(0.0f, 0.0f, 0.0f, 0.0f), dY(0.0f, 0.0f, 0.0f, 0.0f);
2675
2676 for (size_t iPoint = 0; iPoint < cPixels; iPoint++)
2677 {
2678 const float fDot = (pPoints[pIndex[iPoint]].r - X.r) * Dir.r +
2679 (pPoints[pIndex[iPoint]].g - X.g) * Dir.g +
2680 (pPoints[pIndex[iPoint]].b - X.b) * Dir.b;
2681
2682 uint32_t iStep;
2683 if (fDot <= 0.0f)
2684 iStep = 0;
2685 else if (fDot >= fSteps)
2686 iStep = cSteps - 1;
2687 else
2688 iStep = uint32_t(fDot + 0.5f);
2689
2690 HDRColorA Diff;
2691 Diff.r = pSteps[iStep].r - pPoints[pIndex[iPoint]].r;
2692 Diff.g = pSteps[iStep].g - pPoints[pIndex[iPoint]].g;
2693 Diff.b = pSteps[iStep].b - pPoints[pIndex[iPoint]].b;
2694
2695 const float fC = pC[iStep] * (1.0f / 8.0f);
2696 const float fD = pD[iStep] * (1.0f / 8.0f);
2697
2698 d2X += fC * pC[iStep];
2699 dX.r += fC * Diff.r;
2700 dX.g += fC * Diff.g;
2701 dX.b += fC * Diff.b;
2702
2703 d2Y += fD * pD[iStep];
2704 dY.r += fD * Diff.r;
2705 dY.g += fD * Diff.g;
2706 dY.b += fD * Diff.b;
2707 }
2708
2709 // Move endpoints
2710 if (d2X > 0.0f)
2711 {
2712 const float f = -1.0f / d2X;
2713
2714 X.r += dX.r * f;
2715 X.g += dX.g * f;
2716 X.b += dX.b * f;
2717 }
2718
2719 if (d2Y > 0.0f)
2720 {
2721 const float f = -1.0f / d2Y;
2722
2723 Y.r += dY.r * f;
2724 Y.g += dY.g * f;
2725 Y.b += dY.b * f;
2726 }
2727
2728 if ((dX.r * dX.r < fEpsilon) && (dX.g * dX.g < fEpsilon) && (dX.b * dX.b < fEpsilon) &&
2729 (dY.r * dY.r < fEpsilon) && (dY.g * dY.g < fEpsilon) && (dY.b * dY.b < fEpsilon))
2730 {
2731 break;
2732 }
2733 }
2734
2735 pX->r = X.r;
2736 pX->g = X.g;
2737 pX->b = X.b;
2738 pY->r = Y.r;
2739 pY->g = Y.g;
2740 pY->b = Y.b;
2741 return fError;
2742}
2743
2744void FillWithErrorColors(HDRColorA* pOut) noexcept
2745{
2746 for (size_t i = 0; i < BC6H_NUM_PIXELS_PER_BLOCK; ++i)
2747 {
2748#ifdef _DEBUG
2749 // Use Magenta in debug as a highly-visible error color
2750 pOut[i] = HDRColorA(1.0f, 0.0f, 1.0f, 1.0f);
2751#else
2752 // In production use, default to black
2753 pOut[i] = HDRColorA(0.0f, 0.0f, 0.0f, 1.0f);
2754#endif
2755 }
2756}
2757
2758void D3DX_BC6H::Decode(bool bSigned, HDRColorA* pOut) const noexcept
2759{
2760 BC6H_ASSERT(pOut);
2761
2762 size_t uStartBit = 0;
2763 uint8_t uMode = GetBits(uStartBit, 2u);
2764 if (uMode != 0x00 && uMode != 0x01)
2765 {
2766 uMode = static_cast<uint8_t>((unsigned(GetBits(uStartBit, 3)) << 2) | uMode);
2767 }
2768
2769 BC6H_ASSERT(uMode < 32);
2770
2771 if (ms_aModeToInfo[uMode] >= 0)
2772 {
2773 BC6H_ASSERT(static_cast<unsigned int>(ms_aModeToInfo[uMode]) < std__size(ms_aInfo));
2774 const ModeDescriptor* desc = ms_aDesc[ms_aModeToInfo[uMode]];
2775
2776 BC6H_ASSERT(static_cast<unsigned int>(ms_aModeToInfo[uMode]) < std__size(ms_aDesc));
2777 const ModeInfo& info = ms_aInfo[ms_aModeToInfo[uMode]];
2778
2779 INTEndPntPair aEndPts[BC6H_MAX_REGIONS] = {};
2780 uint32_t uShape = 0;
2781
2782 // Read header
2783 const size_t uHeaderBits = info.uPartitions > 0 ? 82u : 65u;
2784 while (uStartBit < uHeaderBits)
2785 {
2786 const size_t uCurBit = uStartBit;
2787 if (GetBit(uStartBit))
2788 {
2789 switch (desc[uCurBit].m_eField)
2790 {
2791 case D: uShape |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
2792 case RW: aEndPts[0].A.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
2793 case RX: aEndPts[0].B.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
2794 case RY: aEndPts[1].A.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
2795 case RZ: aEndPts[1].B.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
2796 case GW: aEndPts[0].A.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
2797 case GX: aEndPts[0].B.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
2798 case GY: aEndPts[1].A.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
2799 case GZ: aEndPts[1].B.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
2800 case BW: aEndPts[0].A.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
2801 case BX: aEndPts[0].B.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
2802 case BY: aEndPts[1].A.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
2803 case BZ: aEndPts[1].B.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
2804 default: {
2805#ifdef BC6H_LOG
2806 BC6H_LOG("BC6H: Invalid header bits encountered during decoding\n");
2807#endif
2808 FillWithErrorColors(pOut);
2809 return;
2810 }
2811 }
2812 }
2813 }
2814
2815 BC6H_ASSERT(uShape < 64);
2816
2817 // Sign extend necessary end points
2818 if (bSigned)
2819 {
2820 aEndPts[0].A.SignExtend(info.RGBAPrec[0][0]);
2821 }
2822 if (bSigned || info.bTransformed)
2823 {
2824 BC6H_ASSERT(info.uPartitions < BC6H_MAX_REGIONS);
2825 for (size_t p = 0; p <= info.uPartitions; ++p)
2826 {
2827 if (p != 0)
2828 {
2829 aEndPts[p].A.SignExtend(info.RGBAPrec[p][0]);
2830 }
2831 aEndPts[p].B.SignExtend(info.RGBAPrec[p][1]);
2832 }
2833 }
2834
2835 // Inverse transform the end points
2836 if (info.bTransformed)
2837 {
2838 TransformInverse(aEndPts, info.RGBAPrec[0][0], bSigned);
2839 }
2840
2841 // Read indices
2842 for (size_t i = 0; i < BC6H_NUM_PIXELS_PER_BLOCK; ++i)
2843 {
2844 const size_t uNumBits = IsFixUpOffset(info.uPartitions, uShape, i) ? info.uIndexPrec - 1u : info.uIndexPrec;
2845 if (uStartBit + uNumBits > 128)
2846 {
2847#ifdef BC6H_LOG
2848 BC6H_LOG("BC6H: Invalid block encountered during decoding\n");
2849#endif
2850 FillWithErrorColors(pOut);
2851 return;
2852 }
2853 const uint8_t uIndex = GetBits(uStartBit, uNumBits);
2854
2855 if (uIndex >= ((info.uPartitions > 0) ? 8 : 16))
2856 {
2857#ifdef BC6H_LOG
2858 BC6H_LOG("BC6H: Invalid index encountered during decoding\n");
2859#endif
2860 FillWithErrorColors(pOut);
2861 return;
2862 }
2863
2864 const size_t uRegion = g_aPartitionTable[info.uPartitions][uShape][i];
2865 BC6H_ASSERT(uRegion < BC6H_MAX_REGIONS);
2866
2867 // Unquantize endpoints and interpolate
2868 const int r1 = Unquantize(aEndPts[uRegion].A.r, info.RGBAPrec[0][0].r, bSigned);
2869 const int g1 = Unquantize(aEndPts[uRegion].A.g, info.RGBAPrec[0][0].g, bSigned);
2870 const int b1 = Unquantize(aEndPts[uRegion].A.b, info.RGBAPrec[0][0].b, bSigned);
2871 const int r2 = Unquantize(aEndPts[uRegion].B.r, info.RGBAPrec[0][0].r, bSigned);
2872 const int g2 = Unquantize(aEndPts[uRegion].B.g, info.RGBAPrec[0][0].g, bSigned);
2873 const int b2 = Unquantize(aEndPts[uRegion].B.b, info.RGBAPrec[0][0].b, bSigned);
2874 const int* aWeights = info.uPartitions > 0 ? g_aWeights3 : g_aWeights4;
2875 INTColor fc;
2876 fc.r = FinishUnquantize((r1 * (BC6H_WEIGHT_MAX - aWeights[uIndex]) + r2 * aWeights[uIndex] + BC6H_WEIGHT_ROUND) >> BC6H_WEIGHT_SHIFT, bSigned);
2877 fc.g = FinishUnquantize((g1 * (BC6H_WEIGHT_MAX - aWeights[uIndex]) + g2 * aWeights[uIndex] + BC6H_WEIGHT_ROUND) >> BC6H_WEIGHT_SHIFT, bSigned);
2878 fc.b = FinishUnquantize((b1 * (BC6H_WEIGHT_MAX - aWeights[uIndex]) + b2 * aWeights[uIndex] + BC6H_WEIGHT_ROUND) >> BC6H_WEIGHT_SHIFT, bSigned);
2879
2880 HALF rgb[3];
2881 fc.ToF16(rgb, bSigned);
2882
2883 pOut[i].r = XMConvertHalfToFloat(rgb[0]);
2884 pOut[i].g = XMConvertHalfToFloat(rgb[1]);
2885 pOut[i].b = XMConvertHalfToFloat(rgb[2]);
2886 pOut[i].a = 1.0f;
2887 }
2888 }
2889 else
2890 {
2891#ifdef BC6H_LOG
2892 const char* warnstr = "BC6H: Invalid mode encountered during decoding\n";
2893 switch (uMode)
2894 {
2895 case 0x13: warnstr = "BC6H: Reserved mode 10011 encountered during decoding\n"; break;
2896 case 0x17: warnstr = "BC6H: Reserved mode 10111 encountered during decoding\n"; break;
2897 case 0x1B: warnstr = "BC6H: Reserved mode 11011 encountered during decoding\n"; break;
2898 case 0x1F: warnstr = "BC6H: Reserved mode 11111 encountered during decoding\n"; break;
2899 }
2900 BC6H_LOG(warnstr);
2901#endif
2902 // Per the BC6H format spec, we must return opaque black
2903 for (size_t i = 0; i < BC6H_NUM_PIXELS_PER_BLOCK; ++i)
2904 {
2905 pOut[i] = HDRColorA(0.0f, 0.0f, 0.0f, 1.0f);
2906 }
2907 }
2908}
2909
2910void D3DX_BC6H::Encode(bool bSigned, const HDRColorA* const pIn) noexcept
2911{
2912 BC6H_ASSERT(pIn);
2913
2914 EncodeParams EP(pIn, bSigned);
2915
2916 for (EP.uMode = 0; EP.uMode < std__size(ms_aInfo) && EP.fBestErr > 0; ++EP.uMode)
2917 {
2918 const uint8_t uShapes = ms_aInfo[EP.uMode].uPartitions ? 32u : 1u;
2919 // Number of rough cases to look at. reasonable values of this are 1, uShapes/4, and uShapes
2920 // uShapes/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
2921 const size_t uItems = std__max<size_t>(1u, size_t(uShapes >> 2));
2922 float afRoughMSE[BC6H_MAX_SHAPES];
2923 uint8_t auShape[BC6H_MAX_SHAPES];
2924
2925 // pick the best uItems shapes and refine these.
2926 for (EP.uShape = 0; EP.uShape < uShapes; ++EP.uShape)
2927 {
2928 size_t uShape = EP.uShape;
2929 afRoughMSE[uShape] = RoughMSE(&EP);
2930 auShape[uShape] = static_cast<uint8_t>(uShape);
2931 }
2932
2933 // Bubble up the first uItems items
2934 for (size_t i = 0; i < uItems; i++)
2935 {
2936 for (size_t j = i + 1; j < uShapes; j++)
2937 {
2938 if (afRoughMSE[i] > afRoughMSE[j])
2939 {
2940 std__swap(afRoughMSE[i], afRoughMSE[j]);
2941 std__swap(auShape[i], auShape[j]);
2942 }
2943 }
2944 }
2945
2946 for (size_t i = 0; i < uItems && EP.fBestErr > 0; i++)
2947 {
2948 EP.uShape = auShape[i];
2949 Refine(&EP);
2950 }
2951 }
2952}
2953
2954int D3DX_BC6H::Quantize(int iValue, int prec, bool bSigned) noexcept
2955{
2956 BC6H_ASSERT(prec > 1); // didn't bother to make it work for 1
2957 int q, s = 0;
2958 if (bSigned)
2959 {
2960 BC6H_ASSERT(iValue >= -F16MAX && iValue <= F16MAX);
2961 if (iValue < 0)
2962 {
2963 s = 1;
2964 iValue = -iValue;
2965 }
2966 q = (prec >= 16) ? iValue : (iValue << (prec - 1)) / (F16MAX + 1);
2967 if (s)
2968 q = -q;
2969 BC6H_ASSERT(q > -(1 << (prec - 1)) && q < (1 << (prec - 1)));
2970 }
2971 else
2972 {
2973 BC6H_ASSERT(iValue >= 0 && iValue <= F16MAX);
2974 q = (prec >= 15) ? iValue : (iValue << prec) / (F16MAX + 1);
2975 BC6H_ASSERT(q >= 0 && q < (1 << prec));
2976 }
2977
2978 return q;
2979}
2980
2981int D3DX_BC6H::Unquantize(int comp, uint8_t uBitsPerComp, bool bSigned) noexcept
2982{
2983 int unq = 0, s = 0;
2984 if (bSigned)
2985 {
2986 if (uBitsPerComp >= 16)
2987 {
2988 unq = comp;
2989 }
2990 else
2991 {
2992 if (comp < 0)
2993 {
2994 s = 1;
2995 comp = -comp;
2996 }
2997
2998 if (comp == 0) unq = 0;
2999 else if (comp >= ((1 << (uBitsPerComp - 1)) - 1))
3000 unq = 0x7FFF;
3001 else
3002 unq = ((comp << 15) + 0x4000) >> (uBitsPerComp - 1);
3003
3004 if (s) unq = -unq;
3005 }
3006 }
3007 else
3008 {
3009 if (uBitsPerComp >= 15) unq = comp;
3010 else if (comp == 0)
3011 unq = 0;
3012 else if (comp == ((1 << uBitsPerComp) - 1))
3013 unq = 0xFFFF;
3014 else
3015 unq = ((comp << 16) + 0x8000) >> uBitsPerComp;
3016 }
3017
3018 return unq;
3019}
3020
3021int D3DX_BC6H::FinishUnquantize(int comp, bool bSigned) noexcept
3022{
3023 if (bSigned)
3024 {
3025 return (comp < 0) ? -(((-comp) * 31) >> 5) : (comp * 31) >> 5; // scale the magnitude by 31/32
3026 }
3027 else
3028 {
3029 return (comp * 31) >> 6; // scale the magnitude by 31/64
3030 }
3031}
3032
3033bool D3DX_BC6H::EndPointsFit(const EncodeParams* pEP, const INTEndPntPair aEndPts[]) noexcept
3034{
3035 BC6H_ASSERT(pEP);
3036 const bool bTransformed = ms_aInfo[pEP->uMode].bTransformed;
3037 const bool bIsSigned = pEP->bSigned;
3038 const LDRColorA& Prec0 = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
3039 const LDRColorA& Prec1 = ms_aInfo[pEP->uMode].RGBAPrec[0][1];
3040 const LDRColorA& Prec2 = ms_aInfo[pEP->uMode].RGBAPrec[1][0];
3041 const LDRColorA& Prec3 = ms_aInfo[pEP->uMode].RGBAPrec[1][1];
3042
3043 INTColor aBits[4];
3044 aBits[0].r = NBits(aEndPts[0].A.r, bIsSigned);
3045 aBits[0].g = NBits(aEndPts[0].A.g, bIsSigned);
3046 aBits[0].b = NBits(aEndPts[0].A.b, bIsSigned);
3047 aBits[1].r = NBits(aEndPts[0].B.r, bTransformed || bIsSigned);
3048 aBits[1].g = NBits(aEndPts[0].B.g, bTransformed || bIsSigned);
3049 aBits[1].b = NBits(aEndPts[0].B.b, bTransformed || bIsSigned);
3050 if (aBits[0].r > Prec0.r || aBits[1].r > Prec1.r ||
3051 aBits[0].g > Prec0.g || aBits[1].g > Prec1.g ||
3052 aBits[0].b > Prec0.b || aBits[1].b > Prec1.b)
3053 return false;
3054
3055 if (ms_aInfo[pEP->uMode].uPartitions)
3056 {
3057 aBits[2].r = NBits(aEndPts[1].A.r, bTransformed || bIsSigned);
3058 aBits[2].g = NBits(aEndPts[1].A.g, bTransformed || bIsSigned);
3059 aBits[2].b = NBits(aEndPts[1].A.b, bTransformed || bIsSigned);
3060 aBits[3].r = NBits(aEndPts[1].B.r, bTransformed || bIsSigned);
3061 aBits[3].g = NBits(aEndPts[1].B.g, bTransformed || bIsSigned);
3062 aBits[3].b = NBits(aEndPts[1].B.b, bTransformed || bIsSigned);
3063
3064 if (aBits[2].r > Prec2.r || aBits[3].r > Prec3.r ||
3065 aBits[2].g > Prec2.g || aBits[3].g > Prec3.g ||
3066 aBits[2].b > Prec2.b || aBits[3].b > Prec3.b)
3067 return false;
3068 }
3069
3070 return true;
3071}
3072
3073void D3DX_BC6H::GeneratePaletteQuantized(const EncodeParams* pEP, const INTEndPntPair& endPts, INTColor aPalette[]) const noexcept
3074{
3075 BC6H_ASSERT(pEP);
3076 const size_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
3077 const size_t uNumIndices = size_t(1) << uIndexPrec;
3078 BC6H_ASSERT(uNumIndices > 0);
3079 const LDRColorA& Prec = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
3080
3081 // scale endpoints
3082 INTEndPntPair unqEndPts;
3083 unqEndPts.A.r = Unquantize(endPts.A.r, Prec.r, pEP->bSigned);
3084 unqEndPts.A.g = Unquantize(endPts.A.g, Prec.g, pEP->bSigned);
3085 unqEndPts.A.b = Unquantize(endPts.A.b, Prec.b, pEP->bSigned);
3086 unqEndPts.B.r = Unquantize(endPts.B.r, Prec.r, pEP->bSigned);
3087 unqEndPts.B.g = Unquantize(endPts.B.g, Prec.g, pEP->bSigned);
3088 unqEndPts.B.b = Unquantize(endPts.B.b, Prec.b, pEP->bSigned);
3089
3090 // interpolate
3091 const int* aWeights = nullptr;
3092 switch (uIndexPrec)
3093 {
3094 case 3:
3095 aWeights = g_aWeights3;
3096 BC6H_ASSERT(uNumIndices <= 8);
3097 break;
3098 case 4:
3099 aWeights = g_aWeights4;
3100 BC6H_ASSERT(uNumIndices <= 16);
3101 break;
3102 default:
3103 BC6H_ASSERT(false);
3104 for (size_t i = 0; i < uNumIndices; ++i)
3105 {
3106//#pragma prefast(suppress : 22102 22103, "writing blocks in two halves confuses tool")
3107 aPalette[i] = INTColor(0, 0, 0);
3108 }
3109 return;
3110 }
3111
3112 for (size_t i = 0; i < uNumIndices; ++i)
3113 {
3114 aPalette[i].r = FinishUnquantize(
3115 (unqEndPts.A.r * (BC6H_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.r * aWeights[i] + BC6H_WEIGHT_ROUND) >> BC6H_WEIGHT_SHIFT,
3116 pEP->bSigned);
3117 aPalette[i].g = FinishUnquantize(
3118 (unqEndPts.A.g * (BC6H_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.g * aWeights[i] + BC6H_WEIGHT_ROUND) >> BC6H_WEIGHT_SHIFT,
3119 pEP->bSigned);
3120 aPalette[i].b = FinishUnquantize(
3121 (unqEndPts.A.b * (BC6H_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.b * aWeights[i] + BC6H_WEIGHT_ROUND) >> BC6H_WEIGHT_SHIFT,
3122 pEP->bSigned);
3123 }
3124}
3125
3126// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
3127float D3DX_BC6H::MapColorsQuantized(const EncodeParams* pEP, const INTColor aColors[], size_t np, const INTEndPntPair& endPts) const noexcept
3128{
3129 BC6H_ASSERT(pEP);
3130
3131 const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
3132 auto const uNumIndices = static_cast<const uint8_t>(1u << uIndexPrec);
3133 INTColor aPalette[BC6H_MAX_INDICES];
3134 GeneratePaletteQuantized(pEP, endPts, aPalette);
3135
3136 float fTotErr = 0;
3137 for (size_t i = 0; i < np; ++i)
3138 {
3139 const XMVECTOR vcolors = XMLoadSInt4(reinterpret_cast<const XMINT4*>(&aColors[i]));
3140
3141 // Compute ErrorMetricRGB
3142 XMVECTOR tpal = XMLoadSInt4(reinterpret_cast<const XMINT4*>(&aPalette[0]));
3143 tpal = XMVectorSubtract(vcolors, tpal);
3144 float fBestErr = XMVectorDot(tpal, tpal);
3145
3146 for (int j = 1; j < uNumIndices && fBestErr > 0; ++j)
3147 {
3148 // Compute ErrorMetricRGB
3149 tpal = XMLoadSInt4(reinterpret_cast<const XMINT4*>(&aPalette[j]));
3150 tpal = XMVectorSubtract(vcolors, tpal);
3151 const float fErr = XMVectorDot(tpal, tpal);
3152 if (fErr > fBestErr) break; // error increased, so we're done searching
3153 if (fErr < fBestErr) fBestErr = fErr;
3154 }
3155 fTotErr += fBestErr;
3156 }
3157 return fTotErr;
3158}
3159
3160float D3DX_BC6H::PerturbOne(const EncodeParams* pEP, const INTColor aColors[], size_t np, uint8_t ch, const INTEndPntPair& oldEndPts, INTEndPntPair& newEndPts, float fOldErr, int do_b) const noexcept
3161{
3162 BC6H_ASSERT(pEP);
3163 uint8_t uPrec;
3164 switch (ch)
3165 {
3166 case 0: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].r; break;
3167 case 1: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].g; break;
3168 case 2: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].b; break;
3169 default:
3170 BC6H_ASSERT(false);
3171 newEndPts = oldEndPts;
3172 return FLT_MAX;
3173 }
3174 INTEndPntPair tmpEndPts;
3175 float fMinErr = fOldErr;
3176 int beststep = 0;
3177
3178 // copy real endpoints so we can perturb them
3179 tmpEndPts = newEndPts = oldEndPts;
3180
3181 // do a logarithmic search for the best error for this endpoint (which)
3182 for (int step = 1 << (uPrec - 1); step; step >>= 1)
3183 {
3184 bool bImproved = false;
3185 for (int sign = -1; sign <= 1; sign += 2)
3186 {
3187 if (do_b == 0)
3188 {
3189 tmpEndPts.A[ch] = newEndPts.A[ch] + sign * step;
3190 if (tmpEndPts.A[ch] < 0 || tmpEndPts.A[ch] >= (1 << uPrec))
3191 continue;
3192 }
3193 else
3194 {
3195 tmpEndPts.B[ch] = newEndPts.B[ch] + sign * step;
3196 if (tmpEndPts.B[ch] < 0 || tmpEndPts.B[ch] >= (1 << uPrec))
3197 continue;
3198 }
3199
3200 const float fErr = MapColorsQuantized(pEP, aColors, np, tmpEndPts);
3201
3202 if (fErr < fMinErr)
3203 {
3204 bImproved = true;
3205 fMinErr = fErr;
3206 beststep = sign * step;
3207 }
3208 }
3209 // if this was an improvement, move the endpoint and continue search from there
3210 if (bImproved)
3211 {
3212 if (do_b == 0)
3213 newEndPts.A[ch] += beststep;
3214 else
3215 newEndPts.B[ch] += beststep;
3216 }
3217 }
3218 return fMinErr;
3219}
3220
3221void D3DX_BC6H::OptimizeOne(const EncodeParams* pEP, const INTColor aColors[], size_t np, float aOrgErr, const INTEndPntPair& aOrgEndPts, INTEndPntPair& aOptEndPts) const noexcept
3222{
3223 BC6H_ASSERT(pEP);
3224 float aOptErr = aOrgErr;
3225 aOptEndPts.A = aOrgEndPts.A;
3226 aOptEndPts.B = aOrgEndPts.B;
3227
3228 INTEndPntPair new_a, new_b;
3229 INTEndPntPair newEndPts;
3230 int do_b;
3231
3232 // now optimize each channel separately
3233 for (uint8_t ch = 0; ch < BC6H_NUM_CHANNELS; ++ch)
3234 {
3235 // figure out which endpoint when perturbed gives the most improvement and start there
3236 // if we just alternate, we can easily end up in a local minima
3237 const float fErr0 = PerturbOne(pEP, aColors, np, ch, aOptEndPts, new_a, aOptErr, 0); // perturb endpt A
3238 const float fErr1 = PerturbOne(pEP, aColors, np, ch, aOptEndPts, new_b, aOptErr, 1); // perturb endpt B
3239
3240 if (fErr0 < fErr1)
3241 {
3242 if (fErr0 >= aOptErr) continue;
3243 aOptEndPts.A[ch] = new_a.A[ch];
3244 aOptErr = fErr0;
3245 do_b = 1; // do B next
3246 }
3247 else
3248 {
3249 if (fErr1 >= aOptErr) continue;
3250 aOptEndPts.B[ch] = new_b.B[ch];
3251 aOptErr = fErr1;
3252 do_b = 0; // do A next
3253 }
3254
3255 // now alternate endpoints and keep trying until there is no improvement
3256 for (;;)
3257 {
3258 const float fErr = PerturbOne(pEP, aColors, np, ch, aOptEndPts, newEndPts, aOptErr, do_b);
3259 if (fErr >= aOptErr)
3260 break;
3261 if (do_b == 0)
3262 aOptEndPts.A[ch] = newEndPts.A[ch];
3263 else
3264 aOptEndPts.B[ch] = newEndPts.B[ch];
3265 aOptErr = fErr;
3266 do_b = 1 - do_b; // now move the other endpoint
3267 }
3268 }
3269}
3270
3271void D3DX_BC6H::OptimizeEndPoints(const EncodeParams* pEP, const float aOrgErr[], const INTEndPntPair aOrgEndPts[], INTEndPntPair aOptEndPts[]) const noexcept
3272{
3273 BC6H_ASSERT(pEP);
3274 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
3275 BC6H_ASSERT(uPartitions < BC6H_MAX_REGIONS);
3276 INTColor aPixels[BC6H_NUM_PIXELS_PER_BLOCK];
3277
3278 for (size_t p = 0; p <= uPartitions; ++p)
3279 {
3280 // collect the pixels in the region
3281 size_t np = 0;
3282 for (size_t i = 0; i < BC6H_NUM_PIXELS_PER_BLOCK; ++i)
3283 {
3284 if (g_aPartitionTable[p][pEP->uShape][i] == p)
3285 {
3286 aPixels[np++] = pEP->aIPixels[i];
3287 }
3288 }
3289
3290 OptimizeOne(pEP, aPixels, np, aOrgErr[p], aOrgEndPts[p], aOptEndPts[p]);
3291 }
3292}
3293
3294// Swap endpoints as needed to ensure that the indices at fix up have a 0 high-order bit
3295void D3DX_BC6H::SwapIndices(const EncodeParams* pEP, INTEndPntPair aEndPts[], size_t aIndices[]) noexcept
3296{
3297 BC6H_ASSERT(pEP);
3298 const size_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
3299 const size_t uNumIndices = size_t(1) << ms_aInfo[pEP->uMode].uIndexPrec;
3300 const size_t uHighIndexBit = uNumIndices >> 1;
3301
3302 BC6H_ASSERT(uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES);
3303
3304 for (size_t p = 0; p <= uPartitions; ++p)
3305 {
3306 const size_t i = g_aFixUp[uPartitions][pEP->uShape][p];
3307 BC6H_ASSERT(g_aPartitionTable[uPartitions][pEP->uShape][i] == p);
3308 if (aIndices[i] & uHighIndexBit)
3309 {
3310 // high bit is set, swap the aEndPts and indices for this region
3311 std__swap(aEndPts[p].A, aEndPts[p].B);
3312
3313 for (size_t j = 0; j < BC6H_NUM_PIXELS_PER_BLOCK; ++j)
3314 if (g_aPartitionTable[uPartitions][pEP->uShape][j] == p)
3315 aIndices[j] = uNumIndices - 1 - aIndices[j];
3316 }
3317 }
3318}
3319
3320// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
3321void D3DX_BC6H::AssignIndices(const EncodeParams* pEP, const INTEndPntPair aEndPts[], size_t aIndices[], float aTotErr[]) const noexcept
3322{
3323 BC6H_ASSERT(pEP);
3324 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
3325 auto const uNumIndices = static_cast<const uint8_t>(1u << ms_aInfo[pEP->uMode].uIndexPrec);
3326
3327 BC6H_ASSERT(uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES);
3328
3329 // build list of possibles
3330 INTColor aPalette[BC6H_MAX_REGIONS][BC6H_MAX_INDICES];
3331
3332 for (size_t p = 0; p <= uPartitions; ++p)
3333 {
3334 GeneratePaletteQuantized(pEP, aEndPts[p], aPalette[p]);
3335 aTotErr[p] = 0;
3336 }
3337
3338 for (size_t i = 0; i < BC6H_NUM_PIXELS_PER_BLOCK; ++i)
3339 {
3340 const uint8_t uRegion = g_aPartitionTable[uPartitions][pEP->uShape][i];
3341 BC6H_ASSERT(uRegion < BC6H_MAX_REGIONS);
3342 float fBestErr = Norm(pEP->aIPixels[i], aPalette[uRegion][0]);
3343 aIndices[i] = 0;
3344
3345 for (uint8_t j = 1; j < uNumIndices && fBestErr > 0; ++j)
3346 {
3347 const float fErr = Norm(pEP->aIPixels[i], aPalette[uRegion][j]);
3348 if (fErr > fBestErr) break; // error increased, so we're done searching
3349 if (fErr < fBestErr)
3350 {
3351 fBestErr = fErr;
3352 aIndices[i] = j;
3353 }
3354 }
3355 aTotErr[uRegion] += fBestErr;
3356 }
3357}
3358
3359void D3DX_BC6H::QuantizeEndPts(const EncodeParams* pEP, INTEndPntPair* aQntEndPts) const noexcept
3360{
3361 BC6H_ASSERT(pEP && aQntEndPts);
3362 const INTEndPntPair* aUnqEndPts = pEP->aUnqEndPts[pEP->uShape];
3363 const LDRColorA& Prec = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
3364 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
3365 BC6H_ASSERT(uPartitions < BC6H_MAX_REGIONS);
3366
3367 for (size_t p = 0; p <= uPartitions; ++p)
3368 {
3369 aQntEndPts[p].A.r = Quantize(aUnqEndPts[p].A.r, Prec.r, pEP->bSigned);
3370 aQntEndPts[p].A.g = Quantize(aUnqEndPts[p].A.g, Prec.g, pEP->bSigned);
3371 aQntEndPts[p].A.b = Quantize(aUnqEndPts[p].A.b, Prec.b, pEP->bSigned);
3372 aQntEndPts[p].B.r = Quantize(aUnqEndPts[p].B.r, Prec.r, pEP->bSigned);
3373 aQntEndPts[p].B.g = Quantize(aUnqEndPts[p].B.g, Prec.g, pEP->bSigned);
3374 aQntEndPts[p].B.b = Quantize(aUnqEndPts[p].B.b, Prec.b, pEP->bSigned);
3375 }
3376}
3377
3378void D3DX_BC6H::EmitBlock(const EncodeParams* pEP, const INTEndPntPair aEndPts[], const size_t aIndices[]) noexcept
3379{
3380 BC6H_ASSERT(pEP);
3381 const uint8_t uRealMode = ms_aInfo[pEP->uMode].uMode;
3382 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
3383 const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
3384 const size_t uHeaderBits = uPartitions > 0 ? 82u : 65u;
3385 const ModeDescriptor* desc = ms_aDesc[pEP->uMode];
3386 size_t uStartBit = 0;
3387
3388 while (uStartBit < uHeaderBits)
3389 {
3390 switch (desc[uStartBit].m_eField)
3391 {
3392 case M: SetBit(uStartBit, uint8_t(uRealMode >> desc[uStartBit].m_uBit) & 0x01u); break;
3393 case D: SetBit(uStartBit, uint8_t(pEP->uShape >> desc[uStartBit].m_uBit) & 0x01u); break;
3394 case RW: SetBit(uStartBit, uint8_t(aEndPts[0].A.r >> desc[uStartBit].m_uBit) & 0x01u); break;
3395 case RX: SetBit(uStartBit, uint8_t(aEndPts[0].B.r >> desc[uStartBit].m_uBit) & 0x01u); break;
3396 case RY: SetBit(uStartBit, uint8_t(aEndPts[1].A.r >> desc[uStartBit].m_uBit) & 0x01u); break;
3397 case RZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.r >> desc[uStartBit].m_uBit) & 0x01u); break;
3398 case GW: SetBit(uStartBit, uint8_t(aEndPts[0].A.g >> desc[uStartBit].m_uBit) & 0x01u); break;
3399 case GX: SetBit(uStartBit, uint8_t(aEndPts[0].B.g >> desc[uStartBit].m_uBit) & 0x01u); break;
3400 case GY: SetBit(uStartBit, uint8_t(aEndPts[1].A.g >> desc[uStartBit].m_uBit) & 0x01u); break;
3401 case GZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.g >> desc[uStartBit].m_uBit) & 0x01u); break;
3402 case BW: SetBit(uStartBit, uint8_t(aEndPts[0].A.b >> desc[uStartBit].m_uBit) & 0x01u); break;
3403 case BX: SetBit(uStartBit, uint8_t(aEndPts[0].B.b >> desc[uStartBit].m_uBit) & 0x01u); break;
3404 case BY: SetBit(uStartBit, uint8_t(aEndPts[1].A.b >> desc[uStartBit].m_uBit) & 0x01u); break;
3405 case BZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.b >> desc[uStartBit].m_uBit) & 0x01u); break;
3406 default: BC6H_ASSERT(false);
3407 }
3408 }
3409
3410 for (size_t i = 0; i < BC6H_NUM_PIXELS_PER_BLOCK; ++i)
3411 {
3412 if (IsFixUpOffset(ms_aInfo[pEP->uMode].uPartitions, pEP->uShape, i))
3413 SetBits(uStartBit, uIndexPrec - 1u, static_cast<uint8_t>(aIndices[i]));
3414 else
3415 SetBits(uStartBit, uIndexPrec, static_cast<uint8_t>(aIndices[i]));
3416 }
3417 BC6H_ASSERT(uStartBit == 128);
3418}
3419
3420void D3DX_BC6H::Refine(EncodeParams* pEP) noexcept
3421{
3422 BC6H_ASSERT(pEP);
3423 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
3424 BC6H_ASSERT(uPartitions < BC6H_MAX_REGIONS);
3425
3426 const bool bTransformed = ms_aInfo[pEP->uMode].bTransformed;
3427 float aOrgErr[BC6H_MAX_REGIONS], aOptErr[BC6H_MAX_REGIONS];
3428 INTEndPntPair aOrgEndPts[BC6H_MAX_REGIONS], aOptEndPts[BC6H_MAX_REGIONS];
3429 size_t aOrgIdx[BC6H_NUM_PIXELS_PER_BLOCK], aOptIdx[BC6H_NUM_PIXELS_PER_BLOCK];
3430
3431 QuantizeEndPts(pEP, aOrgEndPts);
3432 AssignIndices(pEP, aOrgEndPts, aOrgIdx, aOrgErr);
3433 SwapIndices(pEP, aOrgEndPts, aOrgIdx);
3434
3435 if (bTransformed) TransformForward(aOrgEndPts);
3436 if (EndPointsFit(pEP, aOrgEndPts))
3437 {
3438 if (bTransformed) TransformInverse(aOrgEndPts, ms_aInfo[pEP->uMode].RGBAPrec[0][0], pEP->bSigned);
3439 OptimizeEndPoints(pEP, aOrgErr, aOrgEndPts, aOptEndPts);
3440 AssignIndices(pEP, aOptEndPts, aOptIdx, aOptErr);
3441 SwapIndices(pEP, aOptEndPts, aOptIdx);
3442
3443 float fOrgTotErr = 0.0f, fOptTotErr = 0.0f;
3444 for (size_t p = 0; p <= uPartitions; ++p)
3445 {
3446 fOrgTotErr += aOrgErr[p];
3447 fOptTotErr += aOptErr[p];
3448 }
3449
3450 if (bTransformed) TransformForward(aOptEndPts);
3451 if (EndPointsFit(pEP, aOptEndPts) && fOptTotErr < fOrgTotErr && fOptTotErr < pEP->fBestErr)
3452 {
3453 pEP->fBestErr = fOptTotErr;
3454 EmitBlock(pEP, aOptEndPts, aOptIdx);
3455 }
3456 else if (fOrgTotErr < pEP->fBestErr)
3457 {
3458 // either it stopped fitting when we optimized it, or there was no improvement
3459 // so go back to the unoptimized endpoints which we know will fit
3460 if (bTransformed) TransformForward(aOrgEndPts);
3461 pEP->fBestErr = fOrgTotErr;
3462 EmitBlock(pEP, aOrgEndPts, aOrgIdx);
3463 }
3464 }
3465}
3466
3467void D3DX_BC6H::GeneratePaletteUnquantized(const EncodeParams* pEP, size_t uRegion, INTColor aPalette[]) noexcept
3468{
3469 BC6H_ASSERT(pEP);
3470 BC6H_ASSERT(uRegion < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES);
3471 const INTEndPntPair& endPts = pEP->aUnqEndPts[pEP->uShape][uRegion];
3472 const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
3473 auto const uNumIndices = static_cast<const uint8_t>(1u << uIndexPrec);
3474 BC6H_ASSERT(uNumIndices > 0);
3475
3476 const int* aWeights = nullptr;
3477 switch (uIndexPrec)
3478 {
3479 case 3:
3480 aWeights = g_aWeights3;
3481 BC6H_ASSERT(uNumIndices <= 8);
3482 break;
3483 case 4:
3484 aWeights = g_aWeights4;
3485 BC6H_ASSERT(uNumIndices <= 16);
3486 break;
3487 default:
3488 BC6H_ASSERT(false);
3489 for (size_t i = 0; i < uNumIndices; ++i)
3490 {
3491//#pragma prefast(suppress : 22102 22103, "writing blocks in two halves confuses tool")
3492 aPalette[i] = INTColor(0, 0, 0);
3493 }
3494 return;
3495 }
3496
3497 for (size_t i = 0; i < uNumIndices; ++i)
3498 {
3499 aPalette[i].r = (endPts.A.r * (BC6H_WEIGHT_MAX - aWeights[i]) + endPts.B.r * aWeights[i] + BC6H_WEIGHT_ROUND) >> BC6H_WEIGHT_SHIFT;
3500 aPalette[i].g = (endPts.A.g * (BC6H_WEIGHT_MAX - aWeights[i]) + endPts.B.g * aWeights[i] + BC6H_WEIGHT_ROUND) >> BC6H_WEIGHT_SHIFT;
3501 aPalette[i].b = (endPts.A.b * (BC6H_WEIGHT_MAX - aWeights[i]) + endPts.B.b * aWeights[i] + BC6H_WEIGHT_ROUND) >> BC6H_WEIGHT_SHIFT;
3502 }
3503}
3504
3505float D3DX_BC6H::MapColors(const EncodeParams* pEP, size_t uRegion, size_t np, const size_t* auIndex) const noexcept
3506{
3507 BC6H_ASSERT(pEP);
3508 const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
3509 auto const uNumIndices = static_cast<const uint8_t>(1u << uIndexPrec);
3510 INTColor aPalette[BC6H_MAX_INDICES];
3511 GeneratePaletteUnquantized(pEP, uRegion, aPalette);
3512
3513 float fTotalErr = 0.0f;
3514 for (size_t i = 0; i < np; ++i)
3515 {
3516 float fBestErr = Norm(pEP->aIPixels[auIndex[i]], aPalette[0]);
3517 for (uint8_t j = 1; j < uNumIndices && fBestErr > 0.0f; ++j)
3518 {
3519 const float fErr = Norm(pEP->aIPixels[auIndex[i]], aPalette[j]);
3520 if (fErr > fBestErr) break; // error increased, so we're done searching
3521 if (fErr < fBestErr) fBestErr = fErr;
3522 }
3523 fTotalErr += fBestErr;
3524 }
3525
3526 return fTotalErr;
3527}
3528
3529//#define BC6H_USE_AU_PIX_TABLE
3530
3531# ifdef BC6H_USE_AU_PIX_TABLE
3532size_t g_auPixIdx[BC6H_MAX_SHAPES][BC6H_MAX_REGIONS][BC6H_MAX_REGIONS][BC6H_NUM_PIXELS_PER_BLOCK];
3533size_t g_np[BC6H_MAX_SHAPES][BC6H_MAX_REGIONS][BC6H_MAX_REGIONS];
3534
3535struct InitTable
3536{
3537 InitTable()
3538 {
3539 for (size_t shape = 0; shape < BC6H_MAX_SHAPES; shape++)
3540 {
3541 for (size_t uPartitions = 0; uPartitions < BC6H_MAX_REGIONS; uPartitions++)
3542 {
3543 for (size_t p = 0; p < BC6H_MAX_REGIONS; ++p)
3544 {
3545 size_t np = 0;
3546 for (size_t i = 0; i < BC6H_NUM_PIXELS_PER_BLOCK; ++i)
3547 {
3548 if (g_aPartitionTable[uPartitions][shape][i] == p)
3549 {
3550 g_auPixIdx[shape][uPartitions][p][np++] = i;
3551 }
3552 }
3553 //BC6H_ASSERT(np > 0);
3554
3555 g_np[shape][uPartitions][p] = np;
3556 }
3557 }
3558 }
3559 }
3560};
3561
3562static InitTable init_au_pix_table;
3563#endif
3564
3565float D3DX_BC6H::RoughMSE(EncodeParams* pEP) const noexcept
3566{
3567 BC6H_ASSERT(pEP);
3568 BC6H_ASSERT(pEP->uShape < BC6H_MAX_SHAPES);
3569
3570 INTEndPntPair* aEndPts = pEP->aUnqEndPts[pEP->uShape];
3571
3572 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
3573 BC6H_ASSERT(uPartitions < BC6H_MAX_REGIONS);
3574
3575 #ifndef BC6H_USE_AU_PIX_TABLE
3576 size_t auPixIdx[BC6H_NUM_PIXELS_PER_BLOCK];
3577 #endif
3578
3579 float fError = 0.0f;
3580 for (size_t p = 0; p <= uPartitions; ++p)
3581 {
3582 #ifdef BC6H_USE_AU_PIX_TABLE
3583 const size_t* auPixIdx = g_auPixIdx[pEP->uShape][uPartitions][p];
3584 size_t np = g_np[pEP->uShape][uPartitions][p];
3585 #else
3586 size_t np = 0;
3587 for (size_t i = 0; i < BC6H_NUM_PIXELS_PER_BLOCK; ++i)
3588 {
3589 if (g_aPartitionTable[uPartitions][pEP->uShape][i] == p)
3590 {
3591 auPixIdx[np++] = i;
3592 }
3593 }
3594 #endif
3595
3596 // handle simple cases
3597 BC6H_ASSERT(np > 0);
3598 if (np == 1)
3599 {
3600 aEndPts[p].A = pEP->aIPixels[auPixIdx[0]];
3601 aEndPts[p].B = pEP->aIPixels[auPixIdx[0]];
3602 continue;
3603 }
3604 else if (np == 2)
3605 {
3606 aEndPts[p].A = pEP->aIPixels[auPixIdx[0]];
3607 aEndPts[p].B = pEP->aIPixels[auPixIdx[1]];
3608 continue;
3609 }
3610
3611 HDRColorA epA, epB;
3612 OptimizeRGB(pEP->aHDRPixels, &epA, &epB, 4, np, auPixIdx);
3613 aEndPts[p].A.Set(epA, pEP->bSigned);
3614 aEndPts[p].B.Set(epB, pEP->bSigned);
3615 if (pEP->bSigned)
3616 {
3617 aEndPts[p].A.Clamp(-F16MAX, F16MAX);
3618 aEndPts[p].B.Clamp(-F16MAX, F16MAX);
3619 }
3620 else
3621 {
3622 aEndPts[p].A.Clamp(0, F16MAX);
3623 aEndPts[p].B.Clamp(0, F16MAX);
3624 }
3625
3626 fError += MapColors(pEP, p, np, auPixIdx);
3627 }
3628
3629 return fError;
3630}
3631
3632}
3633
3634//=====================================================================================
3635// Entry points
3636//=====================================================================================
3637
3638void DecodeBC6HU(void* pDest, const void* pSrc) noexcept
3639{
3640 static_assert(sizeof(Impl::D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes");
3641 reinterpret_cast<const Impl::D3DX_BC6H*>(pSrc)->Decode(false, reinterpret_cast<Impl::HDRColorA*>(pDest));
3642}
3643
3644void DecodeBC6HS(void* pDest, const void* pSrc) noexcept
3645{
3646 static_assert(sizeof(Impl::D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes");
3647 reinterpret_cast<const Impl::D3DX_BC6H*>(pSrc)->Decode(true, reinterpret_cast<Impl::HDRColorA*>(pDest));
3648}
3649
3650void EncodeBC6HU(void* pDest, const void* pSrc) noexcept
3651{
3652 static_assert(sizeof(Impl::D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes");
3653 reinterpret_cast<Impl::D3DX_BC6H*>(pDest)->Encode(false, reinterpret_cast<const Impl::HDRColorA*>(pSrc));
3654}
3655
3656void EncodeBC6HS(void* pDest, const void* pSrc) noexcept
3657{
3658 static_assert(sizeof(Impl::D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes");
3659 reinterpret_cast<Impl::D3DX_BC6H*>(pDest)->Encode(true, reinterpret_cast<const Impl::HDRColorA*>(pSrc));
3660}
3661
3662}
3663
3664# ifdef BC6H_ASSERT_UNDEF
3665# undef BC6H_ASSERT_UNDEF
3666# endif
3667
3668# ifdef BC6H_HALF_TO_FLOAT_UNDEF
3669# undef BC6H_HALF_TO_FLOAT_UNDEF
3670# undef BC6H_HALF_TO_FLOAT
3671# endif
3672
3673# ifdef BC6H_FLOAT_TO_HALF_UNDEF
3674# undef BC6H_FLOAT_TO_HALF_UNDEF
3675# undef BC6H_FLOAT_TO_HALF
3676# endif
3677
3678# undef BC6H_INLINE
3679
3680#endif