Revert to Alpha 0.1
Browse files- hexstate_quantize.c +41 -344
hexstate_quantize.c
CHANGED
|
@@ -2732,209 +2732,70 @@ static void quantize_tensor_q2k_hpc(const float *weights, int64_t n_elements,
|
|
| 2732 |
}
|
| 2733 |
|
| 2734 |
/* ══════════════════════════════════════════════════════════════════
|
| 2735 |
-
* PHASE 3.9 —
|
| 2736 |
*
|
| 2737 |
-
*
|
|
|
|
| 2738 |
*
|
| 2739 |
-
*
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2740 |
*
|
| 2741 |
-
*
|
| 2742 |
-
* reconstruction, expressed as a sum over block-boundary contributions.
|
| 2743 |
-
* Each block-boundary i produces a scalar signal that is then lifted
|
| 2744 |
-
* via ⊗ Trans(Δτ) into a vector that decays into the interior of the
|
| 2745 |
-
* blocks on either side of that boundary.
|
| 2746 |
*
|
| 2747 |
-
*
|
| 2748 |
*
|
| 2749 |
-
*
|
| 2750 |
-
*
|
| 2751 |
-
* n̂, the unit normal to the block boundary. In the 1-D block-
|
| 2752 |
-
* sequence space n̂ points in the inter-block direction, so this
|
| 2753 |
-
* equals the cross-boundary finite difference:
|
| 2754 |
*
|
| 2755 |
-
*
|
| 2756 |
*
|
| 2757 |
-
*
|
| 2758 |
*
|
| 2759 |
-
*
|
| 2760 |
-
* The boundary information I_boundary = the weight value at the
|
| 2761 |
-
* boundary element, expressed through the quantizer. Computed
|
| 2762 |
-
* from Phase 1 seeds for the boundary sub-block:
|
| 2763 |
*
|
| 2764 |
-
*
|
| 2765 |
-
*
|
| 2766 |
-
* This is not a step size or ratio — it is the actual dequantized
|
| 2767 |
-
* value of the boundary weight.
|
| 2768 |
-
*
|
| 2769 |
-
* Signal s_i = Proj_n̂(Grad(V)) · Quantize(I_boundary)
|
| 2770 |
-
* Scalar product at boundary i. Captures the signed energy of
|
| 2771 |
-
* the weight function at that boundary in the quantized domain.
|
| 2772 |
-
* Units: weight². Normalised by d_sub² to become dimensionless.
|
| 2773 |
-
*
|
| 2774 |
-
* ⊗ Trans(Δτ)
|
| 2775 |
-
* The tensor product lifts the scalar s_i into a vector over
|
| 2776 |
-
* the 256-element interior of the adjacent blocks. Trans(Δτ)
|
| 2777 |
-
* is parameterised by Δτ = d_{i-1}/d_i (the scale ratio between
|
| 2778 |
-
* adjacent blocks), which re-projects s_i from block i-1's
|
| 2779 |
-
* quantization space into block i's:
|
| 2780 |
-
*
|
| 2781 |
-
* right-propagation into block i at position k:
|
| 2782 |
-
* s_i · (d_{i-1}/d_i) · exp(−k / τ)
|
| 2783 |
-
*
|
| 2784 |
-
* left-propagation into block i-1 at position k:
|
| 2785 |
-
* s_i · (d_i/d_{i-1}) · exp(−(QK_K−1−k) / τ)
|
| 2786 |
-
*
|
| 2787 |
-
* The full reconstruction for block b at position k:
|
| 2788 |
-
*
|
| 2789 |
-
* W[b][k] = x[b][k] (original weights, not replaced)
|
| 2790 |
-
* − dc_bias[b] (zeroth-moment error correction, unchanged)
|
| 2791 |
-
* + s_left[b] · (d_{b-1}/d_b) · exp(−k/τ)
|
| 2792 |
-
* + s_right[b] · (d_{b+1}/d_b) · exp(−(QK_K−1−k)/τ)
|
| 2793 |
-
*
|
| 2794 |
-
* where s_left[b] = signal from boundary b (left edge of block b)
|
| 2795 |
-
* s_right[b] = signal from boundary b+1 (right edge of block b)
|
| 2796 |
-
*
|
| 2797 |
-
* The DC bias is subtracted (it is an error correction, not a signal).
|
| 2798 |
-
* The boundary signals are added (they encode the holographic surface).
|
| 2799 |
-
*
|
| 2800 |
-
* ── Implementation ──────────────────────────────────────────────
|
| 2801 |
-
*
|
| 2802 |
-
* Pre-pass (sequential) computes per block:
|
| 2803 |
-
* block_dc_bias[b] — scalar DC offset (existing, unchanged)
|
| 2804 |
-
* block_s_left[b] — left boundary signal (normalised, pre-scaled)
|
| 2805 |
-
* block_s_right[b] — right boundary signal (normalised, pre-scaled)
|
| 2806 |
-
*
|
| 2807 |
-
* Phase 4 (parallel) applies:
|
| 2808 |
-
* adj_x[k] = x[k] − dc_bias + s_left·fwd_decay[k] + s_right·rev_decay[k]
|
| 2809 |
-
*
|
| 2810 |
-
* Two precomputed decay tables (initialised once, thread-safe):
|
| 2811 |
-
* boundary_decay[k] = exp(−k / τ) forward (left → interior)
|
| 2812 |
-
* boundary_decay_rev[k] = exp(−(255−k) / τ) reversed (right → interior)
|
| 2813 |
* ══════════════════════════════════════════════════════════════════ */
|
| 2814 |
|
| 2815 |
-
#define DC_DECAY
|
| 2816 |
-
#define HOLO_TAU 32.0f /* Boundary signal decay length (elements) */
|
| 2817 |
-
#define HOLO_ALPHA 0.20f /* Boundary signal weight (fraction of one step) */
|
| 2818 |
-
|
| 2819 |
-
/* Precompute forward and reverse decay tables — read-only in Phase 4. */
|
| 2820 |
-
static float boundary_decay [QK_K];
|
| 2821 |
-
static float boundary_decay_rev[QK_K];
|
| 2822 |
-
{
|
| 2823 |
-
static int _decay_init = 0;
|
| 2824 |
-
if (!_decay_init) {
|
| 2825 |
-
int _dk;
|
| 2826 |
-
for (_dk = 0; _dk < QK_K; _dk++) {
|
| 2827 |
-
boundary_decay [_dk] = expf(-(float)_dk / HOLO_TAU);
|
| 2828 |
-
boundary_decay_rev[_dk] = expf(-(float)(QK_K-1-_dk)/ HOLO_TAU);
|
| 2829 |
-
}
|
| 2830 |
-
_decay_init = 1;
|
| 2831 |
-
}
|
| 2832 |
-
}
|
| 2833 |
|
| 2834 |
-
float *block_dc_bias
|
| 2835 |
-
float *block_s_left = (float *)calloc(n_blocks, sizeof(float));
|
| 2836 |
-
float *block_s_right = (float *)calloc(n_blocks, sizeof(float));
|
| 2837 |
|
| 2838 |
-
if (block_dc_bias
|
| 2839 |
float rolling_dc = 0.0f;
|
| 2840 |
|
| 2841 |
for (int64_t blk = 0; blk < n_blocks; blk++) {
|
| 2842 |
-
const float *bx
|
| 2843 |
int cidx = best_candidate[blk];
|
| 2844 |
float dm0 = gguf_fp16_to_fp32(candidate_d [blk][cidx]);
|
| 2845 |
float mm0 = gguf_fp16_to_fp32(candidate_dmin[blk][cidx]);
|
| 2846 |
|
| 2847 |
-
/*
|
| 2848 |
-
float dc_bias
|
| 2849 |
-
block_dc_bias[blk]
|
| 2850 |
|
| 2851 |
-
/*
|
| 2852 |
-
*
|
| 2853 |
-
*
|
| 2854 |
-
* Quantize(I_boundary): dequantized value of bx[0] using Phase 1
|
| 2855 |
-
* seeds for the first sub-block (j=0) of block blk.
|
| 2856 |
-
* s_left = g_left × Q(bx[0]) / d_sub² (dimensionless)
|
| 2857 |
-
* Pre-scaled by HOLO_ALPHA × d_sub × (d_{blk-1}/d_{blk}) */
|
| 2858 |
-
{
|
| 2859 |
-
float g_left = (blk > 0)
|
| 2860 |
-
? bx[0] - weights[(blk - 1) * QK_K + QK_K - 1]
|
| 2861 |
-
: 0.0f;
|
| 2862 |
-
|
| 2863 |
-
/* Quantize(I_boundary) for left edge: sub-block j=0 */
|
| 2864 |
-
float d_sub_l = dm0 * (float)seeds[blk].Ls[0];
|
| 2865 |
-
float m_sub_l = mm0 * (float)seeds[blk].Lm[0];
|
| 2866 |
-
float q_val_l = 0.0f;
|
| 2867 |
-
if (d_sub_l > 1e-15f) {
|
| 2868 |
-
int qi = gguf_nearest_int((bx[0] + m_sub_l) / d_sub_l);
|
| 2869 |
-
if (qi < 0) qi = 0; if (qi > 3) qi = 3;
|
| 2870 |
-
q_val_l = d_sub_l * (float)qi - m_sub_l;
|
| 2871 |
-
}
|
| 2872 |
-
|
| 2873 |
-
/* Scale ratio Trans(Δτ): d_{blk-1} / d_{blk} */
|
| 2874 |
-
float d_prev = (blk > 0 && seeds[blk-1].dm > 1e-15f)
|
| 2875 |
-
? seeds[blk-1].dm : dm0;
|
| 2876 |
-
float d_curr = (dm0 > 1e-15f) ? dm0 : 1.0f;
|
| 2877 |
-
float scale_ratio_l = d_prev / d_curr;
|
| 2878 |
-
if (scale_ratio_l < 0.1f) scale_ratio_l = 0.1f;
|
| 2879 |
-
if (scale_ratio_l > 10.f) scale_ratio_l = 10.f;
|
| 2880 |
-
|
| 2881 |
-
/* Normalise s = (g × Q) / d² then re-scale to weight units */
|
| 2882 |
-
float d2 = d_sub_l * d_sub_l;
|
| 2883 |
-
float s = (d2 > 1e-30f) ? (g_left * q_val_l / d2) : 0.0f;
|
| 2884 |
-
block_s_left[blk] = HOLO_ALPHA * s * d_sub_l * scale_ratio_l;
|
| 2885 |
-
}
|
| 2886 |
-
|
| 2887 |
-
/* ── Right boundary signal: boundary between block blk and blk+1 ──
|
| 2888 |
-
*
|
| 2889 |
-
* Same derivation but at the right edge (position QK_K-1,
|
| 2890 |
-
* sub-block j = N_SUB-1) looking into block blk+1. */
|
| 2891 |
-
{
|
| 2892 |
-
float g_right = (blk + 1 < n_blocks)
|
| 2893 |
-
? weights[(blk + 1) * QK_K] - bx[QK_K - 1]
|
| 2894 |
-
: 0.0f;
|
| 2895 |
-
|
| 2896 |
-
/* Quantize(I_boundary) for right edge: sub-block j=N_SUB-1 */
|
| 2897 |
-
float d_sub_r = dm0 * (float)seeds[blk].Ls[N_SUB - 1];
|
| 2898 |
-
float m_sub_r = mm0 * (float)seeds[blk].Lm[N_SUB - 1];
|
| 2899 |
-
float q_val_r = 0.0f;
|
| 2900 |
-
if (d_sub_r > 1e-15f) {
|
| 2901 |
-
int qi = gguf_nearest_int((bx[QK_K-1] + m_sub_r) / d_sub_r);
|
| 2902 |
-
if (qi < 0) qi = 0; if (qi > 3) qi = 3;
|
| 2903 |
-
q_val_r = d_sub_r * (float)qi - m_sub_r;
|
| 2904 |
-
}
|
| 2905 |
-
|
| 2906 |
-
/* Scale ratio Trans(Δτ): d_{blk+1} / d_{blk} */
|
| 2907 |
-
float d_next = (blk + 1 < n_blocks && seeds[blk+1].dm > 1e-15f)
|
| 2908 |
-
? seeds[blk+1].dm : dm0;
|
| 2909 |
-
float d_curr = (dm0 > 1e-15f) ? dm0 : 1.0f;
|
| 2910 |
-
float scale_ratio_r = d_next / d_curr;
|
| 2911 |
-
if (scale_ratio_r < 0.1f) scale_ratio_r = 0.1f;
|
| 2912 |
-
if (scale_ratio_r > 10.f) scale_ratio_r = 10.f;
|
| 2913 |
-
|
| 2914 |
-
float d2 = d_sub_r * d_sub_r;
|
| 2915 |
-
float s = (d2 > 1e-30f) ? (g_right * q_val_r / d2) : 0.0f;
|
| 2916 |
-
block_s_right[blk] = HOLO_ALPHA * s * d_sub_r * scale_ratio_r;
|
| 2917 |
-
}
|
| 2918 |
-
|
| 2919 |
-
/* ── DC residual for the next block's rolling_dc ── */
|
| 2920 |
float dc_res = 0.0f;
|
| 2921 |
int j, k;
|
| 2922 |
for (j = 0; j < N_SUB; j++) {
|
| 2923 |
float d_sub = dm0 * (float)candidate_Ls[blk][cidx][j];
|
| 2924 |
float m_sub = mm0 * (float)candidate_Lm[blk][cidx][j];
|
| 2925 |
-
int base = 16 * j;
|
| 2926 |
for (k = 0; k < 16; k++) {
|
| 2927 |
-
|
| 2928 |
-
float x_adj = bx[elem] - dc_bias
|
| 2929 |
-
+ block_s_left [blk] * boundary_decay [elem]
|
| 2930 |
-
+ block_s_right[blk] * boundary_decay_rev[elem];
|
| 2931 |
int q = 0;
|
| 2932 |
if (d_sub >= 1e-15f) {
|
| 2933 |
q = gguf_nearest_int((x_adj + m_sub) / d_sub);
|
| 2934 |
-
if (q < 0) q = 0;
|
|
|
|
| 2935 |
}
|
| 2936 |
-
float deq
|
| 2937 |
-
|
|
|
|
| 2938 |
}
|
| 2939 |
}
|
| 2940 |
rolling_dc = dc_res;
|
|
@@ -2959,28 +2820,19 @@ static void quantize_tensor_q2k_hpc(const float *weights, int64_t n_elements,
|
|
| 2959 |
int cidx = best_candidate[blk];
|
| 2960 |
uint8_t Ls_blk[16], Lm_blk[16];
|
| 2961 |
|
| 2962 |
-
/* ──
|
| 2963 |
-
*
|
| 2964 |
-
*
|
| 2965 |
-
*
|
| 2966 |
-
|
| 2967 |
-
* + s_right[b] · exp(−(QK_K−1−k)/τ) (right boundary signal)
|
| 2968 |
-
*
|
| 2969 |
-
* The two boundary signals decay inward from opposite edges and meet
|
| 2970 |
-
* in the middle. Together they enforce C¹ continuity across every
|
| 2971 |
-
* block boundary in the quantized domain. */
|
| 2972 |
-
float dc_adj = (block_dc_bias) ? block_dc_bias [blk] : 0.0f;
|
| 2973 |
-
float s_left = (block_s_left) ? block_s_left [blk] : 0.0f;
|
| 2974 |
-
float s_right = (block_s_right) ? block_s_right [blk] : 0.0f;
|
| 2975 |
|
|
|
|
|
|
|
| 2976 |
float adj_block_x[QK_K];
|
| 2977 |
{
|
| 2978 |
int _i;
|
| 2979 |
for (_i = 0; _i < QK_K; _i++)
|
| 2980 |
-
adj_block_x[_i] = block_x[_i]
|
| 2981 |
-
- dc_adj
|
| 2982 |
-
+ s_left * boundary_decay [_i]
|
| 2983 |
-
+ s_right * boundary_decay_rev[_i];
|
| 2984 |
}
|
| 2985 |
|
| 2986 |
memcpy(Ls_blk, candidate_Ls[blk][cidx], 16);
|
|
@@ -3229,159 +3081,6 @@ static void quantize_tensor_q2k_hpc(const float *weights, int64_t n_elements,
|
|
| 3229 |
Lm_blk[j] = best_lm;
|
| 3230 |
}
|
| 3231 |
|
| 3232 |
-
/* ══════════════════════════════════════════════════════════════
|
| 3233 |
-
* PHASE 4.5 — LLOYD-MAX CENTROID REFINEMENT (per sub-block)
|
| 3234 |
-
*
|
| 3235 |
-
* Standard WLS and grid search both assume uniform spacing between
|
| 3236 |
-
* the 4 representable values is MSE-optimal. For non-uniform weight
|
| 3237 |
-
* distributions (the typical case — transformer weights are heavy-
|
| 3238 |
-
* tailed near zero, sparse in the tails), the WLS-optimal (d, m)
|
| 3239 |
-
* does not align with the empirical centroids of the code partitions.
|
| 3240 |
-
*
|
| 3241 |
-
* Lloyd-Max iterates the assignment-then-centroid loop:
|
| 3242 |
-
*
|
| 3243 |
-
* 1. Assign: each weight → nearest representable value, code v∈{0..3}
|
| 3244 |
-
* 2. Centroid: c_v = empirical mean of weights assigned to v
|
| 3245 |
-
* 3. Project: c_v are 4 real numbers; find the arithmetic progression
|
| 3246 |
-
* {d·v − m : v∈{0,1,2,3}} that best fits c_v in MSE.
|
| 3247 |
-
* Closed-form solution from normal equations:
|
| 3248 |
-
*
|
| 3249 |
-
* d_new = (3·c_3 + c_2 − c_1 − 3·c_0) / 10
|
| 3250 |
-
* m_new = (−7·c_0 − 4·c_1 − c_2 + 2·c_3) / 10
|
| 3251 |
-
*
|
| 3252 |
-
* (Constants derived from Σq=6, Σq²=14, 4 codes total.)
|
| 3253 |
-
* 4. Re-quantise; repeat until (d, m) stop changing.
|
| 3254 |
-
*
|
| 3255 |
-
* The arithmetic-progression projection is the key constraint that
|
| 3256 |
-
* keeps the output in valid Q2_K format. In unconstrained Lloyd-Max,
|
| 3257 |
-
* the 4 centroids could be placed freely; here they must sit on an
|
| 3258 |
-
* AP determined by (d, m), which is exactly what Q2_K stores.
|
| 3259 |
-
*
|
| 3260 |
-
* Operating per sub-block: we refine (d_sub_j, m_sub_j) = (d·Ls_j,
|
| 3261 |
-
* m·Lm_j), then re-project onto integer (Ls, Lm) ∈ [0,15]. The
|
| 3262 |
-
* integer rounding can hurt, so we only accept the refined values
|
| 3263 |
-
* if they reduce the sub-block's weighted MSE.
|
| 3264 |
-
*
|
| 3265 |
-
* This is a genuine refinement on top of the grid search: the grid
|
| 3266 |
-
* search minimises element-wise MSE assuming uniform spacing is
|
| 3267 |
-
* locked in; Lloyd-Max iterates toward distribution-optimal spacing
|
| 3268 |
-
* given the actual empirical centroids.
|
| 3269 |
-
* ══════════════════════════════════════════════════════════════ */
|
| 3270 |
-
for (int j = 0; j < N_SUB; j++) {
|
| 3271 |
-
const float *sx = adj_block_x + 16 * j;
|
| 3272 |
-
uint8_t Ls_cur = Ls_blk[j];
|
| 3273 |
-
uint8_t Lm_cur = Lm_blk[j];
|
| 3274 |
-
|
| 3275 |
-
/* Baseline MSE for current (Ls, Lm) — only accept if we beat this */
|
| 3276 |
-
float baseline_err = 0.0f;
|
| 3277 |
-
{
|
| 3278 |
-
float d_sub = dm * (float)Ls_cur;
|
| 3279 |
-
float m_sub = mm * (float)Lm_cur;
|
| 3280 |
-
for (int k = 0; k < 16; k++) {
|
| 3281 |
-
float w_imp = (imat_importance)
|
| 3282 |
-
? imat_importance[blk * QK_K + 16*j + k] : 1.0f;
|
| 3283 |
-
int q;
|
| 3284 |
-
if (d_sub < 1e-15f) { q = 0; }
|
| 3285 |
-
else {
|
| 3286 |
-
q = gguf_nearest_int((sx[k] + m_sub) / d_sub);
|
| 3287 |
-
if (q < 0) q = 0; if (q > 3) q = 3;
|
| 3288 |
-
}
|
| 3289 |
-
float deq = d_sub * (float)q - m_sub;
|
| 3290 |
-
float diff = sx[k] - deq;
|
| 3291 |
-
baseline_err += diff * diff * w_imp;
|
| 3292 |
-
}
|
| 3293 |
-
}
|
| 3294 |
-
|
| 3295 |
-
/* Lloyd-Max iteration on (d_sub, m_sub) */
|
| 3296 |
-
float d_sub = dm * (float)Ls_cur;
|
| 3297 |
-
float m_sub = mm * (float)Lm_cur;
|
| 3298 |
-
float d_sub_best = d_sub, m_sub_best = m_sub;
|
| 3299 |
-
float lloyd_err = baseline_err;
|
| 3300 |
-
|
| 3301 |
-
const int MAX_LLOYD_ITERS = 6;
|
| 3302 |
-
for (int it = 0; it < MAX_LLOYD_ITERS; it++) {
|
| 3303 |
-
if (d_sub < 1e-15f) break;
|
| 3304 |
-
|
| 3305 |
-
/* Step 1+2: assign and accumulate weighted centroids */
|
| 3306 |
-
double sum_v[4] = {0.0, 0.0, 0.0, 0.0};
|
| 3307 |
-
double cnt_v[4] = {0.0, 0.0, 0.0, 0.0};
|
| 3308 |
-
for (int k = 0; k < 16; k++) {
|
| 3309 |
-
float w_imp = (imat_importance)
|
| 3310 |
-
? imat_importance[blk * QK_K + 16*j + k] : 1.0f;
|
| 3311 |
-
int q = gguf_nearest_int((sx[k] + m_sub) / d_sub);
|
| 3312 |
-
if (q < 0) q = 0; if (q > 3) q = 3;
|
| 3313 |
-
sum_v[q] += (double)sx[k] * (double)w_imp;
|
| 3314 |
-
cnt_v[q] += (double)w_imp;
|
| 3315 |
-
}
|
| 3316 |
-
|
| 3317 |
-
/* Fill empty bins with extrapolation from neighbours to avoid
|
| 3318 |
-
* degenerate centroids when a code is unused */
|
| 3319 |
-
double c[4];
|
| 3320 |
-
int n_empty = 0;
|
| 3321 |
-
for (int v = 0; v < 4; v++) {
|
| 3322 |
-
if (cnt_v[v] > 1e-15) {
|
| 3323 |
-
c[v] = sum_v[v] / cnt_v[v];
|
| 3324 |
-
} else {
|
| 3325 |
-
c[v] = (double)(d_sub * (float)v - m_sub); /* fallback to current AP */
|
| 3326 |
-
n_empty++;
|
| 3327 |
-
}
|
| 3328 |
-
}
|
| 3329 |
-
if (n_empty >= 3) break; /* distribution too sparse — give up */
|
| 3330 |
-
|
| 3331 |
-
/* Step 3: AP projection — closed form for arithmetic progression
|
| 3332 |
-
* minimising Σ_v (c_v − (d·v − m))² */
|
| 3333 |
-
float d_new = (float)((3.0*c[3] + c[2] - c[1] - 3.0*c[0]) / 10.0);
|
| 3334 |
-
float m_new = (float)((-7.0*c[0] - 4.0*c[1] - c[2] + 2.0*c[3]) / 10.0);
|
| 3335 |
-
if (d_new <= 1e-15f) break;
|
| 3336 |
-
if (m_new < 0.0f) m_new = 0.0f; /* keep m non-negative */
|
| 3337 |
-
|
| 3338 |
-
/* Step 4: project onto integer (Ls, Lm) and evaluate */
|
| 3339 |
-
int Ls_try = (dm > 1e-15f) ? gguf_nearest_int(d_new / dm) : Ls_cur;
|
| 3340 |
-
int Lm_try = (mm > 1e-15f) ? gguf_nearest_int(m_new / mm) : Lm_cur;
|
| 3341 |
-
if (Ls_try < 1) Ls_try = 1;
|
| 3342 |
-
if (Ls_try > 15) Ls_try = 15;
|
| 3343 |
-
if (Lm_try < 0) Lm_try = 0;
|
| 3344 |
-
if (Lm_try > 15) Lm_try = 15;
|
| 3345 |
-
|
| 3346 |
-
float d_sub_try = dm * (float)Ls_try;
|
| 3347 |
-
float m_sub_try = mm * (float)Lm_try;
|
| 3348 |
-
|
| 3349 |
-
float try_err = 0.0f;
|
| 3350 |
-
for (int k = 0; k < 16; k++) {
|
| 3351 |
-
float w_imp = (imat_importance)
|
| 3352 |
-
? imat_importance[blk * QK_K + 16*j + k] : 1.0f;
|
| 3353 |
-
int q;
|
| 3354 |
-
if (d_sub_try < 1e-15f) { q = 0; }
|
| 3355 |
-
else {
|
| 3356 |
-
q = gguf_nearest_int((sx[k] + m_sub_try) / d_sub_try);
|
| 3357 |
-
if (q < 0) q = 0; if (q > 3) q = 3;
|
| 3358 |
-
}
|
| 3359 |
-
float deq = d_sub_try * (float)q - m_sub_try;
|
| 3360 |
-
float diff = sx[k] - deq;
|
| 3361 |
-
try_err += diff * diff * w_imp;
|
| 3362 |
-
}
|
| 3363 |
-
|
| 3364 |
-
/* Only accept if strictly improves; this is our safety net */
|
| 3365 |
-
if (try_err < lloyd_err) {
|
| 3366 |
-
lloyd_err = try_err;
|
| 3367 |
-
d_sub_best = d_sub_try;
|
| 3368 |
-
m_sub_best = m_sub_try;
|
| 3369 |
-
Ls_cur = (uint8_t)Ls_try;
|
| 3370 |
-
Lm_cur = (uint8_t)Lm_try;
|
| 3371 |
-
d_sub = d_sub_try;
|
| 3372 |
-
m_sub = m_sub_try;
|
| 3373 |
-
} else {
|
| 3374 |
-
/* Converged or projection rounding hurt — stop */
|
| 3375 |
-
break;
|
| 3376 |
-
}
|
| 3377 |
-
}
|
| 3378 |
-
|
| 3379 |
-
if (lloyd_err < baseline_err) {
|
| 3380 |
-
Ls_blk[j] = Ls_cur;
|
| 3381 |
-
Lm_blk[j] = Lm_cur;
|
| 3382 |
-
}
|
| 3383 |
-
}
|
| 3384 |
-
|
| 3385 |
output[blk].d = gguf_fp32_to_fp16(dm);
|
| 3386 |
output[blk].dmin = gguf_fp32_to_fp16(mm);
|
| 3387 |
|
|
@@ -3602,8 +3301,6 @@ static void quantize_tensor_q2k_hpc(const float *weights, int64_t n_elements,
|
|
| 3602 |
free(_tl_graphs);
|
| 3603 |
|
| 3604 |
free(block_dc_bias);
|
| 3605 |
-
free(block_s_left);
|
| 3606 |
-
free(block_s_right);
|
| 3607 |
free(seeds);
|
| 3608 |
free(candidate_errors);
|
| 3609 |
free(candidate_d);
|
|
|
|
| 2732 |
}
|
| 2733 |
|
| 2734 |
/* ══════════════════════════════════════════════════════════════════
|
| 2735 |
+
* PHASE 3.9 — ROLLING DC BOUNDARY CONDITION PRE-PASS
|
| 2736 |
*
|
| 2737 |
+
* Transforms the tensor from a collection of isolated 256-element
|
| 2738 |
+
* Q2_K superblocks into a single, continuous error-cancelling waveform.
|
| 2739 |
*
|
| 2740 |
+
* After Phase 3 has selected the optimal (d, dmin) candidate for every
|
| 2741 |
+
* block, this sequential pass computes the net DC residual left by each
|
| 2742 |
+
* block using a cheap round-nearest forward quantization, then feeds the
|
| 2743 |
+
* negated, exponentially-decayed residual as a correction bias into the
|
| 2744 |
+
* WLS solver of the immediately following block.
|
| 2745 |
*
|
| 2746 |
+
* Mathematically, for block N with final DC residual R_N = Σ εᵢ:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2747 |
*
|
| 2748 |
+
* dc_bias[N+1] = −DC_DECAY × R_N / QK_K (per-element offset)
|
| 2749 |
*
|
| 2750 |
+
* Block N+1's WLS targets become x′ᵢ = xᵢ − dc_bias[N+1], steering the
|
| 2751 |
+
* quantizer toward codes whose reconstruction deq ≈ x′, so that
|
|
|
|
|
|
|
|
|
|
| 2752 |
*
|
| 2753 |
+
* Σ (xᵢ − deqᵢ) ≈ dc_bias[N+1] × QK_K = −DC_DECAY × R_N
|
| 2754 |
*
|
| 2755 |
+
* The accumulated cross-block DC collapses geometrically:
|
| 2756 |
*
|
| 2757 |
+
* R₀, DC_DECAY·R₀, DC_DECAY²·R₀, … → 0
|
|
|
|
|
|
|
|
|
|
| 2758 |
*
|
| 2759 |
+
* The result is written into block_dc_bias[n_blocks]. Phase 4 reads
|
| 2760 |
+
* this array (safe: written sequentially before the parallel loop).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2761 |
* ══════════════════════════════════════════════════════════════════ */
|
| 2762 |
|
| 2763 |
+
#define DC_DECAY 0.85f /* Boundary-condition leak factor (0 = isolated, 1 = full) */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2764 |
|
| 2765 |
+
float *block_dc_bias = (float *)calloc(n_blocks, sizeof(float));
|
|
|
|
|
|
|
| 2766 |
|
| 2767 |
+
if (block_dc_bias) {
|
| 2768 |
float rolling_dc = 0.0f;
|
| 2769 |
|
| 2770 |
for (int64_t blk = 0; blk < n_blocks; blk++) {
|
| 2771 |
+
const float *bx = weights + blk * QK_K;
|
| 2772 |
int cidx = best_candidate[blk];
|
| 2773 |
float dm0 = gguf_fp16_to_fp32(candidate_d [blk][cidx]);
|
| 2774 |
float mm0 = gguf_fp16_to_fp32(candidate_dmin[blk][cidx]);
|
| 2775 |
|
| 2776 |
+
/* Bias applied to THIS block's WLS targets */
|
| 2777 |
+
float dc_bias = (DC_DECAY * rolling_dc) / (float)QK_K;
|
| 2778 |
+
block_dc_bias[blk] = dc_bias;
|
| 2779 |
|
| 2780 |
+
/* Quick round-nearest quant to estimate DC residual for NEXT block.
|
| 2781 |
+
* We quantize the adjusted target x′ = x − dc_bias, then measure
|
| 2782 |
+
* the residual of the ORIGINAL weight against the chosen code. */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2783 |
float dc_res = 0.0f;
|
| 2784 |
int j, k;
|
| 2785 |
for (j = 0; j < N_SUB; j++) {
|
| 2786 |
float d_sub = dm0 * (float)candidate_Ls[blk][cidx][j];
|
| 2787 |
float m_sub = mm0 * (float)candidate_Lm[blk][cidx][j];
|
|
|
|
| 2788 |
for (k = 0; k < 16; k++) {
|
| 2789 |
+
float x_adj = bx[16*j + k] - dc_bias;
|
|
|
|
|
|
|
|
|
|
| 2790 |
int q = 0;
|
| 2791 |
if (d_sub >= 1e-15f) {
|
| 2792 |
q = gguf_nearest_int((x_adj + m_sub) / d_sub);
|
| 2793 |
+
if (q < 0) q = 0;
|
| 2794 |
+
if (q > 3) q = 3;
|
| 2795 |
}
|
| 2796 |
+
float deq = d_sub * (float)q - m_sub;
|
| 2797 |
+
/* Residual against ORIGINAL weight (not adjusted) */
|
| 2798 |
+
dc_res += bx[16*j + k] - deq;
|
| 2799 |
}
|
| 2800 |
}
|
| 2801 |
rolling_dc = dc_res;
|
|
|
|
| 2820 |
int cidx = best_candidate[blk];
|
| 2821 |
uint8_t Ls_blk[16], Lm_blk[16];
|
| 2822 |
|
| 2823 |
+
/* ── Rolling DC boundary condition ──────────────────────────────
|
| 2824 |
+
* dc_adj shifts every WLS target in this block so that the net
|
| 2825 |
+
* quantisation error steers toward cancelling the previous block's
|
| 2826 |
+
* DC residual (written by the sequential Phase 3.9 pre-pass). */
|
| 2827 |
+
float dc_adj = (block_dc_bias) ? block_dc_bias[blk] : 0.0f;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2828 |
|
| 2829 |
+
/* Adjusted weight view — WLS and Shor work on this array;
|
| 2830 |
+
* the final error is always reported against the original block_x. */
|
| 2831 |
float adj_block_x[QK_K];
|
| 2832 |
{
|
| 2833 |
int _i;
|
| 2834 |
for (_i = 0; _i < QK_K; _i++)
|
| 2835 |
+
adj_block_x[_i] = block_x[_i] - dc_adj;
|
|
|
|
|
|
|
|
|
|
| 2836 |
}
|
| 2837 |
|
| 2838 |
memcpy(Ls_blk, candidate_Ls[blk][cidx], 16);
|
|
|
|
| 3081 |
Lm_blk[j] = best_lm;
|
| 3082 |
}
|
| 3083 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3084 |
output[blk].d = gguf_fp32_to_fp16(dm);
|
| 3085 |
output[blk].dmin = gguf_fp32_to_fp16(mm);
|
| 3086 |
|
|
|
|
| 3301 |
free(_tl_graphs);
|
| 3302 |
|
| 3303 |
free(block_dc_bias);
|
|
|
|
|
|
|
| 3304 |
free(seeds);
|
| 3305 |
free(candidate_errors);
|
| 3306 |
free(candidate_d);
|