It's only calibrated for Gemma, atm.
Browse files- born_rule.h +277 -0
- convert_hf_to_gguf.py +0 -0
- generate_imatrix.py +770 -0
- gguf_format.h +707 -0
- hexstate_quantize.c +0 -0
- hexstate_requantize.py +1190 -0
- hpc_amplitude.h +418 -0
- hpc_contract.h +422 -0
- hpc_graph.h +1062 -0
- hpc_mobius.h +833 -0
- imatrix_reader.h +207 -0
- makefile.quantize +43 -0
- quhit_hexagram.c +501 -0
- quhit_hexagram.h +207 -0
- quhit_triality.c +0 -0
- quhit_triality.h +387 -0
- s6_exotic.c +755 -0
- s6_exotic.h +149 -0
- safetensors_reader.h +788 -0
- tokenizer_reader.h +502 -0
born_rule.h
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* born_rule.h β Reality's Born Rule, Reverse-Engineered
|
| 3 |
+
*
|
| 4 |
+
* Extracted by probing the physical substrate's IEEE-754 implementation.
|
| 5 |
+
* Every constant was derived from measurement, not from a textbook.
|
| 6 |
+
*
|
| 7 |
+
* The Born rule says P(i) = |Ο_i|Β². Reality computes this as:
|
| 8 |
+
* P = re*re + im*im (two MULs, one ADD β no FMA by default)
|
| 9 |
+
*
|
| 10 |
+
* We provide three implementations:
|
| 11 |
+
* 1. EXACT: standard reΒ²+imΒ² (matches reality's rounding)
|
| 12 |
+
* 2. FAST: bit-hack squaring (approximate, no MUL needed)
|
| 13 |
+
* 3. QUAKE: bit-hack 1/total + Newton (fast normalization)
|
| 14 |
+
*
|
| 15 |
+
* Generated by born_extract.c
|
| 16 |
+
*/
|
| 17 |
+
|
| 18 |
+
#ifndef BORN_RULE_H
|
| 19 |
+
#define BORN_RULE_H
|
| 20 |
+
|
| 21 |
+
#include <stdint.h>
|
| 22 |
+
#include <string.h>
|
| 23 |
+
#include <math.h>
|
| 24 |
+
|
| 25 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 26 |
+
* MAGIC CONSTANTS β derived from arithmetic.h
|
| 27 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 28 |
+
|
| 29 |
+
#define BORN_MAGIC_SQ 0x3FF0000000000000ULL /* BΓ2^M = bits(1.0) */
|
| 30 |
+
#define BORN_MAGIC_RECIP 0x7FE0000000000000ULL /* 2ΓBΓ2^M for fast 1/x */
|
| 31 |
+
#define BORN_MAGIC_ISQRT 0x5FE6D826D36047EFULL /* libm-oracle optimal (51.91 bits with 4N FMA) */
|
| 32 |
+
|
| 33 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 34 |
+
* BIT-LEVEL UTILITIES
|
| 35 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 36 |
+
|
| 37 |
+
static inline uint64_t _born_d2b(double x) {
|
| 38 |
+
uint64_t b; memcpy(&b, &x, 8); return b;
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
static inline double _born_b2d(uint64_t b) {
|
| 42 |
+
double x; memcpy(&x, &b, 8); return x;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 46 |
+
* BORN RULE: EXACT β matches reality's rounding
|
| 47 |
+
*
|
| 48 |
+
* P = reΒ² + imΒ²
|
| 49 |
+
* This is what reality does. Same ULP rounding.
|
| 50 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 51 |
+
|
| 52 |
+
static inline double born_prob_exact(double re, double im) {
|
| 53 |
+
return re * re + im * im;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 57 |
+
* BORN RULE: FAST β bit-hack squaring, no libm
|
| 58 |
+
*
|
| 59 |
+
* bits(xΒ²) β 2Γbits(|x|) - MAGIC_SQ
|
| 60 |
+
* Accuracy: ~1e-3 relative error (sufficient for sampling)
|
| 61 |
+
* Speed: eliminates multiply instructions
|
| 62 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 63 |
+
|
| 64 |
+
static inline double born_prob_fast(double re, double im) {
|
| 65 |
+
uint64_t rb = _born_d2b(re) & 0x7FFFFFFFFFFFFFFFULL;
|
| 66 |
+
uint64_t ib = _born_d2b(im) & 0x7FFFFFFFFFFFFFFFULL;
|
| 67 |
+
/* Handle exact zero (bits=0 would underflow the subtraction) */
|
| 68 |
+
double re2 = rb ? _born_b2d(2*rb - BORN_MAGIC_SQ) : 0.0;
|
| 69 |
+
double im2 = ib ? _born_b2d(2*ib - BORN_MAGIC_SQ) : 0.0;
|
| 70 |
+
return re2 + im2;
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 74 |
+
* FAST INVERSE SQRT β FMA-accelerated Newton on bit-hack
|
| 75 |
+
*
|
| 76 |
+
* Sidechannel probe (probe_reality.c) results:
|
| 77 |
+
* β’ Bit-hack + 4N plain: 51.6 bits, 2.2 ns
|
| 78 |
+
* β’ Bit-hack + 4N FMA: 51.6 bits, 2.0 ns β WINNER
|
| 79 |
+
* β’ SSE rsqrtss + 3N: 51.5 bits, 2.0 ns
|
| 80 |
+
* β’ Householder4 2-iter: 51.1 bits, 2.4 ns
|
| 81 |
+
* β’ libm 1/sqrt: 52.0 bits, 2.5 ns
|
| 82 |
+
*
|
| 83 |
+
* Quantum-discovered constant: 0x5FE6EB06D314E41A
|
| 84 |
+
* (ITE search over 6^8=1.68M configurations)
|
| 85 |
+
*
|
| 86 |
+
* FMA fuses multiply-add β 1 fewer rounding error per step,
|
| 87 |
+
* 10% faster than plain multiply chain.
|
| 88 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 89 |
+
|
| 90 |
+
static inline double born_fast_isqrt(double x) {
|
| 91 |
+
uint64_t i = _born_d2b(x);
|
| 92 |
+
i = BORN_MAGIC_ISQRT - (i >> 1);
|
| 93 |
+
double y = _born_b2d(i);
|
| 94 |
+
double hx = -0.5 * x;
|
| 95 |
+
#if defined(__FMA__) || defined(__AVX2__)
|
| 96 |
+
y = y * fma(hx * y, y, 1.5); /* FMA Newton 1: ~4.5 β 9 bits */
|
| 97 |
+
y = y * fma(hx * y, y, 1.5); /* FMA Newton 2: 9 β 17.7 bits */
|
| 98 |
+
y = y * fma(hx * y, y, 1.5); /* FMA Newton 3: 17.7 β 34.9 bits */
|
| 99 |
+
y = y * fma(hx * y, y, 1.5); /* FMA Newton 4: 34.9 β 51.6 bits */
|
| 100 |
+
#else
|
| 101 |
+
y = y * (1.5 + hx * y * y); /* fallback: plain multiply chain */
|
| 102 |
+
y = y * (1.5 + hx * y * y);
|
| 103 |
+
y = y * (1.5 + hx * y * y);
|
| 104 |
+
y = y * (1.5 + hx * y * y);
|
| 105 |
+
#endif
|
| 106 |
+
return y;
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 110 |
+
* FAST SQRT β derived from isqrt: sqrt(x) = x * isqrt(x)
|
| 111 |
+
*
|
| 112 |
+
* 51.6 bits precision, ~2.3 ns (1 extra multiply over isqrt).
|
| 113 |
+
* Faster than sqrtsd (5.1 ns) and libm sqrt (2.5 ns).
|
| 114 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 115 |
+
|
| 116 |
+
static inline double born_fast_sqrt(double x) {
|
| 117 |
+
return x * born_fast_isqrt(x);
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 121 |
+
* FAST RECIPROCAL β bit-hack 1/x
|
| 122 |
+
*
|
| 123 |
+
* 1 Newton iteration β ~8 bits precision.
|
| 124 |
+
* Sufficient for Jacobi self-correcting iterations.
|
| 125 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 126 |
+
|
| 127 |
+
static inline double born_fast_recip(double x) {
|
| 128 |
+
uint64_t i = _born_d2b(x);
|
| 129 |
+
i = BORN_MAGIC_RECIP - i; /* initial approximation */
|
| 130 |
+
double y = _born_b2d(i);
|
| 131 |
+
y = y * (2.0 - x * y); /* Newton 1 (8 bits) */
|
| 132 |
+
return y;
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 136 |
+
* LAYER 9: PRECISE INVERSE SQRT β SSE rsqrtss + 2 Newton
|
| 137 |
+
*
|
| 138 |
+
* Sidechannel probe (substrate_probe_isqrt.c) showed:
|
| 139 |
+
* β’ SSE rsqrtss gives 12-bit initial guess via HARDWARE
|
| 140 |
+
* β’ 2 Newton iterations: 12β24β46 bits (quadratic convergence)
|
| 141 |
+
* β’ Cost: 4.3 cycles β SAME speed as the 9-bit Quake hack!
|
| 142 |
+
* β’ On i7-14700: libm 1/sqrt = 5.4cy, Quake = 4.2cy, SSE+2N = 4.3cy
|
| 143 |
+
*
|
| 144 |
+
* Use this for ONE-SHOT precision paths (Ο computation, normalization).
|
| 145 |
+
* Keep born_fast_isqrt for self-correcting Jacobi inner loops.
|
| 146 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 147 |
+
|
| 148 |
+
static inline double born_precise_isqrt(double x) {
|
| 149 |
+
float xf = (float)x;
|
| 150 |
+
float yf;
|
| 151 |
+
__asm__ volatile ("rsqrtss %1, %0" : "=x"(yf) : "x"(xf));
|
| 152 |
+
double y = (double)yf;
|
| 153 |
+
/* Newton refinement 1: 12 β 24 bits */
|
| 154 |
+
y = y * (1.5 - 0.5 * x * y * y);
|
| 155 |
+
/* Newton refinement 2: 24 β 46 bits */
|
| 156 |
+
y = y * (1.5 - 0.5 * x * y * y);
|
| 157 |
+
return y;
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 161 |
+
* LAYER 9: PRECISE RECIPROCAL β SSE rcpss + 2 Newton
|
| 162 |
+
*
|
| 163 |
+
* Sidechannel probe showed born_fast_recip (6 bits) saves
|
| 164 |
+
* ZERO cycles vs hardware 1/x (both 4.3cy on i7-14700).
|
| 165 |
+
* SSE rcpss gives 12-bit seed β 2 Newton β 46 bits.
|
| 166 |
+
* Same speed, 40 more bits of precision.
|
| 167 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 168 |
+
|
| 169 |
+
static inline double born_precise_recip(double x) {
|
| 170 |
+
float xf = (float)x;
|
| 171 |
+
float yf;
|
| 172 |
+
__asm__ volatile ("rcpss %1, %0" : "=x"(yf) : "x"(xf));
|
| 173 |
+
double y = (double)yf;
|
| 174 |
+
/* Newton refinement 1: 12 β 24 bits */
|
| 175 |
+
y = y * (2.0 - x * y);
|
| 176 |
+
/* Newton refinement 2: 24 β 46 bits */
|
| 177 |
+
y = y * (2.0 - x * y);
|
| 178 |
+
return y;
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 182 |
+
* BORN SAMPLING β Complete measurement implementation
|
| 183 |
+
*
|
| 184 |
+
* Given an array of complex amplitudes and a random double
|
| 185 |
+
* in [0,1), returns the measured outcome index.
|
| 186 |
+
*
|
| 187 |
+
* This is the complete Born rule: build CDF, sample.
|
| 188 |
+
* Uses bit-hack normalization for speed.
|
| 189 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 190 |
+
|
| 191 |
+
static inline int born_sample(const double *re, const double *im,
|
| 192 |
+
int dim, double rand_01)
|
| 193 |
+
{
|
| 194 |
+
/* Step 1: compute cumulative probabilities */
|
| 195 |
+
double cum = 0.0;
|
| 196 |
+
for (int i = 0; i < dim; i++) {
|
| 197 |
+
cum += re[i] * re[i] + im[i] * im[i];
|
| 198 |
+
/* Early exit: if cum > rand, we found our outcome */
|
| 199 |
+
/* But we must normalize first. Use running check: */
|
| 200 |
+
/* Since sum should = 1, we sample against randΓtotal */
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
/* Step 2: normalize rand to actual total (handles rounding) */
|
| 204 |
+
double target = rand_01 * cum;
|
| 205 |
+
|
| 206 |
+
/* Step 3: scan CDF for outcome */
|
| 207 |
+
double running = 0.0;
|
| 208 |
+
for (int i = 0; i < dim - 1; i++) {
|
| 209 |
+
running += re[i] * re[i] + im[i] * im[i];
|
| 210 |
+
if (running > target) return i;
|
| 211 |
+
}
|
| 212 |
+
return dim - 1; /* last outcome catches rounding */
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 216 |
+
* BORN COLLAPSE β Post-measurement state update
|
| 217 |
+
*
|
| 218 |
+
* After measuring outcome k, collapse to |kβ© and renormalize.
|
| 219 |
+
* Uses Quake fast inverse sqrt for the renormalization.
|
| 220 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 221 |
+
|
| 222 |
+
static inline void born_collapse(double *re, double *im,
|
| 223 |
+
int dim, int outcome)
|
| 224 |
+
{
|
| 225 |
+
/* Zero all amplitudes except the measured outcome */
|
| 226 |
+
double prob = re[outcome]*re[outcome] + im[outcome]*im[outcome];
|
| 227 |
+
double inv_norm = born_fast_isqrt(prob);
|
| 228 |
+
|
| 229 |
+
for (int i = 0; i < dim; i++) {
|
| 230 |
+
if (i == outcome) {
|
| 231 |
+
re[i] *= inv_norm;
|
| 232 |
+
im[i] *= inv_norm;
|
| 233 |
+
} else {
|
| 234 |
+
re[i] = 0.0;
|
| 235 |
+
im[i] = 0.0;
|
| 236 |
+
}
|
| 237 |
+
}
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 241 |
+
* BORN PARTIAL COLLAPSE β For entangled subsystems
|
| 242 |
+
*
|
| 243 |
+
* After measuring subsystem A with outcome k, renormalize
|
| 244 |
+
* the joint state. Zero all amplitudes where Aβ k.
|
| 245 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 246 |
+
|
| 247 |
+
static inline void born_partial_collapse(
|
| 248 |
+
double *re, double *im,
|
| 249 |
+
int dim_a, int dim_b,
|
| 250 |
+
int outcome_a,
|
| 251 |
+
int which_side /* 0=A is rows, 1=A is columns */
|
| 252 |
+
) {
|
| 253 |
+
int dim = dim_a * dim_b;
|
| 254 |
+
double surviving_prob = 0.0;
|
| 255 |
+
|
| 256 |
+
/* Zero non-matching and accumulate surviving probability */
|
| 257 |
+
for (int i = 0; i < dim; i++) {
|
| 258 |
+
int a_idx = which_side == 0 ? (i / dim_b) : (i % dim_b);
|
| 259 |
+
if (a_idx != outcome_a) {
|
| 260 |
+
re[i] = 0.0;
|
| 261 |
+
im[i] = 0.0;
|
| 262 |
+
} else {
|
| 263 |
+
surviving_prob += re[i]*re[i] + im[i]*im[i];
|
| 264 |
+
}
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
/* Renormalize using Quake inverse sqrt */
|
| 268 |
+
if (surviving_prob > 1e-30) {
|
| 269 |
+
double inv_norm = born_fast_isqrt(surviving_prob);
|
| 270 |
+
for (int i = 0; i < dim; i++) {
|
| 271 |
+
re[i] *= inv_norm;
|
| 272 |
+
im[i] *= inv_norm;
|
| 273 |
+
}
|
| 274 |
+
}
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
#endif /* BORN_RULE_H */
|
convert_hf_to_gguf.py
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
generate_imatrix.py
ADDED
|
@@ -0,0 +1,770 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
HExState Importance Matrix Generator β HPC-Enhanced iMatrix from GGUF
|
| 4 |
+
|
| 5 |
+
Runs transformer forward passes over calibration text to collect per-channel
|
| 6 |
+
E[xΒ²] activation statistics, then uses HPC triality BP to propagate importance
|
| 7 |
+
across layers. Outputs llama.cpp-compatible .dat imatrix files.
|
| 8 |
+
|
| 9 |
+
Usage:
|
| 10 |
+
python3 generate_imatrix.py model.gguf calibration.txt -o imatrix.dat
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import struct
|
| 14 |
+
import sys
|
| 15 |
+
import os
|
| 16 |
+
import time
|
| 17 |
+
import mmap
|
| 18 |
+
import ctypes
|
| 19 |
+
import numpy as np
|
| 20 |
+
from collections import OrderedDict
|
| 21 |
+
|
| 22 |
+
# βββ Constants ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 23 |
+
GGUF_MAGIC = 0x46554747
|
| 24 |
+
ALIGNMENT = 32
|
| 25 |
+
QK_K = 256
|
| 26 |
+
QK4_0 = 32
|
| 27 |
+
QK8_0 = 32
|
| 28 |
+
|
| 29 |
+
GGML_TYPE_F32 = 0
|
| 30 |
+
GGML_TYPE_F16 = 1
|
| 31 |
+
GGML_TYPE_Q4_0 = 2
|
| 32 |
+
GGML_TYPE_Q8_0 = 8
|
| 33 |
+
GGML_TYPE_Q2_K = 10
|
| 34 |
+
GGML_TYPE_BF16 = 30
|
| 35 |
+
|
| 36 |
+
TYPE_BLOCK_SIZE = {
|
| 37 |
+
0: 1, 1: 1, 2: 32, 3: 32, 6: 32, 7: 32,
|
| 38 |
+
8: 32, 9: 32, 10: 256, 11: 256, 12: 256,
|
| 39 |
+
13: 256, 14: 256, 15: 256, 30: 1,
|
| 40 |
+
}
|
| 41 |
+
TYPE_BLOCK_BYTES = {
|
| 42 |
+
0: 4, 1: 2, 2: 18, 3: 20, 6: 20, 7: 22,
|
| 43 |
+
8: 34, 9: 36, 10: 84, 11: 110, 12: 144,
|
| 44 |
+
13: 176, 14: 210, 15: 292, 30: 2,
|
| 45 |
+
}
|
| 46 |
+
TYPE_NAME = {
|
| 47 |
+
0: "F32", 1: "F16", 2: "Q4_0", 8: "Q8_0", 10: "Q2_K", 30: "BF16",
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
# βββ GGUF Reader ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 52 |
+
|
| 53 |
+
def align_offset(offset):
|
| 54 |
+
return (offset + ALIGNMENT - 1) & ~(ALIGNMENT - 1)
|
| 55 |
+
|
| 56 |
+
def read_string(f):
|
| 57 |
+
slen = struct.unpack('<Q', f.read(8))[0]
|
| 58 |
+
return f.read(slen).decode('utf-8', errors='replace')
|
| 59 |
+
|
| 60 |
+
def read_kv_value(f, vtype):
|
| 61 |
+
"""Read and return a KV value."""
|
| 62 |
+
if vtype == 0: return struct.unpack('<B', f.read(1))[0]
|
| 63 |
+
elif vtype == 1: return struct.unpack('<b', f.read(1))[0]
|
| 64 |
+
elif vtype == 2: return struct.unpack('<H', f.read(2))[0]
|
| 65 |
+
elif vtype == 3: return struct.unpack('<h', f.read(2))[0]
|
| 66 |
+
elif vtype == 4: return struct.unpack('<I', f.read(4))[0]
|
| 67 |
+
elif vtype == 5: return struct.unpack('<i', f.read(4))[0]
|
| 68 |
+
elif vtype == 6: return struct.unpack('<f', f.read(4))[0]
|
| 69 |
+
elif vtype == 7: return bool(struct.unpack('<B', f.read(1))[0])
|
| 70 |
+
elif vtype == 8: return read_string(f)
|
| 71 |
+
elif vtype == 9:
|
| 72 |
+
arr_type = struct.unpack('<I', f.read(4))[0]
|
| 73 |
+
arr_len = struct.unpack('<Q', f.read(8))[0]
|
| 74 |
+
return [read_kv_value(f, arr_type) for _ in range(arr_len)]
|
| 75 |
+
elif vtype == 10: return struct.unpack('<Q', f.read(8))[0]
|
| 76 |
+
elif vtype == 11: return struct.unpack('<q', f.read(8))[0]
|
| 77 |
+
elif vtype == 12: return struct.unpack('<d', f.read(8))[0]
|
| 78 |
+
else:
|
| 79 |
+
raise ValueError(f"Unknown KV type {vtype}")
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
class GGUFModel:
|
| 83 |
+
"""Loads a GGUF model with mmap'd tensor access."""
|
| 84 |
+
|
| 85 |
+
def __init__(self, path):
|
| 86 |
+
self.path = path
|
| 87 |
+
self.file_size = os.path.getsize(path)
|
| 88 |
+
self.kv = {}
|
| 89 |
+
self.tensor_infos = OrderedDict()
|
| 90 |
+
self.data_offset = 0
|
| 91 |
+
|
| 92 |
+
self._f = open(path, 'rb')
|
| 93 |
+
self._mm = mmap.mmap(self._f.fileno(), 0, access=mmap.ACCESS_READ)
|
| 94 |
+
self._parse_header()
|
| 95 |
+
|
| 96 |
+
def _parse_header(self):
|
| 97 |
+
f = self._f
|
| 98 |
+
f.seek(0)
|
| 99 |
+
magic = struct.unpack('<I', f.read(4))[0]
|
| 100 |
+
assert magic == GGUF_MAGIC, f"Bad GGUF magic: 0x{magic:08X}"
|
| 101 |
+
version = struct.unpack('<I', f.read(4))[0]
|
| 102 |
+
n_tensors = struct.unpack('<Q', f.read(8))[0]
|
| 103 |
+
n_kv = struct.unpack('<Q', f.read(8))[0]
|
| 104 |
+
|
| 105 |
+
# Read KV pairs
|
| 106 |
+
for _ in range(n_kv):
|
| 107 |
+
key = read_string(f)
|
| 108 |
+
vtype = struct.unpack('<I', f.read(4))[0]
|
| 109 |
+
value = read_kv_value(f, vtype)
|
| 110 |
+
self.kv[key] = value
|
| 111 |
+
|
| 112 |
+
# Read tensor info
|
| 113 |
+
for _ in range(n_tensors):
|
| 114 |
+
name = read_string(f)
|
| 115 |
+
n_dims = struct.unpack('<I', f.read(4))[0]
|
| 116 |
+
dims = [struct.unpack('<Q', f.read(8))[0] for _ in range(n_dims)]
|
| 117 |
+
ttype = struct.unpack('<I', f.read(4))[0]
|
| 118 |
+
offset = struct.unpack('<Q', f.read(8))[0]
|
| 119 |
+
n_elements = 1
|
| 120 |
+
for d in dims:
|
| 121 |
+
n_elements *= d
|
| 122 |
+
blk_sz = TYPE_BLOCK_SIZE.get(ttype, 1)
|
| 123 |
+
blk_bytes = TYPE_BLOCK_BYTES.get(ttype, 4)
|
| 124 |
+
n_blocks = (n_elements + blk_sz - 1) // blk_sz
|
| 125 |
+
data_size = n_blocks * blk_bytes
|
| 126 |
+
self.tensor_infos[name] = {
|
| 127 |
+
'dims': dims, 'n_dims': n_dims, 'type': ttype,
|
| 128 |
+
'offset': offset, 'n_elements': n_elements,
|
| 129 |
+
'data_size': data_size,
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
self.data_offset = align_offset(f.tell())
|
| 133 |
+
|
| 134 |
+
def get_arch(self):
|
| 135 |
+
arch = self.kv.get('general.architecture', 'gemma2')
|
| 136 |
+
return arch
|
| 137 |
+
|
| 138 |
+
def get_config(self):
|
| 139 |
+
arch = self.get_arch()
|
| 140 |
+
return {
|
| 141 |
+
'arch': arch,
|
| 142 |
+
'n_layers': self.kv.get(f'{arch}.block_count', 0),
|
| 143 |
+
'n_embd': self.kv.get(f'{arch}.embedding_length', 0),
|
| 144 |
+
'n_head': self.kv.get(f'{arch}.attention.head_count', 0),
|
| 145 |
+
'n_head_kv': self.kv.get(f'{arch}.attention.head_count_kv', 0),
|
| 146 |
+
'n_ff': self.kv.get(f'{arch}.feed_forward_length', 0),
|
| 147 |
+
'vocab_size': self.kv.get(f'{arch}.vocab_size', 0),
|
| 148 |
+
'rms_eps': self.kv.get(f'{arch}.attention.layer_norm_rms_epsilon', 1e-6),
|
| 149 |
+
'rope_base': self.kv.get(f'{arch}.rope.freq_base', 10000.0),
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
def get_tensor_f32(self, name):
|
| 153 |
+
"""Load a tensor as float32, dequantizing if needed."""
|
| 154 |
+
if name not in self.tensor_infos:
|
| 155 |
+
return None
|
| 156 |
+
ti = self.tensor_infos[name]
|
| 157 |
+
abs_offset = self.data_offset + ti['offset']
|
| 158 |
+
raw = bytes(self._mm[abs_offset:abs_offset + ti['data_size']])
|
| 159 |
+
return dequantize(raw, ti['type'], ti['n_elements'])
|
| 160 |
+
|
| 161 |
+
def get_tensor_shape(self, name):
|
| 162 |
+
"""Return the shape of a tensor (GGUF stores reversed dims)."""
|
| 163 |
+
if name not in self.tensor_infos:
|
| 164 |
+
return None
|
| 165 |
+
dims = self.tensor_infos[name]['dims']
|
| 166 |
+
# GGUF stores dims in reverse order (row-major): dims[0]=cols, dims[1]=rows
|
| 167 |
+
return tuple(reversed(dims))
|
| 168 |
+
|
| 169 |
+
def close(self):
|
| 170 |
+
self._mm.close()
|
| 171 |
+
self._f.close()
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
# βββ Dequantization βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 175 |
+
|
| 176 |
+
def dequantize(raw, ttype, n_elements):
|
| 177 |
+
"""Dequantize raw bytes to float32 numpy array."""
|
| 178 |
+
if ttype == GGML_TYPE_F32:
|
| 179 |
+
return np.frombuffer(raw, dtype=np.float32).copy()
|
| 180 |
+
elif ttype == GGML_TYPE_F16:
|
| 181 |
+
return np.frombuffer(raw, dtype=np.float16).astype(np.float32)
|
| 182 |
+
elif ttype == GGML_TYPE_BF16:
|
| 183 |
+
bf16 = np.frombuffer(raw, dtype=np.uint16)
|
| 184 |
+
return (bf16.astype(np.uint32) << 16).view(np.float32).copy()
|
| 185 |
+
elif ttype == GGML_TYPE_Q8_0:
|
| 186 |
+
return dequant_q8_0(raw, n_elements)
|
| 187 |
+
elif ttype == GGML_TYPE_Q4_0:
|
| 188 |
+
return dequant_q4_0(raw, n_elements)
|
| 189 |
+
elif ttype == GGML_TYPE_Q2_K:
|
| 190 |
+
return dequant_q2k(raw, n_elements)
|
| 191 |
+
else:
|
| 192 |
+
raise ValueError(f"Unsupported quant type {ttype} ({TYPE_NAME.get(ttype, '?')})")
|
| 193 |
+
|
| 194 |
+
def dequant_q8_0(raw, n_elements):
|
| 195 |
+
n_blocks = n_elements // QK8_0
|
| 196 |
+
data = np.frombuffer(raw, dtype=np.uint8).reshape(n_blocks, 34)
|
| 197 |
+
d = data[:, 0:2].view(np.float16).astype(np.float32).reshape(n_blocks, 1)
|
| 198 |
+
qs = data[:, 2:34].view(np.int8).astype(np.float32)
|
| 199 |
+
return (d * qs).reshape(-1)[:n_elements]
|
| 200 |
+
|
| 201 |
+
def dequant_q4_0(raw, n_elements):
|
| 202 |
+
n_blocks = n_elements // QK4_0
|
| 203 |
+
data = np.frombuffer(raw, dtype=np.uint8).reshape(n_blocks, 18)
|
| 204 |
+
d = data[:, 0:2].view(np.float16).astype(np.float32).reshape(n_blocks, 1)
|
| 205 |
+
qs = data[:, 2:18] # 16 bytes = 32 nibbles
|
| 206 |
+
lo = (qs & 0xF).astype(np.float32) - 8.0
|
| 207 |
+
hi = (qs >> 4).astype(np.float32) - 8.0
|
| 208 |
+
x = np.concatenate([lo, hi], axis=1) # [n_blocks, 32]
|
| 209 |
+
return (d * x).reshape(-1)[:n_elements]
|
| 210 |
+
|
| 211 |
+
def dequant_q2k(raw, n_elements):
|
| 212 |
+
n_blocks = n_elements // QK_K
|
| 213 |
+
data = np.frombuffer(raw, dtype=np.uint8).reshape(n_blocks, 84)
|
| 214 |
+
scales_packed = data[:, 0:16] # [n_blocks, 16]
|
| 215 |
+
qs = data[:, 16:80] # [n_blocks, 64]
|
| 216 |
+
d_fp16 = data[:, 80:82].view(np.float16).astype(np.float32).reshape(n_blocks)
|
| 217 |
+
dmin_fp16 = data[:, 82:84].view(np.float16).astype(np.float32).reshape(n_blocks)
|
| 218 |
+
|
| 219 |
+
result = np.zeros((n_blocks, QK_K), dtype=np.float32)
|
| 220 |
+
for blk in range(n_blocks):
|
| 221 |
+
d = d_fp16[blk]
|
| 222 |
+
dmin = dmin_fp16[blk]
|
| 223 |
+
for half in range(2):
|
| 224 |
+
for sub in range(4):
|
| 225 |
+
j = half * 8 + sub
|
| 226 |
+
sc = int(scales_packed[blk, j]) & 0xF
|
| 227 |
+
mn = int(scales_packed[blk, j]) >> 4
|
| 228 |
+
d_sub = d * sc
|
| 229 |
+
m_sub = dmin * mn
|
| 230 |
+
for k in range(32):
|
| 231 |
+
qi_byte = int(qs[blk, half * 32 + k])
|
| 232 |
+
q = (qi_byte >> (sub * 2)) & 3
|
| 233 |
+
idx = half * 128 + sub * 32 + k
|
| 234 |
+
result[blk, idx] = d_sub * q - m_sub
|
| 235 |
+
return result.reshape(-1)[:n_elements]
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
# βββ Tokenizer ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 239 |
+
|
| 240 |
+
class SimpleTokenizer:
|
| 241 |
+
"""Minimal BPE tokenizer from GGUF metadata."""
|
| 242 |
+
|
| 243 |
+
def __init__(self, model):
|
| 244 |
+
self.tokens = model.kv.get('tokenizer.ggml.tokens', [])
|
| 245 |
+
self.vocab_size = len(self.tokens)
|
| 246 |
+
merges_raw = model.kv.get('tokenizer.ggml.merges', [])
|
| 247 |
+
self.bos_id = model.kv.get('tokenizer.ggml.bos_token_id', 2)
|
| 248 |
+
self.eos_id = model.kv.get('tokenizer.ggml.eos_token_id', 1)
|
| 249 |
+
|
| 250 |
+
# Build token β id map
|
| 251 |
+
self.token_to_id = {}
|
| 252 |
+
for i, t in enumerate(self.tokens):
|
| 253 |
+
if isinstance(t, str):
|
| 254 |
+
self.token_to_id[t] = i
|
| 255 |
+
|
| 256 |
+
# Build merge priority
|
| 257 |
+
self.merges = {}
|
| 258 |
+
for i, m in enumerate(merges_raw):
|
| 259 |
+
if isinstance(m, str):
|
| 260 |
+
parts = m.split(' ', 1)
|
| 261 |
+
if len(parts) == 2:
|
| 262 |
+
self.merges[(parts[0], parts[1])] = i
|
| 263 |
+
|
| 264 |
+
def encode(self, text):
|
| 265 |
+
"""Encode text to token IDs using BPE."""
|
| 266 |
+
if not text:
|
| 267 |
+
return [self.bos_id]
|
| 268 |
+
|
| 269 |
+
# Convert to byte-level tokens (SentencePiece style: β = space)
|
| 270 |
+
text = text.replace(' ', 'β')
|
| 271 |
+
if not text.startswith('β'):
|
| 272 |
+
text = 'β' + text
|
| 273 |
+
|
| 274 |
+
# Start with characters
|
| 275 |
+
tokens = list(text)
|
| 276 |
+
|
| 277 |
+
# Apply BPE merges
|
| 278 |
+
while len(tokens) > 1:
|
| 279 |
+
best_pair = None
|
| 280 |
+
best_rank = float('inf')
|
| 281 |
+
for i in range(len(tokens) - 1):
|
| 282 |
+
pair = (tokens[i], tokens[i + 1])
|
| 283 |
+
rank = self.merges.get(pair, float('inf'))
|
| 284 |
+
if rank < best_rank:
|
| 285 |
+
best_rank = rank
|
| 286 |
+
best_pair = (i, pair)
|
| 287 |
+
if best_pair is None or best_rank == float('inf'):
|
| 288 |
+
break
|
| 289 |
+
idx, (a, b) = best_pair
|
| 290 |
+
tokens = tokens[:idx] + [a + b] + tokens[idx + 2:]
|
| 291 |
+
|
| 292 |
+
# Convert to IDs
|
| 293 |
+
ids = [self.bos_id]
|
| 294 |
+
for t in tokens:
|
| 295 |
+
tid = self.token_to_id.get(t, 0)
|
| 296 |
+
ids.append(tid)
|
| 297 |
+
return ids
|
| 298 |
+
|
| 299 |
+
def chunk_text(self, text, chunk_size=512):
|
| 300 |
+
"""Encode text and split into fixed-length chunks."""
|
| 301 |
+
ids = self.encode(text)
|
| 302 |
+
chunks = []
|
| 303 |
+
for i in range(0, len(ids) - chunk_size, chunk_size // 2): # 50% overlap
|
| 304 |
+
chunk = ids[i:i + chunk_size]
|
| 305 |
+
if len(chunk) == chunk_size:
|
| 306 |
+
chunks.append(np.array(chunk, dtype=np.int32))
|
| 307 |
+
if not chunks and ids:
|
| 308 |
+
# Pad short text
|
| 309 |
+
padded = ids + [self.eos_id] * (chunk_size - len(ids))
|
| 310 |
+
chunks.append(np.array(padded[:chunk_size], dtype=np.int32))
|
| 311 |
+
return chunks
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
# βββ Transformer Forward Pass βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 315 |
+
|
| 316 |
+
def rms_norm(x, weight, eps=1e-6):
|
| 317 |
+
rms = np.sqrt(np.mean(x * x, axis=-1, keepdims=True) + eps)
|
| 318 |
+
return (x / rms) * weight
|
| 319 |
+
|
| 320 |
+
def rope_freqs(dim, seq_len, base=10000.0):
|
| 321 |
+
freqs = 1.0 / (base ** (np.arange(0, dim, 2, dtype=np.float32) / dim))
|
| 322 |
+
t = np.arange(seq_len, dtype=np.float32)
|
| 323 |
+
freqs = np.outer(t, freqs) # [seq_len, dim/2]
|
| 324 |
+
return np.cos(freqs), np.sin(freqs)
|
| 325 |
+
|
| 326 |
+
def apply_rope(x, cos_f, sin_f):
|
| 327 |
+
# x: [seq_len, n_heads, head_dim]
|
| 328 |
+
d2 = x.shape[-1] // 2
|
| 329 |
+
x0 = x[..., :d2]
|
| 330 |
+
x1 = x[..., d2:]
|
| 331 |
+
cos_f = cos_f[:x.shape[0], :d2]
|
| 332 |
+
sin_f = sin_f[:x.shape[0], :d2]
|
| 333 |
+
if x.ndim == 3:
|
| 334 |
+
cos_f = cos_f[:, np.newaxis, :]
|
| 335 |
+
sin_f = sin_f[:, np.newaxis, :]
|
| 336 |
+
o0 = x0 * cos_f - x1 * sin_f
|
| 337 |
+
o1 = x1 * cos_f + x0 * sin_f
|
| 338 |
+
return np.concatenate([o0, o1], axis=-1)
|
| 339 |
+
|
| 340 |
+
def softmax(x, axis=-1):
|
| 341 |
+
x_max = np.max(x, axis=axis, keepdims=True)
|
| 342 |
+
e = np.exp(x - x_max)
|
| 343 |
+
return e / np.sum(e, axis=axis, keepdims=True)
|
| 344 |
+
|
| 345 |
+
def gelu(x):
|
| 346 |
+
return 0.5 * x * (1.0 + np.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * x**3)))
|
| 347 |
+
|
| 348 |
+
|
| 349 |
+
class TransformerRunner:
|
| 350 |
+
"""Minimal Gemma transformer for importance collection."""
|
| 351 |
+
|
| 352 |
+
def __init__(self, model, config, verbose=False):
|
| 353 |
+
self.model = model
|
| 354 |
+
self.cfg = config
|
| 355 |
+
self.verbose = verbose
|
| 356 |
+
self.head_dim = config['n_embd'] // config['n_head']
|
| 357 |
+
|
| 358 |
+
# Importance accumulators: tensor_name β (sum_x2, count)
|
| 359 |
+
self.importance = {}
|
| 360 |
+
|
| 361 |
+
def _record(self, name, x):
|
| 362 |
+
"""Record E[xΒ²] for this tensor's input activation."""
|
| 363 |
+
# x shape: [..., n_cols] β record per-column (input channel)
|
| 364 |
+
x_flat = x.reshape(-1, x.shape[-1])
|
| 365 |
+
x2 = np.sum(x_flat ** 2, axis=0)
|
| 366 |
+
if name in self.importance:
|
| 367 |
+
self.importance[name] = (
|
| 368 |
+
self.importance[name][0] + x2,
|
| 369 |
+
self.importance[name][1] + x_flat.shape[0],
|
| 370 |
+
)
|
| 371 |
+
else:
|
| 372 |
+
self.importance[name] = (x2.copy(), x_flat.shape[0])
|
| 373 |
+
|
| 374 |
+
def _get_weight(self, name):
|
| 375 |
+
"""Load weight, trying GGUF name patterns."""
|
| 376 |
+
w = self.model.get_tensor_f32(name)
|
| 377 |
+
if w is None:
|
| 378 |
+
return None
|
| 379 |
+
shape = self.model.get_tensor_shape(name)
|
| 380 |
+
if shape and len(shape) >= 2:
|
| 381 |
+
return w.reshape(shape)
|
| 382 |
+
return w
|
| 383 |
+
|
| 384 |
+
def _layer_prefix(self, layer_idx):
|
| 385 |
+
return f"blk.{layer_idx}"
|
| 386 |
+
|
| 387 |
+
def forward_layer(self, hidden, layer_idx, cos_f, sin_f):
|
| 388 |
+
"""Forward pass through one transformer layer. Returns new hidden state."""
|
| 389 |
+
pfx = self._layer_prefix(layer_idx)
|
| 390 |
+
cfg = self.cfg
|
| 391 |
+
n_head = cfg['n_head']
|
| 392 |
+
n_head_kv = cfg['n_head_kv']
|
| 393 |
+
head_dim = self.head_dim
|
| 394 |
+
seq_len = hidden.shape[0]
|
| 395 |
+
|
| 396 |
+
# ββ Attention ββ
|
| 397 |
+
attn_norm_w = self._get_weight(f'{pfx}.attn_norm.weight')
|
| 398 |
+
if attn_norm_w is None:
|
| 399 |
+
return hidden # Skip if weights missing
|
| 400 |
+
|
| 401 |
+
normed = rms_norm(hidden, attn_norm_w, cfg['rms_eps'])
|
| 402 |
+
|
| 403 |
+
# Q/K/V projections β record importance on the INPUT (normed)
|
| 404 |
+
q_w = self._get_weight(f'{pfx}.attn_q.weight')
|
| 405 |
+
k_w = self._get_weight(f'{pfx}.attn_k.weight')
|
| 406 |
+
v_w = self._get_weight(f'{pfx}.attn_v.weight')
|
| 407 |
+
o_w = self._get_weight(f'{pfx}.attn_output.weight')
|
| 408 |
+
|
| 409 |
+
if q_w is None or k_w is None or v_w is None or o_w is None:
|
| 410 |
+
return hidden
|
| 411 |
+
|
| 412 |
+
self._record(f'{pfx}.attn_q.weight', normed)
|
| 413 |
+
self._record(f'{pfx}.attn_k.weight', normed)
|
| 414 |
+
self._record(f'{pfx}.attn_v.weight', normed)
|
| 415 |
+
|
| 416 |
+
q = normed @ q_w.T # [seq, n_head * head_dim]
|
| 417 |
+
k = normed @ k_w.T # [seq, n_head_kv * head_dim]
|
| 418 |
+
v = normed @ v_w.T
|
| 419 |
+
|
| 420 |
+
q = q.reshape(seq_len, n_head, head_dim)
|
| 421 |
+
k = k.reshape(seq_len, n_head_kv, head_dim)
|
| 422 |
+
v = v.reshape(seq_len, n_head_kv, head_dim)
|
| 423 |
+
|
| 424 |
+
q = apply_rope(q, cos_f, sin_f)
|
| 425 |
+
k = apply_rope(k, cos_f, sin_f)
|
| 426 |
+
|
| 427 |
+
# GQA: repeat KV heads
|
| 428 |
+
if n_head_kv < n_head:
|
| 429 |
+
rep = n_head // n_head_kv
|
| 430 |
+
k = np.repeat(k, rep, axis=1)
|
| 431 |
+
v = np.repeat(v, rep, axis=1)
|
| 432 |
+
|
| 433 |
+
# Attention: [n_head, seq, head_dim] @ [n_head, head_dim, seq]
|
| 434 |
+
q_t = q.transpose(1, 0, 2) # [n_head, seq, head_dim]
|
| 435 |
+
k_t = k.transpose(1, 0, 2)
|
| 436 |
+
v_t = v.transpose(1, 0, 2)
|
| 437 |
+
|
| 438 |
+
scale = 1.0 / np.sqrt(head_dim)
|
| 439 |
+
attn = np.matmul(q_t, k_t.transpose(0, 2, 1)) * scale # [n_head, seq, seq]
|
| 440 |
+
|
| 441 |
+
# Causal mask
|
| 442 |
+
mask = np.triu(np.full((seq_len, seq_len), -1e9, dtype=np.float32), k=1)
|
| 443 |
+
attn = attn + mask[np.newaxis, :, :]
|
| 444 |
+
attn = softmax(attn, axis=-1)
|
| 445 |
+
|
| 446 |
+
out = np.matmul(attn, v_t) # [n_head, seq, head_dim]
|
| 447 |
+
out = out.transpose(1, 0, 2).reshape(seq_len, -1) # [seq, n_embd]
|
| 448 |
+
|
| 449 |
+
self._record(f'{pfx}.attn_output.weight', out)
|
| 450 |
+
attn_out = out @ o_w.T
|
| 451 |
+
|
| 452 |
+
hidden = hidden + attn_out
|
| 453 |
+
|
| 454 |
+
# ββ FFN ββ
|
| 455 |
+
ffn_norm_w = self._get_weight(f'{pfx}.ffn_norm.weight')
|
| 456 |
+
if ffn_norm_w is None:
|
| 457 |
+
return hidden
|
| 458 |
+
|
| 459 |
+
normed_ff = rms_norm(hidden, ffn_norm_w, cfg['rms_eps'])
|
| 460 |
+
|
| 461 |
+
gate_w = self._get_weight(f'{pfx}.ffn_gate.weight')
|
| 462 |
+
up_w = self._get_weight(f'{pfx}.ffn_up.weight')
|
| 463 |
+
down_w = self._get_weight(f'{pfx}.ffn_down.weight')
|
| 464 |
+
|
| 465 |
+
if gate_w is not None and up_w is not None and down_w is not None:
|
| 466 |
+
self._record(f'{pfx}.ffn_gate.weight', normed_ff)
|
| 467 |
+
self._record(f'{pfx}.ffn_up.weight', normed_ff)
|
| 468 |
+
|
| 469 |
+
gate_out = gelu(normed_ff @ gate_w.T)
|
| 470 |
+
up_out = normed_ff @ up_w.T
|
| 471 |
+
ff_mid = gate_out * up_out
|
| 472 |
+
|
| 473 |
+
self._record(f'{pfx}.ffn_down.weight', ff_mid)
|
| 474 |
+
ff_out = ff_mid @ down_w.T
|
| 475 |
+
hidden = hidden + ff_out
|
| 476 |
+
else:
|
| 477 |
+
# MoE path
|
| 478 |
+
gate_inp_w = self._get_weight(f'{pfx}.ffn_gate_inp.weight')
|
| 479 |
+
if gate_inp_w is not None:
|
| 480 |
+
self._record(f'{pfx}.ffn_gate_inp.weight', normed_ff)
|
| 481 |
+
router_logits = normed_ff @ gate_inp_w.T
|
| 482 |
+
n_experts = router_logits.shape[-1]
|
| 483 |
+
probs = softmax(router_logits, axis=-1)
|
| 484 |
+
top2 = np.argsort(probs, axis=-1)[:, -2:]
|
| 485 |
+
|
| 486 |
+
ff_out = np.zeros_like(normed_ff)
|
| 487 |
+
for exp_id in range(n_experts):
|
| 488 |
+
ew_gate = self._get_weight(f'{pfx}.ffn_gate.{exp_id}.weight')
|
| 489 |
+
ew_up = self._get_weight(f'{pfx}.ffn_up.{exp_id}.weight')
|
| 490 |
+
ew_down = self._get_weight(f'{pfx}.ffn_down.{exp_id}.weight')
|
| 491 |
+
if ew_gate is None:
|
| 492 |
+
continue
|
| 493 |
+
|
| 494 |
+
mask_exp = np.any(top2 == exp_id, axis=-1) # [seq]
|
| 495 |
+
if not np.any(mask_exp):
|
| 496 |
+
continue
|
| 497 |
+
|
| 498 |
+
exp_input = normed_ff[mask_exp]
|
| 499 |
+
self._record(f'{pfx}.ffn_gate.{exp_id}.weight', exp_input)
|
| 500 |
+
self._record(f'{pfx}.ffn_up.{exp_id}.weight', exp_input)
|
| 501 |
+
|
| 502 |
+
g = gelu(exp_input @ ew_gate.T)
|
| 503 |
+
u = exp_input @ ew_up.T
|
| 504 |
+
mid = g * u
|
| 505 |
+
self._record(f'{pfx}.ffn_down.{exp_id}.weight', mid)
|
| 506 |
+
|
| 507 |
+
exp_out = mid @ ew_down.T
|
| 508 |
+
# Weight by routing probability
|
| 509 |
+
for token_idx in np.where(mask_exp)[0]:
|
| 510 |
+
w = probs[token_idx, exp_id]
|
| 511 |
+
local_idx = np.sum(mask_exp[:token_idx])
|
| 512 |
+
ff_out[token_idx] += w * exp_out[local_idx]
|
| 513 |
+
|
| 514 |
+
hidden = hidden + ff_out
|
| 515 |
+
|
| 516 |
+
return hidden
|
| 517 |
+
|
| 518 |
+
def forward(self, token_ids):
|
| 519 |
+
"""Full forward pass, collecting importance statistics."""
|
| 520 |
+
cfg = self.cfg
|
| 521 |
+
seq_len = len(token_ids)
|
| 522 |
+
|
| 523 |
+
# Embedding
|
| 524 |
+
embed_w = self._get_weight('token_embd.weight')
|
| 525 |
+
if embed_w is None:
|
| 526 |
+
raise RuntimeError("Missing token_embd.weight")
|
| 527 |
+
|
| 528 |
+
hidden = embed_w[token_ids] # [seq_len, n_embd]
|
| 529 |
+
|
| 530 |
+
# RoPE frequencies
|
| 531 |
+
cos_f, sin_f = rope_freqs(self.head_dim, seq_len, cfg['rope_base'])
|
| 532 |
+
|
| 533 |
+
# Process each layer
|
| 534 |
+
for layer_idx in range(cfg['n_layers']):
|
| 535 |
+
hidden = self.forward_layer(hidden, layer_idx, cos_f, sin_f)
|
| 536 |
+
if self.verbose and (layer_idx + 1) % 4 == 0:
|
| 537 |
+
print(f" Layer {layer_idx + 1}/{cfg['n_layers']}", end='\r')
|
| 538 |
+
|
| 539 |
+
# Output projection
|
| 540 |
+
output_w = self._get_weight('output.weight')
|
| 541 |
+
if output_w is not None:
|
| 542 |
+
self._record('output.weight', hidden)
|
| 543 |
+
|
| 544 |
+
return hidden
|
| 545 |
+
|
| 546 |
+
|
| 547 |
+
# βββ HPC Cross-Layer Importance Propagation βββββββββββββββββββββββββββββββββ
|
| 548 |
+
|
| 549 |
+
def hpc_propagate_importance(importance_dict, n_layers, verbose=False):
|
| 550 |
+
"""Use HPC-inspired BP to propagate importance across layers.
|
| 551 |
+
|
| 552 |
+
Each layer's raw E[xΒ²] statistics are smoothed via cross-layer coupling
|
| 553 |
+
through the residual stream. Layers with high importance AND high-importance
|
| 554 |
+
neighbors get boosted; isolated spikes get damped.
|
| 555 |
+
"""
|
| 556 |
+
# Group tensors by layer
|
| 557 |
+
layer_energies = np.zeros(n_layers, dtype=np.float64)
|
| 558 |
+
layer_tensor_count = np.zeros(n_layers, dtype=np.int32)
|
| 559 |
+
|
| 560 |
+
for name, (sum_x2, count) in importance_dict.items():
|
| 561 |
+
parts = name.split('.')
|
| 562 |
+
if len(parts) >= 2 and parts[0] == 'blk':
|
| 563 |
+
try:
|
| 564 |
+
layer_idx = int(parts[1])
|
| 565 |
+
if 0 <= layer_idx < n_layers:
|
| 566 |
+
mean_imp = np.mean(sum_x2 / max(count, 1))
|
| 567 |
+
layer_energies[layer_idx] += mean_imp
|
| 568 |
+
layer_tensor_count[layer_idx] += 1
|
| 569 |
+
except ValueError:
|
| 570 |
+
pass
|
| 571 |
+
|
| 572 |
+
for i in range(n_layers):
|
| 573 |
+
if layer_tensor_count[i] > 0:
|
| 574 |
+
layer_energies[i] /= layer_tensor_count[i]
|
| 575 |
+
|
| 576 |
+
if np.max(layer_energies) < 1e-30:
|
| 577 |
+
return importance_dict
|
| 578 |
+
|
| 579 |
+
layer_energies /= np.max(layer_energies)
|
| 580 |
+
|
| 581 |
+
# BP-inspired iterative smoothing with residual stream coupling
|
| 582 |
+
multipliers = np.ones(n_layers, dtype=np.float64)
|
| 583 |
+
temperature = 0.5
|
| 584 |
+
|
| 585 |
+
for _ in range(50):
|
| 586 |
+
new_mult = np.ones(n_layers, dtype=np.float64)
|
| 587 |
+
for i in range(n_layers):
|
| 588 |
+
e_self = layer_energies[i]
|
| 589 |
+
e_nbr = 0.0
|
| 590 |
+
n_nbr = 0
|
| 591 |
+
if i > 0:
|
| 592 |
+
e_nbr += layer_energies[i-1] * multipliers[i-1]
|
| 593 |
+
n_nbr += 1
|
| 594 |
+
if i < n_layers - 1:
|
| 595 |
+
e_nbr += layer_energies[i+1] * multipliers[i+1]
|
| 596 |
+
n_nbr += 1
|
| 597 |
+
if n_nbr > 0:
|
| 598 |
+
e_nbr /= n_nbr
|
| 599 |
+
new_mult[i] = np.exp((e_self + 0.3 * e_nbr) / temperature)
|
| 600 |
+
|
| 601 |
+
mean_m = np.mean(new_mult)
|
| 602 |
+
if mean_m > 1e-30:
|
| 603 |
+
new_mult /= mean_m
|
| 604 |
+
multipliers = 0.7 * multipliers + 0.3 * new_mult
|
| 605 |
+
|
| 606 |
+
if verbose:
|
| 607 |
+
print(f"\n HPC layer multipliers (first 8): "
|
| 608 |
+
f"{' '.join(f'{m:.3f}' for m in multipliers[:8])}...")
|
| 609 |
+
print(f" Range: [{np.min(multipliers):.3f}, {np.max(multipliers):.3f}]")
|
| 610 |
+
|
| 611 |
+
adjusted = {}
|
| 612 |
+
for name, (sum_x2, count) in importance_dict.items():
|
| 613 |
+
parts = name.split('.')
|
| 614 |
+
if len(parts) >= 2 and parts[0] == 'blk':
|
| 615 |
+
try:
|
| 616 |
+
layer_idx = int(parts[1])
|
| 617 |
+
if 0 <= layer_idx < n_layers:
|
| 618 |
+
adjusted[name] = (sum_x2 * multipliers[layer_idx], count)
|
| 619 |
+
continue
|
| 620 |
+
except ValueError:
|
| 621 |
+
pass
|
| 622 |
+
adjusted[name] = (sum_x2, count)
|
| 623 |
+
|
| 624 |
+
return adjusted
|
| 625 |
+
|
| 626 |
+
|
| 627 |
+
# βββ iMatrix Output Writer ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 628 |
+
|
| 629 |
+
def write_imatrix(path, importance_dict):
|
| 630 |
+
"""Write llama.cpp-compatible legacy binary imatrix file."""
|
| 631 |
+
entries = []
|
| 632 |
+
for name, (sum_x2, count) in sorted(importance_dict.items()):
|
| 633 |
+
values = sum_x2.astype(np.float32)
|
| 634 |
+
entries.append((name, values, int(count)))
|
| 635 |
+
|
| 636 |
+
with open(path, 'wb') as f:
|
| 637 |
+
f.write(struct.pack('<i', len(entries)))
|
| 638 |
+
for name, values, n_samples in entries:
|
| 639 |
+
name_bytes = name.encode('utf-8')
|
| 640 |
+
f.write(struct.pack('<i', len(name_bytes)))
|
| 641 |
+
f.write(name_bytes)
|
| 642 |
+
f.write(struct.pack('<i', len(values)))
|
| 643 |
+
f.write(struct.pack('<i', n_samples))
|
| 644 |
+
f.write(values.tobytes())
|
| 645 |
+
|
| 646 |
+
return len(entries)
|
| 647 |
+
|
| 648 |
+
|
| 649 |
+
# βββ Main βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 650 |
+
|
| 651 |
+
def main():
|
| 652 |
+
import argparse
|
| 653 |
+
parser = argparse.ArgumentParser(
|
| 654 |
+
description='HExState iMatrix Generator β HPC-enhanced importance matrix from GGUF')
|
| 655 |
+
parser.add_argument('model', help='Input GGUF model file')
|
| 656 |
+
parser.add_argument('calibration', help='Calibration text file')
|
| 657 |
+
parser.add_argument('-o', '--output', default='imatrix.dat',
|
| 658 |
+
help='Output imatrix file (default: imatrix.dat)')
|
| 659 |
+
parser.add_argument('--chunks', type=int, default=100,
|
| 660 |
+
help='Number of token chunks to process (default: 100)')
|
| 661 |
+
parser.add_argument('--chunk-size', type=int, default=512,
|
| 662 |
+
help='Tokens per chunk (default: 512)')
|
| 663 |
+
parser.add_argument('--no-hpc', action='store_true',
|
| 664 |
+
help='Disable HPC cross-layer propagation')
|
| 665 |
+
parser.add_argument('--verbose', action='store_true',
|
| 666 |
+
help='Per-layer statistics')
|
| 667 |
+
args = parser.parse_args()
|
| 668 |
+
|
| 669 |
+
print()
|
| 670 |
+
print(" ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ")
|
| 671 |
+
print(" β HExState Importance Matrix Generator β")
|
| 672 |
+
print(" β HPC-Enhanced E[xΒ²] Collection from GGUF β")
|
| 673 |
+
print(" ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ")
|
| 674 |
+
print()
|
| 675 |
+
|
| 676 |
+
start_time = time.time()
|
| 677 |
+
|
| 678 |
+
# ββ Load model ββ
|
| 679 |
+
print(f" Loading model: {args.model}")
|
| 680 |
+
model = GGUFModel(args.model)
|
| 681 |
+
config = model.get_config()
|
| 682 |
+
|
| 683 |
+
print(f" Architecture: {config['arch']}")
|
| 684 |
+
print(f" Layers: {config['n_layers']}")
|
| 685 |
+
print(f" Hidden: {config['n_embd']}")
|
| 686 |
+
print(f" Heads: {config['n_head']} (KV: {config['n_head_kv']})")
|
| 687 |
+
print(f" FFN: {config['n_ff']}")
|
| 688 |
+
print(f" Vocab: {config['vocab_size']}")
|
| 689 |
+
print(f" Tensors: {len(model.tensor_infos)}")
|
| 690 |
+
print()
|
| 691 |
+
|
| 692 |
+
# ββ Load tokenizer ββ
|
| 693 |
+
print(" Loading tokenizer from GGUF metadata...")
|
| 694 |
+
tokenizer = SimpleTokenizer(model)
|
| 695 |
+
print(f" Vocab size: {tokenizer.vocab_size}")
|
| 696 |
+
print()
|
| 697 |
+
|
| 698 |
+
# ββ Load calibration text ββ
|
| 699 |
+
print(f" Loading calibration data: {args.calibration}")
|
| 700 |
+
with open(args.calibration, 'r', encoding='utf-8', errors='replace') as f:
|
| 701 |
+
cal_text = f.read()
|
| 702 |
+
print(f" Text length: {len(cal_text):,} chars")
|
| 703 |
+
|
| 704 |
+
# ββ Tokenize and chunk ββ
|
| 705 |
+
print(f" Tokenizing ({args.chunk_size} tokens/chunk, {args.chunks} chunks max)...")
|
| 706 |
+
chunks = tokenizer.chunk_text(cal_text, args.chunk_size)
|
| 707 |
+
if len(chunks) > args.chunks:
|
| 708 |
+
chunks = chunks[:args.chunks]
|
| 709 |
+
print(f" Prepared {len(chunks)} chunks")
|
| 710 |
+
print()
|
| 711 |
+
|
| 712 |
+
# ββ Forward pass ββ
|
| 713 |
+
print(" Running forward passes...")
|
| 714 |
+
runner = TransformerRunner(model, config, verbose=args.verbose)
|
| 715 |
+
|
| 716 |
+
for i, chunk in enumerate(chunks):
|
| 717 |
+
elapsed = time.time() - start_time
|
| 718 |
+
eta = elapsed / max(i, 1) * (len(chunks) - i) if i > 0 else 0
|
| 719 |
+
pct = (i + 1) / len(chunks) * 100
|
| 720 |
+
bw = 40
|
| 721 |
+
filled = int(bw * (i + 1) / len(chunks))
|
| 722 |
+
bar = 'β' * filled + 'β' * (bw - filled)
|
| 723 |
+
sys.stdout.write(
|
| 724 |
+
f"\r [{bar}] {pct:5.1f}% ({i+1}/{len(chunks)}) "
|
| 725 |
+
f"{elapsed:.0f}s ETA:{eta:.0f}s")
|
| 726 |
+
sys.stdout.flush()
|
| 727 |
+
|
| 728 |
+
try:
|
| 729 |
+
runner.forward(chunk)
|
| 730 |
+
except Exception as e:
|
| 731 |
+
print(f"\n WARNING: Chunk {i} failed: {e}")
|
| 732 |
+
continue
|
| 733 |
+
|
| 734 |
+
print(f"\n Collected importance for {len(runner.importance)} tensors")
|
| 735 |
+
print()
|
| 736 |
+
|
| 737 |
+
# ββ HPC propagation ββ
|
| 738 |
+
if not args.no_hpc:
|
| 739 |
+
print(" Running HPC cross-layer importance propagation...")
|
| 740 |
+
importance = hpc_propagate_importance(
|
| 741 |
+
runner.importance, config['n_layers'], verbose=args.verbose)
|
| 742 |
+
else:
|
| 743 |
+
importance = runner.importance
|
| 744 |
+
|
| 745 |
+
# ββ Write output ββ
|
| 746 |
+
print(f"\n Writing imatrix: {args.output}")
|
| 747 |
+
n_entries = write_imatrix(args.output, importance)
|
| 748 |
+
|
| 749 |
+
elapsed = time.time() - start_time
|
| 750 |
+
out_size = os.path.getsize(args.output)
|
| 751 |
+
|
| 752 |
+
print()
|
| 753 |
+
print(" ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ")
|
| 754 |
+
print(" β IMATRIX GENERATION COMPLETE β")
|
| 755 |
+
print(" β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ£")
|
| 756 |
+
print(f" β Tensor entries: {n_entries:<42d} β")
|
| 757 |
+
print(f" β Chunks processed: {len(chunks):<42d} β")
|
| 758 |
+
print(f" β Output size: {out_size:>11,} bytes ({out_size/1024:.1f} KB)"
|
| 759 |
+
f"{' '*(25-len(f'{out_size/1024:.1f}'))}β")
|
| 760 |
+
print(f" β Total time: {elapsed:>38.1f} sec β")
|
| 761 |
+
print(" ββββββββββββββοΏ½οΏ½βββββββββββββββββββββββββββββββββββββββββββββββββββ")
|
| 762 |
+
print()
|
| 763 |
+
print(f" Output: {args.output}")
|
| 764 |
+
print()
|
| 765 |
+
|
| 766 |
+
model.close()
|
| 767 |
+
|
| 768 |
+
|
| 769 |
+
if __name__ == '__main__':
|
| 770 |
+
main()
|
gguf_format.h
ADDED
|
@@ -0,0 +1,707 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* gguf_format.h β GGUF v3 Binary Format Writer
|
| 3 |
+
*
|
| 4 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 5 |
+
* β HExState GGUF Output Module β
|
| 6 |
+
* β Implements the GGUF v3 binary specification for writing β
|
| 7 |
+
* β quantized LLM weight files compatible with llama.cpp β
|
| 8 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9 |
+
*
|
| 10 |
+
* File Layout:
|
| 11 |
+
* 1. Header: magic(4) + version(4) + tensor_count(8) + kv_count(8)
|
| 12 |
+
* 2. Metadata: Key-Value pairs (variable length)
|
| 13 |
+
* 3. Tensor Info: Per-tensor descriptors (name, dims, type, offset)
|
| 14 |
+
* 4. Padding: Align to GGUF_DEFAULT_ALIGNMENT bytes
|
| 15 |
+
* 5. Tensor Data: Raw quantized weight data
|
| 16 |
+
*
|
| 17 |
+
* All values are little-endian.
|
| 18 |
+
*/
|
| 19 |
+
|
| 20 |
+
#ifndef GGUF_FORMAT_H
|
| 21 |
+
#define GGUF_FORMAT_H
|
| 22 |
+
|
| 23 |
+
#include <stdint.h>
|
| 24 |
+
#include <stdio.h>
|
| 25 |
+
#include <stdlib.h>
|
| 26 |
+
#include <string.h>
|
| 27 |
+
#include <math.h>
|
| 28 |
+
|
| 29 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 30 |
+
* GGUF CONSTANTS
|
| 31 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 32 |
+
|
| 33 |
+
#define GGUF_MAGIC 0x46554747 /* "GGUF" in little-endian */
|
| 34 |
+
#define GGUF_VERSION 3
|
| 35 |
+
#define GGUF_DEFAULT_ALIGNMENT 32
|
| 36 |
+
|
| 37 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 38 |
+
* GGML TENSOR TYPES
|
| 39 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 40 |
+
|
| 41 |
+
typedef enum {
|
| 42 |
+
GGML_TYPE_F32 = 0,
|
| 43 |
+
GGML_TYPE_F16 = 1,
|
| 44 |
+
GGML_TYPE_Q4_0 = 2,
|
| 45 |
+
GGML_TYPE_Q4_1 = 3,
|
| 46 |
+
GGML_TYPE_Q5_0 = 6,
|
| 47 |
+
GGML_TYPE_Q5_1 = 7,
|
| 48 |
+
GGML_TYPE_Q8_0 = 8,
|
| 49 |
+
GGML_TYPE_Q8_1 = 9,
|
| 50 |
+
GGML_TYPE_Q2_K = 10,
|
| 51 |
+
GGML_TYPE_Q3_K = 11,
|
| 52 |
+
GGML_TYPE_Q4_K = 12,
|
| 53 |
+
GGML_TYPE_Q5_K = 13,
|
| 54 |
+
GGML_TYPE_Q6_K = 14,
|
| 55 |
+
GGML_TYPE_Q8_K = 15,
|
| 56 |
+
GGML_TYPE_IQ2_XXS = 16,
|
| 57 |
+
GGML_TYPE_IQ2_XS = 17,
|
| 58 |
+
GGML_TYPE_IQ3_XXS = 18,
|
| 59 |
+
GGML_TYPE_IQ1_S = 19,
|
| 60 |
+
GGML_TYPE_IQ4_NL = 20,
|
| 61 |
+
GGML_TYPE_IQ3_S = 21,
|
| 62 |
+
GGML_TYPE_IQ2_S = 22,
|
| 63 |
+
GGML_TYPE_IQ4_XS = 23,
|
| 64 |
+
GGML_TYPE_I8 = 24,
|
| 65 |
+
GGML_TYPE_I16 = 25,
|
| 66 |
+
GGML_TYPE_I32 = 26,
|
| 67 |
+
GGML_TYPE_I64 = 27,
|
| 68 |
+
GGML_TYPE_F64 = 28,
|
| 69 |
+
GGML_TYPE_IQ1_M = 29,
|
| 70 |
+
GGML_TYPE_BF16 = 30,
|
| 71 |
+
GGML_TYPE_COUNT
|
| 72 |
+
} GGMLType;
|
| 73 |
+
|
| 74 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 75 |
+
* GGUF METADATA VALUE TYPES
|
| 76 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 77 |
+
|
| 78 |
+
typedef enum {
|
| 79 |
+
GGUF_TYPE_UINT8 = 0,
|
| 80 |
+
GGUF_TYPE_INT8 = 1,
|
| 81 |
+
GGUF_TYPE_UINT16 = 2,
|
| 82 |
+
GGUF_TYPE_INT16 = 3,
|
| 83 |
+
GGUF_TYPE_UINT32 = 4,
|
| 84 |
+
GGUF_TYPE_INT32 = 5,
|
| 85 |
+
GGUF_TYPE_FLOAT32 = 6,
|
| 86 |
+
GGUF_TYPE_BOOL = 7,
|
| 87 |
+
GGUF_TYPE_STRING = 8,
|
| 88 |
+
GGUF_TYPE_ARRAY = 9,
|
| 89 |
+
GGUF_TYPE_UINT64 = 10,
|
| 90 |
+
GGUF_TYPE_INT64 = 11,
|
| 91 |
+
GGUF_TYPE_FLOAT64 = 12
|
| 92 |
+
} GGUFValueType;
|
| 93 |
+
|
| 94 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 95 |
+
* Q8_0 BLOCK STRUCTURE
|
| 96 |
+
*
|
| 97 |
+
* The fundamental quantized unit: 32 weights + 1 fp16 scale.
|
| 98 |
+
* Total: 34 bytes per block = 8.5 bits per weight.
|
| 99 |
+
*
|
| 100 |
+
* Dequantization: w_i = qs[i] * d
|
| 101 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 102 |
+
|
| 103 |
+
#define QK8_0 32 /* Block size for Q8_0 */
|
| 104 |
+
|
| 105 |
+
typedef struct {
|
| 106 |
+
uint16_t d; /* fp16 scale (delta) */
|
| 107 |
+
int8_t qs[QK8_0]; /* quantized values [-127, 127] */
|
| 108 |
+
} BlockQ8_0;
|
| 109 |
+
|
| 110 |
+
/* Verify: sizeof(BlockQ8_0) should be 34 bytes (2 + 32) */
|
| 111 |
+
|
| 112 |
+
/* ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββοΏ½οΏ½ββββββββββββ
|
| 113 |
+
* Q4_0 BLOCK STRUCTURE
|
| 114 |
+
*
|
| 115 |
+
* 32 weights per block with 4-bit quantization.
|
| 116 |
+
* Layout: 1 fp16 scale + 16 bytes packed quants (2 weights per byte)
|
| 117 |
+
* Total: 18 bytes per block = 4.5 bits per weight.
|
| 118 |
+
*
|
| 119 |
+
* Dequantization: w_i = (q_i - 8) * d
|
| 120 |
+
* where q_i in {0..15}, stored as nibbles
|
| 121 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 122 |
+
|
| 123 |
+
#define QK4_0 32 /* Block size for Q4_0 */
|
| 124 |
+
|
| 125 |
+
typedef struct {
|
| 126 |
+
uint16_t d; /* fp16 scale (delta) */
|
| 127 |
+
uint8_t qs[QK4_0/2]; /* 16 bytes: packed 4-bit quants (2 per byte) */
|
| 128 |
+
} BlockQ4_0;
|
| 129 |
+
|
| 130 |
+
/* sizeof(BlockQ4_0) = 2 + 16 = 18 bytes for 32 weights */
|
| 131 |
+
|
| 132 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 133 |
+
* Q2_K BLOCK STRUCTURE (K-Quant, 2-bit)
|
| 134 |
+
*
|
| 135 |
+
* 256-weight superblock divided into 16 sub-blocks of 16 weights.
|
| 136 |
+
*
|
| 137 |
+
* Layout (must match ggml block_q2_K):
|
| 138 |
+
* d: fp16 super-block scale for scales
|
| 139 |
+
* dmin: fp16 super-block scale for mins
|
| 140 |
+
* scales[16]: Per-sub-block scale (low 4 bits) + min (high 4 bits)
|
| 141 |
+
* qs[64]: Packed 2-bit quants (4 weights per byte)
|
| 142 |
+
*
|
| 143 |
+
* Dequantization: w_i = d * scale_j * q_i - dmin * min_j
|
| 144 |
+
* where j = sub-block index, q_i in {0, 1, 2, 3}
|
| 145 |
+
*
|
| 146 |
+
* Effective: 2.625 bits per weight (84 bytes / 256 weights)
|
| 147 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 148 |
+
|
| 149 |
+
#define QK_K 256 /* K-quant superblock size */
|
| 150 |
+
|
| 151 |
+
typedef struct {
|
| 152 |
+
uint8_t scales[QK_K/16]; /* 16 bytes: scale(4bit) | min(4bit) */
|
| 153 |
+
uint8_t qs[QK_K/4]; /* 64 bytes: packed 2-bit quants */
|
| 154 |
+
uint16_t d; /* fp16 super-block scale */
|
| 155 |
+
uint16_t dmin; /* fp16 super-block min scale */
|
| 156 |
+
} BlockQ2K;
|
| 157 |
+
|
| 158 |
+
/* sizeof(BlockQ2K) = 2 + 2 + 16 + 64 = 84 bytes for 256 weights */
|
| 159 |
+
|
| 160 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 161 |
+
* FP16 ββ FP32 CONVERSION
|
| 162 |
+
*
|
| 163 |
+
* IEEE 754 half-precision (binary16):
|
| 164 |
+
* 1 sign bit, 5 exponent bits, 10 mantissa bits
|
| 165 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 166 |
+
|
| 167 |
+
static inline uint16_t gguf_fp32_to_fp16(float f)
|
| 168 |
+
{
|
| 169 |
+
/* Use the union approach for bit manipulation */
|
| 170 |
+
union { float f; uint32_t u; } fu;
|
| 171 |
+
fu.f = f;
|
| 172 |
+
uint32_t x = fu.u;
|
| 173 |
+
|
| 174 |
+
uint16_t sign = (x >> 16) & 0x8000;
|
| 175 |
+
int32_t exponent = ((x >> 23) & 0xFF) - 127 + 15;
|
| 176 |
+
uint32_t mantissa = x & 0x7FFFFF;
|
| 177 |
+
|
| 178 |
+
if (exponent <= 0) {
|
| 179 |
+
/* Subnormal or zero */
|
| 180 |
+
if (exponent < -10) return sign; /* too small β Β±0 */
|
| 181 |
+
mantissa = (mantissa | 0x800000) >> (1 - exponent);
|
| 182 |
+
return sign | (uint16_t)(mantissa >> 13);
|
| 183 |
+
} else if (exponent >= 0x1F) {
|
| 184 |
+
/* Infinity or NaN */
|
| 185 |
+
return sign | 0x7C00 | (uint16_t)(mantissa ? (mantissa >> 13) : 0);
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
/* Normalized */
|
| 189 |
+
return sign | (uint16_t)(exponent << 10) | (uint16_t)(mantissa >> 13);
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
static inline float gguf_fp16_to_fp32(uint16_t h)
|
| 193 |
+
{
|
| 194 |
+
uint32_t sign = (uint32_t)(h & 0x8000) << 16;
|
| 195 |
+
int32_t exponent = (h >> 10) & 0x1F;
|
| 196 |
+
uint32_t mantissa = h & 0x03FF;
|
| 197 |
+
|
| 198 |
+
uint32_t result;
|
| 199 |
+
|
| 200 |
+
if (exponent == 0) {
|
| 201 |
+
if (mantissa == 0) {
|
| 202 |
+
result = sign; /* Β±0 */
|
| 203 |
+
} else {
|
| 204 |
+
/* Subnormal β normalize */
|
| 205 |
+
exponent = 1;
|
| 206 |
+
while (!(mantissa & 0x0400)) {
|
| 207 |
+
mantissa <<= 1;
|
| 208 |
+
exponent--;
|
| 209 |
+
}
|
| 210 |
+
mantissa &= 0x03FF;
|
| 211 |
+
result = sign | ((uint32_t)(exponent + 127 - 15) << 23) | (mantissa << 13);
|
| 212 |
+
}
|
| 213 |
+
} else if (exponent == 0x1F) {
|
| 214 |
+
result = sign | 0x7F800000 | (mantissa << 13); /* Inf/NaN */
|
| 215 |
+
} else {
|
| 216 |
+
result = sign | ((uint32_t)(exponent + 127 - 15) << 23) | (mantissa << 13);
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
union { uint32_t u; float f; } uf;
|
| 220 |
+
uf.u = result;
|
| 221 |
+
return uf.f;
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
/* BFloat16 β Float32 (just shift left by 16, it IS the top 16 bits of fp32) */
|
| 225 |
+
static inline float gguf_bf16_to_fp32(uint16_t bf)
|
| 226 |
+
{
|
| 227 |
+
union { uint32_t u; float f; } uf;
|
| 228 |
+
uf.u = (uint32_t)bf << 16;
|
| 229 |
+
return uf.f;
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 233 |
+
* GGUF STRING β Length-prefixed UTF-8 (no null terminator in file)
|
| 234 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 235 |
+
|
| 236 |
+
static inline void gguf_write_string(FILE *fp, const char *s)
|
| 237 |
+
{
|
| 238 |
+
uint64_t len = strlen(s);
|
| 239 |
+
fwrite(&len, sizeof(uint64_t), 1, fp);
|
| 240 |
+
fwrite(s, 1, len, fp);
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 244 |
+
* GGUF METADATA KEY-VALUE WRITERS
|
| 245 |
+
*
|
| 246 |
+
* Each KV entry: key_string + value_type(u32) + value_data
|
| 247 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 248 |
+
|
| 249 |
+
static inline void gguf_write_kv_string(FILE *fp, const char *key, const char *val)
|
| 250 |
+
{
|
| 251 |
+
gguf_write_string(fp, key);
|
| 252 |
+
uint32_t vtype = GGUF_TYPE_STRING;
|
| 253 |
+
fwrite(&vtype, sizeof(uint32_t), 1, fp);
|
| 254 |
+
gguf_write_string(fp, val);
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
static inline void gguf_write_kv_uint32(FILE *fp, const char *key, uint32_t val)
|
| 258 |
+
{
|
| 259 |
+
gguf_write_string(fp, key);
|
| 260 |
+
uint32_t vtype = GGUF_TYPE_UINT32;
|
| 261 |
+
fwrite(&vtype, sizeof(uint32_t), 1, fp);
|
| 262 |
+
fwrite(&val, sizeof(uint32_t), 1, fp);
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
static inline void gguf_write_kv_int32(FILE *fp, const char *key, int32_t val)
|
| 266 |
+
{
|
| 267 |
+
gguf_write_string(fp, key);
|
| 268 |
+
uint32_t vtype = GGUF_TYPE_INT32;
|
| 269 |
+
fwrite(&vtype, sizeof(uint32_t), 1, fp);
|
| 270 |
+
fwrite(&val, sizeof(int32_t), 1, fp);
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
static inline void gguf_write_kv_uint64(FILE *fp, const char *key, uint64_t val)
|
| 274 |
+
{
|
| 275 |
+
gguf_write_string(fp, key);
|
| 276 |
+
uint32_t vtype = GGUF_TYPE_UINT64;
|
| 277 |
+
fwrite(&vtype, sizeof(uint32_t), 1, fp);
|
| 278 |
+
fwrite(&val, sizeof(uint64_t), 1, fp);
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
static inline void gguf_write_kv_float32(FILE *fp, const char *key, float val)
|
| 282 |
+
{
|
| 283 |
+
gguf_write_string(fp, key);
|
| 284 |
+
uint32_t vtype = GGUF_TYPE_FLOAT32;
|
| 285 |
+
fwrite(&vtype, sizeof(uint32_t), 1, fp);
|
| 286 |
+
fwrite(&val, sizeof(float), 1, fp);
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
static inline void gguf_write_kv_bool(FILE *fp, const char *key, int val)
|
| 290 |
+
{
|
| 291 |
+
gguf_write_string(fp, key);
|
| 292 |
+
uint32_t vtype = GGUF_TYPE_BOOL;
|
| 293 |
+
fwrite(&vtype, sizeof(uint32_t), 1, fp);
|
| 294 |
+
uint8_t b = val ? 1 : 0;
|
| 295 |
+
fwrite(&b, sizeof(uint8_t), 1, fp);
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
/* Write an array of float32 values */
|
| 299 |
+
static inline void gguf_write_kv_float32_array(FILE *fp, const char *key,
|
| 300 |
+
const float *vals, uint64_t count)
|
| 301 |
+
{
|
| 302 |
+
gguf_write_string(fp, key);
|
| 303 |
+
uint32_t vtype = GGUF_TYPE_ARRAY;
|
| 304 |
+
fwrite(&vtype, sizeof(uint32_t), 1, fp);
|
| 305 |
+
uint32_t subtype = GGUF_TYPE_FLOAT32;
|
| 306 |
+
fwrite(&subtype, sizeof(uint32_t), 1, fp);
|
| 307 |
+
fwrite(&count, sizeof(uint64_t), 1, fp);
|
| 308 |
+
fwrite(vals, sizeof(float), count, fp);
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
/* Write an array of int32 values */
|
| 312 |
+
static inline void gguf_write_kv_int32_array(FILE *fp, const char *key,
|
| 313 |
+
const int32_t *vals, uint64_t count)
|
| 314 |
+
{
|
| 315 |
+
gguf_write_string(fp, key);
|
| 316 |
+
uint32_t vtype = GGUF_TYPE_ARRAY;
|
| 317 |
+
fwrite(&vtype, sizeof(uint32_t), 1, fp);
|
| 318 |
+
uint32_t subtype = GGUF_TYPE_INT32;
|
| 319 |
+
fwrite(&subtype, sizeof(uint32_t), 1, fp);
|
| 320 |
+
fwrite(&count, sizeof(uint64_t), 1, fp);
|
| 321 |
+
fwrite(vals, sizeof(int32_t), count, fp);
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
/* Write an array of string values */
|
| 325 |
+
static inline void gguf_write_kv_string_array(FILE *fp, const char *key,
|
| 326 |
+
const char **vals, uint64_t count)
|
| 327 |
+
{
|
| 328 |
+
gguf_write_string(fp, key);
|
| 329 |
+
uint32_t vtype = GGUF_TYPE_ARRAY;
|
| 330 |
+
fwrite(&vtype, sizeof(uint32_t), 1, fp);
|
| 331 |
+
uint32_t subtype = GGUF_TYPE_STRING;
|
| 332 |
+
fwrite(&subtype, sizeof(uint32_t), 1, fp);
|
| 333 |
+
fwrite(&count, sizeof(uint64_t), 1, fp);
|
| 334 |
+
for (uint64_t i = 0; i < count; i++) {
|
| 335 |
+
gguf_write_string(fp, vals[i] ? vals[i] : "");
|
| 336 |
+
}
|
| 337 |
+
}
|
| 338 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 339 |
+
* GGUF TENSOR INFO WRITER
|
| 340 |
+
*
|
| 341 |
+
* Per-tensor descriptor in the file:
|
| 342 |
+
* name_string + n_dims(u32) + dims[n_dims](u64 each) +
|
| 343 |
+
* type(u32) + offset(u64)
|
| 344 |
+
*
|
| 345 |
+
* Offset is relative to the start of the tensor data section.
|
| 346 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 347 |
+
|
| 348 |
+
static inline void gguf_write_tensor_info(FILE *fp, const char *name,
|
| 349 |
+
uint32_t n_dims, const uint64_t *dims,
|
| 350 |
+
GGMLType type, uint64_t offset)
|
| 351 |
+
{
|
| 352 |
+
gguf_write_string(fp, name);
|
| 353 |
+
fwrite(&n_dims, sizeof(uint32_t), 1, fp);
|
| 354 |
+
for (uint32_t i = 0; i < n_dims; i++) {
|
| 355 |
+
fwrite(&dims[i], sizeof(uint64_t), 1, fp);
|
| 356 |
+
}
|
| 357 |
+
uint32_t t = (uint32_t)type;
|
| 358 |
+
fwrite(&t, sizeof(uint32_t), 1, fp);
|
| 359 |
+
fwrite(&offset, sizeof(uint64_t), 1, fp);
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 363 |
+
* GGUF HEADER WRITER
|
| 364 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 365 |
+
|
| 366 |
+
static inline void gguf_write_header(FILE *fp, uint64_t tensor_count,
|
| 367 |
+
uint64_t metadata_kv_count)
|
| 368 |
+
{
|
| 369 |
+
uint32_t magic = GGUF_MAGIC;
|
| 370 |
+
uint32_t version = GGUF_VERSION;
|
| 371 |
+
fwrite(&magic, sizeof(uint32_t), 1, fp);
|
| 372 |
+
fwrite(&version, sizeof(uint32_t), 1, fp);
|
| 373 |
+
fwrite(&tensor_count, sizeof(uint64_t), 1, fp);
|
| 374 |
+
fwrite(&metadata_kv_count, sizeof(uint64_t), 1, fp);
|
| 375 |
+
}
|
| 376 |
+
|
| 377 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 378 |
+
* ALIGNMENT PADDING
|
| 379 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 380 |
+
|
| 381 |
+
static inline void gguf_write_padding(FILE *fp, uint32_t alignment)
|
| 382 |
+
{
|
| 383 |
+
long pos = ftell(fp);
|
| 384 |
+
long pad = (alignment - (pos % alignment)) % alignment;
|
| 385 |
+
if (pad > 0) {
|
| 386 |
+
uint8_t zeros[64] = {0};
|
| 387 |
+
while (pad > 0) {
|
| 388 |
+
long write_n = (pad > 64) ? 64 : pad;
|
| 389 |
+
fwrite(zeros, 1, write_n, fp);
|
| 390 |
+
pad -= write_n;
|
| 391 |
+
}
|
| 392 |
+
}
|
| 393 |
+
}
|
| 394 |
+
|
| 395 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 396 |
+
* Q8_0 QUANTIZATION β Reference Implementation
|
| 397 |
+
*
|
| 398 |
+
* For each block of 32 floats:
|
| 399 |
+
* 1. Find amax = max(|x_i|)
|
| 400 |
+
* 2. Scale d = amax / 127.0
|
| 401 |
+
* 3. Quantize: qs[i] = round(x_i / d)
|
| 402 |
+
*
|
| 403 |
+
* This is the STANDARD brute-force approach.
|
| 404 |
+
* The HExState MCMC optimizer replaces step 2 with intelligent
|
| 405 |
+
* search for the optimal d that minimizes weighted error.
|
| 406 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 407 |
+
|
| 408 |
+
static inline void gguf_quantize_q8_0_reference(const float *x,
|
| 409 |
+
BlockQ8_0 *y,
|
| 410 |
+
int64_t n_elements)
|
| 411 |
+
{
|
| 412 |
+
int64_t n_blocks = n_elements / QK8_0;
|
| 413 |
+
|
| 414 |
+
for (int64_t i = 0; i < n_blocks; i++) {
|
| 415 |
+
float amax = 0.0f;
|
| 416 |
+
for (int j = 0; j < QK8_0; j++) {
|
| 417 |
+
float v = fabsf(x[i * QK8_0 + j]);
|
| 418 |
+
if (v > amax) amax = v;
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
float d = amax / 127.0f;
|
| 422 |
+
float id = (d != 0.0f) ? 1.0f / d : 0.0f;
|
| 423 |
+
|
| 424 |
+
y[i].d = gguf_fp32_to_fp16(d);
|
| 425 |
+
|
| 426 |
+
for (int j = 0; j < QK8_0; j++) {
|
| 427 |
+
float v = x[i * QK8_0 + j] * id;
|
| 428 |
+
y[i].qs[j] = (int8_t)roundf(v);
|
| 429 |
+
}
|
| 430 |
+
}
|
| 431 |
+
}
|
| 432 |
+
|
| 433 |
+
/* Dequantize a single Q8_0 block back to float (for error measurement) */
|
| 434 |
+
static inline void gguf_dequantize_q8_0_block(const BlockQ8_0 *block,
|
| 435 |
+
float *out)
|
| 436 |
+
{
|
| 437 |
+
float d = gguf_fp16_to_fp32(block->d);
|
| 438 |
+
for (int j = 0; j < QK8_0; j++) {
|
| 439 |
+
out[j] = (float)block->qs[j] * d;
|
| 440 |
+
}
|
| 441 |
+
}
|
| 442 |
+
|
| 443 |
+
/* Compute L2 reconstruction error for a Q8_0 quantized block */
|
| 444 |
+
static inline float gguf_q8_0_block_error(const float *original,
|
| 445 |
+
const BlockQ8_0 *block)
|
| 446 |
+
{
|
| 447 |
+
float deq[QK8_0];
|
| 448 |
+
gguf_dequantize_q8_0_block(block, deq);
|
| 449 |
+
float err = 0.0f;
|
| 450 |
+
for (int j = 0; j < QK8_0; j++) {
|
| 451 |
+
float diff = original[j] - deq[j];
|
| 452 |
+
err += diff * diff;
|
| 453 |
+
}
|
| 454 |
+
return err;
|
| 455 |
+
}
|
| 456 |
+
|
| 457 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 458 |
+
* Q2_K QUANTIZATION β Reference Implementation
|
| 459 |
+
*
|
| 460 |
+
* For each superblock of 256 floats:
|
| 461 |
+
* 1. Divide into 16 sub-blocks of 16 weights
|
| 462 |
+
* 2. For each sub-block: find optimal (scale, min) β w β min + scale * q
|
| 463 |
+
* 3. Quantize sub-block scales/mins to 4 bits each
|
| 464 |
+
* 4. Re-quantize weights to 2 bits using final scales
|
| 465 |
+
* 5. Pack 4 quants per byte
|
| 466 |
+
*
|
| 467 |
+
* The HExState MCMC optimizer replaces step 2's brute-force grid search
|
| 468 |
+
* with intelligent Boltzmann-guided exploration.
|
| 469 |
+
* βββββββββββββββββββββββββββββββββββββοΏ½οΏ½οΏ½βββββββββββββββββββββββββββββββββ */
|
| 470 |
+
|
| 471 |
+
/* Helper: find nearest integer (ggml-compatible) */
|
| 472 |
+
static inline int gguf_nearest_int(float fval)
|
| 473 |
+
{
|
| 474 |
+
float val = fval + 12582912.f; /* 2^23 + 2^22 */
|
| 475 |
+
int i;
|
| 476 |
+
memcpy(&i, &val, sizeof(int));
|
| 477 |
+
return (i & 0x007fffff) - 0x00400000;
|
| 478 |
+
}
|
| 479 |
+
|
| 480 |
+
/* Quantize a sub-block of 16 floats with scale+min scheme.
|
| 481 |
+
* Returns scale; stores abs(min) in *the_min.
|
| 482 |
+
* Outputs L[i] β {0, 1, 2, 3} (nmax = 3). */
|
| 483 |
+
static inline float gguf_make_qkx_quants(int n, int nmax,
|
| 484 |
+
const float *x, uint8_t *L,
|
| 485 |
+
float *the_min)
|
| 486 |
+
{
|
| 487 |
+
float min_val = x[0];
|
| 488 |
+
float max_val = x[0];
|
| 489 |
+
for (int i = 1; i < n; i++) {
|
| 490 |
+
if (x[i] < min_val) min_val = x[i];
|
| 491 |
+
if (x[i] > max_val) max_val = x[i];
|
| 492 |
+
}
|
| 493 |
+
if (max_val == min_val) {
|
| 494 |
+
for (int i = 0; i < n; i++) L[i] = 0;
|
| 495 |
+
*the_min = -min_val;
|
| 496 |
+
return 0.0f;
|
| 497 |
+
}
|
| 498 |
+
if (min_val > 0) min_val = 0;
|
| 499 |
+
|
| 500 |
+
float iscale = nmax / (max_val - min_val);
|
| 501 |
+
float scale = 1.0f / iscale;
|
| 502 |
+
|
| 503 |
+
/* Iterative refinement (matches ggml's make_qkx1_quants) */
|
| 504 |
+
for (int itry = 0; itry < 5; itry++) {
|
| 505 |
+
float sumlx = 0;
|
| 506 |
+
int suml2 = 0;
|
| 507 |
+
int did_change = 0;
|
| 508 |
+
for (int i = 0; i < n; i++) {
|
| 509 |
+
int l = gguf_nearest_int(iscale * (x[i] - min_val));
|
| 510 |
+
if (l < 0) l = 0;
|
| 511 |
+
if (l > nmax) l = nmax;
|
| 512 |
+
if (l != (int)L[i]) { L[i] = l; did_change = 1; }
|
| 513 |
+
sumlx += (x[i] - min_val) * l;
|
| 514 |
+
suml2 += l * l;
|
| 515 |
+
}
|
| 516 |
+
if (suml2 > 0) scale = sumlx / suml2;
|
| 517 |
+
float sum = 0;
|
| 518 |
+
for (int i = 0; i < n; i++) {
|
| 519 |
+
sum += x[i] - scale * L[i];
|
| 520 |
+
}
|
| 521 |
+
min_val = 0.7f * min_val + 0.3f * sum / n;
|
| 522 |
+
if (min_val > 0) min_val = 0;
|
| 523 |
+
if (scale > 1e-15f) iscale = 1.0f / scale;
|
| 524 |
+
if (!did_change) break;
|
| 525 |
+
}
|
| 526 |
+
|
| 527 |
+
*the_min = -min_val;
|
| 528 |
+
return scale;
|
| 529 |
+
}
|
| 530 |
+
|
| 531 |
+
static inline void gguf_quantize_q2_k_reference(const float *x,
|
| 532 |
+
BlockQ2K *y,
|
| 533 |
+
int64_t n_elements)
|
| 534 |
+
{
|
| 535 |
+
int64_t n_blocks = n_elements / QK_K;
|
| 536 |
+
const float q4scale = 15.0f;
|
| 537 |
+
|
| 538 |
+
for (int64_t i = 0; i < n_blocks; i++) {
|
| 539 |
+
const float *block_x = x + i * QK_K;
|
| 540 |
+
uint8_t L[QK_K];
|
| 541 |
+
float mins[QK_K / 16];
|
| 542 |
+
float scales[QK_K / 16];
|
| 543 |
+
|
| 544 |
+
float max_scale = 0.0f;
|
| 545 |
+
float max_min = 0.0f;
|
| 546 |
+
|
| 547 |
+
/* Step 1: Find scale and min for each of 16 sub-blocks */
|
| 548 |
+
for (int j = 0; j < QK_K / 16; j++) {
|
| 549 |
+
scales[j] = gguf_make_qkx_quants(16, 3,
|
| 550 |
+
block_x + 16 * j,
|
| 551 |
+
L + 16 * j, &mins[j]);
|
| 552 |
+
if (scales[j] > max_scale) max_scale = scales[j];
|
| 553 |
+
if (mins[j] > max_min) max_min = mins[j];
|
| 554 |
+
}
|
| 555 |
+
|
| 556 |
+
/* Step 2: Quantize the 16 sub-block scales to 4 bits */
|
| 557 |
+
if (max_scale > 0) {
|
| 558 |
+
float iscale = q4scale / max_scale;
|
| 559 |
+
for (int j = 0; j < QK_K / 16; j++) {
|
| 560 |
+
int l = gguf_nearest_int(iscale * scales[j]);
|
| 561 |
+
if (l < 0) l = 0;
|
| 562 |
+
if (l > 15) l = 15;
|
| 563 |
+
y[i].scales[j] = (uint8_t)l;
|
| 564 |
+
}
|
| 565 |
+
y[i].d = gguf_fp32_to_fp16(max_scale / q4scale);
|
| 566 |
+
} else {
|
| 567 |
+
for (int j = 0; j < QK_K / 16; j++) y[i].scales[j] = 0;
|
| 568 |
+
y[i].d = gguf_fp32_to_fp16(0.0f);
|
| 569 |
+
}
|
| 570 |
+
|
| 571 |
+
/* Step 3: Quantize the 16 sub-block mins to 4 bits (packed in high nibble) */
|
| 572 |
+
if (max_min > 0) {
|
| 573 |
+
float iscale = q4scale / max_min;
|
| 574 |
+
for (int j = 0; j < QK_K / 16; j++) {
|
| 575 |
+
int l = gguf_nearest_int(iscale * mins[j]);
|
| 576 |
+
if (l < 0) l = 0;
|
| 577 |
+
if (l > 15) l = 15;
|
| 578 |
+
y[i].scales[j] |= ((uint8_t)l << 4);
|
| 579 |
+
}
|
| 580 |
+
y[i].dmin = gguf_fp32_to_fp16(max_min / q4scale);
|
| 581 |
+
} else {
|
| 582 |
+
y[i].dmin = gguf_fp32_to_fp16(0.0f);
|
| 583 |
+
}
|
| 584 |
+
|
| 585 |
+
/* Step 4: Re-quantize weights to 2 bits using final rounded scales */
|
| 586 |
+
for (int j = 0; j < QK_K / 16; j++) {
|
| 587 |
+
float d = gguf_fp16_to_fp32(y[i].d) * (y[i].scales[j] & 0xF);
|
| 588 |
+
if (d < 1e-15f) {
|
| 589 |
+
for (int ii = 0; ii < 16; ii++) L[16 * j + ii] = 0;
|
| 590 |
+
continue;
|
| 591 |
+
}
|
| 592 |
+
float dm = gguf_fp16_to_fp32(y[i].dmin) * (y[i].scales[j] >> 4);
|
| 593 |
+
for (int ii = 0; ii < 16; ii++) {
|
| 594 |
+
int l = gguf_nearest_int((block_x[16 * j + ii] + dm) / d);
|
| 595 |
+
if (l < 0) l = 0;
|
| 596 |
+
if (l > 3) l = 3;
|
| 597 |
+
L[16 * j + ii] = (uint8_t)l;
|
| 598 |
+
}
|
| 599 |
+
}
|
| 600 |
+
|
| 601 |
+
/* Step 5: Pack 4 quants per byte (2 bits each)
|
| 602 |
+
* Layout: 2 groups of 128, each packed as 32 bytes holding 4Γ32 quants */
|
| 603 |
+
for (int j = 0; j < QK_K; j += 128) {
|
| 604 |
+
for (int l = 0; l < 32; l++) {
|
| 605 |
+
y[i].qs[j / 4 + l] = L[j + l]
|
| 606 |
+
| (L[j + l + 32] << 2)
|
| 607 |
+
| (L[j + l + 64] << 4)
|
| 608 |
+
| (L[j + l + 96] << 6);
|
| 609 |
+
}
|
| 610 |
+
}
|
| 611 |
+
}
|
| 612 |
+
}
|
| 613 |
+
|
| 614 |
+
/* Dequantize a single Q2_K superblock to float (for error measurement) */
|
| 615 |
+
static inline void gguf_dequantize_q2_k_block(const BlockQ2K *block,
|
| 616 |
+
float *out)
|
| 617 |
+
{
|
| 618 |
+
float d = gguf_fp16_to_fp32(block->d);
|
| 619 |
+
float dmin = gguf_fp16_to_fp32(block->dmin);
|
| 620 |
+
|
| 621 |
+
const uint8_t *q = block->qs;
|
| 622 |
+
int is = 0;
|
| 623 |
+
|
| 624 |
+
for (int n = 0; n < QK_K; n += 128) {
|
| 625 |
+
int shift = 0;
|
| 626 |
+
for (int j = 0; j < 4; j++) {
|
| 627 |
+
uint8_t sc = block->scales[is++];
|
| 628 |
+
float dl = d * (sc & 0xF);
|
| 629 |
+
float ml = dmin * (sc >> 4);
|
| 630 |
+
for (int l = 0; l < 16; l++) {
|
| 631 |
+
*out++ = dl * ((float)((q[l] >> shift) & 3)) - ml;
|
| 632 |
+
}
|
| 633 |
+
|
| 634 |
+
sc = block->scales[is++];
|
| 635 |
+
dl = d * (sc & 0xF);
|
| 636 |
+
ml = dmin * (sc >> 4);
|
| 637 |
+
for (int l = 0; l < 16; l++) {
|
| 638 |
+
*out++ = dl * ((float)((q[l + 16] >> shift) & 3)) - ml;
|
| 639 |
+
}
|
| 640 |
+
|
| 641 |
+
shift += 2;
|
| 642 |
+
}
|
| 643 |
+
q += 32;
|
| 644 |
+
}
|
| 645 |
+
}
|
| 646 |
+
|
| 647 |
+
/* Compute L2 error for a Q2_K quantized superblock */
|
| 648 |
+
static inline float gguf_q2_k_block_error(const float *original,
|
| 649 |
+
const BlockQ2K *block)
|
| 650 |
+
{
|
| 651 |
+
float deq[QK_K];
|
| 652 |
+
gguf_dequantize_q2_k_block(block, deq);
|
| 653 |
+
float err = 0.0f;
|
| 654 |
+
for (int j = 0; j < QK_K; j++) {
|
| 655 |
+
float diff = original[j] - deq[j];
|
| 656 |
+
err += diff * diff;
|
| 657 |
+
}
|
| 658 |
+
return err;
|
| 659 |
+
}
|
| 660 |
+
|
| 661 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 662 |
+
* GGML TYPE METADATA β Size calculations
|
| 663 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 664 |
+
|
| 665 |
+
/* Block size for a given type */
|
| 666 |
+
static inline int64_t ggml_type_block_size(GGMLType type)
|
| 667 |
+
{
|
| 668 |
+
switch (type) {
|
| 669 |
+
case GGML_TYPE_F32: return 1;
|
| 670 |
+
case GGML_TYPE_F16: return 1;
|
| 671 |
+
case GGML_TYPE_Q8_0: return QK8_0;
|
| 672 |
+
case GGML_TYPE_Q2_K: return QK_K;
|
| 673 |
+
case GGML_TYPE_Q4_0: return 32;
|
| 674 |
+
case GGML_TYPE_Q4_1: return 32;
|
| 675 |
+
case GGML_TYPE_Q5_0: return 32;
|
| 676 |
+
case GGML_TYPE_Q5_1: return 32;
|
| 677 |
+
case GGML_TYPE_Q4_K: return 256;
|
| 678 |
+
case GGML_TYPE_Q5_K: return 256;
|
| 679 |
+
case GGML_TYPE_Q6_K: return 256;
|
| 680 |
+
default: return 1;
|
| 681 |
+
}
|
| 682 |
+
}
|
| 683 |
+
|
| 684 |
+
/* Bytes per block for a given type */
|
| 685 |
+
static inline int64_t ggml_type_bytes_per_block(GGMLType type)
|
| 686 |
+
{
|
| 687 |
+
switch (type) {
|
| 688 |
+
case GGML_TYPE_F32: return 4;
|
| 689 |
+
case GGML_TYPE_F16: return 2;
|
| 690 |
+
case GGML_TYPE_Q8_0: return sizeof(BlockQ8_0); /* 34 */
|
| 691 |
+
case GGML_TYPE_Q2_K: return sizeof(BlockQ2K); /* 84 */
|
| 692 |
+
case GGML_TYPE_Q4_0: return 18; /* 2 + 16 */
|
| 693 |
+
case GGML_TYPE_Q4_1: return 20; /* 2 + 2 + 16 */
|
| 694 |
+
default: return 4;
|
| 695 |
+
}
|
| 696 |
+
}
|
| 697 |
+
|
| 698 |
+
/* Total bytes for n_elements of a given type */
|
| 699 |
+
static inline int64_t ggml_type_size(GGMLType type, int64_t n_elements)
|
| 700 |
+
{
|
| 701 |
+
int64_t block_size = ggml_type_block_size(type);
|
| 702 |
+
int64_t bytes_per_block = ggml_type_bytes_per_block(type);
|
| 703 |
+
int64_t n_blocks = (n_elements + block_size - 1) / block_size;
|
| 704 |
+
return n_blocks * bytes_per_block;
|
| 705 |
+
}
|
| 706 |
+
|
| 707 |
+
#endif /* GGUF_FORMAT_H */
|
hexstate_quantize.c
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
hexstate_requantize.py
ADDED
|
@@ -0,0 +1,1190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
HExState GGUF Re-Quantizer β GGUF-to-GGUF Q2_K quantization.
|
| 4 |
+
|
| 5 |
+
Reads a source GGUF (F16/BF16/F32), copies all metadata verbatim,
|
| 6 |
+
and re-quantizes eligible weight tensors to Q2_K using numpy.
|
| 7 |
+
|
| 8 |
+
This bypasses the tokenizer parsing problem entirely β the source GGUF
|
| 9 |
+
(from llama.cpp's convert_hf_to_gguf.py) has correct metadata.
|
| 10 |
+
|
| 11 |
+
Usage:
|
| 12 |
+
python3 hexstate_requantize.py input.gguf output.gguf
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
import struct
|
| 16 |
+
import sys
|
| 17 |
+
import time
|
| 18 |
+
import os
|
| 19 |
+
import io
|
| 20 |
+
import ctypes
|
| 21 |
+
import numpy as np
|
| 22 |
+
|
| 23 |
+
# βββ HExState C Library (HPC-optimized Q2_K quantization) ββββββββββββββββββ
|
| 24 |
+
_HEXSTATE_LIB = None
|
| 25 |
+
|
| 26 |
+
def _load_hexstate_lib():
|
| 27 |
+
"""Try to load the HExState C shared library for HPC-optimized quantization."""
|
| 28 |
+
global _HEXSTATE_LIB
|
| 29 |
+
if _HEXSTATE_LIB is not None:
|
| 30 |
+
return _HEXSTATE_LIB
|
| 31 |
+
|
| 32 |
+
lib_dir = os.path.dirname(os.path.abspath(__file__))
|
| 33 |
+
lib_path = os.path.join(lib_dir, "libhexstate_q2k.so")
|
| 34 |
+
|
| 35 |
+
if not os.path.exists(lib_path):
|
| 36 |
+
return None
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
lib = ctypes.CDLL(lib_path)
|
| 40 |
+
|
| 41 |
+
# void hexstate_init(void)
|
| 42 |
+
lib.hexstate_init.restype = None
|
| 43 |
+
lib.hexstate_init.argtypes = []
|
| 44 |
+
|
| 45 |
+
# void hexstate_quantize_tensor_q2k(const float*, int64_t, void*, float*, int, int)
|
| 46 |
+
lib.hexstate_quantize_tensor_q2k.restype = None
|
| 47 |
+
lib.hexstate_quantize_tensor_q2k.argtypes = [
|
| 48 |
+
ctypes.POINTER(ctypes.c_float), # weights
|
| 49 |
+
ctypes.c_int64, # n_elements
|
| 50 |
+
ctypes.c_void_p, # output
|
| 51 |
+
ctypes.POINTER(ctypes.c_float), # out_error
|
| 52 |
+
ctypes.c_int, # opt_mode (0=HPC, 1=MSE, 2=Hybrid)
|
| 53 |
+
ctypes.c_int, # verbose
|
| 54 |
+
]
|
| 55 |
+
|
| 56 |
+
lib.hexstate_q2k_block_bytes.restype = ctypes.c_int
|
| 57 |
+
lib.hexstate_q2k_block_bytes.argtypes = []
|
| 58 |
+
lib.hexstate_q2k_block_elements.restype = ctypes.c_int
|
| 59 |
+
lib.hexstate_q2k_block_elements.argtypes = []
|
| 60 |
+
|
| 61 |
+
# imatrix-aware version
|
| 62 |
+
lib.hexstate_quantize_tensor_q2k_imat.restype = None
|
| 63 |
+
lib.hexstate_quantize_tensor_q2k_imat.argtypes = [
|
| 64 |
+
ctypes.POINTER(ctypes.c_float), # weights
|
| 65 |
+
ctypes.c_int64, # n_elements
|
| 66 |
+
ctypes.c_void_p, # output
|
| 67 |
+
ctypes.POINTER(ctypes.c_float), # out_error
|
| 68 |
+
ctypes.c_int, # opt_mode
|
| 69 |
+
ctypes.POINTER(ctypes.c_float), # imat_importance (can be NULL)
|
| 70 |
+
ctypes.c_int, # verbose
|
| 71 |
+
]
|
| 72 |
+
|
| 73 |
+
# Q4_0 HPC quantizer (for attention tensors)
|
| 74 |
+
if hasattr(lib, 'hexstate_quantize_tensor_q4_0_hpc'):
|
| 75 |
+
lib.hexstate_quantize_tensor_q4_0_hpc.restype = None
|
| 76 |
+
lib.hexstate_quantize_tensor_q4_0_hpc.argtypes = [
|
| 77 |
+
ctypes.POINTER(ctypes.c_float), # weights
|
| 78 |
+
ctypes.c_int64, # n_elements
|
| 79 |
+
ctypes.c_void_p, # output
|
| 80 |
+
ctypes.POINTER(ctypes.c_float), # out_error
|
| 81 |
+
ctypes.POINTER(ctypes.c_float), # imat_importance (can be NULL)
|
| 82 |
+
ctypes.c_int, # verbose
|
| 83 |
+
]
|
| 84 |
+
|
| 85 |
+
lib.hexstate_init()
|
| 86 |
+
_HEXSTATE_LIB = lib
|
| 87 |
+
return lib
|
| 88 |
+
except Exception as e:
|
| 89 |
+
print(f" WARNING: Failed to load HexState library: {e}")
|
| 90 |
+
return None
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def _skip_gguf_kv_value(f, vtype):
|
| 94 |
+
"""Skip a GGUF KV value of the given type."""
|
| 95 |
+
import struct as st
|
| 96 |
+
size_map = {0:1, 1:1, 2:2, 3:2, 4:4, 5:4, 6:4, 7:1, 10:8, 11:8, 12:8}
|
| 97 |
+
if vtype == 8: # string
|
| 98 |
+
slen = st.unpack('<Q', f.read(8))[0]
|
| 99 |
+
f.read(slen)
|
| 100 |
+
elif vtype == 9: # array
|
| 101 |
+
arr_type = st.unpack('<I', f.read(4))[0]
|
| 102 |
+
arr_len = st.unpack('<Q', f.read(8))[0]
|
| 103 |
+
if arr_type == 8: # array of strings
|
| 104 |
+
for _ in range(arr_len):
|
| 105 |
+
slen = st.unpack('<Q', f.read(8))[0]
|
| 106 |
+
f.read(slen)
|
| 107 |
+
else:
|
| 108 |
+
sz = size_map.get(arr_type, 4)
|
| 109 |
+
f.read(arr_len * sz)
|
| 110 |
+
else:
|
| 111 |
+
sz = size_map.get(vtype, 4)
|
| 112 |
+
f.read(sz)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def read_imatrix(path):
|
| 116 |
+
"""Read llama.cpp importance matrix file (GGUF or legacy .dat format).
|
| 117 |
+
|
| 118 |
+
Returns dict: tensor_name -> normalized importance array (float32)
|
| 119 |
+
"""
|
| 120 |
+
import struct as st
|
| 121 |
+
imat = {}
|
| 122 |
+
|
| 123 |
+
with open(path, 'rb') as f:
|
| 124 |
+
magic = st.unpack('<I', f.read(4))[0]
|
| 125 |
+
|
| 126 |
+
if magic == 0x46554747: # GGUF format (modern llama.cpp)
|
| 127 |
+
_ver = st.unpack('<I', f.read(4))[0]
|
| 128 |
+
n_tensors = st.unpack('<Q', f.read(8))[0]
|
| 129 |
+
n_kv = st.unpack('<Q', f.read(8))[0]
|
| 130 |
+
|
| 131 |
+
# Skip KV pairs
|
| 132 |
+
for _ in range(n_kv):
|
| 133 |
+
slen = st.unpack('<Q', f.read(8))[0]
|
| 134 |
+
f.read(slen) # key
|
| 135 |
+
vtype = st.unpack('<I', f.read(4))[0]
|
| 136 |
+
_skip_gguf_kv_value(f, vtype)
|
| 137 |
+
|
| 138 |
+
# Read tensor infos
|
| 139 |
+
tensor_infos = []
|
| 140 |
+
for _ in range(n_tensors):
|
| 141 |
+
slen = st.unpack('<Q', f.read(8))[0]
|
| 142 |
+
name = f.read(slen).decode('utf-8', errors='replace')
|
| 143 |
+
n_dims = st.unpack('<I', f.read(4))[0]
|
| 144 |
+
dims = [st.unpack('<Q', f.read(8))[0] for _ in range(n_dims)]
|
| 145 |
+
ttype = st.unpack('<I', f.read(4))[0]
|
| 146 |
+
offset = st.unpack('<Q', f.read(8))[0]
|
| 147 |
+
n_el = 1
|
| 148 |
+
for d in dims:
|
| 149 |
+
n_el *= d
|
| 150 |
+
tensor_infos.append((name, n_el, offset))
|
| 151 |
+
|
| 152 |
+
# Data section start (32-byte aligned)
|
| 153 |
+
data_start = ((f.tell() + 31) // 32) * 32
|
| 154 |
+
|
| 155 |
+
# Group by base tensor name: collect in_sum2 and counts
|
| 156 |
+
sum2_data = {}
|
| 157 |
+
counts_data = {}
|
| 158 |
+
for name, n_el, offset in tensor_infos:
|
| 159 |
+
f.seek(data_start + offset)
|
| 160 |
+
data = np.frombuffer(f.read(n_el * 4), dtype=np.float32).copy()
|
| 161 |
+
if name.endswith('.in_sum2'):
|
| 162 |
+
base = name[:-len('.in_sum2')]
|
| 163 |
+
sum2_data[base] = data
|
| 164 |
+
elif name.endswith('.counts'):
|
| 165 |
+
base = name[:-len('.counts')]
|
| 166 |
+
counts_data[base] = data
|
| 167 |
+
|
| 168 |
+
# Compute normalized importance: sqrt(in_sum2 / counts) / mean
|
| 169 |
+
for base_name in sum2_data:
|
| 170 |
+
in_sum2 = sum2_data[base_name]
|
| 171 |
+
count = counts_data.get(base_name, np.array([1.0]))[0]
|
| 172 |
+
if count > 0:
|
| 173 |
+
importance = np.sqrt(in_sum2 / count)
|
| 174 |
+
else:
|
| 175 |
+
importance = np.ones_like(in_sum2)
|
| 176 |
+
mean = importance.mean()
|
| 177 |
+
if mean > 1e-30:
|
| 178 |
+
imat[base_name] = importance / mean
|
| 179 |
+
else:
|
| 180 |
+
imat[base_name] = np.ones_like(importance)
|
| 181 |
+
|
| 182 |
+
else:
|
| 183 |
+
# Legacy format: first 4 bytes were n_entries
|
| 184 |
+
f.seek(0)
|
| 185 |
+
n_entries = st.unpack('<i', f.read(4))[0]
|
| 186 |
+
for _ in range(n_entries):
|
| 187 |
+
name_len = st.unpack('<i', f.read(4))[0]
|
| 188 |
+
name = f.read(name_len).decode('utf-8')
|
| 189 |
+
n_values = st.unpack('<i', f.read(4))[0]
|
| 190 |
+
n_samples = st.unpack('<i', f.read(4))[0]
|
| 191 |
+
values = np.frombuffer(f.read(n_values * 4), dtype=np.float32).copy()
|
| 192 |
+
mean = values.mean()
|
| 193 |
+
if mean > 1e-30:
|
| 194 |
+
imat[name] = values / mean
|
| 195 |
+
else:
|
| 196 |
+
imat[name] = np.ones_like(values)
|
| 197 |
+
|
| 198 |
+
return imat
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
def quantize_tensor_q2k_hpc(f32_data, opt_mode=2, importance=None):
|
| 202 |
+
"""Quantize tensor using HexState HPC-optimized C implementation.
|
| 203 |
+
|
| 204 |
+
opt_mode: 0=HPC (BP only), 1=MSE (grid search), 2=Hybrid (recommended)
|
| 205 |
+
importance: optional per-element importance weights (from imatrix)
|
| 206 |
+
Returns: (bytes, n_blocks) same as quantize_tensor_q2k()
|
| 207 |
+
"""
|
| 208 |
+
lib = _load_hexstate_lib()
|
| 209 |
+
if lib is None:
|
| 210 |
+
raise RuntimeError("HexState library not available")
|
| 211 |
+
|
| 212 |
+
n_elements = len(f32_data)
|
| 213 |
+
if n_elements % QK_K != 0:
|
| 214 |
+
pad_len = QK_K - (n_elements % QK_K)
|
| 215 |
+
f32_data = np.concatenate([f32_data, np.zeros(pad_len, dtype=np.float32)])
|
| 216 |
+
if importance is not None:
|
| 217 |
+
importance = np.concatenate([importance, np.ones(pad_len, dtype=np.float32)])
|
| 218 |
+
n_elements = len(f32_data)
|
| 219 |
+
|
| 220 |
+
n_blocks = n_elements // QK_K
|
| 221 |
+
block_bytes = lib.hexstate_q2k_block_bytes() # 84
|
| 222 |
+
|
| 223 |
+
# Allocate output buffer
|
| 224 |
+
output = np.zeros(n_blocks * block_bytes, dtype=np.uint8)
|
| 225 |
+
error = ctypes.c_float(0.0)
|
| 226 |
+
|
| 227 |
+
# Call C quantizer with or without importance weights
|
| 228 |
+
f32_contiguous = np.ascontiguousarray(f32_data, dtype=np.float32)
|
| 229 |
+
|
| 230 |
+
if importance is not None:
|
| 231 |
+
imat_contiguous = np.ascontiguousarray(importance, dtype=np.float32)
|
| 232 |
+
imat_ptr = imat_contiguous.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
|
| 233 |
+
else:
|
| 234 |
+
imat_ptr = None
|
| 235 |
+
|
| 236 |
+
lib.hexstate_quantize_tensor_q2k_imat(
|
| 237 |
+
f32_contiguous.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
|
| 238 |
+
ctypes.c_int64(n_elements),
|
| 239 |
+
output.ctypes.data_as(ctypes.c_void_p),
|
| 240 |
+
ctypes.byref(error),
|
| 241 |
+
ctypes.c_int(opt_mode),
|
| 242 |
+
imat_ptr,
|
| 243 |
+
ctypes.c_int(1), # verbose
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
return output.tobytes(), n_blocks
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
# βββ Constants ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 250 |
+
GGUF_MAGIC = 0x46554747
|
| 251 |
+
GGUF_VERSION = 3
|
| 252 |
+
ALIGNMENT = 32
|
| 253 |
+
QK_K = 256
|
| 254 |
+
|
| 255 |
+
GGML_TYPE_F32 = 0
|
| 256 |
+
GGML_TYPE_F16 = 1
|
| 257 |
+
GGML_TYPE_Q4_0 = 2
|
| 258 |
+
GGML_TYPE_Q2_K = 10
|
| 259 |
+
GGML_TYPE_BF16 = 30
|
| 260 |
+
|
| 261 |
+
TYPE_NAME = {
|
| 262 |
+
0: "F32", 1: "F16", 2: "Q4_0", 3: "Q4_1", 6: "Q5_0", 7: "Q5_1",
|
| 263 |
+
8: "Q8_0", 9: "Q8_1", 10: "Q2_K", 11: "Q3_K", 12: "Q4_K",
|
| 264 |
+
13: "Q5_K", 14: "Q6_K", 15: "Q8_K", 30: "BF16",
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
# Block sizes and byte sizes for each type
|
| 268 |
+
TYPE_BLOCK_SIZE = {
|
| 269 |
+
0: 1, 1: 1, 2: 32, 3: 32, 6: 32, 7: 32,
|
| 270 |
+
8: 32, 9: 32, 10: 256, 11: 256, 12: 256,
|
| 271 |
+
13: 256, 14: 256, 15: 256, 30: 1,
|
| 272 |
+
}
|
| 273 |
+
TYPE_BLOCK_BYTES = {
|
| 274 |
+
0: 4, 1: 2, 2: 18, 3: 20, 6: 20, 7: 22,
|
| 275 |
+
8: 34, 9: 36, 10: 84, 11: 110, 12: 144,
|
| 276 |
+
13: 176, 14: 210, 15: 292, 30: 2,
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
def align_offset(offset, alignment=ALIGNMENT):
|
| 281 |
+
return (offset + alignment - 1) & ~(alignment - 1)
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
def read_string(f):
|
| 285 |
+
slen = struct.unpack('<Q', f.read(8))[0]
|
| 286 |
+
return f.read(slen).decode('utf-8', errors='replace')
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
def write_string(f, s):
|
| 290 |
+
data = s.encode('utf-8')
|
| 291 |
+
f.write(struct.pack('<Q', len(data)))
|
| 292 |
+
f.write(data)
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
def read_kv_value(f, vtype):
|
| 296 |
+
"""Read a KV value and return (vtype, raw_bytes) for passthrough."""
|
| 297 |
+
start = f.tell()
|
| 298 |
+
if vtype == 0: f.read(1) # UINT8
|
| 299 |
+
elif vtype == 1: f.read(1) # INT8
|
| 300 |
+
elif vtype == 2: f.read(2) # UINT16
|
| 301 |
+
elif vtype == 3: f.read(2) # INT16
|
| 302 |
+
elif vtype == 4: f.read(4) # UINT32
|
| 303 |
+
elif vtype == 5: f.read(4) # INT32
|
| 304 |
+
elif vtype == 6: f.read(4) # FLOAT32
|
| 305 |
+
elif vtype == 7: f.read(1) # BOOL
|
| 306 |
+
elif vtype == 8: # STRING
|
| 307 |
+
slen = struct.unpack('<Q', f.read(8))[0]
|
| 308 |
+
f.read(slen)
|
| 309 |
+
elif vtype == 9: # ARRAY
|
| 310 |
+
arr_type = struct.unpack('<I', f.read(4))[0]
|
| 311 |
+
arr_len = struct.unpack('<Q', f.read(8))[0]
|
| 312 |
+
for _ in range(arr_len):
|
| 313 |
+
read_kv_value(f, arr_type)
|
| 314 |
+
elif vtype == 10: f.read(8) # UINT64
|
| 315 |
+
elif vtype == 11: f.read(8) # INT64
|
| 316 |
+
elif vtype == 12: f.read(8) # FLOAT64
|
| 317 |
+
else:
|
| 318 |
+
raise ValueError(f"Unknown KV type {vtype}")
|
| 319 |
+
end = f.tell()
|
| 320 |
+
f.seek(start)
|
| 321 |
+
raw = f.read(end - start)
|
| 322 |
+
return raw
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
# βββ BF16 β F32 conversion βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 326 |
+
def bf16_to_f32(data_bytes, n_elements):
|
| 327 |
+
"""Convert BF16 raw bytes to float32 numpy array."""
|
| 328 |
+
bf16 = np.frombuffer(data_bytes, dtype=np.uint16)
|
| 329 |
+
# BF16 β F32: shift left 16 bits
|
| 330 |
+
f32_bits = bf16.astype(np.uint32) << 16
|
| 331 |
+
return f32_bits.view(np.float32)
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
def f16_to_f32(data_bytes, n_elements):
|
| 335 |
+
"""Convert F16 raw bytes to float32 numpy array."""
|
| 336 |
+
f16 = np.frombuffer(data_bytes, dtype=np.float16)
|
| 337 |
+
return f16.astype(np.float32)
|
| 338 |
+
|
| 339 |
+
|
| 340 |
+
def f32_to_f16(f32_array):
|
| 341 |
+
"""Convert float32 array to F16 bytes."""
|
| 342 |
+
return f32_array.astype(np.float16).tobytes()
|
| 343 |
+
|
| 344 |
+
|
| 345 |
+
def f32_to_bf16(f32_array):
|
| 346 |
+
"""Convert float32 array to BF16 bytes."""
|
| 347 |
+
f32_bits = f32_array.view(np.uint32)
|
| 348 |
+
bf16 = ((f32_bits + 0x8000) >> 16).astype(np.uint16)
|
| 349 |
+
return bf16.tobytes()
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
# βββ Q2_K quantization β faithful port of ggml quantize_row_q2_K_ref βββββββ
|
| 353 |
+
# Vectorized with numpy for performance. Uses make_qkx2_quants algorithm:
|
| 354 |
+
# - Weighted MAD error with weights[i] = |x[i]|
|
| 355 |
+
# - Joint scale+min least-squares solve
|
| 356 |
+
# - 16-step grid search for initial iscale
|
| 357 |
+
|
| 358 |
+
def quantize_tensor_q2k(f32_data):
|
| 359 |
+
"""Quantize an entire tensor to Q2_K format.
|
| 360 |
+
|
| 361 |
+
Faithful vectorized port of ggml quantize_row_q2_K_ref with
|
| 362 |
+
make_qkx2_quants sub-block optimization.
|
| 363 |
+
|
| 364 |
+
Q2_K block layout (84 bytes, must match ggml block_q2_K):
|
| 365 |
+
d : fp16 super-block scale
|
| 366 |
+
dmin : fp16 super-block min-scale
|
| 367 |
+
scales[16] : packed 4-bit scale + 4-bit min per sub-block
|
| 368 |
+
qs[64] : interleaved 2-bit quants (4 weights 32-apart per byte)
|
| 369 |
+
"""
|
| 370 |
+
n_elements = len(f32_data)
|
| 371 |
+
nmax = 3
|
| 372 |
+
q4scale = 15.0
|
| 373 |
+
|
| 374 |
+
# Pad to QK_K (256) multiple
|
| 375 |
+
if n_elements % QK_K != 0:
|
| 376 |
+
pad_len = QK_K - (n_elements % QK_K)
|
| 377 |
+
f32_data = np.concatenate([f32_data, np.zeros(pad_len, dtype=np.float32)])
|
| 378 |
+
n_elements = len(f32_data)
|
| 379 |
+
|
| 380 |
+
n_blocks = n_elements // QK_K
|
| 381 |
+
|
| 382 |
+
# Reshape: [n_blocks, 16 sub-blocks, 16 weights]
|
| 383 |
+
data = f32_data.reshape(n_blocks, 16, 16).astype(np.float64)
|
| 384 |
+
|
| 385 |
+
# ββ make_qkx2_quants vectorized over all sub-blocks ββ
|
| 386 |
+
# Shape key: S = [n_blocks, 16], V = [n_blocks, 16, 16]
|
| 387 |
+
|
| 388 |
+
weights = np.abs(data) # [n_blocks, 16, 16]
|
| 389 |
+
|
| 390 |
+
sb_min = data.min(axis=2) # [n_blocks, 16]
|
| 391 |
+
sb_max = data.max(axis=2) # [n_blocks, 16]
|
| 392 |
+
sb_min = np.minimum(sb_min, 0.0)
|
| 393 |
+
|
| 394 |
+
# Weighted sums (needed for least-squares solve)
|
| 395 |
+
sum_w = weights.sum(axis=2) # [n_blocks, 16]
|
| 396 |
+
sum_x = (weights * data).sum(axis=2) # [n_blocks, 16]
|
| 397 |
+
|
| 398 |
+
sb_range = sb_max - sb_min
|
| 399 |
+
degenerate = sb_range < 1e-30 # [n_blocks, 16]
|
| 400 |
+
safe_range = np.maximum(sb_range, 1e-30)
|
| 401 |
+
|
| 402 |
+
# Initial quantization
|
| 403 |
+
iscale0 = nmax / safe_range
|
| 404 |
+
scale0 = 1.0 / np.maximum(iscale0, 1e-30)
|
| 405 |
+
|
| 406 |
+
shifted0 = data - sb_min[:, :, None] # [n_blocks, 16, 16]
|
| 407 |
+
L0 = np.clip(np.round(iscale0[:, :, None] * shifted0), 0, nmax).astype(np.float64)
|
| 408 |
+
|
| 409 |
+
# Initial error (MAD): sum(w * |scale*L + min - x|)
|
| 410 |
+
recon0 = scale0[:, :, None] * L0 + sb_min[:, :, None]
|
| 411 |
+
best_error = (weights * np.abs(recon0 - data)).sum(axis=2) # [n_blocks, 16]
|
| 412 |
+
|
| 413 |
+
best_L = L0.copy()
|
| 414 |
+
best_scale = scale0.copy()
|
| 415 |
+
best_min = sb_min.copy()
|
| 416 |
+
|
| 417 |
+
# Grid search: 16 steps (nstep=15, rmin=-0.5, rdelta=0.1)
|
| 418 |
+
rmin, rdelta, nstep = -0.5, 0.1, 15
|
| 419 |
+
for ist in range(nstep + 1):
|
| 420 |
+
iscale_try = (rmin + rdelta * ist + nmax) / safe_range # [n_blocks, 16]
|
| 421 |
+
|
| 422 |
+
shifted = data - sb_min[:, :, None] # use original min for quantization
|
| 423 |
+
Laux = np.clip(np.round(iscale_try[:, :, None] * shifted), 0, nmax).astype(np.float64)
|
| 424 |
+
|
| 425 |
+
# Weighted sums for least-squares solve
|
| 426 |
+
wL = weights * Laux # [n_blocks, 16, 16]
|
| 427 |
+
sum_l = wL.sum(axis=2) # [n_blocks, 16]
|
| 428 |
+
sum_l2 = (wL * Laux).sum(axis=2) # [n_blocks, 16]
|
| 429 |
+
sum_xl = (wL * data).sum(axis=2) # [n_blocks, 16]
|
| 430 |
+
|
| 431 |
+
# Solve 2-var system: x[i] β this_scale * L[i] + this_min
|
| 432 |
+
D = sum_w * sum_l2 - sum_l * sum_l
|
| 433 |
+
valid_D = D > 0
|
| 434 |
+
|
| 435 |
+
this_scale = np.where(valid_D,
|
| 436 |
+
(sum_w * sum_xl - sum_x * sum_l) / np.maximum(D, 1e-30),
|
| 437 |
+
0.0)
|
| 438 |
+
this_min = np.where(valid_D,
|
| 439 |
+
(sum_l2 * sum_x - sum_l * sum_xl) / np.maximum(D, 1e-30),
|
| 440 |
+
0.0)
|
| 441 |
+
|
| 442 |
+
# If this_min > 0, clamp to 0 and recompute scale
|
| 443 |
+
pos_min = this_min > 0
|
| 444 |
+
this_min = np.where(pos_min, 0.0, this_min)
|
| 445 |
+
this_scale = np.where(pos_min & (sum_l2 > 0),
|
| 446 |
+
sum_xl / np.maximum(sum_l2, 1e-30),
|
| 447 |
+
this_scale)
|
| 448 |
+
|
| 449 |
+
# Compute error for this trial
|
| 450 |
+
recon = this_scale[:, :, None] * Laux + this_min[:, :, None]
|
| 451 |
+
cur_error = (weights * np.abs(recon - data)).sum(axis=2)
|
| 452 |
+
|
| 453 |
+
# Update where this trial is better
|
| 454 |
+
better = valid_D & (cur_error < best_error) & ~degenerate
|
| 455 |
+
if better.any():
|
| 456 |
+
# Expand mask to weight dimension for L update
|
| 457 |
+
better3d = better[:, :, None]
|
| 458 |
+
best_L = np.where(better3d, Laux, best_L)
|
| 459 |
+
best_error = np.where(better, cur_error, best_error)
|
| 460 |
+
best_scale = np.where(better, this_scale, best_scale)
|
| 461 |
+
best_min = np.where(better, this_min, best_min)
|
| 462 |
+
|
| 463 |
+
# the_min = -best_min (make positive)
|
| 464 |
+
sb_scale = np.maximum(best_scale, 0.0).astype(np.float32) # [n_blocks, 16]
|
| 465 |
+
sb_the_min = np.maximum(-best_min, 0.0).astype(np.float32) # [n_blocks, 16]
|
| 466 |
+
|
| 467 |
+
# Handle degenerate sub-blocks
|
| 468 |
+
sb_scale[degenerate] = 0.0
|
| 469 |
+
sb_the_min[degenerate] = np.maximum(-sb_min[degenerate], 0.0).astype(np.float32)
|
| 470 |
+
|
| 471 |
+
# ββ Phase 2: quantize scales/mins to 4-bit ββ
|
| 472 |
+
max_scale = sb_scale.max(axis=1) # [n_blocks]
|
| 473 |
+
max_min = sb_the_min.max(axis=1) # [n_blocks]
|
| 474 |
+
|
| 475 |
+
# Quantize sub-block scales to 4-bit
|
| 476 |
+
has_scale = max_scale > 0
|
| 477 |
+
iscale_s = np.where(has_scale, q4scale / np.maximum(max_scale, 1e-30), 0.0)
|
| 478 |
+
scales_q = np.where(has_scale[:, None],
|
| 479 |
+
np.clip(np.round(iscale_s[:, None] * sb_scale), 0, 15),
|
| 480 |
+
0.0).astype(np.uint8)
|
| 481 |
+
|
| 482 |
+
# Quantize sub-block mins to 4-bit
|
| 483 |
+
has_min = max_min > 0
|
| 484 |
+
iscale_m = np.where(has_min, q4scale / np.maximum(max_min, 1e-30), 0.0)
|
| 485 |
+
mins_q = np.where(has_min[:, None],
|
| 486 |
+
np.clip(np.round(iscale_m[:, None] * sb_the_min), 0, 15),
|
| 487 |
+
0.0).astype(np.uint8)
|
| 488 |
+
|
| 489 |
+
d_fp16 = np.where(has_scale, max_scale / q4scale, 0.0).astype(np.float16)
|
| 490 |
+
dmin_fp16 = np.where(has_min, max_min / q4scale, 0.0).astype(np.float16)
|
| 491 |
+
|
| 492 |
+
# ββ Phase 3: requantize using fp16-truncated d/dmin ββ
|
| 493 |
+
scales_packed = scales_q | (mins_q << 4) # [n_blocks, 16]
|
| 494 |
+
|
| 495 |
+
d_f32 = d_fp16.astype(np.float32)
|
| 496 |
+
dmin_f32 = dmin_fp16.astype(np.float32)
|
| 497 |
+
|
| 498 |
+
d_sub = d_f32[:, None] * (scales_packed & 0xF).astype(np.float32)
|
| 499 |
+
dm_sub = dmin_f32[:, None] * (scales_packed >> 4).astype(np.float32)
|
| 500 |
+
|
| 501 |
+
# l = nearest_int((x + dm) / d), clamp [0,3]
|
| 502 |
+
valid_d = d_sub > 0
|
| 503 |
+
inv_d = np.where(valid_d, 1.0 / np.maximum(d_sub, 1e-30), 0.0)
|
| 504 |
+
q_vals = np.where(valid_d[:, :, None],
|
| 505 |
+
np.clip(np.round(
|
| 506 |
+
(f32_data.reshape(n_blocks, 16, 16) + dm_sub[:, :, None]) * inv_d[:, :, None]
|
| 507 |
+
), 0, 3),
|
| 508 |
+
0).astype(np.uint8)
|
| 509 |
+
|
| 510 |
+
# ββ Phase 4: pack ββ
|
| 511 |
+
q_flat = q_vals.reshape(n_blocks, QK_K)
|
| 512 |
+
q_groups = q_flat.reshape(n_blocks, 2, 4, 32)
|
| 513 |
+
qs_packed = (q_groups[:, :, 0, :] |
|
| 514 |
+
(q_groups[:, :, 1, :] << 2) |
|
| 515 |
+
(q_groups[:, :, 2, :] << 4) |
|
| 516 |
+
(q_groups[:, :, 3, :] << 6)).astype(np.uint8)
|
| 517 |
+
qs_packed = qs_packed.reshape(n_blocks, 64)
|
| 518 |
+
|
| 519 |
+
# Build output: [n_blocks, 84] bytes
|
| 520 |
+
# Layout matches ggml block_q2_K: scales[16] | qs[64] | d(fp16) | dmin(fp16)
|
| 521 |
+
result = np.zeros((n_blocks, 84), dtype=np.uint8)
|
| 522 |
+
result[:, 0:16] = scales_packed
|
| 523 |
+
result[:, 16:80] = qs_packed
|
| 524 |
+
result[:, 80:82] = d_fp16.view(np.uint8).reshape(n_blocks, 2)
|
| 525 |
+
result[:, 82:84] = dmin_fp16.view(np.uint8).reshape(n_blocks, 2)
|
| 526 |
+
|
| 527 |
+
return result.tobytes(), n_blocks
|
| 528 |
+
|
| 529 |
+
|
| 530 |
+
def dequant_q2k_fast(q2k_bytes, n_blocks):
|
| 531 |
+
"""Vectorized Q2_K dequantization for RMSE computation.
|
| 532 |
+
|
| 533 |
+
Block layout (84 bytes) β same for both C struct and Python writer:
|
| 534 |
+
scales[16] (bytes 0-15) | qs[64] (bytes 16-79) | d(fp16, bytes 80-81) | dmin(fp16, bytes 82-83)
|
| 535 |
+
|
| 536 |
+
The C struct BlockQ2K in gguf_format.h is:
|
| 537 |
+
{ uint8_t scales[16]; uint8_t qs[64]; uint16_t d; uint16_t dmin; }
|
| 538 |
+
|
| 539 |
+
Dequantization follows gguf_dequantize_q2_k_block() exactly:
|
| 540 |
+
For each half (0..1), qs_half = qs[half*32 : half*32+32]
|
| 541 |
+
For each shift j (0..3):
|
| 542 |
+
scale_idx = half*8 + j*2
|
| 543 |
+
elements [0..15] use scales[scale_idx], from qs_half[0..15] >> (j*2)
|
| 544 |
+
elements [16..31] use scales[scale_idx+1], from qs_half[16..31] >> (j*2)
|
| 545 |
+
"""
|
| 546 |
+
data = np.frombuffer(q2k_bytes, dtype=np.uint8).reshape(n_blocks, 84)
|
| 547 |
+
|
| 548 |
+
# Extract fields
|
| 549 |
+
scales_packed = data[:, 0:16] # [n_blocks, 16]
|
| 550 |
+
qs = data[:, 16:80] # [n_blocks, 64]
|
| 551 |
+
d_fp16 = data[:, 80:82].copy().view(np.float16).astype(np.float32).reshape(n_blocks)
|
| 552 |
+
dmin_fp16 = data[:, 82:84].copy().view(np.float16).astype(np.float32).reshape(n_blocks)
|
| 553 |
+
|
| 554 |
+
# Extract scale (low 4 bits) and min (high 4 bits) per sub-block
|
| 555 |
+
sc = (scales_packed & 0xF).astype(np.float32) # [n_blocks, 16]
|
| 556 |
+
mn = (scales_packed >> 4).astype(np.float32) # [n_blocks, 16]
|
| 557 |
+
|
| 558 |
+
# Compute per-sub-block d_sub and m_sub
|
| 559 |
+
d_sub = d_fp16[:, np.newaxis] * sc # [n_blocks, 16]
|
| 560 |
+
m_sub = dmin_fp16[:, np.newaxis] * mn # [n_blocks, 16]
|
| 561 |
+
|
| 562 |
+
# Unpack 2-bit quants from qs[64] into 256 values per block.
|
| 563 |
+
# Matches C reference: two scales per 32-byte extraction (16 elements each).
|
| 564 |
+
# half=0: qs[0..31], half=1: qs[32..63]
|
| 565 |
+
# shift j=0..3: scale_idx = half*8 + j*2 (first 16), +1 (second 16)
|
| 566 |
+
result = np.zeros((n_blocks, QK_K), dtype=np.float32)
|
| 567 |
+
for half in range(2):
|
| 568 |
+
qs_half = qs[:, half * 32:(half + 1) * 32] # [n_blocks, 32]
|
| 569 |
+
for sub in range(4):
|
| 570 |
+
# Extract 2-bit quants at this shift position
|
| 571 |
+
q_vals = ((qs_half >> (sub * 2)) & 3).astype(np.float32) # [n_blocks, 32]
|
| 572 |
+
base_idx = half * 128 + sub * 32
|
| 573 |
+
|
| 574 |
+
# First 16 elements: qs_half[0..15], scale index = half*8 + sub*2
|
| 575 |
+
si_0 = half * 8 + sub * 2
|
| 576 |
+
result[:, base_idx:base_idx + 16] = (
|
| 577 |
+
d_sub[:, si_0:si_0+1] * q_vals[:, :16] - m_sub[:, si_0:si_0+1]
|
| 578 |
+
)
|
| 579 |
+
|
| 580 |
+
# Second 16 elements: qs_half[16..31], scale index = si_0 + 1
|
| 581 |
+
si_1 = si_0 + 1
|
| 582 |
+
result[:, base_idx + 16:base_idx + 32] = (
|
| 583 |
+
d_sub[:, si_1:si_1+1] * q_vals[:, 16:] - m_sub[:, si_1:si_1+1]
|
| 584 |
+
)
|
| 585 |
+
return result.reshape(-1)
|
| 586 |
+
|
| 587 |
+
|
| 588 |
+
def is_attention_tensor(name):
|
| 589 |
+
"""Detect attention Q/K/V/O projection tensors.
|
| 590 |
+
These are the most sensitive to quantization and get promoted to Q4_0."""
|
| 591 |
+
attn_patterns = [
|
| 592 |
+
'attn_q.weight', 'attn_k.weight', 'attn_v.weight', 'attn_output.weight',
|
| 593 |
+
'attn_qkv.weight',
|
| 594 |
+
'self_attn.q_proj.weight', 'self_attn.k_proj.weight',
|
| 595 |
+
'self_attn.v_proj.weight', 'self_attn.o_proj.weight',
|
| 596 |
+
]
|
| 597 |
+
for pat in attn_patterns:
|
| 598 |
+
if pat in name:
|
| 599 |
+
return True
|
| 600 |
+
return False
|
| 601 |
+
|
| 602 |
+
|
| 603 |
+
def should_quantize(name, n_dims, dims, tied_embeddings=False):
|
| 604 |
+
"""Should this tensor be quantized to Q2_K?
|
| 605 |
+
|
| 606 |
+
With iMatrix importance weighting, Q2_K is applied to ALL eligible
|
| 607 |
+
tensors including embeddings for maximum compression.
|
| 608 |
+
|
| 609 |
+
Tensors kept as-is:
|
| 610 |
+
- 1D tensors (norms, biases) β always kept
|
| 611 |
+
- _norm, .bias β normalization layers
|
| 612 |
+
- ffn_gate_inp β MoE routing gate
|
| 613 |
+
- layer_output_scale β per-layer scaling factor (scalar)
|
| 614 |
+
- altup, laurel β small Gemma-specific tensors
|
| 615 |
+
- token_embd.weight / output.weight when embeddings are tied
|
| 616 |
+
(the same tensor serves as both embedding lookup AND LM head;
|
| 617 |
+
quantizing it to Q2_K destroys logit precision β garbage output)
|
| 618 |
+
"""
|
| 619 |
+
n_elements = 1
|
| 620 |
+
for d in dims:
|
| 621 |
+
n_elements *= d
|
| 622 |
+
if n_dims < 2:
|
| 623 |
+
return False
|
| 624 |
+
if 'norm' in name:
|
| 625 |
+
return False
|
| 626 |
+
if '.bias' in name:
|
| 627 |
+
return False
|
| 628 |
+
if 'ffn_gate_inp' in name:
|
| 629 |
+
return False
|
| 630 |
+
if 'altup' in name or 'laurel' in name:
|
| 631 |
+
return False
|
| 632 |
+
if 'layer_output_scale' in name:
|
| 633 |
+
return False
|
| 634 |
+
# When embeddings are tied, token_embd.weight doubles as the output
|
| 635 |
+
# projection (LM head). It gets routed to Q4_0 in the quant plan
|
| 636 |
+
# instead of Q2_K β handled in main(), not here.
|
| 637 |
+
# Skip vision/audio encoder tensors
|
| 638 |
+
if 'v.' in name and name.startswith('v.'):
|
| 639 |
+
return False
|
| 640 |
+
if name.startswith('mm.') or name.startswith('a.'):
|
| 641 |
+
return False
|
| 642 |
+
# Small tensors are not worth quantizing
|
| 643 |
+
if n_elements < QK_K:
|
| 644 |
+
return False
|
| 645 |
+
# Must be divisible by QK_K
|
| 646 |
+
if n_elements % QK_K != 0:
|
| 647 |
+
return False
|
| 648 |
+
return True
|
| 649 |
+
|
| 650 |
+
|
| 651 |
+
def main():
|
| 652 |
+
if len(sys.argv) < 3:
|
| 653 |
+
print("Usage: python3 hexstate_requantize.py <input.gguf> <output.gguf> [--keep-metadata]")
|
| 654 |
+
sys.exit(1)
|
| 655 |
+
|
| 656 |
+
input_path = sys.argv[1]
|
| 657 |
+
output_path = sys.argv[2]
|
| 658 |
+
keep_metadata = '--keep-metadata' in sys.argv
|
| 659 |
+
quantize_none = '--quantize-none' in sys.argv
|
| 660 |
+
q2all = '--q2all' in sys.argv
|
| 661 |
+
|
| 662 |
+
# Check for imatrix
|
| 663 |
+
imatrix_data = None
|
| 664 |
+
for i, arg in enumerate(sys.argv):
|
| 665 |
+
if arg == '--imatrix' and i + 1 < len(sys.argv):
|
| 666 |
+
imat_path = sys.argv[i + 1]
|
| 667 |
+
if os.path.exists(imat_path):
|
| 668 |
+
imatrix_data = read_imatrix(imat_path)
|
| 669 |
+
print(f" Loaded imatrix: {len(imatrix_data)} tensors from {imat_path}")
|
| 670 |
+
else:
|
| 671 |
+
print(f" WARNING: imatrix file not found: {imat_path}")
|
| 672 |
+
break
|
| 673 |
+
|
| 674 |
+
# Check for HPC C library
|
| 675 |
+
use_hpc = _load_hexstate_lib() is not None
|
| 676 |
+
|
| 677 |
+
print()
|
| 678 |
+
print(" ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ")
|
| 679 |
+
print(" β HExState GGUF Re-Quantizer β")
|
| 680 |
+
print(" β GGUF β Q2_K GGUF with metadata passthrough β")
|
| 681 |
+
if use_hpc and imatrix_data:
|
| 682 |
+
print(" β Engine: HPC + iMatrix (calibrated sensitivity propagation) β")
|
| 683 |
+
elif use_hpc:
|
| 684 |
+
print(" β Engine: HPC (BP + MSE Grid + Sensitivity Propagation) β")
|
| 685 |
+
else:
|
| 686 |
+
print(" β Engine: Python (numpy vectorized) β")
|
| 687 |
+
print(" ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ")
|
| 688 |
+
print()
|
| 689 |
+
|
| 690 |
+
start_time = time.time()
|
| 691 |
+
file_size = os.path.getsize(input_path)
|
| 692 |
+
print(f" Input: {input_path}")
|
| 693 |
+
print(f" Size: {file_size / 1024**3:.2f} GB")
|
| 694 |
+
print(f" Output: {output_path}")
|
| 695 |
+
print()
|
| 696 |
+
|
| 697 |
+
with open(input_path, 'rb') as fin:
|
| 698 |
+
# ββ Read Header ββ
|
| 699 |
+
magic = struct.unpack('<I', fin.read(4))[0]
|
| 700 |
+
assert magic == GGUF_MAGIC, f"Bad GGUF magic: 0x{magic:08X}"
|
| 701 |
+
version = struct.unpack('<I', fin.read(4))[0]
|
| 702 |
+
n_tensors = struct.unpack('<Q', fin.read(8))[0]
|
| 703 |
+
n_kv = struct.unpack('<Q', fin.read(8))[0]
|
| 704 |
+
|
| 705 |
+
print(f" GGUF v{version}: {n_tensors} tensors, {n_kv} KV pairs")
|
| 706 |
+
print()
|
| 707 |
+
|
| 708 |
+
# ββ Read KV pairs (store as raw bytes for passthrough) ββ
|
| 709 |
+
kv_pairs = []
|
| 710 |
+
for i in range(n_kv):
|
| 711 |
+
key = read_string(fin)
|
| 712 |
+
vtype = struct.unpack('<I', fin.read(4))[0]
|
| 713 |
+
raw_value = read_kv_value(fin, vtype)
|
| 714 |
+
kv_pairs.append((key, vtype, raw_value))
|
| 715 |
+
|
| 716 |
+
# ββ Read Tensor Info ββ
|
| 717 |
+
tensor_infos = []
|
| 718 |
+
for i in range(n_tensors):
|
| 719 |
+
name = read_string(fin)
|
| 720 |
+
n_dims = struct.unpack('<I', fin.read(4))[0]
|
| 721 |
+
dims = [struct.unpack('<Q', fin.read(8))[0] for _ in range(n_dims)]
|
| 722 |
+
ttype = struct.unpack('<I', fin.read(4))[0]
|
| 723 |
+
offset = struct.unpack('<Q', fin.read(8))[0]
|
| 724 |
+
|
| 725 |
+
n_elements = 1
|
| 726 |
+
for d in dims:
|
| 727 |
+
n_elements *= d
|
| 728 |
+
|
| 729 |
+
blk_sz = TYPE_BLOCK_SIZE.get(ttype, 1)
|
| 730 |
+
blk_bytes = TYPE_BLOCK_BYTES.get(ttype, 4)
|
| 731 |
+
n_blocks = (n_elements + blk_sz - 1) // blk_sz
|
| 732 |
+
data_size = n_blocks * blk_bytes
|
| 733 |
+
|
| 734 |
+
tensor_infos.append({
|
| 735 |
+
'name': name, 'n_dims': n_dims, 'dims': dims,
|
| 736 |
+
'type': ttype, 'offset': offset,
|
| 737 |
+
'n_elements': n_elements, 'data_size': data_size,
|
| 738 |
+
})
|
| 739 |
+
|
| 740 |
+
# Calculate data section start
|
| 741 |
+
pos_after_info = fin.tell()
|
| 742 |
+
data_section_start = align_offset(pos_after_info)
|
| 743 |
+
|
| 744 |
+
print(f" Data section starts at: {data_section_start:,}")
|
| 745 |
+
print()
|
| 746 |
+
|
| 747 |
+
# ββ Detect tied embeddings ββ
|
| 748 |
+
# If no separate output.weight tensor exists, token_embd.weight
|
| 749 |
+
# doubles as the LM head. Must preserve it at full precision.
|
| 750 |
+
tensor_names = {ti['name'] for ti in tensor_infos}
|
| 751 |
+
has_output_weight = 'output.weight' in tensor_names
|
| 752 |
+
tied_embeddings = not has_output_weight and 'token_embd.weight' in tensor_names
|
| 753 |
+
if tied_embeddings:
|
| 754 |
+
print(" β Tied embeddings detected β token_embd.weight promoted to Q4_0 (serves as LM head)")
|
| 755 |
+
print()
|
| 756 |
+
|
| 757 |
+
# ββ Determine output types ββ
|
| 758 |
+
quant_plan = []
|
| 759 |
+
total_quant = 0
|
| 760 |
+
total_attn = 0
|
| 761 |
+
total_keep = 0
|
| 762 |
+
for ti in tensor_infos:
|
| 763 |
+
if quantize_none:
|
| 764 |
+
will_quant = False
|
| 765 |
+
elif should_quantize(ti['name'], ti['n_dims'], ti['dims'], tied_embeddings):
|
| 766 |
+
if tied_embeddings and ti['name'] in ('token_embd.weight', 'output.weight'):
|
| 767 |
+
will_quant = 'ATTN_Q4' # Promote tied embedding to Q4_0
|
| 768 |
+
total_attn += 1
|
| 769 |
+
elif q2all:
|
| 770 |
+
will_quant = True # --q2all: everything to Q2_K
|
| 771 |
+
total_quant += 1
|
| 772 |
+
elif is_attention_tensor(ti['name']):
|
| 773 |
+
will_quant = 'ATTN_Q4' # Promote attention to Q4_0 HPC
|
| 774 |
+
total_attn += 1
|
| 775 |
+
else:
|
| 776 |
+
will_quant = True
|
| 777 |
+
total_quant += 1
|
| 778 |
+
else:
|
| 779 |
+
will_quant = False
|
| 780 |
+
total_keep += 1
|
| 781 |
+
quant_plan.append(will_quant)
|
| 782 |
+
|
| 783 |
+
print(f" Tensors to quantize (Q2_K): {total_quant}")
|
| 784 |
+
print(f" Tensors to promote (Q4_0Β·HPC): {total_attn}")
|
| 785 |
+
print(f" Tensors to keep as-is: {total_keep}")
|
| 786 |
+
print()
|
| 787 |
+
|
| 788 |
+
# ββ Compute output tensor sizes and offsets ββ
|
| 789 |
+
out_tensor_infos = []
|
| 790 |
+
out_data_offset = 0
|
| 791 |
+
|
| 792 |
+
for i, ti in enumerate(tensor_infos):
|
| 793 |
+
if quant_plan[i]:
|
| 794 |
+
out_dims = list(ti['dims'])
|
| 795 |
+
dim0 = out_dims[0] if ti['n_dims'] >= 2 else ti['n_elements']
|
| 796 |
+
|
| 797 |
+
if quant_plan[i] == 'ATTN_Q4':
|
| 798 |
+
# Attention tensor β Q4_0 HPC (4.5 bpw)
|
| 799 |
+
out_type = GGML_TYPE_Q4_0
|
| 800 |
+
n_blocks = (ti['n_elements'] + 31) // 32
|
| 801 |
+
out_size = n_blocks * 18
|
| 802 |
+
print(f" [ATTNβQ4_0Β·HPC] {ti['name']} ({ti['n_elements']} elements)")
|
| 803 |
+
elif dim0 % QK_K == 0:
|
| 804 |
+
# Q2_K (2.6 bpw, block_size=256)
|
| 805 |
+
out_type = GGML_TYPE_Q2_K
|
| 806 |
+
n_blocks = (ti['n_elements'] + QK_K - 1) // QK_K
|
| 807 |
+
out_size = n_blocks * 84
|
| 808 |
+
elif dim0 % 32 == 0:
|
| 809 |
+
# Q4_0 fallback (4.5 bpw, block_size=32)
|
| 810 |
+
out_type = GGML_TYPE_Q4_0
|
| 811 |
+
n_blocks = ti['n_elements'] // 32
|
| 812 |
+
out_size = n_blocks * 18
|
| 813 |
+
quant_plan[i] = 'Q4_0'
|
| 814 |
+
print(f" Q4_0: {ti['name']} (dims[0]={dim0})")
|
| 815 |
+
else:
|
| 816 |
+
out_type = ti['type']
|
| 817 |
+
out_size = ti['data_size']
|
| 818 |
+
quant_plan[i] = False
|
| 819 |
+
print(f" Keep: {ti['name']} (dims[0]={dim0})")
|
| 820 |
+
else:
|
| 821 |
+
out_type = ti['type']
|
| 822 |
+
out_size = ti['data_size']
|
| 823 |
+
out_dims = list(ti['dims'])
|
| 824 |
+
|
| 825 |
+
out_tensor_infos.append({
|
| 826 |
+
'name': ti['name'],
|
| 827 |
+
'n_dims': ti['n_dims'],
|
| 828 |
+
'dims': out_dims,
|
| 829 |
+
'type': out_type,
|
| 830 |
+
'offset': out_data_offset,
|
| 831 |
+
'data_size': out_size,
|
| 832 |
+
})
|
| 833 |
+
out_data_offset += out_size
|
| 834 |
+
out_data_offset = align_offset(out_data_offset)
|
| 835 |
+
|
| 836 |
+
# ββ Update KV pairs ββ
|
| 837 |
+
updated_kv = []
|
| 838 |
+
if keep_metadata:
|
| 839 |
+
print(" --keep-metadata: passing through ALL KV pairs unchanged")
|
| 840 |
+
updated_kv = list(kv_pairs)
|
| 841 |
+
else:
|
| 842 |
+
for key, vtype, raw_value in kv_pairs:
|
| 843 |
+
if key == 'general.file_type' and vtype == 4: # UINT32
|
| 844 |
+
# file_type=10 means Q2_K in llama.cpp
|
| 845 |
+
updated_kv.append((key, vtype, struct.pack('<I', 10)))
|
| 846 |
+
elif key == 'general.quantization_version' and vtype == 4:
|
| 847 |
+
updated_kv.append((key, vtype, struct.pack('<I', 2)))
|
| 848 |
+
elif key == 'tokenizer.ggml.token_type' and vtype == 9:
|
| 849 |
+
# ββ Fix Gemma 4 token types ββ
|
| 850 |
+
# convert_hf_to_gguf.py incorrectly marks control tokens as
|
| 851 |
+
# NORMAL (1), causing llama.cpp to sample them (e.g. <unused24>
|
| 852 |
+
# spam). Fix: read the tokens array to find control-looking
|
| 853 |
+
# tokens, then patch their types to CONTROL (3).
|
| 854 |
+
# See: https://github.com/ggml-org/llama.cpp/issues/21321
|
| 855 |
+
tokens_kv = next((v for k, vt, v in kv_pairs
|
| 856 |
+
if k == 'tokenizer.ggml.tokens' and vt == 9), None)
|
| 857 |
+
token_names = []
|
| 858 |
+
if tokens_kv:
|
| 859 |
+
bio = io.BytesIO(tokens_kv)
|
| 860 |
+
arr_type = struct.unpack('<I', bio.read(4))[0]
|
| 861 |
+
arr_len = struct.unpack('<Q', bio.read(8))[0]
|
| 862 |
+
for _ in range(arr_len):
|
| 863 |
+
slen = struct.unpack('<Q', bio.read(8))[0]
|
| 864 |
+
token_names.append(bio.read(slen).decode('utf-8', errors='replace'))
|
| 865 |
+
|
| 866 |
+
# Parse the token_type array
|
| 867 |
+
bio2 = io.BytesIO(raw_value)
|
| 868 |
+
arr_type2 = struct.unpack('<I', bio2.read(4))[0]
|
| 869 |
+
arr_len2 = struct.unpack('<Q', bio2.read(8))[0]
|
| 870 |
+
ttypes = list(struct.unpack(f'<{arr_len2}i', bio2.read(arr_len2 * 4)))
|
| 871 |
+
|
| 872 |
+
# Patch control-looking tokens
|
| 873 |
+
n_fixed = 0
|
| 874 |
+
CONTROL_TYPE = 3
|
| 875 |
+
import re
|
| 876 |
+
for i, tname in enumerate(token_names):
|
| 877 |
+
if ttypes[i] == CONTROL_TYPE:
|
| 878 |
+
continue # already correct
|
| 879 |
+
if ttypes[i] == 6:
|
| 880 |
+
continue # BYTE type β leave as-is
|
| 881 |
+
# Only fix tokens that are genuine control/special tokens:
|
| 882 |
+
# - <eos>, <bos>, <unk>, <mask>, </s> β sentence markers
|
| 883 |
+
# - <|turn>, <turn|>, <|tool_*|> etc β delimiters
|
| 884 |
+
# NOTE: do NOT mark <unused*> as CONTROL β Gemma 4 uses
|
| 885 |
+
# these tokens internally for thinking/channel markers
|
| 886 |
+
# (e.g. <unused24> = <|channel>). The llama.cpp parser
|
| 887 |
+
# handles them via the peg-gemma4 format instead.
|
| 888 |
+
is_control = False
|
| 889 |
+
if tname in ('<eos>', '<bos>', '<unk>', '<mask>', '</s>',
|
| 890 |
+
'<pad>', '<s>'):
|
| 891 |
+
is_control = True
|
| 892 |
+
elif re.match(r'^<\|.*\|?>$', tname) or re.match(r'^<.*\|>$', tname):
|
| 893 |
+
is_control = True
|
| 894 |
+
if is_control and ttypes[i] != CONTROL_TYPE:
|
| 895 |
+
ttypes[i] = CONTROL_TYPE
|
| 896 |
+
n_fixed += 1
|
| 897 |
+
|
| 898 |
+
print(f" Fixed {n_fixed} token types to CONTROL (Gemma 4 <unused> fix)")
|
| 899 |
+
|
| 900 |
+
# Rebuild the raw value
|
| 901 |
+
new_raw = struct.pack('<I', arr_type2)
|
| 902 |
+
new_raw += struct.pack('<Q', arr_len2)
|
| 903 |
+
new_raw += struct.pack(f'<{arr_len2}i', *ttypes)
|
| 904 |
+
updated_kv.append((key, vtype, new_raw))
|
| 905 |
+
elif key == 'tokenizer.chat_template' and vtype == 8:
|
| 906 |
+
# ββ Replace chat template with fixed Gemma 4 template ββ
|
| 907 |
+
# The HF-exported template doesn't handle thinking mode, causing
|
| 908 |
+
# the model to emit <unused24> tokens. The fixed template from
|
| 909 |
+
# llama.cpp PR #21418 pre-fills an empty thought block when
|
| 910 |
+
# thinking is disabled: <|channel>thought\n<channel|>
|
| 911 |
+
# See: https://github.com/ggml-org/llama.cpp/pull/21418
|
| 912 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 913 |
+
workspace_dir = os.path.dirname(script_dir)
|
| 914 |
+
template_path = os.path.join(workspace_dir, 'llama-cpp-latest',
|
| 915 |
+
'models', 'templates', 'google-gemma-4-31B-it.jinja')
|
| 916 |
+
if os.path.exists(template_path):
|
| 917 |
+
with open(template_path, 'r') as tf:
|
| 918 |
+
new_template = tf.read()
|
| 919 |
+
new_raw = struct.pack('<Q', len(new_template.encode('utf-8')))
|
| 920 |
+
new_raw += new_template.encode('utf-8')
|
| 921 |
+
updated_kv.append((key, vtype, new_raw))
|
| 922 |
+
print(f" Replaced chat template with fixed Gemma 4 template ({len(new_template)} chars)")
|
| 923 |
+
else:
|
| 924 |
+
print(f" WARNING: Fixed template not found at {template_path}, keeping original")
|
| 925 |
+
updated_kv.append((key, vtype, raw_value))
|
| 926 |
+
else:
|
| 927 |
+
updated_kv.append((key, vtype, raw_value))
|
| 928 |
+
|
| 929 |
+
# ββ Write output GGUF ββ
|
| 930 |
+
print(" Writing output GGUF...")
|
| 931 |
+
with open(output_path, 'wb') as fout:
|
| 932 |
+
# Header
|
| 933 |
+
fout.write(struct.pack('<I', GGUF_MAGIC))
|
| 934 |
+
fout.write(struct.pack('<I', GGUF_VERSION))
|
| 935 |
+
fout.write(struct.pack('<Q', n_tensors))
|
| 936 |
+
fout.write(struct.pack('<Q', n_kv))
|
| 937 |
+
|
| 938 |
+
# KV pairs (passthrough)
|
| 939 |
+
for key, vtype, raw_value in updated_kv:
|
| 940 |
+
write_string(fout, key)
|
| 941 |
+
fout.write(struct.pack('<I', vtype))
|
| 942 |
+
fout.write(raw_value)
|
| 943 |
+
|
| 944 |
+
# Tensor info
|
| 945 |
+
for oti in out_tensor_infos:
|
| 946 |
+
write_string(fout, oti['name'])
|
| 947 |
+
fout.write(struct.pack('<I', oti['n_dims']))
|
| 948 |
+
for d in oti['dims']:
|
| 949 |
+
fout.write(struct.pack('<Q', d))
|
| 950 |
+
fout.write(struct.pack('<I', oti['type']))
|
| 951 |
+
fout.write(struct.pack('<Q', oti['offset']))
|
| 952 |
+
|
| 953 |
+
# Alignment padding before data
|
| 954 |
+
pos = fout.tell()
|
| 955 |
+
aligned = align_offset(pos)
|
| 956 |
+
if aligned > pos:
|
| 957 |
+
fout.write(b'\x00' * (aligned - pos))
|
| 958 |
+
|
| 959 |
+
# ββ Write tensor data ββ
|
| 960 |
+
quant_count = 0
|
| 961 |
+
total_quant_bytes = 0
|
| 962 |
+
total_keep_bytes = 0
|
| 963 |
+
total_rmse = 0.0
|
| 964 |
+
q2k_rmse_sum = 0.0
|
| 965 |
+
q2k_tensor_count = 0
|
| 966 |
+
|
| 967 |
+
for i, ti in enumerate(tensor_infos):
|
| 968 |
+
# Progress bar
|
| 969 |
+
pct = (i + 1) / n_tensors * 100
|
| 970 |
+
bar_width = 40
|
| 971 |
+
filled = int(bar_width * (i + 1) / n_tensors)
|
| 972 |
+
bar = 'β' * filled + 'β' * (bar_width - filled)
|
| 973 |
+
elapsed = time.time() - start_time
|
| 974 |
+
eta = elapsed / max(i + 1, 1) * (n_tensors - i - 1)
|
| 975 |
+
sys.stdout.write(f"\r [{bar}] {pct:5.1f}% ({i+1}/{n_tensors}) {elapsed:.0f}s ETA:{eta:.0f}s {ti['name'][:50]}")
|
| 976 |
+
sys.stdout.flush()
|
| 977 |
+
|
| 978 |
+
# Read source tensor data
|
| 979 |
+
abs_offset = data_section_start + ti['offset']
|
| 980 |
+
fin.seek(abs_offset)
|
| 981 |
+
raw_data = fin.read(ti['data_size'])
|
| 982 |
+
|
| 983 |
+
if quant_plan[i] in ('Q4_0', 'ATTN_Q4'):
|
| 984 |
+
# ββ Q4_0 quantization (fallback or attention HPC) ββ
|
| 985 |
+
if ti['type'] == GGML_TYPE_BF16:
|
| 986 |
+
f32 = bf16_to_f32(raw_data, ti['n_elements'])
|
| 987 |
+
elif ti['type'] == GGML_TYPE_F16:
|
| 988 |
+
f32 = f16_to_f32(raw_data, ti['n_elements'])
|
| 989 |
+
elif ti['type'] == GGML_TYPE_F32:
|
| 990 |
+
f32 = np.frombuffer(raw_data, dtype=np.float32).copy()
|
| 991 |
+
else:
|
| 992 |
+
fout.write(raw_data)
|
| 993 |
+
pad = align_offset(fout.tell()) - fout.tell()
|
| 994 |
+
if pad > 0: fout.write(b'\x00' * pad)
|
| 995 |
+
continue
|
| 996 |
+
|
| 997 |
+
# Pad to 32-element boundary
|
| 998 |
+
n_el = len(f32)
|
| 999 |
+
pad_to = ((n_el + 31) // 32) * 32
|
| 1000 |
+
if pad_to > n_el:
|
| 1001 |
+
f32 = np.concatenate([f32, np.zeros(pad_to - n_el, dtype=np.float32)])
|
| 1002 |
+
n_el = pad_to
|
| 1003 |
+
|
| 1004 |
+
n_blocks_q4 = n_el // 32
|
| 1005 |
+
|
| 1006 |
+
# Use HPC for attention tensors if available
|
| 1007 |
+
if quant_plan[i] == 'ATTN_Q4' and use_hpc and hasattr(_HEXSTATE_LIB, 'hexstate_quantize_tensor_q4_0_hpc'):
|
| 1008 |
+
output_buf = np.zeros(n_blocks_q4 * 18, dtype=np.uint8)
|
| 1009 |
+
error = ctypes.c_float(0.0)
|
| 1010 |
+
f32_c = np.ascontiguousarray(f32, dtype=np.float32)
|
| 1011 |
+
|
| 1012 |
+
# Look up imatrix importance
|
| 1013 |
+
imat_ptr = None
|
| 1014 |
+
if imatrix_data and ti['name'] in imatrix_data:
|
| 1015 |
+
iw = imatrix_data[ti['name']]
|
| 1016 |
+
n_cols = iw.shape[0]
|
| 1017 |
+
n_rows = n_el // n_cols if n_cols > 0 else 1
|
| 1018 |
+
imat_full = np.tile(iw, n_rows)[:n_el].astype(np.float32)
|
| 1019 |
+
imat_c = np.ascontiguousarray(imat_full)
|
| 1020 |
+
imat_ptr = imat_c.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
|
| 1021 |
+
|
| 1022 |
+
_HEXSTATE_LIB.hexstate_quantize_tensor_q4_0_hpc(
|
| 1023 |
+
f32_c.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
|
| 1024 |
+
ctypes.c_int64(n_el),
|
| 1025 |
+
output_buf.ctypes.data_as(ctypes.c_void_p),
|
| 1026 |
+
ctypes.byref(error),
|
| 1027 |
+
imat_ptr,
|
| 1028 |
+
ctypes.c_int(1), # verbose
|
| 1029 |
+
)
|
| 1030 |
+
fout.write(output_buf.tobytes())
|
| 1031 |
+
print(f"\n [Q4_0Β·HPC] {ti['name']} RMSE={np.sqrt(error.value / ti['n_elements']):.6e}")
|
| 1032 |
+
else:
|
| 1033 |
+
# Vectorized Q4_0: process all blocks at once
|
| 1034 |
+
blocks = f32.reshape(-1, 32)
|
| 1035 |
+
amax = np.max(np.abs(blocks), axis=1)
|
| 1036 |
+
d = amax / 7.0
|
| 1037 |
+
d[d == 0] = 1.0 # avoid div by zero
|
| 1038 |
+
qs = np.clip(np.round(blocks / d[:, None]) + 8, 0, 15).astype(np.uint8)
|
| 1039 |
+
d_orig = amax / 7.0 # restore zeros
|
| 1040 |
+
d_fp16 = d_orig.astype(np.float16)
|
| 1041 |
+
|
| 1042 |
+
out_buf = bytearray(n_blocks_q4 * 18)
|
| 1043 |
+
for b in range(n_blocks_q4):
|
| 1044 |
+
off = b * 18
|
| 1045 |
+
struct.pack_into('<e', out_buf, off, float(d_fp16[b]))
|
| 1046 |
+
for j in range(16):
|
| 1047 |
+
out_buf[off + 2 + j] = int(qs[b, j]) | (int(qs[b, j + 16]) << 4)
|
| 1048 |
+
fout.write(bytes(out_buf))
|
| 1049 |
+
|
| 1050 |
+
quant_count += 1
|
| 1051 |
+
total_quant_bytes += n_blocks_q4 * 18
|
| 1052 |
+
|
| 1053 |
+
elif quant_plan[i]:
|
| 1054 |
+
# Convert to F32 for quantization
|
| 1055 |
+
if ti['type'] == GGML_TYPE_BF16:
|
| 1056 |
+
f32 = bf16_to_f32(raw_data, ti['n_elements'])
|
| 1057 |
+
elif ti['type'] == GGML_TYPE_F16:
|
| 1058 |
+
f32 = f16_to_f32(raw_data, ti['n_elements'])
|
| 1059 |
+
elif ti['type'] == GGML_TYPE_F32:
|
| 1060 |
+
f32 = np.frombuffer(raw_data, dtype=np.float32).copy()
|
| 1061 |
+
else:
|
| 1062 |
+
# Can't re-quantize from quantized format β keep as-is
|
| 1063 |
+
fout.write(raw_data)
|
| 1064 |
+
pad = align_offset(fout.tell()) - fout.tell()
|
| 1065 |
+
if pad > 0:
|
| 1066 |
+
fout.write(b'\x00' * pad)
|
| 1067 |
+
continue
|
| 1068 |
+
|
| 1069 |
+
# Quantize to Q2_K β always use HPC with chunked processing
|
| 1070 |
+
# Each chunk gets full HPC treatment (no size threshold)
|
| 1071 |
+
HPC_CHUNK = 50_000_000 # 50M elements per HPC chunk
|
| 1072 |
+
HPC_CHUNK = (HPC_CHUNK // QK_K) * QK_K # align to QK_K
|
| 1073 |
+
|
| 1074 |
+
# Look up imatrix importance for this tensor
|
| 1075 |
+
imat_full = None
|
| 1076 |
+
if imatrix_data and ti['name'] in imatrix_data:
|
| 1077 |
+
iw = imatrix_data[ti['name']]
|
| 1078 |
+
n_cols = iw.shape[0]
|
| 1079 |
+
n_rows = ti['n_elements'] // n_cols if n_cols > 0 else 1
|
| 1080 |
+
imat_full = np.tile(iw, n_rows)[:ti['n_elements']]
|
| 1081 |
+
|
| 1082 |
+
n_el = ti['n_elements']
|
| 1083 |
+
if use_hpc and n_el <= HPC_CHUNK:
|
| 1084 |
+
# Small tensor β single HPC pass
|
| 1085 |
+
q2k_data, n_blocks = quantize_tensor_q2k_hpc(f32, opt_mode=2, importance=imat_full)
|
| 1086 |
+
elif use_hpc:
|
| 1087 |
+
# Large tensor β chunked HPC (each chunk gets BP)
|
| 1088 |
+
chunks = []
|
| 1089 |
+
processed = 0
|
| 1090 |
+
while processed < n_el:
|
| 1091 |
+
end = min(processed + HPC_CHUNK, n_el)
|
| 1092 |
+
chunk_f32 = f32[processed:end]
|
| 1093 |
+
if len(chunk_f32) % QK_K != 0:
|
| 1094 |
+
pad_len = QK_K - (len(chunk_f32) % QK_K)
|
| 1095 |
+
chunk_f32 = np.concatenate([chunk_f32, np.zeros(pad_len, dtype=np.float32)])
|
| 1096 |
+
chunk_imp = imat_full[processed:end] if imat_full is not None else None
|
| 1097 |
+
if chunk_imp is not None and len(chunk_imp) < len(chunk_f32):
|
| 1098 |
+
chunk_imp = np.concatenate([chunk_imp, np.ones(len(chunk_f32) - len(chunk_imp), dtype=np.float32)])
|
| 1099 |
+
chunk_data, _ = quantize_tensor_q2k_hpc(chunk_f32, opt_mode=2, importance=chunk_imp)
|
| 1100 |
+
actual_blocks = (end - processed + QK_K - 1) // QK_K
|
| 1101 |
+
chunks.append(chunk_data[:actual_blocks * 84])
|
| 1102 |
+
processed = end
|
| 1103 |
+
pct = 100.0 * processed / n_el
|
| 1104 |
+
print(f"\r β {processed/1e6:.0f}M/{n_el/1e6:.0f}M ({pct:.0f}%)", end='', flush=True)
|
| 1105 |
+
print()
|
| 1106 |
+
q2k_data = b''.join(chunks)
|
| 1107 |
+
n_blocks = n_el // QK_K
|
| 1108 |
+
else:
|
| 1109 |
+
# No HPC available β python fallback
|
| 1110 |
+
CHUNK_SIZE = 10_000_000
|
| 1111 |
+
CHUNK_SIZE = (CHUNK_SIZE // QK_K) * QK_K
|
| 1112 |
+
chunks = []
|
| 1113 |
+
processed = 0
|
| 1114 |
+
while processed < n_el:
|
| 1115 |
+
end = min(processed + CHUNK_SIZE, n_el)
|
| 1116 |
+
chunk_data, _ = quantize_tensor_q2k(f32[processed:end])
|
| 1117 |
+
chunks.append(chunk_data)
|
| 1118 |
+
processed = end
|
| 1119 |
+
pct = 100.0 * processed / n_el
|
| 1120 |
+
print(f"\r β {processed/1e6:.0f}M/{n_el/1e6:.0f}M ({pct:.0f}%)", end='', flush=True)
|
| 1121 |
+
print()
|
| 1122 |
+
q2k_data = b''.join(chunks)
|
| 1123 |
+
n_blocks = n_el // QK_K
|
| 1124 |
+
fout.write(q2k_data)
|
| 1125 |
+
|
| 1126 |
+
# ββ Compute and report exact per-tensor RMSE ββ
|
| 1127 |
+
try:
|
| 1128 |
+
CHUNK_BLK = 100_000 # blocks per chunk to bound memory
|
| 1129 |
+
total_se = 0.0
|
| 1130 |
+
total_n = 0
|
| 1131 |
+
for ci in range(0, n_blocks, CHUNK_BLK):
|
| 1132 |
+
ce = min(ci + CHUNK_BLK, n_blocks)
|
| 1133 |
+
chunk_q = q2k_data[ci*84:ce*84]
|
| 1134 |
+
deq_chunk = dequant_q2k_fast(chunk_q, ce - ci)
|
| 1135 |
+
orig_chunk = f32[ci*QK_K:ce*QK_K]
|
| 1136 |
+
n_valid = min(len(orig_chunk), len(deq_chunk))
|
| 1137 |
+
diff = orig_chunk[:n_valid] - deq_chunk[:n_valid]
|
| 1138 |
+
total_se += np.sum(diff ** 2)
|
| 1139 |
+
total_n += n_valid
|
| 1140 |
+
tensor_rmse = np.sqrt(total_se / max(total_n, 1))
|
| 1141 |
+
q2k_rmse_sum += tensor_rmse
|
| 1142 |
+
q2k_tensor_count += 1
|
| 1143 |
+
print(f"\n [Q2_K] {ti['name'][:55]} RMSE={tensor_rmse:.6e}")
|
| 1144 |
+
except Exception as e:
|
| 1145 |
+
print(f"\n [Q2_K] {ti['name'][:55]} RMSE=err({e})")
|
| 1146 |
+
|
| 1147 |
+
quant_count += 1
|
| 1148 |
+
total_quant_bytes += len(q2k_data)
|
| 1149 |
+
else:
|
| 1150 |
+
# Keep as-is (passthrough)
|
| 1151 |
+
fout.write(raw_data)
|
| 1152 |
+
total_keep_bytes += len(raw_data)
|
| 1153 |
+
|
| 1154 |
+
# Alignment padding
|
| 1155 |
+
pad = align_offset(fout.tell()) - fout.tell()
|
| 1156 |
+
if pad > 0:
|
| 1157 |
+
fout.write(b'\x00' * pad)
|
| 1158 |
+
|
| 1159 |
+
final_size = fout.tell()
|
| 1160 |
+
|
| 1161 |
+
elapsed = time.time() - start_time
|
| 1162 |
+
print(f"\r {'β' * 40} 100.0% ({n_tensors}/{n_tensors}) {elapsed:.0f}s" + " " * 60)
|
| 1163 |
+
print()
|
| 1164 |
+
|
| 1165 |
+
# ββ Summary ββ
|
| 1166 |
+
original_bytes = sum(ti['data_size'] for ti in tensor_infos)
|
| 1167 |
+
compression = original_bytes / max(final_size, 1)
|
| 1168 |
+
|
| 1169 |
+
print(" ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ")
|
| 1170 |
+
print(" β RE-QUANTIZATION SUMMARY β")
|
| 1171 |
+
print(" β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ£")
|
| 1172 |
+
print(f" β Tensors quantized (Q2_K): {quant_count:<33d} β")
|
| 1173 |
+
print(f" β Tensors kept as-is: {total_keep:<33d} β")
|
| 1174 |
+
print(f" β Q2_K data: {total_quant_bytes:>12,} bytes ({total_quant_bytes/1024**2:>7.1f} MB) β")
|
| 1175 |
+
print(f" β Kept data: {total_keep_bytes:>12,} bytes ({total_keep_bytes/1024**2:>7.1f} MB) β")
|
| 1176 |
+
print(f" β Original size: {file_size:>12,} bytes ({file_size/1024**3:>7.2f} GB) β")
|
| 1177 |
+
print(f" β Output size: {final_size:>12,} bytes ({final_size/1024**3:>7.2f} GB) β")
|
| 1178 |
+
print(f" β Compression: {compression:>42.1f}x β")
|
| 1179 |
+
if q2k_tensor_count > 0:
|
| 1180 |
+
mean_rmse = q2k_rmse_sum / q2k_tensor_count
|
| 1181 |
+
print(f" β Mean Q2_K RMSE: {mean_rmse:>12.6e} β")
|
| 1182 |
+
print(f" β Total time: {elapsed:>39.1f} sec β")
|
| 1183 |
+
print(" ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ")
|
| 1184 |
+
print()
|
| 1185 |
+
print(f" Output: {output_path}")
|
| 1186 |
+
print()
|
| 1187 |
+
|
| 1188 |
+
|
| 1189 |
+
if __name__ == '__main__':
|
| 1190 |
+
main()
|
hpc_amplitude.h
ADDED
|
@@ -0,0 +1,418 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* hpc_amplitude.h β On-Demand State Vector
|
| 3 |
+
*
|
| 4 |
+
* The state vector has D^N entries. We never materialize it.
|
| 5 |
+
* Instead, we compute exactly what's needed, when it's needed.
|
| 6 |
+
*
|
| 7 |
+
* Three modes of access:
|
| 8 |
+
*
|
| 9 |
+
* 1. POINT QUERY: Ο(iβ,...,iβ) β O(N+E) β one amplitude
|
| 10 |
+
* 2. SPARSE RECON: All |Ο| > threshold β O(?) β importance sampling
|
| 11 |
+
* 3. EXPECTATION: β¨Ο|O|Οβ© β O(samplesΓ(N+E)) β Monte Carlo
|
| 12 |
+
*
|
| 13 |
+
* The Devil computes only what you ask for. Nothing more.
|
| 14 |
+
* The rest of the state vector does not exist until observed.
|
| 15 |
+
*/
|
| 16 |
+
|
| 17 |
+
#ifndef HPC_AMPLITUDE_H
|
| 18 |
+
#define HPC_AMPLITUDE_H
|
| 19 |
+
|
| 20 |
+
#include "hpc_graph.h"
|
| 21 |
+
#include "hpc_contract.h"
|
| 22 |
+
#include <math.h>
|
| 23 |
+
#include <stdlib.h>
|
| 24 |
+
#include <string.h>
|
| 25 |
+
|
| 26 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 27 |
+
* SPARSE STATE VECTOR ENTRY
|
| 28 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 29 |
+
|
| 30 |
+
typedef struct {
|
| 31 |
+
uint32_t *indices; /* Site indices: [n_sites] */
|
| 32 |
+
double re, im; /* Amplitude value */
|
| 33 |
+
double prob; /* |amplitude|Β² */
|
| 34 |
+
} HPCSparseEntry;
|
| 35 |
+
|
| 36 |
+
typedef struct {
|
| 37 |
+
HPCSparseEntry *entries;
|
| 38 |
+
uint64_t count;
|
| 39 |
+
uint64_t capacity;
|
| 40 |
+
uint64_t n_sites; /* For index array sizing */
|
| 41 |
+
double total_prob; /* Sum of captured probability */
|
| 42 |
+
double threshold; /* Minimum |Ο|Β² captured */
|
| 43 |
+
} HPCSparseVector;
|
| 44 |
+
|
| 45 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 46 |
+
* SPARSE VECTOR LIFECYCLE
|
| 47 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 48 |
+
|
| 49 |
+
static inline HPCSparseVector *hpc_sv_create(uint64_t n_sites,
|
| 50 |
+
uint64_t initial_cap)
|
| 51 |
+
{
|
| 52 |
+
HPCSparseVector *sv = (HPCSparseVector *)calloc(1, sizeof(HPCSparseVector));
|
| 53 |
+
if (!sv) return NULL;
|
| 54 |
+
sv->n_sites = n_sites;
|
| 55 |
+
sv->capacity = initial_cap;
|
| 56 |
+
sv->entries = (HPCSparseEntry *)calloc(initial_cap, sizeof(HPCSparseEntry));
|
| 57 |
+
for (uint64_t i = 0; i < initial_cap; i++)
|
| 58 |
+
sv->entries[i].indices = (uint32_t *)calloc(n_sites, sizeof(uint32_t));
|
| 59 |
+
return sv;
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
static inline void hpc_sv_destroy(HPCSparseVector *sv)
|
| 63 |
+
{
|
| 64 |
+
if (!sv) return;
|
| 65 |
+
for (uint64_t i = 0; i < sv->capacity; i++)
|
| 66 |
+
free(sv->entries[i].indices);
|
| 67 |
+
free(sv->entries);
|
| 68 |
+
free(sv);
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
static inline void hpc_sv_grow(HPCSparseVector *sv)
|
| 72 |
+
{
|
| 73 |
+
if (sv->count < sv->capacity) return;
|
| 74 |
+
uint64_t new_cap = sv->capacity * 2;
|
| 75 |
+
sv->entries = (HPCSparseEntry *)realloc(sv->entries,
|
| 76 |
+
new_cap * sizeof(HPCSparseEntry));
|
| 77 |
+
for (uint64_t i = sv->capacity; i < new_cap; i++) {
|
| 78 |
+
sv->entries[i].indices = (uint32_t *)calloc(sv->n_sites, sizeof(uint32_t));
|
| 79 |
+
sv->entries[i].re = 0; sv->entries[i].im = 0; sv->entries[i].prob = 0;
|
| 80 |
+
}
|
| 81 |
+
sv->capacity = new_cap;
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
static inline void hpc_sv_add(HPCSparseVector *sv,
|
| 85 |
+
const uint32_t *indices,
|
| 86 |
+
double re, double im)
|
| 87 |
+
{
|
| 88 |
+
hpc_sv_grow(sv);
|
| 89 |
+
HPCSparseEntry *e = &sv->entries[sv->count];
|
| 90 |
+
memcpy(e->indices, indices, sv->n_sites * sizeof(uint32_t));
|
| 91 |
+
e->re = re;
|
| 92 |
+
e->im = im;
|
| 93 |
+
e->prob = re * re + im * im;
|
| 94 |
+
sv->total_prob += e->prob;
|
| 95 |
+
sv->count++;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 99 |
+
* BRUTE-FORCE SPARSE RECONSTRUCTION
|
| 100 |
+
*
|
| 101 |
+
* For small N: enumerate all D^N configurations, keep those above
|
| 102 |
+
* threshold. Returns a sparse vector of significant amplitudes.
|
| 103 |
+
*
|
| 104 |
+
* Cost: O(D^N Γ (N+E)) β exponential, small N only.
|
| 105 |
+
* This is the reference implementation for verification.
|
| 106 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 107 |
+
|
| 108 |
+
static inline HPCSparseVector *hpc_sparse_brute(const HPCGraph *g,
|
| 109 |
+
double threshold,
|
| 110 |
+
uint64_t max_entries)
|
| 111 |
+
{
|
| 112 |
+
if (g->n_sites > 8) {
|
| 113 |
+
fprintf(stderr, "hpc_sparse_brute: N=%lu too large\n", g->n_sites);
|
| 114 |
+
return NULL;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
HPCSparseVector *sv = hpc_sv_create(g->n_sites, 256);
|
| 118 |
+
if (!sv) return NULL;
|
| 119 |
+
sv->threshold = threshold;
|
| 120 |
+
|
| 121 |
+
uint64_t total_configs = 1;
|
| 122 |
+
for (uint64_t i = 0; i < g->n_sites; i++) total_configs *= HPC_D;
|
| 123 |
+
|
| 124 |
+
uint32_t indices[8];
|
| 125 |
+
|
| 126 |
+
for (uint64_t cfg = 0; cfg < total_configs && sv->count < max_entries; cfg++) {
|
| 127 |
+
uint64_t tmp = cfg;
|
| 128 |
+
for (uint64_t i = 0; i < g->n_sites; i++) {
|
| 129 |
+
indices[i] = tmp % HPC_D;
|
| 130 |
+
tmp /= HPC_D;
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
double re, im;
|
| 134 |
+
hpc_amplitude(g, indices, &re, &im);
|
| 135 |
+
double prob = re * re + im * im;
|
| 136 |
+
|
| 137 |
+
if (prob >= threshold)
|
| 138 |
+
hpc_sv_add(sv, indices, re, im);
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
return sv;
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 145 |
+
* TREE-PRUNED SPARSE RECONSTRUCTION
|
| 146 |
+
*
|
| 147 |
+
* For larger N: build the state vector site-by-site, pruning branches
|
| 148 |
+
* whose cumulative probability falls below threshold.
|
| 149 |
+
*
|
| 150 |
+
* At each site k, we have a set of "live" partial configurations
|
| 151 |
+
* (iβ,...,i_k) with accumulated amplitude. For site k+1, we extend
|
| 152 |
+
* each live config to all D values, compute the new amplitude, and
|
| 153 |
+
* prune low-probability branches.
|
| 154 |
+
*
|
| 155 |
+
* Cost: O(active_branches Γ D Γ E_local) per site.
|
| 156 |
+
* For sparse states: active_branches << D^k β exponential speedup.
|
| 157 |
+
*
|
| 158 |
+
* This is the practical reconstruction method for N > 8.
|
| 159 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 160 |
+
|
| 161 |
+
typedef struct {
|
| 162 |
+
uint32_t *indices; /* Partial index vector [n_sites] */
|
| 163 |
+
double re, im; /* Accumulated amplitude */
|
| 164 |
+
} HPCTreeNode;
|
| 165 |
+
|
| 166 |
+
static inline HPCSparseVector *hpc_sparse_tree(const HPCGraph *g,
|
| 167 |
+
double threshold,
|
| 168 |
+
uint64_t max_branches)
|
| 169 |
+
{
|
| 170 |
+
HPCSparseVector *sv = hpc_sv_create(g->n_sites, 256);
|
| 171 |
+
if (!sv) return NULL;
|
| 172 |
+
sv->threshold = threshold;
|
| 173 |
+
|
| 174 |
+
/* Initial pool: one root node with no sites assigned */
|
| 175 |
+
uint64_t pool_cap = max_branches * HPC_D + 16;
|
| 176 |
+
HPCTreeNode *current = (HPCTreeNode *)calloc(pool_cap, sizeof(HPCTreeNode));
|
| 177 |
+
HPCTreeNode *next = (HPCTreeNode *)calloc(pool_cap, sizeof(HPCTreeNode));
|
| 178 |
+
for (uint64_t i = 0; i < pool_cap; i++) {
|
| 179 |
+
current[i].indices = (uint32_t *)calloc(g->n_sites, sizeof(uint32_t));
|
| 180 |
+
next[i].indices = (uint32_t *)calloc(g->n_sites, sizeof(uint32_t));
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
/* Seed: one root node */
|
| 184 |
+
uint64_t n_current = 1;
|
| 185 |
+
current[0].re = 1.0;
|
| 186 |
+
current[0].im = 0.0;
|
| 187 |
+
|
| 188 |
+
/* Grow site by site */
|
| 189 |
+
for (uint64_t site = 0; site < g->n_sites; site++) {
|
| 190 |
+
uint64_t n_next = 0;
|
| 191 |
+
const TrialityQuhit *q = &g->locals[site];
|
| 192 |
+
|
| 193 |
+
for (uint64_t b = 0; b < n_current; b++) {
|
| 194 |
+
for (int v = 0; v < HPC_D; v++) {
|
| 195 |
+
/* Extend branch with site=v */
|
| 196 |
+
double a_re = q->edge_re[v];
|
| 197 |
+
double a_im = q->edge_im[v];
|
| 198 |
+
|
| 199 |
+
/* Multiply accumulated amplitude by local amplitude */
|
| 200 |
+
double new_re = current[b].re * a_re - current[b].im * a_im;
|
| 201 |
+
double new_im = current[b].re * a_im + current[b].im * a_re;
|
| 202 |
+
|
| 203 |
+
/* Apply phase contributions from edges connecting
|
| 204 |
+
* this site to already-assigned sites */
|
| 205 |
+
for (uint64_t e = 0; e < g->n_edges; e++) {
|
| 206 |
+
uint64_t sa = g->edges[e].site_a;
|
| 207 |
+
uint64_t sb = g->edges[e].site_b;
|
| 208 |
+
int partner_site = -1;
|
| 209 |
+
|
| 210 |
+
if (sa == site && sb < site) partner_site = (int)sb;
|
| 211 |
+
else if (sb == site && sa < site) partner_site = (int)sa;
|
| 212 |
+
|
| 213 |
+
if (partner_site >= 0) {
|
| 214 |
+
uint32_t pv = current[b].indices[partner_site];
|
| 215 |
+
double w_re, w_im;
|
| 216 |
+
|
| 217 |
+
if (g->edges[e].type == HPC_EDGE_CZ) {
|
| 218 |
+
uint32_t phase_idx = ((uint32_t)v * pv) % HPC_D;
|
| 219 |
+
w_re = HPC_W6_RE[phase_idx];
|
| 220 |
+
w_im = HPC_W6_IM[phase_idx];
|
| 221 |
+
} else {
|
| 222 |
+
if (sa == site) {
|
| 223 |
+
w_re = g->edges[e].w_re[v][pv];
|
| 224 |
+
w_im = g->edges[e].w_im[v][pv];
|
| 225 |
+
} else {
|
| 226 |
+
w_re = g->edges[e].w_re[pv][v];
|
| 227 |
+
w_im = g->edges[e].w_im[pv][v];
|
| 228 |
+
}
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
double tmp_re = new_re * w_re - new_im * w_im;
|
| 232 |
+
double tmp_im = new_re * w_im + new_im * w_re;
|
| 233 |
+
new_re = tmp_re;
|
| 234 |
+
new_im = tmp_im;
|
| 235 |
+
}
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
/* Prune: skip if amplitude is too small */
|
| 239 |
+
double prob = new_re * new_re + new_im * new_im;
|
| 240 |
+
if (prob < threshold && site < g->n_sites - 1) continue;
|
| 241 |
+
|
| 242 |
+
/* Accept this branch */
|
| 243 |
+
if (n_next < pool_cap) {
|
| 244 |
+
memcpy(next[n_next].indices, current[b].indices,
|
| 245 |
+
g->n_sites * sizeof(uint32_t));
|
| 246 |
+
next[n_next].indices[site] = v;
|
| 247 |
+
next[n_next].re = new_re;
|
| 248 |
+
next[n_next].im = new_im;
|
| 249 |
+
n_next++;
|
| 250 |
+
}
|
| 251 |
+
}
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
/* Swap pools */
|
| 255 |
+
HPCTreeNode *tmp = current;
|
| 256 |
+
current = next;
|
| 257 |
+
next = tmp;
|
| 258 |
+
n_current = n_next;
|
| 259 |
+
|
| 260 |
+
/* Sort by probability and truncate to max_branches */
|
| 261 |
+
if (n_current > max_branches && site < g->n_sites - 1) {
|
| 262 |
+
/* Simple selection: keep top max_branches by probability */
|
| 263 |
+
/* Partial sort using partition around threshold */
|
| 264 |
+
for (uint64_t i = max_branches; i < n_current; i++) {
|
| 265 |
+
/* Find minimum in kept set */
|
| 266 |
+
uint64_t min_idx = 0;
|
| 267 |
+
double min_prob = current[0].re * current[0].re +
|
| 268 |
+
current[0].im * current[0].im;
|
| 269 |
+
for (uint64_t j = 1; j < max_branches; j++) {
|
| 270 |
+
double p = current[j].re * current[j].re +
|
| 271 |
+
current[j].im * current[j].im;
|
| 272 |
+
if (p < min_prob) { min_prob = p; min_idx = j; }
|
| 273 |
+
}
|
| 274 |
+
/* Swap if current[i] is larger */
|
| 275 |
+
double p_i = current[i].re * current[i].re +
|
| 276 |
+
current[i].im * current[i].im;
|
| 277 |
+
if (p_i > min_prob) {
|
| 278 |
+
HPCTreeNode swap = current[min_idx];
|
| 279 |
+
current[min_idx] = current[i];
|
| 280 |
+
current[i] = swap;
|
| 281 |
+
}
|
| 282 |
+
}
|
| 283 |
+
n_current = max_branches;
|
| 284 |
+
}
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
/* All remaining branches are complete configurations */
|
| 288 |
+
for (uint64_t b = 0; b < n_current; b++) {
|
| 289 |
+
double prob = current[b].re * current[b].re +
|
| 290 |
+
current[b].im * current[b].im;
|
| 291 |
+
if (prob >= threshold)
|
| 292 |
+
hpc_sv_add(sv, current[b].indices, current[b].re, current[b].im);
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
/* Cleanup */
|
| 296 |
+
for (uint64_t i = 0; i < pool_cap; i++) {
|
| 297 |
+
free(current[i].indices);
|
| 298 |
+
free(next[i].indices);
|
| 299 |
+
}
|
| 300 |
+
free(current);
|
| 301 |
+
free(next);
|
| 302 |
+
|
| 303 |
+
return sv;
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 307 |
+
* MONTE CARLO EXPECTATION VALUE
|
| 308 |
+
*
|
| 309 |
+
* Computes β¨Ο|O|Οβ© via importance sampling without materializing |Οβ©.
|
| 310 |
+
*
|
| 311 |
+
* Strategy:
|
| 312 |
+
* 1. Sample configurations by measuring each site sequentially
|
| 313 |
+
* using Born probabilities (marginals from the graph)
|
| 314 |
+
* 2. For each sample, evaluate Ο(config) and O(config)
|
| 315 |
+
* 3. Average over samples
|
| 316 |
+
*
|
| 317 |
+
* For diagonal observables O = Ξ£_i o(i)|iβ©β¨i|:
|
| 318 |
+
* β¨Oβ© = Ξ£_i |Ο(i)|Β² o(i) β (1/S) Ξ£_{samples} o(i_s)
|
| 319 |
+
*
|
| 320 |
+
* Cost: O(n_samples Γ (N + E))
|
| 321 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 322 |
+
|
| 323 |
+
typedef double (*HPCObservable)(const uint32_t *indices, uint64_t n_sites,
|
| 324 |
+
void *ctx);
|
| 325 |
+
|
| 326 |
+
static inline double hpc_expectation(const HPCGraph *g,
|
| 327 |
+
HPCObservable obs, void *obs_ctx,
|
| 328 |
+
int n_samples, uint64_t rng_seed)
|
| 329 |
+
{
|
| 330 |
+
/* Simple LCG for reproducible sampling */
|
| 331 |
+
uint64_t rng = rng_seed;
|
| 332 |
+
#define HPC_LCG(r) ((r) = (r) * 6364136223846793005ULL + 1442695040888963407ULL)
|
| 333 |
+
#define HPC_RAND(r) (((double)((r) >> 11)) * 0x1.0p-53)
|
| 334 |
+
|
| 335 |
+
double sum_obs = 0.0;
|
| 336 |
+
int valid_samples = 0;
|
| 337 |
+
|
| 338 |
+
for (int s = 0; s < n_samples; s++) {
|
| 339 |
+
/* Generate a configuration by sampling site-by-site */
|
| 340 |
+
uint32_t config[256]; /* max sites for MC */
|
| 341 |
+
if (g->n_sites > 256) break;
|
| 342 |
+
|
| 343 |
+
/* Simple approach: sample each site from its local distribution.
|
| 344 |
+
* This is approximate for entangled states but fast. */
|
| 345 |
+
for (uint64_t site = 0; site < g->n_sites; site++) {
|
| 346 |
+
const TrialityQuhit *q = &g->locals[site];
|
| 347 |
+
|
| 348 |
+
/* Local probability distribution */
|
| 349 |
+
double probs[HPC_D];
|
| 350 |
+
double total = 0;
|
| 351 |
+
for (int v = 0; v < HPC_D; v++) {
|
| 352 |
+
probs[v] = q->edge_re[v] * q->edge_re[v] +
|
| 353 |
+
q->edge_im[v] * q->edge_im[v];
|
| 354 |
+
total += probs[v];
|
| 355 |
+
}
|
| 356 |
+
|
| 357 |
+
/* Sample from local distribution */
|
| 358 |
+
HPC_LCG(rng);
|
| 359 |
+
double r = HPC_RAND(rng) * total;
|
| 360 |
+
double cumul = 0;
|
| 361 |
+
config[site] = HPC_D - 1;
|
| 362 |
+
for (int v = 0; v < HPC_D; v++) {
|
| 363 |
+
cumul += probs[v];
|
| 364 |
+
if (r <= cumul) { config[site] = v; break; }
|
| 365 |
+
}
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
/* Compute importance weight: |Ο(config)|Β² / q(config)
|
| 369 |
+
* where q = Ξ _k p_k(config[k]) is the proposal distribution */
|
| 370 |
+
double prob_psi = hpc_probability(g, config);
|
| 371 |
+
double prob_q = 1.0;
|
| 372 |
+
for (uint64_t site = 0; site < g->n_sites; site++) {
|
| 373 |
+
const TrialityQuhit *q = &g->locals[site];
|
| 374 |
+
uint32_t v = config[site];
|
| 375 |
+
double p = q->edge_re[v] * q->edge_re[v] +
|
| 376 |
+
q->edge_im[v] * q->edge_im[v];
|
| 377 |
+
prob_q *= p;
|
| 378 |
+
}
|
| 379 |
+
|
| 380 |
+
if (prob_q > 1e-30) {
|
| 381 |
+
double weight = prob_psi / prob_q;
|
| 382 |
+
double obs_val = obs(config, g->n_sites, obs_ctx);
|
| 383 |
+
sum_obs += weight * obs_val;
|
| 384 |
+
valid_samples++;
|
| 385 |
+
}
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
#undef HPC_LCG
|
| 389 |
+
#undef HPC_RAND
|
| 390 |
+
|
| 391 |
+
return (valid_samples > 0) ? sum_obs / valid_samples : 0.0;
|
| 392 |
+
}
|
| 393 |
+
|
| 394 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 395 |
+
* PRINT SPARSE VECTOR
|
| 396 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 397 |
+
|
| 398 |
+
static inline void hpc_sv_print(const HPCSparseVector *sv, int max_show)
|
| 399 |
+
{
|
| 400 |
+
printf("ββ Sparse State Vector ββ\n");
|
| 401 |
+
printf(" Entries: %lu, Captured prob: %.6f, Threshold: %.2e\n",
|
| 402 |
+
sv->count, sv->total_prob, sv->threshold);
|
| 403 |
+
|
| 404 |
+
uint64_t show = sv->count;
|
| 405 |
+
if (max_show > 0 && show > (uint64_t)max_show) show = max_show;
|
| 406 |
+
|
| 407 |
+
for (uint64_t i = 0; i < show; i++) {
|
| 408 |
+
printf(" |");
|
| 409 |
+
for (uint64_t s = 0; s < sv->n_sites; s++)
|
| 410 |
+
printf("%u", sv->entries[i].indices[s]);
|
| 411 |
+
printf("β© β %.6f%+.6fi (P=%.6e)\n",
|
| 412 |
+
sv->entries[i].re, sv->entries[i].im, sv->entries[i].prob);
|
| 413 |
+
}
|
| 414 |
+
if (show < sv->count)
|
| 415 |
+
printf(" ... (%lu more entries)\n", sv->count - show);
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
#endif /* HPC_AMPLITUDE_H */
|
hpc_contract.h
ADDED
|
@@ -0,0 +1,422 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* hpc_contract.h β Syntheme-Aware Bond Encoding
|
| 3 |
+
*
|
| 4 |
+
*
|
| 5 |
+
* SVD: numerically rotate a matrix until you find its eigenstructure.
|
| 6 |
+
* HPC: analytically decompose using the 15 synthemes of Sβ.
|
| 7 |
+
*
|
| 8 |
+
* A syntheme is a partition of {0,1,2,3,4,5} into 3 unordered pairs.
|
| 9 |
+
* There are exactly 15 synthemes. Each one defines a natural pairing
|
| 10 |
+
* of the D=6 basis states β a way to decompose correlations.
|
| 11 |
+
*
|
| 12 |
+
* The vesica fold (0β3, 1β4, 2β5) decomposes any 6Γ6 interaction
|
| 13 |
+
* into a 3Γ3 vesica (symmetric) + 3Γ3 wave (antisymmetric) channel.
|
| 14 |
+
* This is O(D), zero multiplies β just index remapping.
|
| 15 |
+
*
|
| 16 |
+
* Together: syntheme selection + vesica fold = O(DΒ²) bond encoding.
|
| 17 |
+
* SVD is O(DΒ³Β·ΟΒ²). For D=6: 36 vs ~1.6M operations at Ο=256.
|
| 18 |
+
*/
|
| 19 |
+
|
| 20 |
+
#ifndef HPC_CONTRACT_H
|
| 21 |
+
#define HPC_CONTRACT_H
|
| 22 |
+
|
| 23 |
+
#include "hpc_graph.h"
|
| 24 |
+
#include "s6_exotic.h"
|
| 25 |
+
#include <math.h>
|
| 26 |
+
#include <string.h>
|
| 27 |
+
|
| 28 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 29 |
+
* THE 15 SYNTHEMES β Sβ's complete pairings
|
| 30 |
+
*
|
| 31 |
+
* Each syntheme partitions {0,1,2,3,4,5} into 3 pairs.
|
| 32 |
+
* syntheme[s] = {{aβ,bβ}, {aβ,bβ}, {aβ,bβ}}
|
| 33 |
+
*
|
| 34 |
+
* These are the 15 natural "lenses" through which D=6 correlations
|
| 35 |
+
* can be viewed. SVD discovers a decomposition numerically.
|
| 36 |
+
* We select the best syntheme analytically.
|
| 37 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 38 |
+
|
| 39 |
+
static const int HPC_SYNTHEMES[15][3][2] = {
|
| 40 |
+
/* Synthematic total 0 (antipodal family) */
|
| 41 |
+
{{0,1}, {2,3}, {4,5}}, /* 0: hex-edge pairing */
|
| 42 |
+
{{0,2}, {1,4}, {3,5}}, /* 1: vertex skip-1 */
|
| 43 |
+
{{0,3}, {1,4}, {2,5}}, /* 2: vesica fold (antipodal) */
|
| 44 |
+
{{0,4}, {1,5}, {2,3}}, /* 3: vertex skip-2 */
|
| 45 |
+
{{0,5}, {1,2}, {3,4}}, /* 4: hex-edge reverse */
|
| 46 |
+
|
| 47 |
+
/* Synthematic total 1 */
|
| 48 |
+
{{0,1}, {2,4}, {3,5}}, /* 5 */
|
| 49 |
+
{{0,2}, {1,3}, {4,5}}, /* 6 */
|
| 50 |
+
{{0,3}, {2,5}, {1,4}}, /* 7: = syntheme 2 reordered */
|
| 51 |
+
{{0,4}, {1,3}, {2,5}}, /* 8 */
|
| 52 |
+
{{0,5}, {1,4}, {2,3}}, /* 9 */
|
| 53 |
+
|
| 54 |
+
/* Synthematic total 2 */
|
| 55 |
+
{{0,1}, {2,5}, {3,4}}, /* 10 */
|
| 56 |
+
{{0,2}, {1,5}, {3,4}}, /* 11 */
|
| 57 |
+
{{0,3}, {1,2}, {4,5}}, /* 12 */
|
| 58 |
+
{{0,4}, {2,5}, {1,3}}, /* 13 */
|
| 59 |
+
{{0,5}, {1,3}, {2,4}} /* 14 */
|
| 60 |
+
};
|
| 61 |
+
|
| 62 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 63 |
+
* VESICA FOLD β The antipodal decomposition (Syntheme 2)
|
| 64 |
+
*
|
| 65 |
+
* Maps 6 basis states to 3 vesica + 3 wave components:
|
| 66 |
+
* vesica[c] = (state[c] + state[c+3]) / β2 β symmetric
|
| 67 |
+
* wave[c] = (state[c] - state[c+3]) / β2 β antisymmetric
|
| 68 |
+
*
|
| 69 |
+
* c β {0,1,2} maps to CMY channels:
|
| 70 |
+
* c=0: {0,3} β Cyan
|
| 71 |
+
* c=1: {1,4} β Magenta
|
| 72 |
+
* c=2: {2,5} β Yellow
|
| 73 |
+
*
|
| 74 |
+
* Cost: O(D) = O(6), zero multiplies (addition + constant scaling).
|
| 75 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 76 |
+
|
| 77 |
+
typedef struct {
|
| 78 |
+
double vesica_re[3]; /* Symmetric (sum) channel */
|
| 79 |
+
double vesica_im[3];
|
| 80 |
+
double wave_re[3]; /* Antisymmetric (diff) channel */
|
| 81 |
+
double wave_im[3];
|
| 82 |
+
} VesicaFold;
|
| 83 |
+
|
| 84 |
+
static const double INV_SQRT2 = 0.70710678118654752440;
|
| 85 |
+
|
| 86 |
+
static inline VesicaFold hpc_vesica_fold(const double re[6], const double im[6])
|
| 87 |
+
{
|
| 88 |
+
VesicaFold vf;
|
| 89 |
+
for (int c = 0; c < 3; c++) {
|
| 90 |
+
vf.vesica_re[c] = INV_SQRT2 * (re[c] + re[c + 3]);
|
| 91 |
+
vf.vesica_im[c] = INV_SQRT2 * (im[c] + im[c + 3]);
|
| 92 |
+
vf.wave_re[c] = INV_SQRT2 * (re[c] - re[c + 3]);
|
| 93 |
+
vf.wave_im[c] = INV_SQRT2 * (im[c] - im[c + 3]);
|
| 94 |
+
}
|
| 95 |
+
return vf;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
/* Inverse vesica fold: reconstruct 6-vector from vesica + wave */
|
| 99 |
+
static inline void hpc_vesica_unfold(const VesicaFold *vf,
|
| 100 |
+
double re[6], double im[6])
|
| 101 |
+
{
|
| 102 |
+
for (int c = 0; c < 3; c++) {
|
| 103 |
+
re[c] = INV_SQRT2 * (vf->vesica_re[c] + vf->wave_re[c]);
|
| 104 |
+
im[c] = INV_SQRT2 * (vf->vesica_im[c] + vf->wave_im[c]);
|
| 105 |
+
re[c + 3] = INV_SQRT2 * (vf->vesica_re[c] - vf->wave_re[c]);
|
| 106 |
+
im[c + 3] = INV_SQRT2 * (vf->vesica_im[c] - vf->wave_im[c]);
|
| 107 |
+
}
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 111 |
+
* SYNTHEME ENERGY β How much correlation a syntheme captures
|
| 112 |
+
*
|
| 113 |
+
* For a 6Γ6 phase matrix w(a,b), the "energy" captured by syntheme s
|
| 114 |
+
* is the sum of |w(a_i, b_i)|Β² for each pair (a_i, b_i) in the syntheme.
|
| 115 |
+
*
|
| 116 |
+
* The optimal syntheme maximizes this: it's the pairing that captures
|
| 117 |
+
* the most phase structure of the interaction.
|
| 118 |
+
*
|
| 119 |
+
* Cost: O(15 Γ 3) = O(45) β constant, independent of Ο.
|
| 120 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 121 |
+
|
| 122 |
+
static inline double hpc_syntheme_energy(const double w_re[6][6],
|
| 123 |
+
const double w_im[6][6],
|
| 124 |
+
int syntheme_id)
|
| 125 |
+
{
|
| 126 |
+
double energy = 0.0;
|
| 127 |
+
for (int p = 0; p < 3; p++) {
|
| 128 |
+
int a = HPC_SYNTHEMES[syntheme_id][p][0];
|
| 129 |
+
int b = HPC_SYNTHEMES[syntheme_id][p][1];
|
| 130 |
+
/* Sum both (a,b) and (b,a) correlations */
|
| 131 |
+
energy += w_re[a][b] * w_re[a][b] + w_im[a][b] * w_im[a][b];
|
| 132 |
+
energy += w_re[b][a] * w_re[b][a] + w_im[b][a] * w_im[b][a];
|
| 133 |
+
}
|
| 134 |
+
return energy;
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 138 |
+
* OPTIMAL SYNTHEME SELECTION β O(45) lookup
|
| 139 |
+
*
|
| 140 |
+
* Searches all 15 synthemes for the one that captures the most
|
| 141 |
+
* phase structure of the interaction matrix.
|
| 142 |
+
*
|
| 143 |
+
* This is the Devil's replacement for eigendecomposition:
|
| 144 |
+
* instead of rotating until you find the basis, check the 15
|
| 145 |
+
* analytically-known bases and pick the best one.
|
| 146 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 147 |
+
|
| 148 |
+
static inline int hpc_select_syntheme(const double w_re[6][6],
|
| 149 |
+
const double w_im[6][6])
|
| 150 |
+
{
|
| 151 |
+
int best = 0;
|
| 152 |
+
double best_energy = hpc_syntheme_energy(w_re, w_im, 0);
|
| 153 |
+
|
| 154 |
+
for (int s = 1; s < 15; s++) {
|
| 155 |
+
double e = hpc_syntheme_energy(w_re, w_im, s);
|
| 156 |
+
if (e > best_energy) {
|
| 157 |
+
best_energy = e;
|
| 158 |
+
best = s;
|
| 159 |
+
}
|
| 160 |
+
}
|
| 161 |
+
return best;
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 165 |
+
* SYNTHEME PROJECTION β Project a 6Γ6 matrix onto a syntheme
|
| 166 |
+
*
|
| 167 |
+
* Given a syntheme with pairs {(aβ,bβ), (aβ,bβ), (aβ,bβ)},
|
| 168 |
+
* the projection retains only the entries at paired positions
|
| 169 |
+
* and zeroes everything else.
|
| 170 |
+
*
|
| 171 |
+
* This is the "truncation" operation β the Devil's SVD.
|
| 172 |
+
* It keeps the D=6-native correlations and discards the rest.
|
| 173 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 174 |
+
|
| 175 |
+
static inline void hpc_syntheme_project(const double in_re[6][6],
|
| 176 |
+
const double in_im[6][6],
|
| 177 |
+
int syntheme_id,
|
| 178 |
+
double out_re[6][6],
|
| 179 |
+
double out_im[6][6])
|
| 180 |
+
{
|
| 181 |
+
memset(out_re, 0, 36 * sizeof(double));
|
| 182 |
+
memset(out_im, 0, 36 * sizeof(double));
|
| 183 |
+
|
| 184 |
+
for (int p = 0; p < 3; p++) {
|
| 185 |
+
int a = HPC_SYNTHEMES[syntheme_id][p][0];
|
| 186 |
+
int b = HPC_SYNTHEMES[syntheme_id][p][1];
|
| 187 |
+
|
| 188 |
+
/* Keep paired entries in both directions */
|
| 189 |
+
out_re[a][b] = in_re[a][b]; out_im[a][b] = in_im[a][b];
|
| 190 |
+
out_re[b][a] = in_re[b][a]; out_im[b][a] = in_im[b][a];
|
| 191 |
+
/* Keep diagonal entries at paired positions */
|
| 192 |
+
out_re[a][a] = in_re[a][a]; out_im[a][a] = in_im[a][a];
|
| 193 |
+
out_re[b][b] = in_re[b][b]; out_im[b][b] = in_im[b][b];
|
| 194 |
+
}
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 198 |
+
* FIDELITY COMPUTATION β How much of the gate was captured?
|
| 199 |
+
*
|
| 200 |
+
* F = ||projected||Β² / ||original||Β²
|
| 201 |
+
*
|
| 202 |
+
* F = 1.0 for CZ (exact).
|
| 203 |
+
* F β [0,1] for general gates.
|
| 204 |
+
* F measures the Ξ-dependent quality of the syntheme decomposition.
|
| 205 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 206 |
+
|
| 207 |
+
static inline double hpc_compute_fidelity(const double orig_re[6][6],
|
| 208 |
+
const double orig_im[6][6],
|
| 209 |
+
const double proj_re[6][6],
|
| 210 |
+
const double proj_im[6][6])
|
| 211 |
+
{
|
| 212 |
+
double norm_orig = 0.0, norm_proj = 0.0;
|
| 213 |
+
for (int i = 0; i < 6; i++) {
|
| 214 |
+
for (int j = 0; j < 6; j++) {
|
| 215 |
+
norm_orig += orig_re[i][j] * orig_re[i][j] +
|
| 216 |
+
orig_im[i][j] * orig_im[i][j];
|
| 217 |
+
norm_proj += proj_re[i][j] * proj_re[i][j] +
|
| 218 |
+
proj_im[i][j] * proj_im[i][j];
|
| 219 |
+
}
|
| 220 |
+
}
|
| 221 |
+
return (norm_orig > 1e-30) ? norm_proj / norm_orig : 0.0;
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 225 |
+
* ENCODE GATE AS SYNTHEME EDGE β The full Devil's contraction
|
| 226 |
+
*
|
| 227 |
+
* Given a 2-site gate's phase matrix (the entangling component):
|
| 228 |
+
* 1. Select the optimal syntheme β O(45)
|
| 229 |
+
* 2. Project onto the syntheme β O(36)
|
| 230 |
+
* 3. Compute fidelity β O(36)
|
| 231 |
+
* 4. Store as a syntheme edge in the graph β O(1)
|
| 232 |
+
*
|
| 233 |
+
* Total: O(DΒ²) = O(36). SVD is O(DΒ³Β·ΟΒ²).
|
| 234 |
+
*
|
| 235 |
+
* For CZ gates, this is never called β CZ is exact.
|
| 236 |
+
* For general gates, this captures the D=6-native structure.
|
| 237 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 238 |
+
|
| 239 |
+
static inline void hpc_encode_syntheme(HPCGraph *g,
|
| 240 |
+
uint64_t site_a, uint64_t site_b,
|
| 241 |
+
const double phase_re[6][6],
|
| 242 |
+
const double phase_im[6][6])
|
| 243 |
+
{
|
| 244 |
+
/* Step 1: Select optimal syntheme */
|
| 245 |
+
int best_s = hpc_select_syntheme(phase_re, phase_im);
|
| 246 |
+
|
| 247 |
+
/* Step 2: Project */
|
| 248 |
+
double proj_re[6][6], proj_im[6][6];
|
| 249 |
+
hpc_syntheme_project(phase_re, phase_im, best_s, proj_re, proj_im);
|
| 250 |
+
|
| 251 |
+
/* Step 3: Fidelity */
|
| 252 |
+
double fidelity = hpc_compute_fidelity(phase_re, phase_im, proj_re, proj_im);
|
| 253 |
+
|
| 254 |
+
/* Step 4: Store as edge */
|
| 255 |
+
hpc_grow_edges(g);
|
| 256 |
+
HPCEdge *e = &g->edges[g->n_edges];
|
| 257 |
+
memset(e, 0, sizeof(HPCEdge));
|
| 258 |
+
e->type = HPC_EDGE_SYNTHEME;
|
| 259 |
+
e->site_a = site_a;
|
| 260 |
+
e->site_b = site_b;
|
| 261 |
+
e->syntheme_id = best_s;
|
| 262 |
+
e->fidelity = fidelity;
|
| 263 |
+
|
| 264 |
+
/* Store projected phase matrix */
|
| 265 |
+
for (int i = 0; i < 6; i++) {
|
| 266 |
+
for (int j = 0; j < 6; j++) {
|
| 267 |
+
double mag = sqrt(proj_re[i][j] * proj_re[i][j] +
|
| 268 |
+
proj_im[i][j] * proj_im[i][j]);
|
| 269 |
+
if (mag > 1e-15) {
|
| 270 |
+
e->w_re[i][j] = proj_re[i][j] / mag;
|
| 271 |
+
e->w_im[i][j] = proj_im[i][j] / mag;
|
| 272 |
+
} else {
|
| 273 |
+
e->w_re[i][j] = 1.0;
|
| 274 |
+
e->w_im[i][j] = 0.0;
|
| 275 |
+
}
|
| 276 |
+
}
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
g->n_edges++;
|
| 280 |
+
g->syntheme_edges++;
|
| 281 |
+
hpc_update_fidelity_stats(g);
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 285 |
+
* EXTRACT PHASE MATRIX FROM 2-SITE GATE
|
| 286 |
+
*
|
| 287 |
+
* A general 2-site gate G (36Γ36) can be factored as:
|
| 288 |
+
* G = (U_a β U_b) Β· diag(phases) Β· (V_aβ β V_bβ )
|
| 289 |
+
*
|
| 290 |
+
* The "phase matrix" w(j,k) captures the entangling component:
|
| 291 |
+
* w(j,k) = G_{(j,k),(j,k)} / |G_{(j,k),(j,k)}|
|
| 292 |
+
*
|
| 293 |
+
* For CZ: w(j,k) = Ο^(jΒ·k) β exact, analytically known.
|
| 294 |
+
* For general gates: w(j,k) captures the diagonal entangling phases.
|
| 295 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 296 |
+
|
| 297 |
+
static inline void hpc_extract_phase_matrix(const double *G_re,
|
| 298 |
+
const double *G_im,
|
| 299 |
+
double phase_re[6][6],
|
| 300 |
+
double phase_im[6][6])
|
| 301 |
+
{
|
| 302 |
+
for (int j = 0; j < HPC_D; j++) {
|
| 303 |
+
for (int k = 0; k < HPC_D; k++) {
|
| 304 |
+
int idx = (j * HPC_D + k) * HPC_D * HPC_D + (j * HPC_D + k);
|
| 305 |
+
double g_re = G_re[idx];
|
| 306 |
+
double g_im = G_im[idx];
|
| 307 |
+
double mag = sqrt(g_re * g_re + g_im * g_im);
|
| 308 |
+
|
| 309 |
+
if (mag > 1e-15) {
|
| 310 |
+
phase_re[j][k] = g_re / mag;
|
| 311 |
+
phase_im[j][k] = g_im / mag;
|
| 312 |
+
} else {
|
| 313 |
+
phase_re[j][k] = 1.0;
|
| 314 |
+
phase_im[j][k] = 0.0;
|
| 315 |
+
}
|
| 316 |
+
}
|
| 317 |
+
}
|
| 318 |
+
}
|
| 319 |
+
|
| 320 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 321 |
+
* HIGH-LEVEL ENCODE β Automatic selection of encoding strategy
|
| 322 |
+
*
|
| 323 |
+
* Examines the gate to determine the best encoding:
|
| 324 |
+
* 1. If CZ: exact edge (fidelity=1.0)
|
| 325 |
+
* 2. If syntheme fidelity β₯ threshold: syntheme edge
|
| 326 |
+
* 3. Otherwise: general phase edge (full 6Γ6 matrix)
|
| 327 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 328 |
+
|
| 329 |
+
#define HPC_SYNTHEME_THRESHOLD 0.80 /* Min fidelity for syntheme encoding */
|
| 330 |
+
|
| 331 |
+
static inline void hpc_encode_2site(HPCGraph *g,
|
| 332 |
+
uint64_t site_a, uint64_t site_b,
|
| 333 |
+
const double *G_re, const double *G_im)
|
| 334 |
+
{
|
| 335 |
+
/* Check if this is a CZ gate by examining the phase matrix */
|
| 336 |
+
double phase_re[6][6], phase_im[6][6];
|
| 337 |
+
hpc_extract_phase_matrix(G_re, G_im, phase_re, phase_im);
|
| 338 |
+
|
| 339 |
+
/* Test for CZ: w(j,k) should equal Ο^(jΒ·k) for all j,k */
|
| 340 |
+
int is_cz = 1;
|
| 341 |
+
for (int j = 0; j < HPC_D && is_cz; j++) {
|
| 342 |
+
for (int k = 0; k < HPC_D && is_cz; k++) {
|
| 343 |
+
uint32_t phase_idx = (j * k) % HPC_D;
|
| 344 |
+
double diff_re = phase_re[j][k] - HPC_W6_RE[phase_idx];
|
| 345 |
+
double diff_im = phase_im[j][k] - HPC_W6_IM[phase_idx];
|
| 346 |
+
if (diff_re * diff_re + diff_im * diff_im > 1e-10)
|
| 347 |
+
is_cz = 0;
|
| 348 |
+
}
|
| 349 |
+
}
|
| 350 |
+
|
| 351 |
+
if (is_cz) {
|
| 352 |
+
hpc_cz(g, site_a, site_b);
|
| 353 |
+
return;
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
/* Try syntheme encoding */
|
| 357 |
+
int best_s = hpc_select_syntheme(phase_re, phase_im);
|
| 358 |
+
double proj_re[6][6], proj_im[6][6];
|
| 359 |
+
hpc_syntheme_project(phase_re, phase_im, best_s, proj_re, proj_im);
|
| 360 |
+
double fidelity = hpc_compute_fidelity(phase_re, phase_im, proj_re, proj_im);
|
| 361 |
+
|
| 362 |
+
if (fidelity >= HPC_SYNTHEME_THRESHOLD) {
|
| 363 |
+
hpc_encode_syntheme(g, site_a, site_b, phase_re, phase_im);
|
| 364 |
+
} else {
|
| 365 |
+
/* Fall back to general phase edge (stores full 6Γ6) */
|
| 366 |
+
hpc_general_2site(g, site_a, site_b, G_re, G_im);
|
| 367 |
+
}
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 371 |
+
* VESICA-ENHANCED CZ β Apply CZ using the vesica fold structure
|
| 372 |
+
*
|
| 373 |
+
* For sites already in vesica-folded representation, CZ has a
|
| 374 |
+
* particularly clean structure: it acts independently on the
|
| 375 |
+
* 3 CMY channels, each as a 2Γ2 CZ (which is just a phase gate).
|
| 376 |
+
*
|
| 377 |
+
* This doesn't change the CZ edge storage (still exact), but it
|
| 378 |
+
* provides insight into the channel-decomposed entanglement structure.
|
| 379 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 380 |
+
|
| 381 |
+
typedef struct {
|
| 382 |
+
double vesica_fidelity; /* How much entanglement is in vesica channel */
|
| 383 |
+
double wave_fidelity; /* How much entanglement is in wave channel */
|
| 384 |
+
double channel_entropy[3]; /* Per-CMY-channel entanglement entropy */
|
| 385 |
+
} HPCVesicaAnalysis;
|
| 386 |
+
|
| 387 |
+
static inline HPCVesicaAnalysis hpc_analyze_vesica(const HPCGraph *g,
|
| 388 |
+
uint64_t site)
|
| 389 |
+
{
|
| 390 |
+
HPCVesicaAnalysis va;
|
| 391 |
+
memset(&va, 0, sizeof(va));
|
| 392 |
+
|
| 393 |
+
const TrialityQuhit *q = &g->locals[site];
|
| 394 |
+
VesicaFold vf = hpc_vesica_fold(q->edge_re, q->edge_im);
|
| 395 |
+
|
| 396 |
+
/* Vesica channel probability */
|
| 397 |
+
double v_prob = 0, w_prob = 0;
|
| 398 |
+
for (int c = 0; c < 3; c++) {
|
| 399 |
+
double vp = vf.vesica_re[c] * vf.vesica_re[c] +
|
| 400 |
+
vf.vesica_im[c] * vf.vesica_im[c];
|
| 401 |
+
double wp = vf.wave_re[c] * vf.wave_re[c] +
|
| 402 |
+
vf.wave_im[c] * vf.wave_im[c];
|
| 403 |
+
v_prob += vp;
|
| 404 |
+
w_prob += wp;
|
| 405 |
+
|
| 406 |
+
/* Per-channel entropy from the pair probabilities */
|
| 407 |
+
double total = vp + wp;
|
| 408 |
+
if (total > 1e-15) {
|
| 409 |
+
double p_v = vp / total, p_w = wp / total;
|
| 410 |
+
if (p_v > 1e-15) va.channel_entropy[c] -= p_v * log2(p_v);
|
| 411 |
+
if (p_w > 1e-15) va.channel_entropy[c] -= p_w * log2(p_w);
|
| 412 |
+
}
|
| 413 |
+
}
|
| 414 |
+
|
| 415 |
+
double total = v_prob + w_prob;
|
| 416 |
+
va.vesica_fidelity = (total > 1e-15) ? v_prob / total : 0.5;
|
| 417 |
+
va.wave_fidelity = (total > 1e-15) ? w_prob / total : 0.5;
|
| 418 |
+
|
| 419 |
+
return va;
|
| 420 |
+
}
|
| 421 |
+
|
| 422 |
+
#endif /* HPC_CONTRACT_H */
|
hpc_graph.h
ADDED
|
@@ -0,0 +1,1062 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* hpc_graph.h β The Holographic Phase Graph
|
| 3 |
+
*
|
| 4 |
+
* The Devil's alternative to SVD.
|
| 5 |
+
*
|
| 6 |
+
* SVD reaches into the interior of a tensor and numerically discovers
|
| 7 |
+
* structure. O(nΒ³). Dense. Bulk-seeking.
|
| 8 |
+
*
|
| 9 |
+
* HPC works from the surface: entanglement is encoded as weighted phase
|
| 10 |
+
* edges in a graph. Amplitudes are computed on demand via O(N+E) graph
|
| 11 |
+
* traversal. The state vector is never materialized.
|
| 12 |
+
*
|
| 13 |
+
* Core formula:
|
| 14 |
+
* Ο(iβ,...,iβ) = [Ξ _k a_k(i_k)] Γ [Ξ _edges w_e(i_a, i_b)]
|
| 15 |
+
*
|
| 16 |
+
* For CZ edges: w_e(a,b) = Ο^(aΒ·b) β EXACT, fidelity = 1.0
|
| 17 |
+
* For general edges: w_e(a,b) = arbitrary 6Γ6 phase matrix β bounded fidelity
|
| 18 |
+
* For syntheme edges: w_e determined by Sβ syntheme projector β O(1) lookup
|
| 19 |
+
*
|
| 20 |
+
* This is an extension of magic_pointer.h that supports:
|
| 21 |
+
* - Weighted phase edges (not just CZ)
|
| 22 |
+
* - Syntheme metadata per edge
|
| 23 |
+
* - Fidelity tracking
|
| 24 |
+
* - On-demand marginal probabilities
|
| 25 |
+
*/
|
| 26 |
+
|
| 27 |
+
#ifndef HPC_GRAPH_H
|
| 28 |
+
#define HPC_GRAPH_H
|
| 29 |
+
|
| 30 |
+
#include "quhit_triality.h"
|
| 31 |
+
#include "s6_exotic.h"
|
| 32 |
+
#include "born_rule.h"
|
| 33 |
+
#include <math.h>
|
| 34 |
+
#include <stdlib.h>
|
| 35 |
+
#include <string.h>
|
| 36 |
+
#include <stdio.h>
|
| 37 |
+
|
| 38 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 39 |
+
* CONSTANTS
|
| 40 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 41 |
+
|
| 42 |
+
#define HPC_D 6 /* Physical dimension per site */
|
| 43 |
+
#define HPC_INIT_EDGES 4096 /* Initial edge capacity (grows) */
|
| 44 |
+
#define HPC_INIT_LOG 8192 /* Initial gate log capacity (grows) */
|
| 45 |
+
|
| 46 |
+
/* Ο = exp(2Οi/6) roots of unity β precomputed */
|
| 47 |
+
static const double HPC_W6_RE[6] = {
|
| 48 |
+
1.0, 0.5, -0.5, -1.0, -0.5, 0.5
|
| 49 |
+
};
|
| 50 |
+
static const double HPC_W6_IM[6] = {
|
| 51 |
+
0.0, 0.866025403784438647, 0.866025403784438647,
|
| 52 |
+
0.0, -0.866025403784438647, -0.866025403784438647
|
| 53 |
+
};
|
| 54 |
+
|
| 55 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 56 |
+
* EDGE TYPES β The Devil has more than one handshake
|
| 57 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 58 |
+
|
| 59 |
+
typedef enum {
|
| 60 |
+
HPC_EDGE_CZ, /* Exact CZ: w(a,b) = Ο^(aΒ·b), fidelity=1.0 */
|
| 61 |
+
HPC_EDGE_PHASE, /* General phase: w(a,b) = arbitrary 6Γ6 matrix */
|
| 62 |
+
HPC_EDGE_SYNTHEME /* Syntheme-projected: w from Sβ syntheme */
|
| 63 |
+
} HPCEdgeType;
|
| 64 |
+
|
| 65 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 66 |
+
* WEIGHTED PHASE EDGE β One entangling interaction on the surface
|
| 67 |
+
*
|
| 68 |
+
* For CZ edges, only type + site indices are used.
|
| 69 |
+
* For general/syntheme edges, the full 6Γ6 phase matrix is stored.
|
| 70 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 71 |
+
|
| 72 |
+
typedef struct {
|
| 73 |
+
HPCEdgeType type;
|
| 74 |
+
uint64_t site_a; /* First site index */
|
| 75 |
+
uint64_t site_b; /* Second site index */
|
| 76 |
+
|
| 77 |
+
/* Phase matrix: w(a,b) β only used for PHASE and SYNTHEME types.
|
| 78 |
+
* For CZ: implicitly Ο^(aΒ·b), never stored.
|
| 79 |
+
* For PHASE: arbitrary complex 6Γ6 (36 complex entries, 576 bytes).
|
| 80 |
+
* For SYNTHEME: derived from syntheme projector. */
|
| 81 |
+
double w_re[HPC_D][HPC_D];
|
| 82 |
+
double w_im[HPC_D][HPC_D];
|
| 83 |
+
|
| 84 |
+
/* Syntheme metadata (only for SYNTHEME type) */
|
| 85 |
+
uint8_t syntheme_id; /* Which of 15 synthemes (0-14) */
|
| 86 |
+
uint8_t total_id; /* Which of 6 synthematic totals (0-5) */
|
| 87 |
+
|
| 88 |
+
/* Quality metric */
|
| 89 |
+
double fidelity; /* 1.0 = lossless, 0.0 = total loss */
|
| 90 |
+
} HPCEdge;
|
| 91 |
+
|
| 92 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 93 |
+
* GATE LOG ENTRY β Recording what was applied
|
| 94 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 95 |
+
|
| 96 |
+
typedef enum {
|
| 97 |
+
HPC_GATE_LOCAL_DFT,
|
| 98 |
+
HPC_GATE_LOCAL_PHASE,
|
| 99 |
+
HPC_GATE_LOCAL_SHIFT,
|
| 100 |
+
HPC_GATE_LOCAL_UNITARY,
|
| 101 |
+
HPC_GATE_CZ,
|
| 102 |
+
HPC_GATE_GENERAL_2SITE,
|
| 103 |
+
HPC_GATE_INIT
|
| 104 |
+
} HPCGateType;
|
| 105 |
+
|
| 106 |
+
typedef struct {
|
| 107 |
+
HPCGateType type;
|
| 108 |
+
uint64_t site_a;
|
| 109 |
+
uint64_t site_b; /* Only for 2-site gates */
|
| 110 |
+
double params[12]; /* Gate-specific parameters */
|
| 111 |
+
double fidelity; /* Encoding fidelity for this gate */
|
| 112 |
+
} HPCGateEntry;
|
| 113 |
+
|
| 114 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 115 |
+
* PER-SITE ADJACENCY LIST β O(degree) edge lookup
|
| 116 |
+
*
|
| 117 |
+
* Each site maintains a list of edge indices that touch it.
|
| 118 |
+
* This is the optimization that turns O(NΓE) β O(NΓdegree) = O(N).
|
| 119 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 120 |
+
|
| 121 |
+
#define HPC_ADJ_INIT 16 /* Initial adjacency list capacity per site */
|
| 122 |
+
|
| 123 |
+
typedef struct {
|
| 124 |
+
uint64_t *edge_ids; /* Indices into the graph's edge array */
|
| 125 |
+
uint64_t count; /* Number of edges touching this site */
|
| 126 |
+
uint64_t capacity; /* Allocated capacity */
|
| 127 |
+
} HPCAdjList;
|
| 128 |
+
|
| 129 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 130 |
+
* HPC GRAPH β The Devil's state representation
|
| 131 |
+
*
|
| 132 |
+
* This struct IS the state. The 6^N state vector does not exist.
|
| 133 |
+
* Entanglement is a graph. Amplitudes are computed on demand.
|
| 134 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 135 |
+
|
| 136 |
+
typedef struct {
|
| 137 |
+
/* ββ Sites ββ */
|
| 138 |
+
uint64_t n_sites;
|
| 139 |
+
TrialityQuhit *locals; /* Per-site local states */
|
| 140 |
+
|
| 141 |
+
/* ββ Phase Graph ββ */
|
| 142 |
+
uint64_t n_edges;
|
| 143 |
+
uint64_t edge_cap;
|
| 144 |
+
HPCEdge *edges; /* Weighted phase edge list */
|
| 145 |
+
|
| 146 |
+
/* ββ Adjacency Lists ββ O(1) per-site edge lookup */
|
| 147 |
+
HPCAdjList *adj; /* Per-site adjacency lists */
|
| 148 |
+
|
| 149 |
+
/* ββ Gate Log ββ */
|
| 150 |
+
uint64_t n_log;
|
| 151 |
+
uint64_t log_cap;
|
| 152 |
+
HPCGateEntry *gate_log;
|
| 153 |
+
|
| 154 |
+
/* ββ Statistics ββ */
|
| 155 |
+
uint64_t amp_evals; /* Amplitude evaluations performed */
|
| 156 |
+
uint64_t prob_evals; /* Probability evaluations */
|
| 157 |
+
uint64_t measurements; /* Measurements performed */
|
| 158 |
+
uint64_t cz_edges; /* Number of exact CZ edges */
|
| 159 |
+
uint64_t phase_edges; /* Number of general phase edges */
|
| 160 |
+
uint64_t syntheme_edges; /* Number of syntheme-encoded edges */
|
| 161 |
+
double min_fidelity; /* Worst fidelity across all edges */
|
| 162 |
+
double avg_fidelity; /* Average fidelity */
|
| 163 |
+
} HPCGraph;
|
| 164 |
+
|
| 165 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 166 |
+
* LIFECYCLE
|
| 167 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 168 |
+
|
| 169 |
+
static inline HPCGraph *hpc_create(uint64_t n_sites)
|
| 170 |
+
{
|
| 171 |
+
HPCGraph *g = (HPCGraph *)calloc(1, sizeof(HPCGraph));
|
| 172 |
+
if (!g) return NULL;
|
| 173 |
+
|
| 174 |
+
g->n_sites = n_sites;
|
| 175 |
+
g->locals = (TrialityQuhit *)calloc(n_sites, sizeof(TrialityQuhit));
|
| 176 |
+
if (!g->locals) { free(g); return NULL; }
|
| 177 |
+
|
| 178 |
+
for (uint64_t i = 0; i < n_sites; i++)
|
| 179 |
+
triality_init(&g->locals[i]);
|
| 180 |
+
|
| 181 |
+
g->edge_cap = (n_sites < HPC_INIT_EDGES) ? n_sites * 2 + 16 : HPC_INIT_EDGES;
|
| 182 |
+
g->edges = (HPCEdge *)calloc(g->edge_cap, sizeof(HPCEdge));
|
| 183 |
+
g->n_edges = 0;
|
| 184 |
+
|
| 185 |
+
/* Initialize per-site adjacency lists */
|
| 186 |
+
g->adj = (HPCAdjList *)calloc(n_sites, sizeof(HPCAdjList));
|
| 187 |
+
for (uint64_t i = 0; i < n_sites; i++) {
|
| 188 |
+
g->adj[i].capacity = HPC_ADJ_INIT;
|
| 189 |
+
g->adj[i].edge_ids = (uint64_t *)calloc(HPC_ADJ_INIT, sizeof(uint64_t));
|
| 190 |
+
g->adj[i].count = 0;
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
g->log_cap = HPC_INIT_LOG;
|
| 194 |
+
g->gate_log = (HPCGateEntry *)calloc(g->log_cap, sizeof(HPCGateEntry));
|
| 195 |
+
g->n_log = 0;
|
| 196 |
+
|
| 197 |
+
g->min_fidelity = 1.0;
|
| 198 |
+
g->avg_fidelity = 1.0;
|
| 199 |
+
|
| 200 |
+
return g;
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
static inline void hpc_destroy(HPCGraph *g)
|
| 204 |
+
{
|
| 205 |
+
if (!g) return;
|
| 206 |
+
if (g->adj) {
|
| 207 |
+
for (uint64_t i = 0; i < g->n_sites; i++)
|
| 208 |
+
free(g->adj[i].edge_ids);
|
| 209 |
+
free(g->adj);
|
| 210 |
+
}
|
| 211 |
+
free(g->locals);
|
| 212 |
+
free(g->edges);
|
| 213 |
+
free(g->gate_log);
|
| 214 |
+
free(g);
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
/* ββββββββββββββββββββββββββββββββοΏ½οΏ½ββββββββββββββββββββββββββββββββββββββ
|
| 218 |
+
* INTERNAL: grow arrays
|
| 219 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 220 |
+
|
| 221 |
+
static inline void hpc_grow_edges(HPCGraph *g)
|
| 222 |
+
{
|
| 223 |
+
if (g->n_edges < g->edge_cap) return;
|
| 224 |
+
g->edge_cap *= 2;
|
| 225 |
+
g->edges = (HPCEdge *)realloc(g->edges, g->edge_cap * sizeof(HPCEdge));
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
/* Grow the graph to accommodate new_n_sites total sites.
|
| 229 |
+
* Reallocates locals[] and adj[] arrays, initializes new entries.
|
| 230 |
+
* If new_n_sites <= g->n_sites, this is a no-op. */
|
| 231 |
+
static inline void hpc_grow_sites(HPCGraph *g, uint64_t new_n_sites)
|
| 232 |
+
{
|
| 233 |
+
if (new_n_sites <= g->n_sites) return;
|
| 234 |
+
|
| 235 |
+
g->locals = (TrialityQuhit *)realloc(g->locals,
|
| 236 |
+
new_n_sites * sizeof(TrialityQuhit));
|
| 237 |
+
g->adj = (HPCAdjList *)realloc(g->adj,
|
| 238 |
+
new_n_sites * sizeof(HPCAdjList));
|
| 239 |
+
|
| 240 |
+
/* Initialize the new sites */
|
| 241 |
+
for (uint64_t i = g->n_sites; i < new_n_sites; i++) {
|
| 242 |
+
triality_init(&g->locals[i]);
|
| 243 |
+
g->adj[i].capacity = HPC_ADJ_INIT;
|
| 244 |
+
g->adj[i].edge_ids = (uint64_t *)calloc(HPC_ADJ_INIT, sizeof(uint64_t));
|
| 245 |
+
g->adj[i].count = 0;
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
g->n_sites = new_n_sites;
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
static inline void hpc_grow_adj(HPCAdjList *a)
|
| 252 |
+
{
|
| 253 |
+
if (a->count < a->capacity) return;
|
| 254 |
+
a->capacity *= 2;
|
| 255 |
+
a->edge_ids = (uint64_t *)realloc(a->edge_ids,
|
| 256 |
+
a->capacity * sizeof(uint64_t));
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
static inline void hpc_adj_add(HPCGraph *g, uint64_t site, uint64_t edge_id)
|
| 260 |
+
{
|
| 261 |
+
HPCAdjList *a = &g->adj[site];
|
| 262 |
+
hpc_grow_adj(a);
|
| 263 |
+
a->edge_ids[a->count++] = edge_id;
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
static inline void hpc_adj_remove(HPCGraph *g, uint64_t site, uint64_t edge_id)
|
| 267 |
+
{
|
| 268 |
+
HPCAdjList *a = &g->adj[site];
|
| 269 |
+
for (uint64_t i = 0; i < a->count; i++) {
|
| 270 |
+
if (a->edge_ids[i] == edge_id) {
|
| 271 |
+
a->edge_ids[i] = a->edge_ids[--a->count];
|
| 272 |
+
return;
|
| 273 |
+
}
|
| 274 |
+
}
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
/* Replace one edge ID with another in a site's adjacency list */
|
| 278 |
+
static inline void hpc_adj_replace(HPCGraph *g, uint64_t site,
|
| 279 |
+
uint64_t old_id, uint64_t new_id)
|
| 280 |
+
{
|
| 281 |
+
HPCAdjList *a = &g->adj[site];
|
| 282 |
+
for (uint64_t i = 0; i < a->count; i++) {
|
| 283 |
+
if (a->edge_ids[i] == old_id) {
|
| 284 |
+
a->edge_ids[i] = new_id;
|
| 285 |
+
return;
|
| 286 |
+
}
|
| 287 |
+
}
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
static inline void hpc_grow_log(HPCGraph *g)
|
| 291 |
+
{
|
| 292 |
+
if (g->n_log < g->log_cap) return;
|
| 293 |
+
g->log_cap *= 2;
|
| 294 |
+
g->gate_log = (HPCGateEntry *)realloc(g->gate_log,
|
| 295 |
+
g->log_cap * sizeof(HPCGateEntry));
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
static inline void hpc_log_gate(HPCGraph *g, HPCGateEntry entry)
|
| 299 |
+
{
|
| 300 |
+
hpc_grow_log(g);
|
| 301 |
+
g->gate_log[g->n_log++] = entry;
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 305 |
+
* INTERNAL: update fidelity statistics
|
| 306 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 307 |
+
|
| 308 |
+
static inline void hpc_update_fidelity_stats(HPCGraph *g)
|
| 309 |
+
{
|
| 310 |
+
if (g->n_edges == 0) {
|
| 311 |
+
g->min_fidelity = 1.0;
|
| 312 |
+
g->avg_fidelity = 1.0;
|
| 313 |
+
return;
|
| 314 |
+
}
|
| 315 |
+
double sum = 0.0;
|
| 316 |
+
double min_f = 1.0;
|
| 317 |
+
for (uint64_t e = 0; e < g->n_edges; e++) {
|
| 318 |
+
double f = g->edges[e].fidelity;
|
| 319 |
+
sum += f;
|
| 320 |
+
if (f < min_f) min_f = f;
|
| 321 |
+
}
|
| 322 |
+
g->min_fidelity = min_f;
|
| 323 |
+
g->avg_fidelity = sum / g->n_edges;
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 327 |
+
* LOCAL GATES β Absorbed into the local quhit state
|
| 328 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 329 |
+
|
| 330 |
+
static inline void hpc_set_local(HPCGraph *g, uint64_t site,
|
| 331 |
+
const double re[6], const double im[6])
|
| 332 |
+
{
|
| 333 |
+
TrialityQuhit *q = &g->locals[site];
|
| 334 |
+
for (int i = 0; i < HPC_D; i++) {
|
| 335 |
+
q->edge_re[i] = re[i];
|
| 336 |
+
q->edge_im[i] = im[i];
|
| 337 |
+
}
|
| 338 |
+
q->primary = VIEW_EDGE;
|
| 339 |
+
q->dirty = DIRTY_VERTEX | DIRTY_DIAGONAL | DIRTY_FOLDED;
|
| 340 |
+
q->delta_valid = 0;
|
| 341 |
+
triality_update_mask(q);
|
| 342 |
+
|
| 343 |
+
HPCGateEntry entry = { .type = HPC_GATE_INIT, .site_a = site,
|
| 344 |
+
.fidelity = 1.0 };
|
| 345 |
+
for (int i = 0; i < 6; i++) entry.params[i] = re[i];
|
| 346 |
+
hpc_log_gate(g, entry);
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
static inline void hpc_dft(HPCGraph *g, uint64_t site)
|
| 350 |
+
{
|
| 351 |
+
triality_dft(&g->locals[site]);
|
| 352 |
+
HPCGateEntry entry = { .type = HPC_GATE_LOCAL_DFT, .site_a = site,
|
| 353 |
+
.fidelity = 1.0 };
|
| 354 |
+
hpc_log_gate(g, entry);
|
| 355 |
+
}
|
| 356 |
+
|
| 357 |
+
static inline void hpc_phase(HPCGraph *g, uint64_t site,
|
| 358 |
+
const double phi_re[6], const double phi_im[6])
|
| 359 |
+
{
|
| 360 |
+
triality_phase(&g->locals[site], phi_re, phi_im);
|
| 361 |
+
HPCGateEntry entry = { .type = HPC_GATE_LOCAL_PHASE, .site_a = site,
|
| 362 |
+
.fidelity = 1.0 };
|
| 363 |
+
for (int i = 0; i < 6; i++) entry.params[i] = phi_re[i];
|
| 364 |
+
hpc_log_gate(g, entry);
|
| 365 |
+
}
|
| 366 |
+
|
| 367 |
+
static inline void hpc_shift(HPCGraph *g, uint64_t site, int delta)
|
| 368 |
+
{
|
| 369 |
+
triality_shift(&g->locals[site], delta);
|
| 370 |
+
HPCGateEntry entry = { .type = HPC_GATE_LOCAL_SHIFT, .site_a = site,
|
| 371 |
+
.fidelity = 1.0 };
|
| 372 |
+
entry.params[0] = (double)delta;
|
| 373 |
+
hpc_log_gate(g, entry);
|
| 374 |
+
}
|
| 375 |
+
|
| 376 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 377 |
+
* CZ GATE β The Devil's perfect handshake
|
| 378 |
+
*
|
| 379 |
+
* CZ is EXACT in HPC: no truncation, no approximation, no SVD.
|
| 380 |
+
* The entanglement is recorded as a phase edge: w(a,b) = Ο^(aΒ·b).
|
| 381 |
+
* Fidelity = 1.0. Always. This is the Devil at full power.
|
| 382 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 383 |
+
|
| 384 |
+
static inline void hpc_cz(HPCGraph *g, uint64_t site_a, uint64_t site_b)
|
| 385 |
+
{
|
| 386 |
+
hpc_grow_edges(g);
|
| 387 |
+
|
| 388 |
+
uint64_t eid = g->n_edges;
|
| 389 |
+
HPCEdge *e = &g->edges[eid];
|
| 390 |
+
memset(e, 0, sizeof(HPCEdge));
|
| 391 |
+
e->type = HPC_EDGE_CZ;
|
| 392 |
+
e->site_a = site_a;
|
| 393 |
+
e->site_b = site_b;
|
| 394 |
+
e->fidelity = 1.0;
|
| 395 |
+
/* Phase matrix not stored β implicitly Ο^(aΒ·b) */
|
| 396 |
+
|
| 397 |
+
g->n_edges++;
|
| 398 |
+
g->cz_edges++;
|
| 399 |
+
|
| 400 |
+
/* Maintain adjacency lists */
|
| 401 |
+
hpc_adj_add(g, site_a, eid);
|
| 402 |
+
hpc_adj_add(g, site_b, eid);
|
| 403 |
+
|
| 404 |
+
HPCGateEntry entry = {
|
| 405 |
+
.type = HPC_GATE_CZ,
|
| 406 |
+
.site_a = site_a, .site_b = site_b,
|
| 407 |
+
.fidelity = 1.0
|
| 408 |
+
};
|
| 409 |
+
hpc_log_gate(g, entry);
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 413 |
+
* GENERAL 2-SITE GATE β Encoded as a weighted phase edge
|
| 414 |
+
*
|
| 415 |
+
* For a general 2-site gate G acting on sites (a,b):
|
| 416 |
+
* The gate creates entanglement that we encode as a phase matrix.
|
| 417 |
+
* G|Ο_aβ©|Ο_bβ© = Ξ£_{j,k} G_{(j,k),(m,n)} Ο_a(m) Ο_b(n) |jβ©|kβ©
|
| 418 |
+
*
|
| 419 |
+
* We decompose G into: (local on a) Γ (phase edge) Γ (local on b)
|
| 420 |
+
* The phase edge captures the entangling component.
|
| 421 |
+
*
|
| 422 |
+
* For CZ: this decomposition is EXACT (CZ is already in this form).
|
| 423 |
+
* For general gates: this is the syntheme approximation (lossy).
|
| 424 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 425 |
+
|
| 426 |
+
static inline void hpc_general_2site(HPCGraph *g, uint64_t site_a,
|
| 427 |
+
uint64_t site_b,
|
| 428 |
+
const double *G_re, const double *G_im)
|
| 429 |
+
{
|
| 430 |
+
/* G is a 36Γ36 matrix (DΒ²ΓDΒ² = 36Γ36) in row-major order.
|
| 431 |
+
* G[(j*D+k)*D*D + (m*D+n)] = G_{(j,k),(m,n)}
|
| 432 |
+
*
|
| 433 |
+
* Phase edge extraction:
|
| 434 |
+
* For each (j,k), compute the dominant phase of G_{(j,k),(j,k)}.
|
| 435 |
+
* This captures the diagonal (phase) part of the interaction.
|
| 436 |
+
* Off-diagonal terms are absorbed into local state updates. */
|
| 437 |
+
|
| 438 |
+
hpc_grow_edges(g);
|
| 439 |
+
|
| 440 |
+
uint64_t eid = g->n_edges;
|
| 441 |
+
HPCEdge *e = &g->edges[eid];
|
| 442 |
+
memset(e, 0, sizeof(HPCEdge));
|
| 443 |
+
e->type = HPC_EDGE_PHASE;
|
| 444 |
+
e->site_a = site_a;
|
| 445 |
+
e->site_b = site_b;
|
| 446 |
+
|
| 447 |
+
/* Extract diagonal phases: w(j,k) = G_{(j,k),(j,k)} / |G_{(j,k),(j,k)}| */
|
| 448 |
+
double max_mag = 0.0;
|
| 449 |
+
double fidelity_sum = 0.0;
|
| 450 |
+
int fidelity_count = 0;
|
| 451 |
+
|
| 452 |
+
for (int j = 0; j < HPC_D; j++) {
|
| 453 |
+
for (int k = 0; k < HPC_D; k++) {
|
| 454 |
+
int idx = (j * HPC_D + k) * HPC_D * HPC_D + (j * HPC_D + k);
|
| 455 |
+
double g_re = G_re[idx];
|
| 456 |
+
double g_im = G_im[idx];
|
| 457 |
+
double mag = sqrt(g_re * g_re + g_im * g_im);
|
| 458 |
+
|
| 459 |
+
if (mag > 1e-15) {
|
| 460 |
+
e->w_re[j][k] = g_re / mag;
|
| 461 |
+
e->w_im[j][k] = g_im / mag;
|
| 462 |
+
} else {
|
| 463 |
+
e->w_re[j][k] = 1.0;
|
| 464 |
+
e->w_im[j][k] = 0.0;
|
| 465 |
+
}
|
| 466 |
+
|
| 467 |
+
if (mag > max_mag) max_mag = mag;
|
| 468 |
+
|
| 469 |
+
double row_norm2 = 0.0;
|
| 470 |
+
for (int m = 0; m < HPC_D; m++) {
|
| 471 |
+
for (int n = 0; n < HPC_D; n++) {
|
| 472 |
+
int ridx = (j * HPC_D + k) * HPC_D * HPC_D + (m * HPC_D + n);
|
| 473 |
+
row_norm2 += G_re[ridx] * G_re[ridx] + G_im[ridx] * G_im[ridx];
|
| 474 |
+
}
|
| 475 |
+
}
|
| 476 |
+
if (row_norm2 > 1e-30) {
|
| 477 |
+
fidelity_sum += (g_re * g_re + g_im * g_im) / row_norm2;
|
| 478 |
+
fidelity_count++;
|
| 479 |
+
}
|
| 480 |
+
}
|
| 481 |
+
}
|
| 482 |
+
|
| 483 |
+
e->fidelity = (fidelity_count > 0) ? fidelity_sum / fidelity_count : 0.0;
|
| 484 |
+
|
| 485 |
+
g->n_edges++;
|
| 486 |
+
g->phase_edges++;
|
| 487 |
+
|
| 488 |
+
/* Maintain adjacency lists */
|
| 489 |
+
hpc_adj_add(g, site_a, eid);
|
| 490 |
+
hpc_adj_add(g, site_b, eid);
|
| 491 |
+
|
| 492 |
+
hpc_update_fidelity_stats(g);
|
| 493 |
+
|
| 494 |
+
HPCGateEntry entry = {
|
| 495 |
+
.type = HPC_GATE_GENERAL_2SITE,
|
| 496 |
+
.site_a = site_a, .site_b = site_b,
|
| 497 |
+
.fidelity = e->fidelity
|
| 498 |
+
};
|
| 499 |
+
hpc_log_gate(g, entry);
|
| 500 |
+
}
|
| 501 |
+
|
| 502 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 503 |
+
* THE MAGIC: Amplitude Evaluation
|
| 504 |
+
*
|
| 505 |
+
* Ο(iβ,...,iβ) = [Ξ _k a_k(i_k)] Γ [Ξ _edges w_e(i_a, i_b)]
|
| 506 |
+
*
|
| 507 |
+
* Cost: O(N + E) β linear in sites + edges
|
| 508 |
+
* Memory: O(1) additional
|
| 509 |
+
*
|
| 510 |
+
* For CZ edges: w_e(a,b) = Ο^(aΒ·b) β precomputed lookup, no math
|
| 511 |
+
* For PHASE/SYNTHEME edges: w_e(a,b) from stored 6Γ6 matrix
|
| 512 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 513 |
+
|
| 514 |
+
static inline void hpc_amplitude(const HPCGraph *g,
|
| 515 |
+
const uint32_t *indices,
|
| 516 |
+
double *out_re, double *out_im)
|
| 517 |
+
{
|
| 518 |
+
double re = 1.0, im = 0.0;
|
| 519 |
+
|
| 520 |
+
/* Step 1: Product of local amplitudes β O(N) */
|
| 521 |
+
for (uint64_t k = 0; k < g->n_sites; k++) {
|
| 522 |
+
uint32_t idx = indices[k];
|
| 523 |
+
const TrialityQuhit *q = &g->locals[k];
|
| 524 |
+
double a_re = q->edge_re[idx];
|
| 525 |
+
double a_im = q->edge_im[idx];
|
| 526 |
+
double new_re = re * a_re - im * a_im;
|
| 527 |
+
double new_im = re * a_im + im * a_re;
|
| 528 |
+
re = new_re;
|
| 529 |
+
im = new_im;
|
| 530 |
+
}
|
| 531 |
+
|
| 532 |
+
/* Step 2: Phase edge accumulation β O(E) */
|
| 533 |
+
for (uint64_t e = 0; e < g->n_edges; e++) {
|
| 534 |
+
const HPCEdge *edge = &g->edges[e];
|
| 535 |
+
uint32_t ia = indices[edge->site_a];
|
| 536 |
+
uint32_t ib = indices[edge->site_b];
|
| 537 |
+
|
| 538 |
+
double w_re, w_im;
|
| 539 |
+
|
| 540 |
+
if (edge->type == HPC_EDGE_CZ) {
|
| 541 |
+
/* CZ: Ο^(iaΒ·ib) β precomputed, O(1) */
|
| 542 |
+
uint32_t phase_idx = (ia * ib) % HPC_D;
|
| 543 |
+
w_re = HPC_W6_RE[phase_idx];
|
| 544 |
+
w_im = HPC_W6_IM[phase_idx];
|
| 545 |
+
} else {
|
| 546 |
+
/* PHASE or SYNTHEME: lookup from stored matrix */
|
| 547 |
+
w_re = edge->w_re[ia][ib];
|
| 548 |
+
w_im = edge->w_im[ia][ib];
|
| 549 |
+
}
|
| 550 |
+
|
| 551 |
+
double new_re = re * w_re - im * w_im;
|
| 552 |
+
double new_im = re * w_im + im * w_re;
|
| 553 |
+
re = new_re;
|
| 554 |
+
im = new_im;
|
| 555 |
+
}
|
| 556 |
+
|
| 557 |
+
*out_re = re;
|
| 558 |
+
*out_im = im;
|
| 559 |
+
((HPCGraph *)g)->amp_evals++;
|
| 560 |
+
}
|
| 561 |
+
|
| 562 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 563 |
+
* PROBABILITY β |Ο(iβ,...,iβ)|Β²
|
| 564 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 565 |
+
|
| 566 |
+
static inline double hpc_probability(const HPCGraph *g,
|
| 567 |
+
const uint32_t *indices)
|
| 568 |
+
{
|
| 569 |
+
double re, im;
|
| 570 |
+
hpc_amplitude(g, indices, &re, &im);
|
| 571 |
+
((HPCGraph *)g)->prob_evals++;
|
| 572 |
+
return re * re + im * im;
|
| 573 |
+
}
|
| 574 |
+
|
| 575 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 576 |
+
* MARGINAL PROBABILITY β P(site_k = v)
|
| 577 |
+
*
|
| 578 |
+
* Uses per-site adjacency lists for O(degree) edge lookup.
|
| 579 |
+
* Only enumerates sites connected by edges to site k.
|
| 580 |
+
* Disconnected sites contribute 1.0 (they're normalized independently).
|
| 581 |
+
*
|
| 582 |
+
* OPTIMIZED: O(degree) edge lookup via adjacency list.
|
| 583 |
+
* Old version: O(E) scan β O(NΓE) = O(NΒ²) total.
|
| 584 |
+
* New version: O(degree) lookup β O(NΓdegree) = O(N) for bounded-degree lattices.
|
| 585 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 586 |
+
|
| 587 |
+
static inline double hpc_marginal(const HPCGraph *g,
|
| 588 |
+
uint64_t site, uint32_t value)
|
| 589 |
+
{
|
| 590 |
+
const HPCAdjList *adj = &g->adj[site];
|
| 591 |
+
|
| 592 |
+
/* Product state: no edges touching this site */
|
| 593 |
+
if (adj->count == 0) {
|
| 594 |
+
const TrialityQuhit *q = &g->locals[site];
|
| 595 |
+
return q->edge_re[value] * q->edge_re[value] +
|
| 596 |
+
q->edge_im[value] * q->edge_im[value];
|
| 597 |
+
}
|
| 598 |
+
|
| 599 |
+
/* Find unique connected sites via adjacency list β O(degree) */
|
| 600 |
+
uint64_t connected[128];
|
| 601 |
+
uint64_t conn_edge_ids[512]; /* Edge IDs in connected subsystem */
|
| 602 |
+
uint64_t n_connected = 0;
|
| 603 |
+
uint64_t n_conn_edges = 0;
|
| 604 |
+
|
| 605 |
+
for (uint64_t i = 0; i < adj->count; i++) {
|
| 606 |
+
uint64_t eid = adj->edge_ids[i];
|
| 607 |
+
const HPCEdge *edge = &g->edges[eid];
|
| 608 |
+
uint64_t partner = (edge->site_a == site) ? edge->site_b : edge->site_a;
|
| 609 |
+
|
| 610 |
+
/* Add edge to subsystem edge list */
|
| 611 |
+
if (n_conn_edges < 512)
|
| 612 |
+
conn_edge_ids[n_conn_edges++] = eid;
|
| 613 |
+
|
| 614 |
+
/* Add partner to connected list (dedup) */
|
| 615 |
+
int found = 0;
|
| 616 |
+
for (uint64_t c = 0; c < n_connected; c++)
|
| 617 |
+
if (connected[c] == partner) { found = 1; break; }
|
| 618 |
+
if (!found && n_connected < 128)
|
| 619 |
+
connected[n_connected++] = partner;
|
| 620 |
+
}
|
| 621 |
+
|
| 622 |
+
/* Also find edges between connected partners (not touching site)
|
| 623 |
+
* by scanning adjacency lists of connected sites β O(degreeΒ²) */
|
| 624 |
+
for (uint64_t c = 0; c < n_connected; c++) {
|
| 625 |
+
const HPCAdjList *padj = &g->adj[connected[c]];
|
| 626 |
+
for (uint64_t i = 0; i < padj->count; i++) {
|
| 627 |
+
uint64_t eid = padj->edge_ids[i];
|
| 628 |
+
const HPCEdge *edge = &g->edges[eid];
|
| 629 |
+
uint64_t sa = edge->site_a, sb = edge->site_b;
|
| 630 |
+
if (sa == site || sb == site) continue; /* Already counted */
|
| 631 |
+
|
| 632 |
+
/* Check if both ends are in connected set */
|
| 633 |
+
int a_in = 0, b_in = 0;
|
| 634 |
+
for (uint64_t c2 = 0; c2 < n_connected; c2++) {
|
| 635 |
+
if (connected[c2] == sa) a_in = 1;
|
| 636 |
+
if (connected[c2] == sb) b_in = 1;
|
| 637 |
+
}
|
| 638 |
+
if (a_in && b_in) {
|
| 639 |
+
/* Dedup edge */
|
| 640 |
+
int dup = 0;
|
| 641 |
+
for (uint64_t e2 = 0; e2 < n_conn_edges; e2++)
|
| 642 |
+
if (conn_edge_ids[e2] == eid) { dup = 1; break; }
|
| 643 |
+
if (!dup && n_conn_edges < 512)
|
| 644 |
+
conn_edge_ids[n_conn_edges++] = eid;
|
| 645 |
+
}
|
| 646 |
+
}
|
| 647 |
+
}
|
| 648 |
+
|
| 649 |
+
/* βββ Component 4: Ξ-Gated Fast Path βββ
|
| 650 |
+
* Instead of enumerating all D^n_connected configurations,
|
| 651 |
+
* only enumerate basis states that have nonzero amplitude
|
| 652 |
+
* (tracked by active_mask). For states confined to k of 6
|
| 653 |
+
* basis states, this reduces from 6^n to k^n configs.
|
| 654 |
+
*
|
| 655 |
+
* From the Faustian Pact: Ξβ0 states use fewer basis states,
|
| 656 |
+
* making this optimization most effective when it matters most. */
|
| 657 |
+
|
| 658 |
+
/* Build per-partner active state lists */
|
| 659 |
+
uint32_t partner_active[128][6];
|
| 660 |
+
uint32_t partner_active_count[128];
|
| 661 |
+
uint64_t n_configs = 1;
|
| 662 |
+
|
| 663 |
+
for (uint64_t c = 0; c < n_connected; c++) {
|
| 664 |
+
const TrialityQuhit *q_c = &g->locals[connected[c]];
|
| 665 |
+
uint8_t mask = q_c->active_mask ? q_c->active_mask : 0x3F;
|
| 666 |
+
int cnt = 0;
|
| 667 |
+
for (int k = 0; k < HPC_D; k++)
|
| 668 |
+
if (mask & (1 << k)) partner_active[c][cnt++] = k;
|
| 669 |
+
partner_active_count[c] = cnt;
|
| 670 |
+
n_configs *= cnt;
|
| 671 |
+
}
|
| 672 |
+
|
| 673 |
+
double total_prob = 0.0;
|
| 674 |
+
for (uint64_t cfg = 0; cfg < n_configs; cfg++) {
|
| 675 |
+
uint32_t partner_vals[128];
|
| 676 |
+
uint64_t tmp = cfg;
|
| 677 |
+
for (uint64_t c = 0; c < n_connected; c++) {
|
| 678 |
+
uint32_t idx_in_active = tmp % partner_active_count[c];
|
| 679 |
+
partner_vals[c] = partner_active[c][idx_in_active];
|
| 680 |
+
tmp /= partner_active_count[c];
|
| 681 |
+
}
|
| 682 |
+
|
| 683 |
+
/* Compute amplitude for this configuration */
|
| 684 |
+
const TrialityQuhit *q_site = &g->locals[site];
|
| 685 |
+
double amp_re = q_site->edge_re[value];
|
| 686 |
+
double amp_im = q_site->edge_im[value];
|
| 687 |
+
|
| 688 |
+
for (uint64_t c = 0; c < n_connected; c++) {
|
| 689 |
+
const TrialityQuhit *q_p = &g->locals[connected[c]];
|
| 690 |
+
uint32_t pv = partner_vals[c];
|
| 691 |
+
double p_re = q_p->edge_re[pv], p_im = q_p->edge_im[pv];
|
| 692 |
+
double new_re = amp_re * p_re - amp_im * p_im;
|
| 693 |
+
double new_im = amp_re * p_im + amp_im * p_re;
|
| 694 |
+
amp_re = new_re;
|
| 695 |
+
amp_im = new_im;
|
| 696 |
+
}
|
| 697 |
+
|
| 698 |
+
/* Phase contributions from edges in the connected subsystem only */
|
| 699 |
+
for (uint64_t ei = 0; ei < n_conn_edges; ei++) {
|
| 700 |
+
const HPCEdge *edge = &g->edges[conn_edge_ids[ei]];
|
| 701 |
+
uint64_t sa = edge->site_a;
|
| 702 |
+
uint64_t sb = edge->site_b;
|
| 703 |
+
|
| 704 |
+
uint32_t va = 0, vb = 0;
|
| 705 |
+
|
| 706 |
+
/* Resolve values for both endpoints */
|
| 707 |
+
if (sa == site) {
|
| 708 |
+
va = value;
|
| 709 |
+
for (uint64_t c = 0; c < n_connected; c++)
|
| 710 |
+
if (connected[c] == sb) { vb = partner_vals[c]; break; }
|
| 711 |
+
} else if (sb == site) {
|
| 712 |
+
vb = value;
|
| 713 |
+
for (uint64_t c = 0; c < n_connected; c++)
|
| 714 |
+
if (connected[c] == sa) { va = partner_vals[c]; break; }
|
| 715 |
+
} else {
|
| 716 |
+
for (uint64_t c = 0; c < n_connected; c++) {
|
| 717 |
+
if (connected[c] == sa) va = partner_vals[c];
|
| 718 |
+
if (connected[c] == sb) vb = partner_vals[c];
|
| 719 |
+
}
|
| 720 |
+
}
|
| 721 |
+
|
| 722 |
+
double w_re, w_im;
|
| 723 |
+
if (edge->type == HPC_EDGE_CZ) {
|
| 724 |
+
uint32_t phase_idx = (va * vb) % HPC_D;
|
| 725 |
+
w_re = HPC_W6_RE[phase_idx];
|
| 726 |
+
w_im = HPC_W6_IM[phase_idx];
|
| 727 |
+
} else {
|
| 728 |
+
w_re = edge->w_re[va][vb];
|
| 729 |
+
w_im = edge->w_im[va][vb];
|
| 730 |
+
}
|
| 731 |
+
|
| 732 |
+
double new_re = amp_re * w_re - amp_im * w_im;
|
| 733 |
+
double new_im = amp_re * w_im + amp_im * w_re;
|
| 734 |
+
amp_re = new_re;
|
| 735 |
+
amp_im = new_im;
|
| 736 |
+
}
|
| 737 |
+
|
| 738 |
+
total_prob += amp_re * amp_re + amp_im * amp_im;
|
| 739 |
+
}
|
| 740 |
+
|
| 741 |
+
return total_prob;
|
| 742 |
+
}
|
| 743 |
+
|
| 744 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 745 |
+
* EDGE COMPACTION β Merge parallel CZ edges
|
| 746 |
+
*
|
| 747 |
+
* Multiple CZ edges between the same pair of sites can be merged:
|
| 748 |
+
* CZ Γ CZ = CZ with phase Ο^(2Β·aΒ·b) β equivalent to CZ^2
|
| 749 |
+
* n CZ edges β one edge with accumulated phase Ο^(nΒ·aΒ·b)
|
| 750 |
+
*
|
| 751 |
+
* For n β‘ 0 mod 6: the edge cancels (Ο^6 = 1) β remove entirely.
|
| 752 |
+
* For n β‘ 1 mod 6: standard CZ.
|
| 753 |
+
* For n β‘ 3 mod 6: anti-CZ (ΟΒ³ = -1).
|
| 754 |
+
*
|
| 755 |
+
* This preserves perfect phase coherence at any lattice scale.
|
| 756 |
+
* Without compaction, d-wave pairing bleeds out as parallel edges
|
| 757 |
+
* fragment the phase structure.
|
| 758 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 759 |
+
|
| 760 |
+
static inline void hpc_compact_edges(HPCGraph *g)
|
| 761 |
+
{
|
| 762 |
+
/* Count CZ edges between each pair, merge into accumulated phase.
|
| 763 |
+
* For bounded-degree lattices, this is O(E Γ degree) β O(E). */
|
| 764 |
+
|
| 765 |
+
for (uint64_t e = 0; e < g->n_edges; ) {
|
| 766 |
+
HPCEdge *edge = &g->edges[e];
|
| 767 |
+
if (edge->type != HPC_EDGE_CZ) { e++; continue; }
|
| 768 |
+
|
| 769 |
+
uint64_t sa = edge->site_a, sb = edge->site_b;
|
| 770 |
+
|
| 771 |
+
/* Count and remove duplicate CZ edges for this pair */
|
| 772 |
+
int cz_count = 1; /* This edge counts as 1 */
|
| 773 |
+
for (uint64_t e2 = e + 1; e2 < g->n_edges; ) {
|
| 774 |
+
HPCEdge *other = &g->edges[e2];
|
| 775 |
+
if (other->type == HPC_EDGE_CZ &&
|
| 776 |
+
((other->site_a == sa && other->site_b == sb) ||
|
| 777 |
+
(other->site_a == sb && other->site_b == sa))) {
|
| 778 |
+
cz_count++;
|
| 779 |
+
|
| 780 |
+
/* Remove adjacency entries for the duplicate */
|
| 781 |
+
hpc_adj_remove(g, other->site_a, e2);
|
| 782 |
+
hpc_adj_remove(g, other->site_b, e2);
|
| 783 |
+
|
| 784 |
+
/* Swap-remove the duplicate edge */
|
| 785 |
+
uint64_t last = g->n_edges - 1;
|
| 786 |
+
if (e2 != last) {
|
| 787 |
+
/* Update adjacency for the edge being swapped in */
|
| 788 |
+
hpc_adj_replace(g, g->edges[last].site_a, last, e2);
|
| 789 |
+
hpc_adj_replace(g, g->edges[last].site_b, last, e2);
|
| 790 |
+
g->edges[e2] = g->edges[last];
|
| 791 |
+
}
|
| 792 |
+
g->n_edges--;
|
| 793 |
+
g->cz_edges--;
|
| 794 |
+
} else {
|
| 795 |
+
e2++;
|
| 796 |
+
}
|
| 797 |
+
}
|
| 798 |
+
|
| 799 |
+
/* Reduce cz_count mod 6 */
|
| 800 |
+
int reduced = cz_count % 6;
|
| 801 |
+
|
| 802 |
+
if (reduced == 0) {
|
| 803 |
+
/* Complete cancellation: Ο^(6k) = 1 β remove edge entirely */
|
| 804 |
+
hpc_adj_remove(g, sa, e);
|
| 805 |
+
hpc_adj_remove(g, sb, e);
|
| 806 |
+
|
| 807 |
+
uint64_t last = g->n_edges - 1;
|
| 808 |
+
if (e != last) {
|
| 809 |
+
hpc_adj_replace(g, g->edges[last].site_a, last, e);
|
| 810 |
+
hpc_adj_replace(g, g->edges[last].site_b, last, e);
|
| 811 |
+
g->edges[e] = g->edges[last];
|
| 812 |
+
}
|
| 813 |
+
g->n_edges--;
|
| 814 |
+
g->cz_edges--;
|
| 815 |
+
} else if (reduced == 1) {
|
| 816 |
+
/* Standard CZ β already correct, just advance */
|
| 817 |
+
e++;
|
| 818 |
+
} else {
|
| 819 |
+
/* Convert to general phase edge with accumulated phase:
|
| 820 |
+
* w(a,b) = Ο^(reduced Β· a Β· b) */
|
| 821 |
+
edge->type = HPC_EDGE_PHASE;
|
| 822 |
+
edge->fidelity = 1.0; /* Still exact */
|
| 823 |
+
for (int a = 0; a < HPC_D; a++) {
|
| 824 |
+
for (int b = 0; b < HPC_D; b++) {
|
| 825 |
+
uint32_t phase_idx = (uint32_t)(reduced * a * b) % HPC_D;
|
| 826 |
+
edge->w_re[a][b] = HPC_W6_RE[phase_idx];
|
| 827 |
+
edge->w_im[a][b] = HPC_W6_IM[phase_idx];
|
| 828 |
+
}
|
| 829 |
+
}
|
| 830 |
+
g->cz_edges--;
|
| 831 |
+
g->phase_edges++;
|
| 832 |
+
e++;
|
| 833 |
+
}
|
| 834 |
+
}
|
| 835 |
+
}
|
| 836 |
+
|
| 837 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 838 |
+
* BORN SAMPLING β Collapse site k
|
| 839 |
+
*
|
| 840 |
+
* Uses adjacency lists for O(degree) edge identification.
|
| 841 |
+
* Absorbs CZ phases into partners, removes resolved edges.
|
| 842 |
+
* This IS measurement-induced disentanglement.
|
| 843 |
+
* ββββββββββββββββββββββββββββββββββββββββοΏ½οΏ½ββββββββββββββββββββββββββββββ */
|
| 844 |
+
|
| 845 |
+
static inline uint32_t hpc_measure(HPCGraph *g, uint64_t site,
|
| 846 |
+
double random_01)
|
| 847 |
+
{
|
| 848 |
+
/* Compute marginals */
|
| 849 |
+
double probs[HPC_D];
|
| 850 |
+
double total = 0.0;
|
| 851 |
+
for (int v = 0; v < HPC_D; v++) {
|
| 852 |
+
probs[v] = hpc_marginal(g, site, v);
|
| 853 |
+
total += probs[v];
|
| 854 |
+
}
|
| 855 |
+
if (total > 0) {
|
| 856 |
+
for (int v = 0; v < HPC_D; v++) probs[v] /= total;
|
| 857 |
+
}
|
| 858 |
+
|
| 859 |
+
/* Sample */
|
| 860 |
+
double cumul = 0.0;
|
| 861 |
+
uint32_t outcome = HPC_D - 1;
|
| 862 |
+
for (int v = 0; v < HPC_D; v++) {
|
| 863 |
+
cumul += probs[v];
|
| 864 |
+
if (random_01 <= cumul) { outcome = v; break; }
|
| 865 |
+
}
|
| 866 |
+
|
| 867 |
+
/* Collapse local state to |outcomeβ© */
|
| 868 |
+
for (int v = 0; v < HPC_D; v++) {
|
| 869 |
+
g->locals[site].edge_re[v] = (v == (int)outcome) ? 1.0 : 0.0;
|
| 870 |
+
g->locals[site].edge_im[v] = 0.0;
|
| 871 |
+
}
|
| 872 |
+
g->locals[site].primary = VIEW_EDGE;
|
| 873 |
+
g->locals[site].dirty = DIRTY_VERTEX | DIRTY_DIAGONAL | DIRTY_FOLDED;
|
| 874 |
+
g->locals[site].delta_valid = 0;
|
| 875 |
+
triality_update_mask(&g->locals[site]);
|
| 876 |
+
|
| 877 |
+
/* Collect edge IDs touching this site from adjacency list β O(degree) */
|
| 878 |
+
uint64_t edges_to_remove[512];
|
| 879 |
+
uint64_t n_remove = 0;
|
| 880 |
+
const HPCAdjList *adj = &g->adj[site];
|
| 881 |
+
for (uint64_t i = 0; i < adj->count && n_remove < 512; i++)
|
| 882 |
+
edges_to_remove[n_remove++] = adj->edge_ids[i];
|
| 883 |
+
|
| 884 |
+
/* Absorb phases and remove edges */
|
| 885 |
+
for (uint64_t r = 0; r < n_remove; r++) {
|
| 886 |
+
uint64_t eid = edges_to_remove[r];
|
| 887 |
+
if (eid >= g->n_edges) continue; /* Already removed by swap */
|
| 888 |
+
|
| 889 |
+
HPCEdge *edge = &g->edges[eid];
|
| 890 |
+
/* Verify this edge still touches our site (may have been swapped) */
|
| 891 |
+
if (edge->site_a != site && edge->site_b != site) continue;
|
| 892 |
+
|
| 893 |
+
uint64_t partner = (edge->site_a == site) ?
|
| 894 |
+
edge->site_b : edge->site_a;
|
| 895 |
+
TrialityQuhit *p = &g->locals[partner];
|
| 896 |
+
|
| 897 |
+
/* Absorb the phase: partner[k] *= w(outcome, k) or w(k, outcome) */
|
| 898 |
+
for (int k = 0; k < HPC_D; k++) {
|
| 899 |
+
double w_re, w_im;
|
| 900 |
+
if (edge->type == HPC_EDGE_CZ) {
|
| 901 |
+
uint32_t phase_idx = (outcome * k) % HPC_D;
|
| 902 |
+
w_re = HPC_W6_RE[phase_idx];
|
| 903 |
+
w_im = HPC_W6_IM[phase_idx];
|
| 904 |
+
} else if (edge->site_a == site) {
|
| 905 |
+
w_re = edge->w_re[outcome][k];
|
| 906 |
+
w_im = edge->w_im[outcome][k];
|
| 907 |
+
} else {
|
| 908 |
+
w_re = edge->w_re[k][outcome];
|
| 909 |
+
w_im = edge->w_im[k][outcome];
|
| 910 |
+
}
|
| 911 |
+
|
| 912 |
+
double old_re = p->edge_re[k], old_im = p->edge_im[k];
|
| 913 |
+
p->edge_re[k] = old_re * w_re - old_im * w_im;
|
| 914 |
+
p->edge_im[k] = old_re * w_im + old_im * w_re;
|
| 915 |
+
}
|
| 916 |
+
p->dirty = DIRTY_VERTEX | DIRTY_DIAGONAL | DIRTY_FOLDED;
|
| 917 |
+
p->delta_valid = 0;
|
| 918 |
+
|
| 919 |
+
/* Track edge type removal */
|
| 920 |
+
if (edge->type == HPC_EDGE_CZ) g->cz_edges--;
|
| 921 |
+
else if (edge->type == HPC_EDGE_PHASE) g->phase_edges--;
|
| 922 |
+
else g->syntheme_edges--;
|
| 923 |
+
|
| 924 |
+
/* Remove from adjacency lists */
|
| 925 |
+
hpc_adj_remove(g, site, eid);
|
| 926 |
+
hpc_adj_remove(g, partner, eid);
|
| 927 |
+
|
| 928 |
+
/* Swap-remove the edge */
|
| 929 |
+
uint64_t last = g->n_edges - 1;
|
| 930 |
+
if (eid != last) {
|
| 931 |
+
/* Update adjacency for the swapped-in edge */
|
| 932 |
+
hpc_adj_replace(g, g->edges[last].site_a, last, eid);
|
| 933 |
+
hpc_adj_replace(g, g->edges[last].site_b, last, eid);
|
| 934 |
+
g->edges[eid] = g->edges[last];
|
| 935 |
+
|
| 936 |
+
/* Update remaining removal targets that pointed to 'last' */
|
| 937 |
+
for (uint64_t r2 = r + 1; r2 < n_remove; r2++)
|
| 938 |
+
if (edges_to_remove[r2] == last)
|
| 939 |
+
edges_to_remove[r2] = eid;
|
| 940 |
+
}
|
| 941 |
+
g->n_edges--;
|
| 942 |
+
}
|
| 943 |
+
|
| 944 |
+
g->measurements++;
|
| 945 |
+
hpc_update_fidelity_stats(g);
|
| 946 |
+
return outcome;
|
| 947 |
+
}
|
| 948 |
+
|
| 949 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 950 |
+
* NORMALIZATION CHECK β Ξ£ |Ο|Β² over ALL indices
|
| 951 |
+
*
|
| 952 |
+
* Cost: O(D^N Γ (N+E)) β small N only!
|
| 953 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 954 |
+
|
| 955 |
+
static inline double hpc_norm_sq(const HPCGraph *g)
|
| 956 |
+
{
|
| 957 |
+
if (g->n_sites > 8) {
|
| 958 |
+
fprintf(stderr, "hpc_norm_sq: N=%lu too large for brute force\n",
|
| 959 |
+
g->n_sites);
|
| 960 |
+
return -1.0;
|
| 961 |
+
}
|
| 962 |
+
|
| 963 |
+
uint64_t total_configs = 1;
|
| 964 |
+
for (uint64_t i = 0; i < g->n_sites; i++) total_configs *= HPC_D;
|
| 965 |
+
|
| 966 |
+
double norm = 0.0;
|
| 967 |
+
uint32_t indices[8];
|
| 968 |
+
|
| 969 |
+
for (uint64_t cfg = 0; cfg < total_configs; cfg++) {
|
| 970 |
+
uint64_t tmp = cfg;
|
| 971 |
+
for (uint64_t i = 0; i < g->n_sites; i++) {
|
| 972 |
+
indices[i] = tmp % HPC_D;
|
| 973 |
+
tmp /= HPC_D;
|
| 974 |
+
}
|
| 975 |
+
norm += hpc_probability(g, indices);
|
| 976 |
+
}
|
| 977 |
+
return norm;
|
| 978 |
+
}
|
| 979 |
+
|
| 980 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 981 |
+
* EXOTIC INVARIANT β weighted Ξ across all sites
|
| 982 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 983 |
+
|
| 984 |
+
static inline double hpc_exotic_invariant(HPCGraph *g)
|
| 985 |
+
{
|
| 986 |
+
double total = 0.0;
|
| 987 |
+
for (uint64_t i = 0; i < g->n_sites; i++)
|
| 988 |
+
total += triality_exotic_invariant_cached(&g->locals[i]);
|
| 989 |
+
return total / g->n_sites;
|
| 990 |
+
}
|
| 991 |
+
|
| 992 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 993 |
+
* ENTROPY ESTIMATE β across a bipartition cut
|
| 994 |
+
*
|
| 995 |
+
* CZ edges contribute exactly logβ(D) bits per crossing edge.
|
| 996 |
+
* General edges contribute fidelity-weighted logβ(D) bits.
|
| 997 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 998 |
+
|
| 999 |
+
static inline double hpc_entropy_cut(const HPCGraph *g, uint64_t cut_after)
|
| 1000 |
+
{
|
| 1001 |
+
double entropy = 0.0;
|
| 1002 |
+
for (uint64_t e = 0; e < g->n_edges; e++) {
|
| 1003 |
+
uint64_t sa = g->edges[e].site_a;
|
| 1004 |
+
uint64_t sb = g->edges[e].site_b;
|
| 1005 |
+
if ((sa <= cut_after && sb > cut_after) ||
|
| 1006 |
+
(sb <= cut_after && sa > cut_after)) {
|
| 1007 |
+
entropy += g->edges[e].fidelity * log2((double)HPC_D);
|
| 1008 |
+
}
|
| 1009 |
+
}
|
| 1010 |
+
return entropy;
|
| 1011 |
+
}
|
| 1012 |
+
|
| 1013 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1014 |
+
* DIAGNOSTICS
|
| 1015 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 1016 |
+
|
| 1017 |
+
static inline void hpc_print_stats(const HPCGraph *g)
|
| 1018 |
+
{
|
| 1019 |
+
printf("βββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n");
|
| 1020 |
+
printf("β Holographic Phase Graph Statistics β\n");
|
| 1021 |
+
printf("β ββββββββββββββββββββββββββββββββββββββββββββββββββββββ£\n");
|
| 1022 |
+
printf("β Sites: %10lu β\n", g->n_sites);
|
| 1023 |
+
printf("β Total edges: %10lu β\n", g->n_edges);
|
| 1024 |
+
printf("β CZ (exact): %10lu β\n", g->cz_edges);
|
| 1025 |
+
printf("β Phase (lossy): %10lu β\n", g->phase_edges);
|
| 1026 |
+
printf("β Syntheme: %10lu β\n", g->syntheme_edges);
|
| 1027 |
+
printf("β Gate log: %10lu β\n", g->n_log);
|
| 1028 |
+
printf("β Amp evals: %10lu β\n", g->amp_evals);
|
| 1029 |
+
printf("β Measurements: %10lu β\n", g->measurements);
|
| 1030 |
+
printf("β Min fidelity: %10.6f β\n", g->min_fidelity);
|
| 1031 |
+
printf("β Avg fidelity: %10.6f β\n", g->avg_fidelity);
|
| 1032 |
+
|
| 1033 |
+
uint64_t mem_bytes = g->n_sites * sizeof(TrialityQuhit) +
|
| 1034 |
+
g->n_edges * sizeof(HPCEdge) +
|
| 1035 |
+
g->n_log * sizeof(HPCGateEntry) +
|
| 1036 |
+
sizeof(HPCGraph);
|
| 1037 |
+
printf("β Memory: %10lu bytes β\n", mem_bytes);
|
| 1038 |
+
|
| 1039 |
+
double full_sv_log = g->n_sites * log10(6.0) + log10(16.0);
|
| 1040 |
+
printf("β Full SV: 10^%.1f bytes (impossible) β\n", full_sv_log);
|
| 1041 |
+
printf("βββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n");
|
| 1042 |
+
}
|
| 1043 |
+
|
| 1044 |
+
static inline void hpc_print_state(const HPCGraph *g, const char *label)
|
| 1045 |
+
{
|
| 1046 |
+
printf("ββ %s ββ\n", label);
|
| 1047 |
+
printf(" Sites: %lu, Edges: %lu (CZ:%lu Phase:%lu Synth:%lu)\n",
|
| 1048 |
+
g->n_sites, g->n_edges, g->cz_edges, g->phase_edges, g->syntheme_edges);
|
| 1049 |
+
printf(" Fidelity: min=%.4f avg=%.4f\n", g->min_fidelity, g->avg_fidelity);
|
| 1050 |
+
for (uint64_t i = 0; i < g->n_sites && i < 8; i++) {
|
| 1051 |
+
printf(" Site %lu: [", i);
|
| 1052 |
+
for (int j = 0; j < HPC_D; j++) {
|
| 1053 |
+
printf("%.3f%+.3fi", g->locals[i].edge_re[j],
|
| 1054 |
+
g->locals[i].edge_im[j]);
|
| 1055 |
+
if (j < HPC_D - 1) printf(", ");
|
| 1056 |
+
}
|
| 1057 |
+
printf("]\n");
|
| 1058 |
+
}
|
| 1059 |
+
if (g->n_sites > 8) printf(" ... (%lu more sites)\n", g->n_sites - 8);
|
| 1060 |
+
}
|
| 1061 |
+
|
| 1062 |
+
#endif /* HPC_GRAPH_H */
|
hpc_mobius.h
ADDED
|
@@ -0,0 +1,833 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* hpc_mobius.h β The MΓΆbius Amplitude Sheet
|
| 3 |
+
*
|
| 4 |
+
* The Devil's answer to "hold all superposition at once."
|
| 5 |
+
*
|
| 6 |
+
* The HPC graph encodes 6^N amplitudes implicitly as:
|
| 7 |
+
* Ο(iβ,...,iβ) = [Ξ _k aβ(iβ)] Γ [Ξ _edges w_e(iβ, iᡦ)]
|
| 8 |
+
*
|
| 9 |
+
* But this product is computed-and-discarded for each point query.
|
| 10 |
+
* The MΓΆbius Sheet HOLDS the full amplitude surface by maintaining
|
| 11 |
+
* per-site "dressed amplitudes" that pre-absorb entanglement from
|
| 12 |
+
* all touching edges via belief propagation message passing.
|
| 13 |
+
*
|
| 14 |
+
* Each site has two faces (the MΓΆbius twist):
|
| 15 |
+
* Forward: dressed[k][v] β local amp Γ absorbed edge messages
|
| 16 |
+
* Shadow: message[kβp][v] β outgoing message to partner p
|
| 17 |
+
*
|
| 18 |
+
* The forward face of site A is defined IN TERMS OF the shadow faces
|
| 19 |
+
* of its neighbors. This self-referential loop converges to exact
|
| 20 |
+
* marginals on tree graphs and approximates on loopy graphs.
|
| 21 |
+
*
|
| 22 |
+
* KEY INSIGHT: Messages operate in the PROBABILITY domain (|Β·|Β²),
|
| 23 |
+
* not the amplitude domain. Complex phases create destructive
|
| 24 |
+
* interference feedback loops in BP. Instead:
|
| 25 |
+
* - Messages carry marginal probability beliefs: m_{pβk}[v] β ββΊ
|
| 26 |
+
* - Edge factors are |w_e(u,v)|Β² (phase magnitude squared)
|
| 27 |
+
* - For CZ edges: |Ο^(uΒ·v)|Β² = 1 for all u,v β messages = local |a|Β²
|
| 28 |
+
* - Dressed amplitudes are RECONSTRUCTED from prob-domain beliefs
|
| 29 |
+
* by re-introducing the phase structure from the graph
|
| 30 |
+
*
|
| 31 |
+
* Once converged:
|
| 32 |
+
* marginal[k][v] = P(site_k = v) β O(1) lookup
|
| 33 |
+
* Ο(iβ,...,iβ) reconstructable from sheets in O(N + E)
|
| 34 |
+
* Surface walk enumerates all |Ο|Β² > Ο via sheet intersection
|
| 35 |
+
*/
|
| 36 |
+
|
| 37 |
+
#ifndef HPC_MOBIUS_H
|
| 38 |
+
#define HPC_MOBIUS_H
|
| 39 |
+
|
| 40 |
+
#include "hpc_graph.h"
|
| 41 |
+
#include "hpc_contract.h"
|
| 42 |
+
#include "hpc_amplitude.h"
|
| 43 |
+
#include <math.h>
|
| 44 |
+
#include <stdlib.h>
|
| 45 |
+
#include <string.h>
|
| 46 |
+
#include <stdio.h>
|
| 47 |
+
|
| 48 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 49 |
+
* CONSTANTS
|
| 50 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 51 |
+
|
| 52 |
+
#define MOBIUS_D 6 /* Dimension per site */
|
| 53 |
+
#define MOBIUS_MAX_DEGREE 128 /* Max edges per site */
|
| 54 |
+
#define MOBIUS_BP_MAX_ITER 100 /* Max belief propagation iterations */
|
| 55 |
+
#define MOBIUS_BP_TOL 1e-14 /* Convergence tolerance */
|
| 56 |
+
#define MOBIUS_DAMPING 0.3 /* Damping for loopy BP stability */
|
| 57 |
+
|
| 58 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 59 |
+
* PROBABILITY MESSAGE β A D-dimensional real non-negative vector
|
| 60 |
+
*
|
| 61 |
+
* Messages flow along edges in the PROBABILITY domain.
|
| 62 |
+
* m_{pβk}[v] represents the belief about site k taking value v,
|
| 63 |
+
* as conveyed by neighbor p through their shared edge.
|
| 64 |
+
*
|
| 65 |
+
* This is classical sum-product BP on the factor graph where:
|
| 66 |
+
* Variable nodes = sites
|
| 67 |
+
* Factor nodes = edges (with factor |w(u,v)|Β² Γ local priors)
|
| 68 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 69 |
+
|
| 70 |
+
typedef struct {
|
| 71 |
+
double p[MOBIUS_D]; /* Probability-domain belief, non-negative */
|
| 72 |
+
} MobiusProbMsg;
|
| 73 |
+
|
| 74 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 75 |
+
* SITE SHEET β One face of the MΓΆbius surface
|
| 76 |
+
*
|
| 77 |
+
* Belief about site k, value v:
|
| 78 |
+
* belief[v] = |aβ(v)|Β² Γ Ξ _{messages mβk} m[v]
|
| 79 |
+
*
|
| 80 |
+
* Dressed amplitudes are reconstructed from beliefs by re-introducing
|
| 81 |
+
* the original complex phases from the local state and edge weights.
|
| 82 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 83 |
+
|
| 84 |
+
typedef struct {
|
| 85 |
+
/* Dressed (forward) face β complex amplitudes consistent with beliefs */
|
| 86 |
+
double dressed_re[MOBIUS_D];
|
| 87 |
+
double dressed_im[MOBIUS_D];
|
| 88 |
+
|
| 89 |
+
/* Cached marginal probabilities (normalized beliefs) */
|
| 90 |
+
double marginal[MOBIUS_D];
|
| 91 |
+
|
| 92 |
+
/* Incoming probability messages: one per touching edge */
|
| 93 |
+
MobiusProbMsg *msg_in;
|
| 94 |
+
uint64_t n_messages;
|
| 95 |
+
uint64_t msg_capacity;
|
| 96 |
+
|
| 97 |
+
/* Vesica decomposition of dressed amplitudes */
|
| 98 |
+
double vesica_re[3], vesica_im[3];
|
| 99 |
+
double wave_re[3], wave_im[3];
|
| 100 |
+
int vesica_valid;
|
| 101 |
+
|
| 102 |
+
/* Interference witness: phase coherence measure */
|
| 103 |
+
double coherence;
|
| 104 |
+
} MobiusSiteSheet;
|
| 105 |
+
|
| 106 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 107 |
+
* THE MΓBIUS AMPLITUDE SHEET β All superposition, held at once
|
| 108 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 109 |
+
|
| 110 |
+
typedef struct {
|
| 111 |
+
const HPCGraph *graph;
|
| 112 |
+
|
| 113 |
+
uint64_t n_sites;
|
| 114 |
+
MobiusSiteSheet *sheets;
|
| 115 |
+
|
| 116 |
+
int converged;
|
| 117 |
+
int iterations;
|
| 118 |
+
double max_residual;
|
| 119 |
+
|
| 120 |
+
uint64_t msg_updates;
|
| 121 |
+
uint64_t amplitude_queries;
|
| 122 |
+
uint64_t surface_walks;
|
| 123 |
+
double bethe_free_energy;
|
| 124 |
+
} MobiusAmplitudeSheet;
|
| 125 |
+
|
| 126 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 127 |
+
* LIFECYCLE
|
| 128 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 129 |
+
|
| 130 |
+
static inline MobiusAmplitudeSheet *mobius_create(const HPCGraph *g)
|
| 131 |
+
{
|
| 132 |
+
MobiusAmplitudeSheet *ms = (MobiusAmplitudeSheet *)calloc(1, sizeof(MobiusAmplitudeSheet));
|
| 133 |
+
if (!ms) return NULL;
|
| 134 |
+
|
| 135 |
+
ms->graph = g;
|
| 136 |
+
ms->n_sites = g->n_sites;
|
| 137 |
+
ms->sheets = (MobiusSiteSheet *)calloc(g->n_sites, sizeof(MobiusSiteSheet));
|
| 138 |
+
if (!ms->sheets) { free(ms); return NULL; }
|
| 139 |
+
|
| 140 |
+
for (uint64_t k = 0; k < g->n_sites; k++) {
|
| 141 |
+
MobiusSiteSheet *s = &ms->sheets[k];
|
| 142 |
+
const HPCAdjList *adj = &g->adj[k];
|
| 143 |
+
|
| 144 |
+
s->n_messages = adj->count;
|
| 145 |
+
s->msg_capacity = adj->count > 0 ? adj->count : 1;
|
| 146 |
+
s->msg_in = (MobiusProbMsg *)calloc(s->msg_capacity, sizeof(MobiusProbMsg));
|
| 147 |
+
|
| 148 |
+
/* Initialize messages to uniform (no information) */
|
| 149 |
+
for (uint64_t m = 0; m < s->n_messages; m++)
|
| 150 |
+
for (int v = 0; v < MOBIUS_D; v++)
|
| 151 |
+
s->msg_in[m].p[v] = 1.0;
|
| 152 |
+
|
| 153 |
+
/* Initialize marginals from local probabilities */
|
| 154 |
+
double total = 0.0;
|
| 155 |
+
for (int v = 0; v < MOBIUS_D; v++) {
|
| 156 |
+
s->marginal[v] = g->locals[k].edge_re[v] * g->locals[k].edge_re[v] +
|
| 157 |
+
g->locals[k].edge_im[v] * g->locals[k].edge_im[v];
|
| 158 |
+
total += s->marginal[v];
|
| 159 |
+
}
|
| 160 |
+
if (total > 1e-30)
|
| 161 |
+
for (int v = 0; v < MOBIUS_D; v++)
|
| 162 |
+
s->marginal[v] /= total;
|
| 163 |
+
|
| 164 |
+
/* Initialize dressed amplitudes from local state */
|
| 165 |
+
for (int v = 0; v < MOBIUS_D; v++) {
|
| 166 |
+
s->dressed_re[v] = g->locals[k].edge_re[v];
|
| 167 |
+
s->dressed_im[v] = g->locals[k].edge_im[v];
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
s->vesica_valid = 0;
|
| 171 |
+
s->coherence = 0.5;
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
return ms;
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
static inline void mobius_destroy(MobiusAmplitudeSheet *ms)
|
| 178 |
+
{
|
| 179 |
+
if (!ms) return;
|
| 180 |
+
if (ms->sheets) {
|
| 181 |
+
for (uint64_t k = 0; k < ms->n_sites; k++)
|
| 182 |
+
free(ms->sheets[k].msg_in);
|
| 183 |
+
free(ms->sheets);
|
| 184 |
+
}
|
| 185 |
+
free(ms);
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 189 |
+
* INTERNAL: Find the message index for an edge in a site's adjacency
|
| 190 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 191 |
+
|
| 192 |
+
static inline int mobius_find_msg_idx(const HPCGraph *g, uint64_t site, uint64_t eid)
|
| 193 |
+
{
|
| 194 |
+
const HPCAdjList *adj = &g->adj[site];
|
| 195 |
+
for (uint64_t i = 0; i < adj->count; i++)
|
| 196 |
+
if (adj->edge_ids[i] == eid) return (int)i;
|
| 197 |
+
return -1;
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 201 |
+
* INTERNAL: Compute edge factor |w_e(va, vb)|Β²
|
| 202 |
+
*
|
| 203 |
+
* For CZ edges: |Ο^(vaΒ·vb)|Β² = 1.0 always (unit phases).
|
| 204 |
+
* For general edges: |w[va][vb]|Β².
|
| 205 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 206 |
+
|
| 207 |
+
static inline double mobius_edge_factor(const HPCEdge *edge,
|
| 208 |
+
uint32_t va, uint32_t vb)
|
| 209 |
+
{
|
| 210 |
+
if (edge->type == HPC_EDGE_CZ) {
|
| 211 |
+
return 1.0; /* |Ο^(vaΒ·vb)|Β² = 1 always */
|
| 212 |
+
} else {
|
| 213 |
+
double wr = edge->w_re[va][vb];
|
| 214 |
+
double wi = edge->w_im[va][vb];
|
| 215 |
+
return wr * wr + wi * wi;
|
| 216 |
+
}
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
/* βββββββββββββββββββββββββββββοΏ½οΏ½βββββββββββββββββββββββββββββββββββββββββ
|
| 220 |
+
* INTERNAL: Compute edge weight w_e(va, vb) (complex)
|
| 221 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 222 |
+
|
| 223 |
+
static inline void mobius_edge_weight(const HPCEdge *edge,
|
| 224 |
+
uint32_t va, uint32_t vb,
|
| 225 |
+
double *w_re, double *w_im)
|
| 226 |
+
{
|
| 227 |
+
if (edge->type == HPC_EDGE_CZ) {
|
| 228 |
+
uint32_t pidx = (va * vb) % MOBIUS_D;
|
| 229 |
+
*w_re = HPC_W6_RE[pidx];
|
| 230 |
+
*w_im = HPC_W6_IM[pidx];
|
| 231 |
+
} else {
|
| 232 |
+
*w_re = edge->w_re[va][vb];
|
| 233 |
+
*w_im = edge->w_im[va][vb];
|
| 234 |
+
}
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 238 |
+
* BELIEF PROPAGATION β Probability-domain message passing
|
| 239 |
+
*
|
| 240 |
+
* Sum-product BP on the factor graph:
|
| 241 |
+
*
|
| 242 |
+
* Message from variable p to variable k through factor f(p,k):
|
| 243 |
+
* m_{pβk}[vk] = Ξ£_{vp} |aβ(vp)|Β² Γ |w(vp,vk)|Β² Γ Ξ _{m'βp, m'β k} m'[vp]
|
| 244 |
+
*
|
| 245 |
+
* This is standard BP in the probability domain.
|
| 246 |
+
* For CZ edges: |w|Β² = 1, so messages just propagate local priors.
|
| 247 |
+
* For general edges: |w|Β² provides the coupling structure.
|
| 248 |
+
*
|
| 249 |
+
* After convergence:
|
| 250 |
+
* belief[k][v] = |aβ(v)|Β² Γ Ξ _{mβk} m[v]
|
| 251 |
+
* marginal[k][v] = belief[k][v] / Ξ£_u belief[k][u]
|
| 252 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 253 |
+
|
| 254 |
+
static inline double mobius_bp_iterate(MobiusAmplitudeSheet *ms)
|
| 255 |
+
{
|
| 256 |
+
const HPCGraph *g = ms->graph;
|
| 257 |
+
double max_delta = 0.0;
|
| 258 |
+
|
| 259 |
+
for (uint64_t eid = 0; eid < g->n_edges; eid++) {
|
| 260 |
+
const HPCEdge *edge = &g->edges[eid];
|
| 261 |
+
uint64_t sa = edge->site_a;
|
| 262 |
+
uint64_t sb = edge->site_b;
|
| 263 |
+
|
| 264 |
+
int idx_a_in_b = mobius_find_msg_idx(g, sb, eid);
|
| 265 |
+
int idx_b_in_a = mobius_find_msg_idx(g, sa, eid);
|
| 266 |
+
if (idx_a_in_b < 0 || idx_b_in_a < 0) continue;
|
| 267 |
+
|
| 268 |
+
/* ββ Message aβb: for each vb, sum over va ββ */
|
| 269 |
+
{
|
| 270 |
+
MobiusProbMsg new_msg;
|
| 271 |
+
const MobiusSiteSheet *sheet_a = &ms->sheets[sa];
|
| 272 |
+
const HPCAdjList *adj_a = &g->adj[sa];
|
| 273 |
+
|
| 274 |
+
for (int vb = 0; vb < MOBIUS_D; vb++) {
|
| 275 |
+
double sum = 0.0;
|
| 276 |
+
|
| 277 |
+
for (int va = 0; va < MOBIUS_D; va++) {
|
| 278 |
+
/* Local probability at site a for value va */
|
| 279 |
+
double local_prob = g->locals[sa].edge_re[va] * g->locals[sa].edge_re[va] +
|
| 280 |
+
g->locals[sa].edge_im[va] * g->locals[sa].edge_im[va];
|
| 281 |
+
|
| 282 |
+
/* Multiply by all incoming messages to a EXCEPT from b */
|
| 283 |
+
for (uint64_t mi = 0; mi < adj_a->count; mi++) {
|
| 284 |
+
if (adj_a->edge_ids[mi] == eid) continue;
|
| 285 |
+
local_prob *= sheet_a->msg_in[mi].p[va];
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
/* Multiply by edge factor |w(va, vb)|Β² */
|
| 289 |
+
double ef = mobius_edge_factor(edge, va, vb);
|
| 290 |
+
sum += local_prob * ef;
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
new_msg.p[vb] = sum;
|
| 294 |
+
}
|
| 295 |
+
|
| 296 |
+
/* Normalize message */
|
| 297 |
+
double msg_sum = 0.0;
|
| 298 |
+
for (int v = 0; v < MOBIUS_D; v++) msg_sum += new_msg.p[v];
|
| 299 |
+
if (msg_sum > 1e-30) {
|
| 300 |
+
double inv = 1.0 / msg_sum;
|
| 301 |
+
for (int v = 0; v < MOBIUS_D; v++) new_msg.p[v] *= inv;
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
/* Damped update + compute residual */
|
| 305 |
+
MobiusProbMsg *old_msg = &ms->sheets[sb].msg_in[idx_a_in_b];
|
| 306 |
+
double delta = 0.0;
|
| 307 |
+
for (int v = 0; v < MOBIUS_D; v++) {
|
| 308 |
+
double updated = MOBIUS_DAMPING * new_msg.p[v] +
|
| 309 |
+
(1.0 - MOBIUS_DAMPING) * old_msg->p[v];
|
| 310 |
+
double diff = updated - old_msg->p[v];
|
| 311 |
+
delta += diff * diff;
|
| 312 |
+
old_msg->p[v] = updated;
|
| 313 |
+
}
|
| 314 |
+
if (delta > max_delta) max_delta = delta;
|
| 315 |
+
ms->msg_updates++;
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
/* ββ Message bβa: for each va, sum over vb ββ */
|
| 319 |
+
{
|
| 320 |
+
MobiusProbMsg new_msg;
|
| 321 |
+
const MobiusSiteSheet *sheet_b = &ms->sheets[sb];
|
| 322 |
+
const HPCAdjList *adj_b = &g->adj[sb];
|
| 323 |
+
|
| 324 |
+
for (int va = 0; va < MOBIUS_D; va++) {
|
| 325 |
+
double sum = 0.0;
|
| 326 |
+
|
| 327 |
+
for (int vb = 0; vb < MOBIUS_D; vb++) {
|
| 328 |
+
double local_prob = g->locals[sb].edge_re[vb] * g->locals[sb].edge_re[vb] +
|
| 329 |
+
g->locals[sb].edge_im[vb] * g->locals[sb].edge_im[vb];
|
| 330 |
+
|
| 331 |
+
for (uint64_t mi = 0; mi < adj_b->count; mi++) {
|
| 332 |
+
if (adj_b->edge_ids[mi] == eid) continue;
|
| 333 |
+
local_prob *= sheet_b->msg_in[mi].p[vb];
|
| 334 |
+
}
|
| 335 |
+
|
| 336 |
+
/* Edge factor: |w(va, vb)|Β²
|
| 337 |
+
* For message bβa we sum over vb for each va target.
|
| 338 |
+
* Factor is |w(va, vb)|Β² same as stored. */
|
| 339 |
+
double ef = mobius_edge_factor(edge, va, vb);
|
| 340 |
+
sum += local_prob * ef;
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
new_msg.p[va] = sum;
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
double msg_sum = 0.0;
|
| 347 |
+
for (int v = 0; v < MOBIUS_D; v++) msg_sum += new_msg.p[v];
|
| 348 |
+
if (msg_sum > 1e-30) {
|
| 349 |
+
double inv = 1.0 / msg_sum;
|
| 350 |
+
for (int v = 0; v < MOBIUS_D; v++) new_msg.p[v] *= inv;
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
MobiusProbMsg *old_msg = &ms->sheets[sa].msg_in[idx_b_in_a];
|
| 354 |
+
double delta = 0.0;
|
| 355 |
+
for (int v = 0; v < MOBIUS_D; v++) {
|
| 356 |
+
double updated = MOBIUS_DAMPING * new_msg.p[v] +
|
| 357 |
+
(1.0 - MOBIUS_DAMPING) * old_msg->p[v];
|
| 358 |
+
double diff = updated - old_msg->p[v];
|
| 359 |
+
delta += diff * diff;
|
| 360 |
+
old_msg->p[v] = updated;
|
| 361 |
+
}
|
| 362 |
+
if (delta > max_delta) max_delta = delta;
|
| 363 |
+
ms->msg_updates++;
|
| 364 |
+
}
|
| 365 |
+
}
|
| 366 |
+
|
| 367 |
+
return max_delta;
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 371 |
+
* COMPUTE BELIEFS β Update marginals and dressed amplitudes
|
| 372 |
+
*
|
| 373 |
+
* Marginals (probability domain):
|
| 374 |
+
* belief[k][v] = |aβ(v)|Β² Γ Ξ _{mβk} m[v]
|
| 375 |
+
* marginal[k][v] = belief[k][v] / Z_k
|
| 376 |
+
*
|
| 377 |
+
* Dressed amplitudes (complex domain):
|
| 378 |
+
* dressed[k][v] = aβ(v) Γ β(marginal[k][v] / |aβ(v)|Β²)
|
| 379 |
+
* This preserves the original phase while scaling the magnitude
|
| 380 |
+
* to match the converged marginal probability.
|
| 381 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 382 |
+
|
| 383 |
+
static inline void mobius_compute_beliefs(MobiusAmplitudeSheet *ms)
|
| 384 |
+
{
|
| 385 |
+
const HPCGraph *g = ms->graph;
|
| 386 |
+
|
| 387 |
+
for (uint64_t k = 0; k < ms->n_sites; k++) {
|
| 388 |
+
MobiusSiteSheet *s = &ms->sheets[k];
|
| 389 |
+
|
| 390 |
+
/* Compute unnormalized beliefs */
|
| 391 |
+
double belief[MOBIUS_D];
|
| 392 |
+
double total = 0.0;
|
| 393 |
+
for (int v = 0; v < MOBIUS_D; v++) {
|
| 394 |
+
belief[v] = g->locals[k].edge_re[v] * g->locals[k].edge_re[v] +
|
| 395 |
+
g->locals[k].edge_im[v] * g->locals[k].edge_im[v];
|
| 396 |
+
|
| 397 |
+
for (uint64_t mi = 0; mi < s->n_messages; mi++)
|
| 398 |
+
belief[v] *= s->msg_in[mi].p[v];
|
| 399 |
+
|
| 400 |
+
total += belief[v];
|
| 401 |
+
}
|
| 402 |
+
|
| 403 |
+
/* Normalize to marginals */
|
| 404 |
+
if (total > 1e-30) {
|
| 405 |
+
for (int v = 0; v < MOBIUS_D; v++)
|
| 406 |
+
s->marginal[v] = belief[v] / total;
|
| 407 |
+
} else {
|
| 408 |
+
for (int v = 0; v < MOBIUS_D; v++)
|
| 409 |
+
s->marginal[v] = 1.0 / MOBIUS_D;
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
/* Reconstruct dressed amplitudes:
|
| 413 |
+
* dressed[v] = aβ(v) Γ scale[v]
|
| 414 |
+
* where scale[v] = β(marginal[v] / |aβ(v)|Β²)
|
| 415 |
+
* This preserves the original complex phase while
|
| 416 |
+
* rescaling magnitude to match the BP marginals. */
|
| 417 |
+
for (int v = 0; v < MOBIUS_D; v++) {
|
| 418 |
+
double local_prob = g->locals[k].edge_re[v] * g->locals[k].edge_re[v] +
|
| 419 |
+
g->locals[k].edge_im[v] * g->locals[k].edge_im[v];
|
| 420 |
+
if (local_prob > 1e-30) {
|
| 421 |
+
double scale = sqrt(s->marginal[v] / local_prob);
|
| 422 |
+
s->dressed_re[v] = g->locals[k].edge_re[v] * scale;
|
| 423 |
+
s->dressed_im[v] = g->locals[k].edge_im[v] * scale;
|
| 424 |
+
} else {
|
| 425 |
+
s->dressed_re[v] = 0.0;
|
| 426 |
+
s->dressed_im[v] = 0.0;
|
| 427 |
+
}
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
/* Compute coherence: |Ξ£_v dressed[v]|Β² / (D Γ Ξ£_v |dressed[v]|Β²) */
|
| 431 |
+
double coh_re = 0.0, coh_im = 0.0;
|
| 432 |
+
double d_total = 0.0;
|
| 433 |
+
for (int v = 0; v < MOBIUS_D; v++) {
|
| 434 |
+
coh_re += s->dressed_re[v];
|
| 435 |
+
coh_im += s->dressed_im[v];
|
| 436 |
+
d_total += s->dressed_re[v] * s->dressed_re[v] +
|
| 437 |
+
s->dressed_im[v] * s->dressed_im[v];
|
| 438 |
+
}
|
| 439 |
+
double coh_num = coh_re * coh_re + coh_im * coh_im;
|
| 440 |
+
s->coherence = (d_total > 1e-30) ?
|
| 441 |
+
coh_num / (MOBIUS_D * d_total) : 0.5;
|
| 442 |
+
|
| 443 |
+
s->vesica_valid = 0;
|
| 444 |
+
}
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 448 |
+
* CONVERGE β Run belief propagation until convergence
|
| 449 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 450 |
+
|
| 451 |
+
static inline int mobius_converge(MobiusAmplitudeSheet *ms)
|
| 452 |
+
{
|
| 453 |
+
if (ms->graph->n_edges == 0) {
|
| 454 |
+
mobius_compute_beliefs(ms);
|
| 455 |
+
ms->converged = 1;
|
| 456 |
+
ms->iterations = 0;
|
| 457 |
+
ms->max_residual = 0.0;
|
| 458 |
+
return 0;
|
| 459 |
+
}
|
| 460 |
+
|
| 461 |
+
ms->converged = 0;
|
| 462 |
+
for (int iter = 0; iter < MOBIUS_BP_MAX_ITER; iter++) {
|
| 463 |
+
double residual = mobius_bp_iterate(ms);
|
| 464 |
+
ms->iterations = iter + 1;
|
| 465 |
+
ms->max_residual = residual;
|
| 466 |
+
|
| 467 |
+
if (residual < MOBIUS_BP_TOL) {
|
| 468 |
+
ms->converged = 1;
|
| 469 |
+
break;
|
| 470 |
+
}
|
| 471 |
+
}
|
| 472 |
+
|
| 473 |
+
mobius_compute_beliefs(ms);
|
| 474 |
+
if (!ms->converged && ms->max_residual < 1e-8)
|
| 475 |
+
ms->converged = 1;
|
| 476 |
+
|
| 477 |
+
return ms->iterations;
|
| 478 |
+
}
|
| 479 |
+
|
| 480 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 481 |
+
* O(1) MARGINAL PROBABILITY β From cached beliefs
|
| 482 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 483 |
+
|
| 484 |
+
static inline double mobius_marginal(const MobiusAmplitudeSheet *ms,
|
| 485 |
+
uint64_t site, uint32_t value)
|
| 486 |
+
{
|
| 487 |
+
return ms->sheets[site].marginal[value];
|
| 488 |
+
}
|
| 489 |
+
|
| 490 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 491 |
+
* FULL AMPLITUDE β Reconstruct Ο(iβ,...,iβ) via graph
|
| 492 |
+
*
|
| 493 |
+
* Uses cached marginals for quick-reject of zero-probability configs.
|
| 494 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 495 |
+
|
| 496 |
+
static inline void mobius_amplitude(const MobiusAmplitudeSheet *ms,
|
| 497 |
+
const uint32_t *indices,
|
| 498 |
+
double *out_re, double *out_im)
|
| 499 |
+
{
|
| 500 |
+
const HPCGraph *g = ms->graph;
|
| 501 |
+
|
| 502 |
+
/* Quick reject from cached marginals */
|
| 503 |
+
for (uint64_t k = 0; k < ms->n_sites; k++) {
|
| 504 |
+
if (ms->sheets[k].marginal[indices[k]] < 1e-30) {
|
| 505 |
+
*out_re = 0.0;
|
| 506 |
+
*out_im = 0.0;
|
| 507 |
+
return;
|
| 508 |
+
}
|
| 509 |
+
}
|
| 510 |
+
|
| 511 |
+
hpc_amplitude(g, indices, out_re, out_im);
|
| 512 |
+
((MobiusAmplitudeSheet *)ms)->amplitude_queries++;
|
| 513 |
+
}
|
| 514 |
+
|
| 515 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 516 |
+
* SURFACE WALK β Enumerate all configurations with |Ο|Β² > threshold
|
| 517 |
+
*
|
| 518 |
+
* Uses sheet marginals to prune the search tree aggressively.
|
| 519 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 520 |
+
|
| 521 |
+
static inline HPCSparseVector *mobius_surface_walk(const MobiusAmplitudeSheet *ms,
|
| 522 |
+
double threshold,
|
| 523 |
+
uint64_t max_entries)
|
| 524 |
+
{
|
| 525 |
+
const HPCGraph *g = ms->graph;
|
| 526 |
+
HPCSparseVector *sv = hpc_sv_create(g->n_sites, 256);
|
| 527 |
+
if (!sv) return NULL;
|
| 528 |
+
sv->threshold = threshold;
|
| 529 |
+
|
| 530 |
+
((MobiusAmplitudeSheet *)ms)->surface_walks++;
|
| 531 |
+
|
| 532 |
+
uint32_t candidates[64][MOBIUS_D];
|
| 533 |
+
uint32_t n_cand[64];
|
| 534 |
+
uint64_t total_configs = 1;
|
| 535 |
+
|
| 536 |
+
uint64_t n = g->n_sites;
|
| 537 |
+
if (n > 64) n = 64;
|
| 538 |
+
|
| 539 |
+
for (uint64_t k = 0; k < n; k++) {
|
| 540 |
+
n_cand[k] = 0;
|
| 541 |
+
for (int v = 0; v < MOBIUS_D; v++) {
|
| 542 |
+
if (ms->sheets[k].marginal[v] >= threshold * 0.1) {
|
| 543 |
+
candidates[k][n_cand[k]++] = v;
|
| 544 |
+
}
|
| 545 |
+
}
|
| 546 |
+
if (n_cand[k] == 0) {
|
| 547 |
+
for (int v = 0; v < MOBIUS_D; v++)
|
| 548 |
+
candidates[k][n_cand[k]++] = v;
|
| 549 |
+
}
|
| 550 |
+
total_configs *= n_cand[k];
|
| 551 |
+
}
|
| 552 |
+
|
| 553 |
+
uint32_t indices[64];
|
| 554 |
+
for (uint64_t cfg = 0; cfg < total_configs && sv->count < max_entries; cfg++) {
|
| 555 |
+
uint64_t tmp = cfg;
|
| 556 |
+
for (uint64_t k = 0; k < n; k++) {
|
| 557 |
+
indices[k] = candidates[k][tmp % n_cand[k]];
|
| 558 |
+
tmp /= n_cand[k];
|
| 559 |
+
}
|
| 560 |
+
|
| 561 |
+
double re, im;
|
| 562 |
+
hpc_amplitude(g, indices, &re, &im);
|
| 563 |
+
double prob = re * re + im * im;
|
| 564 |
+
|
| 565 |
+
if (prob >= threshold)
|
| 566 |
+
hpc_sv_add(sv, indices, re, im);
|
| 567 |
+
}
|
| 568 |
+
|
| 569 |
+
return sv;
|
| 570 |
+
}
|
| 571 |
+
|
| 572 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 573 |
+
* VESICA DECOMPOSITION β Per-site CMY channel analysis
|
| 574 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 575 |
+
|
| 576 |
+
static inline void mobius_vesica_decompose(MobiusAmplitudeSheet *ms, uint64_t site)
|
| 577 |
+
{
|
| 578 |
+
MobiusSiteSheet *s = &ms->sheets[site];
|
| 579 |
+
if (s->vesica_valid) return;
|
| 580 |
+
|
| 581 |
+
for (int c = 0; c < 3; c++) {
|
| 582 |
+
s->vesica_re[c] = INV_SQRT2 * (s->dressed_re[c] + s->dressed_re[c + 3]);
|
| 583 |
+
s->vesica_im[c] = INV_SQRT2 * (s->dressed_im[c] + s->dressed_im[c + 3]);
|
| 584 |
+
s->wave_re[c] = INV_SQRT2 * (s->dressed_re[c] - s->dressed_re[c + 3]);
|
| 585 |
+
s->wave_im[c] = INV_SQRT2 * (s->dressed_im[c] - s->dressed_im[c + 3]);
|
| 586 |
+
}
|
| 587 |
+
s->vesica_valid = 1;
|
| 588 |
+
}
|
| 589 |
+
|
| 590 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 591 |
+
* INTERFERENCE WITNESS β Detect coherence patterns across the sheet
|
| 592 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 593 |
+
|
| 594 |
+
static inline double mobius_interference_witness(const MobiusAmplitudeSheet *ms)
|
| 595 |
+
{
|
| 596 |
+
double total = 0.0;
|
| 597 |
+
for (uint64_t k = 0; k < ms->n_sites; k++)
|
| 598 |
+
total += ms->sheets[k].coherence;
|
| 599 |
+
return (ms->n_sites > 0) ? total / ms->n_sites : 0.0;
|
| 600 |
+
}
|
| 601 |
+
|
| 602 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 603 |
+
* BETHE FREE ENERGY β Approximate partition function
|
| 604 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 605 |
+
|
| 606 |
+
static inline double mobius_bethe_free_energy(MobiusAmplitudeSheet *ms)
|
| 607 |
+
{
|
| 608 |
+
const HPCGraph *g = ms->graph;
|
| 609 |
+
double F = 0.0;
|
| 610 |
+
|
| 611 |
+
/* Site contributions: (d_k - 1) Γ H(site_k) */
|
| 612 |
+
for (uint64_t k = 0; k < g->n_sites; k++) {
|
| 613 |
+
const MobiusSiteSheet *s = &ms->sheets[k];
|
| 614 |
+
int degree = (int)g->adj[k].count;
|
| 615 |
+
double site_entropy = 0.0;
|
| 616 |
+
|
| 617 |
+
for (int v = 0; v < MOBIUS_D; v++) {
|
| 618 |
+
double p = s->marginal[v];
|
| 619 |
+
if (p > 1e-30)
|
| 620 |
+
site_entropy -= p * log(p);
|
| 621 |
+
}
|
| 622 |
+
|
| 623 |
+
F += (double)(degree - 1) * site_entropy;
|
| 624 |
+
}
|
| 625 |
+
|
| 626 |
+
/* Edge contributions */
|
| 627 |
+
for (uint64_t eid = 0; eid < g->n_edges; eid++) {
|
| 628 |
+
const HPCEdge *edge = &g->edges[eid];
|
| 629 |
+
uint64_t sa = edge->site_a, sb = edge->site_b;
|
| 630 |
+
const MobiusSiteSheet *sheet_a = &ms->sheets[sa];
|
| 631 |
+
const MobiusSiteSheet *sheet_b = &ms->sheets[sb];
|
| 632 |
+
|
| 633 |
+
double edge_entropy = 0.0;
|
| 634 |
+
double Z_edge = 0.0;
|
| 635 |
+
double pairwise[MOBIUS_D][MOBIUS_D];
|
| 636 |
+
|
| 637 |
+
for (int va = 0; va < MOBIUS_D; va++) {
|
| 638 |
+
for (int vb = 0; vb < MOBIUS_D; vb++) {
|
| 639 |
+
double p_ab = sheet_a->marginal[va] * sheet_b->marginal[vb] *
|
| 640 |
+
mobius_edge_factor(edge, va, vb);
|
| 641 |
+
pairwise[va][vb] = p_ab;
|
| 642 |
+
Z_edge += p_ab;
|
| 643 |
+
}
|
| 644 |
+
}
|
| 645 |
+
|
| 646 |
+
if (Z_edge > 1e-30) {
|
| 647 |
+
for (int va = 0; va < MOBIUS_D; va++) {
|
| 648 |
+
for (int vb = 0; vb < MOBIUS_D; vb++) {
|
| 649 |
+
double p = pairwise[va][vb] / Z_edge;
|
| 650 |
+
if (p > 1e-30)
|
| 651 |
+
edge_entropy -= p * log(p);
|
| 652 |
+
}
|
| 653 |
+
}
|
| 654 |
+
}
|
| 655 |
+
|
| 656 |
+
F -= edge_entropy;
|
| 657 |
+
}
|
| 658 |
+
|
| 659 |
+
ms->bethe_free_energy = F;
|
| 660 |
+
return F;
|
| 661 |
+
}
|
| 662 |
+
|
| 663 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 664 |
+
* INCREMENTAL UPDATE β Apply a CZ gate and update the sheet
|
| 665 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 666 |
+
|
| 667 |
+
static inline void mobius_apply_cz(MobiusAmplitudeSheet *ms,
|
| 668 |
+
uint64_t site_a, uint64_t site_b)
|
| 669 |
+
{
|
| 670 |
+
hpc_cz((HPCGraph *)ms->graph, site_a, site_b);
|
| 671 |
+
|
| 672 |
+
for (int side = 0; side < 2; side++) {
|
| 673 |
+
uint64_t site = (side == 0) ? site_a : site_b;
|
| 674 |
+
MobiusSiteSheet *s = &ms->sheets[site];
|
| 675 |
+
const HPCAdjList *adj = &ms->graph->adj[site];
|
| 676 |
+
|
| 677 |
+
if (adj->count > s->msg_capacity) {
|
| 678 |
+
uint64_t new_cap = adj->count * 2;
|
| 679 |
+
s->msg_in = (MobiusProbMsg *)realloc(s->msg_in,
|
| 680 |
+
new_cap * sizeof(MobiusProbMsg));
|
| 681 |
+
for (uint64_t i = s->msg_capacity; i < new_cap; i++)
|
| 682 |
+
for (int v = 0; v < MOBIUS_D; v++)
|
| 683 |
+
s->msg_in[i].p[v] = 1.0;
|
| 684 |
+
s->msg_capacity = new_cap;
|
| 685 |
+
}
|
| 686 |
+
|
| 687 |
+
uint64_t new_idx = adj->count - 1;
|
| 688 |
+
s->n_messages = adj->count;
|
| 689 |
+
for (int v = 0; v < MOBIUS_D; v++)
|
| 690 |
+
s->msg_in[new_idx].p[v] = 1.0;
|
| 691 |
+
}
|
| 692 |
+
|
| 693 |
+
ms->converged = 0;
|
| 694 |
+
mobius_converge(ms);
|
| 695 |
+
}
|
| 696 |
+
|
| 697 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 698 |
+
* INCREMENTAL UPDATE β Apply local gates
|
| 699 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 700 |
+
|
| 701 |
+
static inline void mobius_apply_local_phase(MobiusAmplitudeSheet *ms,
|
| 702 |
+
uint64_t site,
|
| 703 |
+
const double phi_re[6],
|
| 704 |
+
const double phi_im[6])
|
| 705 |
+
{
|
| 706 |
+
hpc_phase((HPCGraph *)ms->graph, site, phi_re, phi_im);
|
| 707 |
+
ms->converged = 0;
|
| 708 |
+
mobius_converge(ms);
|
| 709 |
+
}
|
| 710 |
+
|
| 711 |
+
static inline void mobius_apply_dft(MobiusAmplitudeSheet *ms, uint64_t site)
|
| 712 |
+
{
|
| 713 |
+
hpc_dft((HPCGraph *)ms->graph, site);
|
| 714 |
+
ms->converged = 0;
|
| 715 |
+
mobius_converge(ms);
|
| 716 |
+
}
|
| 717 |
+
|
| 718 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 719 |
+
* MEASUREMENT β Born sample from the sheet, then tear it
|
| 720 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 721 |
+
|
| 722 |
+
static inline uint32_t mobius_measure(MobiusAmplitudeSheet *ms,
|
| 723 |
+
uint64_t site, double random_01)
|
| 724 |
+
{
|
| 725 |
+
const MobiusSiteSheet *s = &ms->sheets[site];
|
| 726 |
+
double cumul = 0.0;
|
| 727 |
+
uint32_t outcome = MOBIUS_D - 1;
|
| 728 |
+
for (int v = 0; v < MOBIUS_D; v++) {
|
| 729 |
+
cumul += s->marginal[v];
|
| 730 |
+
if (random_01 <= cumul) { outcome = v; break; }
|
| 731 |
+
}
|
| 732 |
+
|
| 733 |
+
hpc_measure((HPCGraph *)ms->graph, site, random_01);
|
| 734 |
+
|
| 735 |
+
ms->converged = 0;
|
| 736 |
+
MobiusSiteSheet *collapsed = &ms->sheets[site];
|
| 737 |
+
collapsed->n_messages = ms->graph->adj[site].count;
|
| 738 |
+
for (uint64_t mi = 0; mi < collapsed->n_messages; mi++)
|
| 739 |
+
for (int v = 0; v < MOBIUS_D; v++)
|
| 740 |
+
collapsed->msg_in[mi].p[v] = 1.0;
|
| 741 |
+
|
| 742 |
+
mobius_converge(ms);
|
| 743 |
+
return outcome;
|
| 744 |
+
}
|
| 745 |
+
|
| 746 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 747 |
+
* ALL-SITE MARGINAL SNAPSHOT β The complete probability surface
|
| 748 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 749 |
+
|
| 750 |
+
typedef struct {
|
| 751 |
+
double *probabilities; /* [n_sites Γ MOBIUS_D], row-major */
|
| 752 |
+
double *coherences;
|
| 753 |
+
uint64_t n_sites;
|
| 754 |
+
double global_coherence;
|
| 755 |
+
double bethe_F;
|
| 756 |
+
} MobiusSurface;
|
| 757 |
+
|
| 758 |
+
static inline MobiusSurface *mobius_snapshot(MobiusAmplitudeSheet *ms)
|
| 759 |
+
{
|
| 760 |
+
MobiusSurface *surf = (MobiusSurface *)calloc(1, sizeof(MobiusSurface));
|
| 761 |
+
if (!surf) return NULL;
|
| 762 |
+
|
| 763 |
+
surf->n_sites = ms->n_sites;
|
| 764 |
+
surf->probabilities = (double *)calloc(ms->n_sites * MOBIUS_D, sizeof(double));
|
| 765 |
+
surf->coherences = (double *)calloc(ms->n_sites, sizeof(double));
|
| 766 |
+
|
| 767 |
+
for (uint64_t k = 0; k < ms->n_sites; k++) {
|
| 768 |
+
for (int v = 0; v < MOBIUS_D; v++)
|
| 769 |
+
surf->probabilities[k * MOBIUS_D + v] = ms->sheets[k].marginal[v];
|
| 770 |
+
surf->coherences[k] = ms->sheets[k].coherence;
|
| 771 |
+
}
|
| 772 |
+
|
| 773 |
+
surf->global_coherence = mobius_interference_witness(ms);
|
| 774 |
+
surf->bethe_F = mobius_bethe_free_energy(ms);
|
| 775 |
+
|
| 776 |
+
return surf;
|
| 777 |
+
}
|
| 778 |
+
|
| 779 |
+
static inline void mobius_surface_destroy(MobiusSurface *surf)
|
| 780 |
+
{
|
| 781 |
+
if (!surf) return;
|
| 782 |
+
free(surf->probabilities);
|
| 783 |
+
free(surf->coherences);
|
| 784 |
+
free(surf);
|
| 785 |
+
}
|
| 786 |
+
|
| 787 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 788 |
+
* DIAGNOSTICS
|
| 789 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 790 |
+
|
| 791 |
+
static inline void mobius_print(const MobiusAmplitudeSheet *ms)
|
| 792 |
+
{
|
| 793 |
+
printf("βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n");
|
| 794 |
+
printf("β MΓΆbius Amplitude Sheet β\n");
|
| 795 |
+
printf("β ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ£\n");
|
| 796 |
+
printf("β Sites: %10lu β\n", ms->n_sites);
|
| 797 |
+
printf("β Converged: %10s β\n",
|
| 798 |
+
ms->converged ? "YES" : "NO");
|
| 799 |
+
printf("β Iterations: %10d β\n", ms->iterations);
|
| 800 |
+
printf("β Max residual: %10.2e β\n", ms->max_residual);
|
| 801 |
+
printf("β Msg updates: %10lu β\n", ms->msg_updates);
|
| 802 |
+
printf("β Amp queries: %10lu β\n", ms->amplitude_queries);
|
| 803 |
+
printf("β Surface walks: %10lu β\n", ms->surface_walks);
|
| 804 |
+
printf("β Bethe F: %10.6f β\n", ms->bethe_free_energy);
|
| 805 |
+
printf("βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n");
|
| 806 |
+
|
| 807 |
+
uint64_t show = ms->n_sites;
|
| 808 |
+
if (show > 8) show = 8;
|
| 809 |
+
for (uint64_t k = 0; k < show; k++) {
|
| 810 |
+
const MobiusSiteSheet *s = &ms->sheets[k];
|
| 811 |
+
printf(" Site %lu: marginals=[", k);
|
| 812 |
+
for (int v = 0; v < MOBIUS_D; v++) {
|
| 813 |
+
printf("%.4f", s->marginal[v]);
|
| 814 |
+
if (v < MOBIUS_D - 1) printf(", ");
|
| 815 |
+
}
|
| 816 |
+
printf("] coh=%.4f degree=%lu\n", s->coherence, s->n_messages);
|
| 817 |
+
}
|
| 818 |
+
if (ms->n_sites > 8)
|
| 819 |
+
printf(" ... (%lu more sites)\n", ms->n_sites - 8);
|
| 820 |
+
}
|
| 821 |
+
|
| 822 |
+
static inline void mobius_print_dressed(const MobiusAmplitudeSheet *ms, uint64_t site)
|
| 823 |
+
{
|
| 824 |
+
const MobiusSiteSheet *s = &ms->sheets[site];
|
| 825 |
+
printf(" Site %lu dressed: [", site);
|
| 826 |
+
for (int v = 0; v < MOBIUS_D; v++) {
|
| 827 |
+
printf("%.4f%+.4fi", s->dressed_re[v], s->dressed_im[v]);
|
| 828 |
+
if (v < MOBIUS_D - 1) printf(", ");
|
| 829 |
+
}
|
| 830 |
+
printf("]\n");
|
| 831 |
+
}
|
| 832 |
+
|
| 833 |
+
#endif /* HPC_MOBIUS_H */
|
imatrix_reader.h
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* imatrix_reader.h β Importance Matrix File Reader
|
| 3 |
+
*
|
| 4 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 5 |
+
* β HExState Importance Matrix Input Module β
|
| 6 |
+
* β Reads llama.cpp-compatible .imatrix binary files β
|
| 7 |
+
* β Provides per-channel importance weights for quantization β
|
| 8 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9 |
+
*
|
| 10 |
+
* Importance matrices capture E[xΒ²] per input channel from calibration
|
| 11 |
+
* data. This information biases quantization toward preserving
|
| 12 |
+
* high-importance channels, significantly improving perplexity at
|
| 13 |
+
* low bit widths (Q2_K).
|
| 14 |
+
*
|
| 15 |
+
* File format (llama.cpp imatrix):
|
| 16 |
+
* [4 bytes: n_entries (int32)]
|
| 17 |
+
* For each entry:
|
| 18 |
+
* [4 bytes: name_len (int32)]
|
| 19 |
+
* [name_len bytes: tensor name (utf-8, no null terminator)]
|
| 20 |
+
* [4 bytes: n_values (int32)]
|
| 21 |
+
* [4 bytes: n_samples (int32)] -- (count of calibration tokens)
|
| 22 |
+
* [n_values * 4 bytes: float32 importance values]
|
| 23 |
+
*/
|
| 24 |
+
|
| 25 |
+
#ifndef IMATRIX_READER_H
|
| 26 |
+
#define IMATRIX_READER_H
|
| 27 |
+
|
| 28 |
+
#include <stdint.h>
|
| 29 |
+
#include <stdio.h>
|
| 30 |
+
#include <stdlib.h>
|
| 31 |
+
#include <string.h>
|
| 32 |
+
|
| 33 |
+
#define IMAT_MAX_ENTRIES 8192
|
| 34 |
+
#define IMAT_MAX_NAME_LEN 512
|
| 35 |
+
|
| 36 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 37 |
+
* IMPORTANCE MATRIX ENTRY
|
| 38 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 39 |
+
|
| 40 |
+
typedef struct {
|
| 41 |
+
char name[IMAT_MAX_NAME_LEN];
|
| 42 |
+
int32_t n_values;
|
| 43 |
+
int32_t n_samples;
|
| 44 |
+
float *values; /* Raw importance values (E[xΒ²] per channel) */
|
| 45 |
+
float *normalized; /* Normalized: values / mean(values) */
|
| 46 |
+
} IMatrixEntry;
|
| 47 |
+
|
| 48 |
+
typedef struct {
|
| 49 |
+
IMatrixEntry *entries;
|
| 50 |
+
int32_t n_entries;
|
| 51 |
+
} IMatrixData;
|
| 52 |
+
|
| 53 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 54 |
+
* LOAD IMATRIX FILE
|
| 55 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 56 |
+
|
| 57 |
+
static IMatrixData *imatrix_load(const char *path)
|
| 58 |
+
{
|
| 59 |
+
FILE *f = fopen(path, "rb");
|
| 60 |
+
if (!f) {
|
| 61 |
+
fprintf(stderr, " imatrix_load: cannot open '%s'\n", path);
|
| 62 |
+
return NULL;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
IMatrixData *imat = (IMatrixData *)calloc(1, sizeof(IMatrixData));
|
| 66 |
+
if (!imat) { fclose(f); return NULL; }
|
| 67 |
+
|
| 68 |
+
/* Read entry count */
|
| 69 |
+
int32_t n_entries;
|
| 70 |
+
if (fread(&n_entries, sizeof(int32_t), 1, f) != 1 ||
|
| 71 |
+
n_entries <= 0 || n_entries > IMAT_MAX_ENTRIES) {
|
| 72 |
+
fprintf(stderr, " imatrix_load: invalid entry count %d\n", n_entries);
|
| 73 |
+
free(imat);
|
| 74 |
+
fclose(f);
|
| 75 |
+
return NULL;
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
imat->n_entries = n_entries;
|
| 79 |
+
imat->entries = (IMatrixEntry *)calloc(n_entries, sizeof(IMatrixEntry));
|
| 80 |
+
|
| 81 |
+
for (int i = 0; i < n_entries; i++) {
|
| 82 |
+
IMatrixEntry *e = &imat->entries[i];
|
| 83 |
+
|
| 84 |
+
/* Read tensor name */
|
| 85 |
+
int32_t name_len;
|
| 86 |
+
if (fread(&name_len, sizeof(int32_t), 1, f) != 1) goto fail;
|
| 87 |
+
if (name_len <= 0 || name_len >= IMAT_MAX_NAME_LEN) goto fail;
|
| 88 |
+
|
| 89 |
+
if (fread(e->name, 1, name_len, f) != (size_t)name_len) goto fail;
|
| 90 |
+
e->name[name_len] = '\0';
|
| 91 |
+
|
| 92 |
+
/* Read value count and sample count */
|
| 93 |
+
if (fread(&e->n_values, sizeof(int32_t), 1, f) != 1) goto fail;
|
| 94 |
+
if (fread(&e->n_samples, sizeof(int32_t), 1, f) != 1) goto fail;
|
| 95 |
+
|
| 96 |
+
if (e->n_values <= 0 || e->n_values > 1024 * 1024) goto fail;
|
| 97 |
+
|
| 98 |
+
/* Read importance values */
|
| 99 |
+
e->values = (float *)malloc(e->n_values * sizeof(float));
|
| 100 |
+
if (!e->values) goto fail;
|
| 101 |
+
if (fread(e->values, sizeof(float), e->n_values, f) !=
|
| 102 |
+
(size_t)e->n_values) goto fail;
|
| 103 |
+
|
| 104 |
+
/* Normalize: divide by mean so that mean(normalized) = 1.0 */
|
| 105 |
+
e->normalized = (float *)malloc(e->n_values * sizeof(float));
|
| 106 |
+
if (!e->normalized) goto fail;
|
| 107 |
+
|
| 108 |
+
double sum = 0.0;
|
| 109 |
+
for (int j = 0; j < e->n_values; j++)
|
| 110 |
+
sum += (double)e->values[j];
|
| 111 |
+
|
| 112 |
+
double mean = sum / (double)e->n_values;
|
| 113 |
+
if (mean > 1e-30) {
|
| 114 |
+
float inv_mean = (float)(1.0 / mean);
|
| 115 |
+
for (int j = 0; j < e->n_values; j++)
|
| 116 |
+
e->normalized[j] = e->values[j] * inv_mean;
|
| 117 |
+
} else {
|
| 118 |
+
/* Degenerate: all zeros β uniform */
|
| 119 |
+
for (int j = 0; j < e->n_values; j++)
|
| 120 |
+
e->normalized[j] = 1.0f;
|
| 121 |
+
}
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
fclose(f);
|
| 125 |
+
return imat;
|
| 126 |
+
|
| 127 |
+
fail:
|
| 128 |
+
fprintf(stderr, " imatrix_load: parse error in '%s'\n", path);
|
| 129 |
+
/* Clean up partially loaded data */
|
| 130 |
+
for (int i = 0; i < imat->n_entries; i++) {
|
| 131 |
+
free(imat->entries[i].values);
|
| 132 |
+
free(imat->entries[i].normalized);
|
| 133 |
+
}
|
| 134 |
+
free(imat->entries);
|
| 135 |
+
free(imat);
|
| 136 |
+
fclose(f);
|
| 137 |
+
return NULL;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 141 |
+
* FIND IMPORTANCE DATA FOR A TENSOR
|
| 142 |
+
*
|
| 143 |
+
* Looks up by GGUF tensor name. Returns NULL if not found.
|
| 144 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 145 |
+
|
| 146 |
+
static const IMatrixEntry *imatrix_find(const IMatrixData *imat,
|
| 147 |
+
const char *tensor_name)
|
| 148 |
+
{
|
| 149 |
+
if (!imat) return NULL;
|
| 150 |
+
for (int i = 0; i < imat->n_entries; i++) {
|
| 151 |
+
if (strcmp(imat->entries[i].name, tensor_name) == 0)
|
| 152 |
+
return &imat->entries[i];
|
| 153 |
+
}
|
| 154 |
+
return NULL;
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
/* Also try the HuggingFace-style tensor name */
|
| 158 |
+
static const IMatrixEntry *imatrix_find_any(const IMatrixData *imat,
|
| 159 |
+
const char *gguf_name,
|
| 160 |
+
const char *hf_name)
|
| 161 |
+
{
|
| 162 |
+
const IMatrixEntry *e = imatrix_find(imat, gguf_name);
|
| 163 |
+
if (e) return e;
|
| 164 |
+
return imatrix_find(imat, hf_name);
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 168 |
+
* CLEANUP
|
| 169 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 170 |
+
|
| 171 |
+
static void imatrix_free(IMatrixData *imat)
|
| 172 |
+
{
|
| 173 |
+
if (!imat) return;
|
| 174 |
+
for (int i = 0; i < imat->n_entries; i++) {
|
| 175 |
+
free(imat->entries[i].values);
|
| 176 |
+
free(imat->entries[i].normalized);
|
| 177 |
+
}
|
| 178 |
+
free(imat->entries);
|
| 179 |
+
free(imat);
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 183 |
+
* SUMMARY
|
| 184 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 185 |
+
|
| 186 |
+
static void imatrix_print_summary(const IMatrixData *imat)
|
| 187 |
+
{
|
| 188 |
+
printf(" βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n");
|
| 189 |
+
printf(" β Importance Matrix β\n");
|
| 190 |
+
printf(" β ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ£\n");
|
| 191 |
+
printf(" β Entries: %-40d β\n", imat->n_entries);
|
| 192 |
+
|
| 193 |
+
/* Show first few entries as samples */
|
| 194 |
+
int show = imat->n_entries < 5 ? imat->n_entries : 5;
|
| 195 |
+
for (int i = 0; i < show; i++) {
|
| 196 |
+
const IMatrixEntry *e = &imat->entries[i];
|
| 197 |
+
printf(" β [%3d] %-30s %6d ch, %4d samples β\n",
|
| 198 |
+
i, e->name, e->n_values, e->n_samples);
|
| 199 |
+
}
|
| 200 |
+
if (imat->n_entries > 5)
|
| 201 |
+
printf(" β ... and %d more entries β\n",
|
| 202 |
+
imat->n_entries - 5);
|
| 203 |
+
|
| 204 |
+
printf(" βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n\n");
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
#endif /* IMATRIX_READER_H */
|
makefile.quantize
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2 |
+
# makefile.quantize β HexState HPC Quantizer Engine (Shared Library)
|
| 3 |
+
#
|
| 4 |
+
# Build: make -f makefile.quantize
|
| 5 |
+
# Clean: make -f makefile.quantize clean
|
| 6 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 7 |
+
|
| 8 |
+
CC = gcc
|
| 9 |
+
CFLAGS = -O2 -std=gnu99 -shared -fPIC -Wall -Wno-unused-function -Wno-unused-variable -fopenmp
|
| 10 |
+
LDFLAGS = -lm -lgmp -lmpfr -fopenmp
|
| 11 |
+
|
| 12 |
+
# Include local directory for HexState headers
|
| 13 |
+
INCLUDES = -I.
|
| 14 |
+
|
| 15 |
+
# Source files β quantizer + HExState engine dependencies (no bigint)
|
| 16 |
+
SRCS = hexstate_quantize.c \
|
| 17 |
+
quhit_triality.c \
|
| 18 |
+
quhit_hexagram.c \
|
| 19 |
+
s6_exotic.c
|
| 20 |
+
|
| 21 |
+
TARGET = libhexstate_q2k.so
|
| 22 |
+
|
| 23 |
+
.PHONY: all clean
|
| 24 |
+
|
| 25 |
+
all: $(TARGET)
|
| 26 |
+
|
| 27 |
+
$(TARGET): $(SRCS)
|
| 28 |
+
$(CC) $(CFLAGS) $(INCLUDES) -o $(TARGET) $(SRCS) $(LDFLAGS)
|
| 29 |
+
@echo ""
|
| 30 |
+
@echo " ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| 31 |
+
@echo " β HexState HPC Quantizer Engine v2.1 built successfully! β"
|
| 32 |
+
@echo " β β"
|
| 33 |
+
@echo " β Output: libhexstate_q2k.so (shared library) β"
|
| 34 |
+
@echo " β β"
|
| 35 |
+
@echo " β Beam Search: 24-beam Hensel (Q2_K + Q4_0) β"
|
| 36 |
+
@echo " β Scale Grid: 16Γ16 = 256 candidates per block β"
|
| 37 |
+
@echo " β β"
|
| 38 |
+
@echo " β Usage: loaded by Python quantization pipeline via ctypes β"
|
| 39 |
+
@echo " ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| 40 |
+
@echo ""
|
| 41 |
+
|
| 42 |
+
clean:
|
| 43 |
+
rm -f $(TARGET)
|
quhit_hexagram.c
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* quhit_hexagram.c β The Hexagram Quhit Implementation
|
| 3 |
+
*
|
| 4 |
+
* Edge-dual of the triality quhit. Amplitudes on hexagram line segments.
|
| 5 |
+
*
|
| 6 |
+
* The Hβ transform is derived from the body-diagonal projection of the
|
| 7 |
+
* cube's face diagonals. Each hexagram line ββ corresponds to specific
|
| 8 |
+
* face diagonals that project onto that line when viewed from (1,1,1).
|
| 9 |
+
*
|
| 10 |
+
* Cube vertex labels (Cubeee.html convention):
|
| 11 |
+
* 0:(-1,-1,-1) 1:(+1,-1,-1) 2:(+1,+1,-1) 3:(-1,+1,-1)
|
| 12 |
+
* 4:(-1,-1,+1) 5:(+1,-1,+1) 6:(+1,+1,+1) 7:(-1,+1,+1)
|
| 13 |
+
*
|
| 14 |
+
* Body-diagonal projection from (1,1,1), projected positions:
|
| 15 |
+
* 0,6 β center (body diagonal endpoints)
|
| 16 |
+
* 1 β (β2, 0) β right
|
| 17 |
+
* 2 β (1/β2, β(3/2)) β upper-right
|
| 18 |
+
* 3 β (-1/β2, β(3/2)) β upper-left
|
| 19 |
+
* 4 β (-β2, 0) β left
|
| 20 |
+
* 5 β (-1/β2, -β(3/2)) β lower-left (wasn't this wrong? No...)
|
| 21 |
+
* ... Wait, let me use the quhit basis states directly.
|
| 22 |
+
*
|
| 23 |
+
* ββ Mapping from quhit basis states to hexagram lines ββ
|
| 24 |
+
*
|
| 25 |
+
* The 6 basis states |0β©...|5β© map to the CMY channel structure:
|
| 26 |
+
* C: {|0β©, |1β©} = Β±X face pair
|
| 27 |
+
* M: {|2β©, |3β©} = Β±Y face pair
|
| 28 |
+
* Y: {|4β©, |5β©} = Β±Z face pair
|
| 29 |
+
*
|
| 30 |
+
* Each face has 2 diagonals. Under body-diagonal projection:
|
| 31 |
+
* Face diagonals within channel k map to hexagram lines.
|
| 32 |
+
* The specific mapping depends on which cube vertices the
|
| 33 |
+
* face diagonals connect and how they project.
|
| 34 |
+
*
|
| 35 |
+
* The Hβ matrix encodes: for each hexagram line ββ, which
|
| 36 |
+
* superposition of basis states |jβ© contributes amplitude.
|
| 37 |
+
*
|
| 38 |
+
* ββ Derivation of Hβ ββ
|
| 39 |
+
*
|
| 40 |
+
* The 6 hexagram lines alternate: diameter, outer, diameter, outer, ...
|
| 41 |
+
*
|
| 42 |
+
* A DIAMETER line passes through the center. In the cube, this
|
| 43 |
+
* corresponds to two face diagonals from opposite faces of the same
|
| 44 |
+
* axis that project onto the same line through center. These combine
|
| 45 |
+
* the vesica (sum) and wave (difference) of the antipodal pair.
|
| 46 |
+
*
|
| 47 |
+
* An OUTER line connects two adjacent hexagram vertices. This
|
| 48 |
+
* corresponds to a single face diagonal from a different axis that
|
| 49 |
+
* connects the projected positions of two non-antipodal vertices.
|
| 50 |
+
*
|
| 51 |
+
* For each hexagram line ββ, Hβ[k][j] gives the contribution of
|
| 52 |
+
* vertex basis state |jβ©. The matrix is constructed so that:
|
| 53 |
+
*
|
| 54 |
+
* Diameters: ββ combines C-channel pair {|0β©,|1β©} antisymmetrically
|
| 55 |
+
* ββ combines M-channel pair {|2β©,|3β©} antisymmetrically
|
| 56 |
+
* ββ combines Y-channel pair {|4β©,|5β©} antisymmetrically
|
| 57 |
+
*
|
| 58 |
+
* Outers: ββ combines a cross-channel pair from Y and M
|
| 59 |
+
* ββ combines a cross-channel pair from C and Y
|
| 60 |
+
* ββ
combines a cross-channel pair from M and C
|
| 61 |
+
*
|
| 62 |
+
* The specific coefficients ensure unitarity and encode the 120Β°
|
| 63 |
+
* rotational symmetry of the body-diagonal view (CβMβYβC cycling).
|
| 64 |
+
*
|
| 65 |
+
* The eigenbasis structure: diameters are channel-internal (sum/diff
|
| 66 |
+
* within a pair), outers are channel-crossing (linking adjacent
|
| 67 |
+
* channels). This 3+3 partition mirrors the unicursal path's
|
| 68 |
+
* alternating diameter/outer structure.
|
| 69 |
+
*/
|
| 70 |
+
|
| 71 |
+
#include <string.h>
|
| 72 |
+
#include <math.h>
|
| 73 |
+
#include <stdio.h>
|
| 74 |
+
#include "quhit_hexagram.h"
|
| 75 |
+
|
| 76 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 77 |
+
* CONSTANTS
|
| 78 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 79 |
+
|
| 80 |
+
static const double INV_SQRT2 = 0.70710678118654752440;
|
| 81 |
+
static const double INV_SQRT3 = 0.57735026918962576451;
|
| 82 |
+
static const double INV_SQRT6 = 0.40824829046386301637;
|
| 83 |
+
|
| 84 |
+
/* Οβ = e^{2Οi/3} = -1/2 + iβ3/2 */
|
| 85 |
+
static const double W3_RE = -0.5;
|
| 86 |
+
static const double W3_IM = 0.86602540378443864676;
|
| 87 |
+
|
| 88 |
+
/* Οβ = e^{2Οi/6} = 1/2 + iβ3/2 */
|
| 89 |
+
static const double W6_RE = 0.5;
|
| 90 |
+
static const double W6_IM = 0.86602540378443864676;
|
| 91 |
+
|
| 92 |
+
/* Line metadata (static) */
|
| 93 |
+
static const int LINE_TYPES[6] = {
|
| 94 |
+
LINE_DIAMETER, LINE_OUTER,
|
| 95 |
+
LINE_DIAMETER, LINE_OUTER,
|
| 96 |
+
LINE_DIAMETER, LINE_OUTER
|
| 97 |
+
};
|
| 98 |
+
|
| 99 |
+
/* CMY color assignment per line:
|
| 100 |
+
* ββ=C(0), ββ=Y(2), ββ=M(1), ββ=C(0), ββ=Y(2), ββ
=M(1)
|
| 101 |
+
* Pattern: C, Y, M, C, Y, M β triality cycling with 120Β° offset */
|
| 102 |
+
static const int LINE_COLORS[6] = { 0, 2, 1, 0, 2, 1 };
|
| 103 |
+
|
| 104 |
+
static const char *LINE_NAMES[6] = {
|
| 105 |
+
"l0 diam C", "l1 outr Y", "l2 diam M",
|
| 106 |
+
"l3 outr C", "l4 diam Y", "l5 outr M"
|
| 107 |
+
};
|
| 108 |
+
|
| 109 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 110 |
+
* Hβ TRANSFORM MATRICES
|
| 111 |
+
*
|
| 112 |
+
* Hβ maps vertex basis |jβ© β hexagram line basis |βββ©.
|
| 113 |
+
*
|
| 114 |
+
* Structure (6Γ6 unitary):
|
| 115 |
+
*
|
| 116 |
+
* Diameters (rows 0,2,4) = channel-pair DIFFERENCES (wave):
|
| 117 |
+
* ββ = (|0β© - |1β©)/β2 [C channel difference]
|
| 118 |
+
* ββ = (|2β© - |3β©)/β2 [M channel difference]
|
| 119 |
+
* ββ = (|4β© - |5β©)/β2 [Y channel difference]
|
| 120 |
+
*
|
| 121 |
+
* Outers (rows 1,3,5) = DFTβ-weighted channel SUMS (vesica):
|
| 122 |
+
* Let s_c = (|2cβ© + |2c+1β©)/β2 for channel c β {0,1,2}
|
| 123 |
+
* Then:
|
| 124 |
+
* ββ = (sβ + sβ + sβ)/β3 = (1,1,1,1,1,1)/β6
|
| 125 |
+
* ββ = (sβ + ΟβΒ·sβ + ΟβΒ²Β·sβ)/β3
|
| 126 |
+
* ββ
= (sβ + ΟβΒ²Β·sβ + ΟβΒ·sβ)/β3
|
| 127 |
+
*
|
| 128 |
+
* Orthogonality proof:
|
| 129 |
+
* Diameter β₯ Outer: within each channel pair (2c, 2c+1),
|
| 130 |
+
* diameter has (+1,-1)/β2, outer has (+x,+x)/β2.
|
| 131 |
+
* Inner product per pair: x - x = 0. β
|
| 132 |
+
* Outer β₯ Outer: DFTβ rows are orthogonal (1+Οβ+ΟβΒ²=0). β
|
| 133 |
+
* Diameter β₯ Diameter: non-overlapping channel pairs. β
|
| 134 |
+
*
|
| 135 |
+
* This is the Cooley-Tukey DFTβ = DFTβ β DFTβ:
|
| 136 |
+
* DFTβ within each channel β difference (diameter) + sum (outer)
|
| 137 |
+
* DFTβ across the 3 sums β the 3 outer lines with Οβ phases
|
| 138 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 139 |
+
|
| 140 |
+
double H6_re[HEX_D][HEX_D];
|
| 141 |
+
double H6_im[HEX_D][HEX_D];
|
| 142 |
+
double H6_adj_re[HEX_D][HEX_D];
|
| 143 |
+
double H6_adj_im[HEX_D][HEX_D];
|
| 144 |
+
|
| 145 |
+
void hexagram_init_tables(void) {
|
| 146 |
+
memset(H6_re, 0, sizeof(H6_re));
|
| 147 |
+
memset(H6_im, 0, sizeof(H6_im));
|
| 148 |
+
|
| 149 |
+
/* Οβ powers: Οβ^0=1, Οβ^1=(-1+iβ3)/2, Οβ^2=(-1-iβ3)/2 */
|
| 150 |
+
const double w3r[3] = { 1.0, W3_RE, W3_RE };
|
| 151 |
+
const double w3i[3] = { 0.0, W3_IM, -W3_IM };
|
| 152 |
+
|
| 153 |
+
/* ββ Diameter rows: (|2cβ© - |2c+1β©)/β2 ββ */
|
| 154 |
+
for (int d = 0; d < 3; d++) {
|
| 155 |
+
int row = 2 * d; /* rows 0, 2, 4 */
|
| 156 |
+
int c0 = 2 * d; /* first column of channel pair */
|
| 157 |
+
H6_re[row][c0] = INV_SQRT2;
|
| 158 |
+
H6_re[row][c0 + 1] = -INV_SQRT2;
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
/* ββ Outer rows: Ξ£_c Οβ^(rΒ·c) Β· (|2cβ© + |2c+1β©) / β6 ββ */
|
| 162 |
+
for (int r = 0; r < 3; r++) {
|
| 163 |
+
int row = 2 * r + 1; /* rows 1, 3, 5 */
|
| 164 |
+
for (int c = 0; c < 3; c++) {
|
| 165 |
+
int idx = (r * c) % 3; /* Οβ exponent */
|
| 166 |
+
double wr = w3r[idx] * INV_SQRT6;
|
| 167 |
+
double wi = w3i[idx] * INV_SQRT6;
|
| 168 |
+
/* Both elements of channel c get the same coefficient */
|
| 169 |
+
H6_re[row][2*c] = wr; H6_im[row][2*c] = wi;
|
| 170 |
+
H6_re[row][2*c + 1] = wr; H6_im[row][2*c + 1] = wi;
|
| 171 |
+
}
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
/* Compute Hββ (conjugate transpose) */
|
| 175 |
+
for (int i = 0; i < HEX_D; i++) {
|
| 176 |
+
for (int j = 0; j < HEX_D; j++) {
|
| 177 |
+
H6_adj_re[i][j] = H6_re[j][i];
|
| 178 |
+
H6_adj_im[i][j] = -H6_im[j][i];
|
| 179 |
+
}
|
| 180 |
+
}
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 184 |
+
* TRANSFORM PRIMITIVES
|
| 185 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 186 |
+
|
| 187 |
+
/* Apply Hβ: vertex β hexagram */
|
| 188 |
+
static void apply_H6(const double *in_re, const double *in_im,
|
| 189 |
+
double *out_re, double *out_im)
|
| 190 |
+
{
|
| 191 |
+
for (int k = 0; k < HEX_D; k++) {
|
| 192 |
+
double sr = 0, si = 0;
|
| 193 |
+
for (int j = 0; j < HEX_D; j++) {
|
| 194 |
+
double hr = H6_re[k][j], hi = H6_im[k][j];
|
| 195 |
+
sr += hr * in_re[j] - hi * in_im[j];
|
| 196 |
+
si += hr * in_im[j] + hi * in_re[j];
|
| 197 |
+
}
|
| 198 |
+
out_re[k] = sr;
|
| 199 |
+
out_im[k] = si;
|
| 200 |
+
}
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
/* Apply Hββ : hexagram β vertex */
|
| 204 |
+
static void apply_H6_adj(const double *in_re, const double *in_im,
|
| 205 |
+
double *out_re, double *out_im)
|
| 206 |
+
{
|
| 207 |
+
for (int j = 0; j < HEX_D; j++) {
|
| 208 |
+
double sr = 0, si = 0;
|
| 209 |
+
for (int k = 0; k < HEX_D; k++) {
|
| 210 |
+
double hr = H6_adj_re[j][k], hi = H6_adj_im[j][k];
|
| 211 |
+
sr += hr * in_re[k] - hi * in_im[k];
|
| 212 |
+
si += hr * in_im[k] + hi * in_re[k];
|
| 213 |
+
}
|
| 214 |
+
out_re[j] = sr;
|
| 215 |
+
out_im[j] = si;
|
| 216 |
+
}
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 220 |
+
* LIFECYCLE
|
| 221 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 222 |
+
|
| 223 |
+
void hexagram_init(HexagramQuhit *q) {
|
| 224 |
+
memset(q, 0, sizeof(HexagramQuhit));
|
| 225 |
+
q->line_re[0] = 1.0; /* |βββ© */
|
| 226 |
+
q->chirality = CHIRALITY_POS;
|
| 227 |
+
q->vertex_dirty = 1;
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
void hexagram_init_from_vertex(HexagramQuhit *q,
|
| 231 |
+
const double *vert_re, const double *vert_im,
|
| 232 |
+
int chirality)
|
| 233 |
+
{
|
| 234 |
+
memset(q, 0, sizeof(HexagramQuhit));
|
| 235 |
+
q->chirality = chirality;
|
| 236 |
+
|
| 237 |
+
/* Apply Hβ to convert vertex β hexagram */
|
| 238 |
+
apply_H6(vert_re, vert_im, q->line_re, q->line_im);
|
| 239 |
+
|
| 240 |
+
/* Cache the vertex representation */
|
| 241 |
+
memcpy(q->vertex_re, vert_re, HEX_D * sizeof(double));
|
| 242 |
+
memcpy(q->vertex_im, vert_im, HEX_D * sizeof(double));
|
| 243 |
+
q->vertex_dirty = 0;
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
void hexagram_init_line(HexagramQuhit *q, int k, int chirality) {
|
| 247 |
+
memset(q, 0, sizeof(HexagramQuhit));
|
| 248 |
+
q->line_re[k] = 1.0;
|
| 249 |
+
q->chirality = chirality;
|
| 250 |
+
q->vertex_dirty = 1;
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 254 |
+
* NATIVE HEXAGRAM GATES
|
| 255 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 256 |
+
|
| 257 |
+
void hexagram_path_shift(HexagramQuhit *q, int delta) {
|
| 258 |
+
delta = ((delta % HEX_D) + HEX_D) % HEX_D;
|
| 259 |
+
if (delta == 0) return;
|
| 260 |
+
|
| 261 |
+
/* Cyclic permutation of line amplitudes */
|
| 262 |
+
double tmp_re[HEX_D], tmp_im[HEX_D];
|
| 263 |
+
for (int k = 0; k < HEX_D; k++) {
|
| 264 |
+
int src = (k - delta + HEX_D) % HEX_D;
|
| 265 |
+
tmp_re[k] = q->line_re[src];
|
| 266 |
+
tmp_im[k] = q->line_im[src];
|
| 267 |
+
}
|
| 268 |
+
memcpy(q->line_re, tmp_re, sizeof(tmp_re));
|
| 269 |
+
memcpy(q->line_im, tmp_im, sizeof(tmp_im));
|
| 270 |
+
q->vertex_dirty = 1;
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
void hexagram_phase(HexagramQuhit *q, const double *phi_re, const double *phi_im) {
|
| 274 |
+
for (int k = 0; k < HEX_D; k++) {
|
| 275 |
+
double re = q->line_re[k], im = q->line_im[k];
|
| 276 |
+
q->line_re[k] = re * phi_re[k] - im * phi_im[k];
|
| 277 |
+
q->line_im[k] = re * phi_im[k] + im * phi_re[k];
|
| 278 |
+
}
|
| 279 |
+
q->vertex_dirty = 1;
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
void hexagram_diameter_phase(HexagramQuhit *q, double phi_re, double phi_im) {
|
| 283 |
+
/* Apply phase only to diameter lines: ββ, ββ, ββ */
|
| 284 |
+
for (int k = 0; k < HEX_D; k += 2) {
|
| 285 |
+
double re = q->line_re[k], im = q->line_im[k];
|
| 286 |
+
q->line_re[k] = re * phi_re - im * phi_im;
|
| 287 |
+
q->line_im[k] = re * phi_im + im * phi_re;
|
| 288 |
+
}
|
| 289 |
+
q->vertex_dirty = 1;
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
void hexagram_outer_phase(HexagramQuhit *q, double phi_re, double phi_im) {
|
| 293 |
+
/* Apply phase only to outer lines: ββ, ββ, ββ
*/
|
| 294 |
+
for (int k = 1; k < HEX_D; k += 2) {
|
| 295 |
+
double re = q->line_re[k], im = q->line_im[k];
|
| 296 |
+
q->line_re[k] = re * phi_re - im * phi_im;
|
| 297 |
+
q->line_im[k] = re * phi_im + im * phi_re;
|
| 298 |
+
}
|
| 299 |
+
q->vertex_dirty = 1;
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
void hexagram_flip(HexagramQuhit *q) {
|
| 303 |
+
/* Chirality flip: reverse path orientation.
|
| 304 |
+
* |ββ, +β© β |β_{5-k}, -β©
|
| 305 |
+
* Also complex-conjugates amplitudes (time reversal). */
|
| 306 |
+
double tmp_re[HEX_D], tmp_im[HEX_D];
|
| 307 |
+
for (int k = 0; k < HEX_D; k++) {
|
| 308 |
+
tmp_re[k] = q->line_re[5 - k];
|
| 309 |
+
tmp_im[k] = -q->line_im[5 - k]; /* conjugation */
|
| 310 |
+
}
|
| 311 |
+
memcpy(q->line_re, tmp_re, sizeof(tmp_re));
|
| 312 |
+
memcpy(q->line_im, tmp_im, sizeof(tmp_im));
|
| 313 |
+
q->chirality = -q->chirality;
|
| 314 |
+
q->vertex_dirty = 1;
|
| 315 |
+
}
|
| 316 |
+
|
| 317 |
+
void hexagram_triad(HexagramQuhit *q) {
|
| 318 |
+
/* Triad gate: cyclic permutation of the 3 diameter/outer pairs.
|
| 319 |
+
* βββββββββββ (diameters: CβMβYβC)
|
| 320 |
+
* ββββββββ
βββ (outers: YβCβMβY)
|
| 321 |
+
* This is the Ο-image of triality_rotate. */
|
| 322 |
+
double d0_re = q->line_re[0], d0_im = q->line_im[0];
|
| 323 |
+
double o0_re = q->line_re[1], o0_im = q->line_im[1];
|
| 324 |
+
|
| 325 |
+
q->line_re[0] = q->line_re[4]; q->line_im[0] = q->line_im[4];
|
| 326 |
+
q->line_re[1] = q->line_re[5]; q->line_im[1] = q->line_im[5];
|
| 327 |
+
q->line_re[4] = q->line_re[2]; q->line_im[4] = q->line_im[2];
|
| 328 |
+
q->line_re[5] = q->line_re[3]; q->line_im[5] = q->line_im[3];
|
| 329 |
+
q->line_re[2] = d0_re; q->line_im[2] = d0_im;
|
| 330 |
+
q->line_re[3] = o0_re; q->line_im[3] = o0_im;
|
| 331 |
+
|
| 332 |
+
q->vertex_dirty = 1;
|
| 333 |
+
}
|
| 334 |
+
|
| 335 |
+
void hexagram_triad_inv(HexagramQuhit *q) {
|
| 336 |
+
/* Inverse: βββββββββββ, βββββ
ββββββ */
|
| 337 |
+
double d0_re = q->line_re[0], d0_im = q->line_im[0];
|
| 338 |
+
double o0_re = q->line_re[1], o0_im = q->line_im[1];
|
| 339 |
+
|
| 340 |
+
q->line_re[0] = q->line_re[2]; q->line_im[0] = q->line_im[2];
|
| 341 |
+
q->line_re[1] = q->line_re[3]; q->line_im[1] = q->line_im[3];
|
| 342 |
+
q->line_re[2] = q->line_re[4]; q->line_im[2] = q->line_im[4];
|
| 343 |
+
q->line_re[3] = q->line_re[5]; q->line_im[3] = q->line_im[5];
|
| 344 |
+
q->line_re[4] = d0_re; q->line_im[4] = d0_im;
|
| 345 |
+
q->line_re[5] = o0_re; q->line_im[5] = o0_im;
|
| 346 |
+
|
| 347 |
+
q->vertex_dirty = 1;
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 351 |
+
* ENTANGLEMENT β Center-crossing interaction
|
| 352 |
+
*
|
| 353 |
+
* The hexagrammatic CZ: diameters (ββ,ββ,ββ) all pass through center.
|
| 354 |
+
* When two hexagram quhits have diameter amplitude, they interfere
|
| 355 |
+
* at the center crossing. The phase coupling is:
|
| 356 |
+
*
|
| 357 |
+
* Ο^(d_a Β· d_b) where d_a, d_b β {0,1,2} are the diameter indices
|
| 358 |
+
*
|
| 359 |
+
* Outer lines (ββ,ββ,ββ
) do not pass through center β no coupling.
|
| 360 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 361 |
+
|
| 362 |
+
void hexagram_cross(HexagramQuhit *a, HexagramQuhit *b) {
|
| 363 |
+
/* Οβ roots: Οβ^0=1, Οβ^1=(-1+iβ3)/2, Οβ^2=(-1-iβ3)/2 */
|
| 364 |
+
static const double W3R[3] = {1.0, -0.5, -0.5};
|
| 365 |
+
static const double W3I[3] = {0.0, 0.86602540378443864676, -0.86602540378443864676};
|
| 366 |
+
|
| 367 |
+
/* Diameter indices: βββd0, βββd1, βββd2 */
|
| 368 |
+
/* Map line index to diameter index: k/2 for even k */
|
| 369 |
+
|
| 370 |
+
/* Compute effective phases from partner's diameter amplitudes */
|
| 371 |
+
/* For each diameter d_a of qubit a, the effective phase is:
|
| 372 |
+
* eff_a[d_a] = Ξ£_{d_b} |b[2Β·d_b]|Β² Β· Οβ^(d_a Β· d_b) */
|
| 373 |
+
for (int da = 0; da < 3; da++) {
|
| 374 |
+
int ka = 2 * da; /* line index */
|
| 375 |
+
double eff_re = 0, eff_im = 0;
|
| 376 |
+
for (int db = 0; db < 3; db++) {
|
| 377 |
+
int kb = 2 * db;
|
| 378 |
+
double bprob = b->line_re[kb]*b->line_re[kb] + b->line_im[kb]*b->line_im[kb];
|
| 379 |
+
int idx = (da * db) % 3;
|
| 380 |
+
eff_re += bprob * W3R[idx];
|
| 381 |
+
eff_im += bprob * W3I[idx];
|
| 382 |
+
}
|
| 383 |
+
/* Apply effective phase to a's diameter amplitude */
|
| 384 |
+
double re = a->line_re[ka], im = a->line_im[ka];
|
| 385 |
+
a->line_re[ka] = re * eff_re - im * eff_im;
|
| 386 |
+
a->line_im[ka] = re * eff_im + im * eff_re;
|
| 387 |
+
}
|
| 388 |
+
|
| 389 |
+
/* Same for qubit b */
|
| 390 |
+
for (int db = 0; db < 3; db++) {
|
| 391 |
+
int kb = 2 * db;
|
| 392 |
+
double eff_re = 0, eff_im = 0;
|
| 393 |
+
for (int da = 0; da < 3; da++) {
|
| 394 |
+
int ka = 2 * da;
|
| 395 |
+
double aprob = a->line_re[ka]*a->line_re[ka] + a->line_im[ka]*a->line_im[ka];
|
| 396 |
+
int idx = (da * db) % 3;
|
| 397 |
+
eff_re += aprob * W3R[idx];
|
| 398 |
+
eff_im += aprob * W3I[idx];
|
| 399 |
+
}
|
| 400 |
+
double re = b->line_re[kb], im = b->line_im[kb];
|
| 401 |
+
b->line_re[kb] = re * eff_re - im * eff_im;
|
| 402 |
+
b->line_im[kb] = re * eff_im + im * eff_re;
|
| 403 |
+
}
|
| 404 |
+
|
| 405 |
+
/* Renormalize both quhits */
|
| 406 |
+
for (int qi = 0; qi < 2; qi++) {
|
| 407 |
+
HexagramQuhit *q = (qi == 0) ? a : b;
|
| 408 |
+
double norm = 0;
|
| 409 |
+
for (int k = 0; k < HEX_D; k++)
|
| 410 |
+
norm += q->line_re[k]*q->line_re[k] + q->line_im[k]*q->line_im[k];
|
| 411 |
+
if (norm > 1e-30 && fabs(norm - 1.0) > 1e-15) {
|
| 412 |
+
double inv = 1.0 / sqrt(norm);
|
| 413 |
+
for (int k = 0; k < HEX_D; k++) {
|
| 414 |
+
q->line_re[k] *= inv;
|
| 415 |
+
q->line_im[k] *= inv;
|
| 416 |
+
}
|
| 417 |
+
}
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
+
a->vertex_dirty = 1;
|
| 421 |
+
b->vertex_dirty = 1;
|
| 422 |
+
}
|
| 423 |
+
|
| 424 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 425 |
+
* MEASUREMENT
|
| 426 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 427 |
+
|
| 428 |
+
static uint64_t xorshift64(uint64_t *s) {
|
| 429 |
+
uint64_t x = *s;
|
| 430 |
+
x ^= x << 13; x ^= x >> 7; x ^= x << 17;
|
| 431 |
+
return *s = x;
|
| 432 |
+
}
|
| 433 |
+
|
| 434 |
+
void hexagram_probabilities(const HexagramQuhit *q, double *probs) {
|
| 435 |
+
for (int k = 0; k < HEX_D; k++)
|
| 436 |
+
probs[k] = q->line_re[k]*q->line_re[k] + q->line_im[k]*q->line_im[k];
|
| 437 |
+
}
|
| 438 |
+
|
| 439 |
+
int hexagram_measure(HexagramQuhit *q, uint64_t *rng_state) {
|
| 440 |
+
double probs[HEX_D];
|
| 441 |
+
hexagram_probabilities(q, probs);
|
| 442 |
+
|
| 443 |
+
/* Born rule sampling */
|
| 444 |
+
double r = (double)(xorshift64(rng_state) & 0xFFFFFFFFFFFFF) / (double)0x10000000000000;
|
| 445 |
+
double cumul = 0;
|
| 446 |
+
int outcome = HEX_D - 1;
|
| 447 |
+
for (int k = 0; k < HEX_D; k++) {
|
| 448 |
+
cumul += probs[k];
|
| 449 |
+
if (r < cumul) { outcome = k; break; }
|
| 450 |
+
}
|
| 451 |
+
|
| 452 |
+
/* Collapse */
|
| 453 |
+
memset(q->line_re, 0, sizeof(q->line_re));
|
| 454 |
+
memset(q->line_im, 0, sizeof(q->line_im));
|
| 455 |
+
q->line_re[outcome] = 1.0;
|
| 456 |
+
q->vertex_dirty = 1;
|
| 457 |
+
|
| 458 |
+
return outcome;
|
| 459 |
+
}
|
| 460 |
+
|
| 461 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 462 |
+
* INTERCONVERSION
|
| 463 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 464 |
+
|
| 465 |
+
void hexagram_ensure_vertex(HexagramQuhit *q) {
|
| 466 |
+
if (!q->vertex_dirty) return;
|
| 467 |
+
apply_H6_adj(q->line_re, q->line_im, q->vertex_re, q->vertex_im);
|
| 468 |
+
q->vertex_dirty = 0;
|
| 469 |
+
}
|
| 470 |
+
|
| 471 |
+
const double *hexagram_vertex_re(HexagramQuhit *q) {
|
| 472 |
+
hexagram_ensure_vertex(q);
|
| 473 |
+
return q->vertex_re;
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
+
const double *hexagram_vertex_im(HexagramQuhit *q) {
|
| 477 |
+
hexagram_ensure_vertex(q);
|
| 478 |
+
return q->vertex_im;
|
| 479 |
+
}
|
| 480 |
+
|
| 481 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 482 |
+
* DIAGNOSTICS
|
| 483 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 484 |
+
|
| 485 |
+
int hexagram_line_type(int k) { return LINE_TYPES[k]; }
|
| 486 |
+
int hexagram_line_color(int k) { return LINE_COLORS[k]; }
|
| 487 |
+
const char *hexagram_line_name(int k) { return LINE_NAMES[k]; }
|
| 488 |
+
|
| 489 |
+
void hexagram_print(const HexagramQuhit *q, const char *label) {
|
| 490 |
+
const char *chir = (q->chirality == CHIRALITY_POS) ? "+" : "-";
|
| 491 |
+
printf("HexagramQuhit [%s] chirality=%s\n", label ? label : "", chir);
|
| 492 |
+
for (int k = 0; k < HEX_D; k++) {
|
| 493 |
+
double p = q->line_re[k]*q->line_re[k] + q->line_im[k]*q->line_im[k];
|
| 494 |
+
printf(" |%s>: (%+.6f %+.6fi) P=%.4f\n",
|
| 495 |
+
LINE_NAMES[k], q->line_re[k], q->line_im[k], p);
|
| 496 |
+
}
|
| 497 |
+
double total = 0;
|
| 498 |
+
for (int k = 0; k < HEX_D; k++)
|
| 499 |
+
total += q->line_re[k]*q->line_re[k] + q->line_im[k]*q->line_im[k];
|
| 500 |
+
printf(" ||psi||^2 = %.10f\n", total);
|
| 501 |
+
}
|
quhit_hexagram.h
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* quhit_hexagram.h β The Hexagram Quhit
|
| 3 |
+
*
|
| 4 |
+
* A new quantum primitive: the EDGE DUAL of the triality quhit.
|
| 5 |
+
*
|
| 6 |
+
* The standard (triality) quhit stores amplitudes on 6 VERTICES of
|
| 7 |
+
* the hexagon β the computational basis states |0β©...|5β©.
|
| 8 |
+
*
|
| 9 |
+
* The hexagram quhit stores amplitudes on 6 LINE SEGMENTS of the
|
| 10 |
+
* unicursal hexagram β the face diagonals of the cube projected along
|
| 11 |
+
* its body diagonal (1,1,1).
|
| 12 |
+
*
|
| 13 |
+
* The 6 hexagram lines (unicursal traversal order):
|
| 14 |
+
*
|
| 15 |
+
* ββ: diameter EβcenterβD (cyan, C face diagonals)
|
| 16 |
+
* ββ: outer DβC (yellow, Y face diagonal)
|
| 17 |
+
* ββ: diameter CβcenterβF (magenta, M face diagonals)
|
| 18 |
+
* ββ: outer FβB (cyan, C face diagonal)
|
| 19 |
+
* ββ: diameter BβcenterβG (yellow, Y face diagonals)
|
| 20 |
+
* ββ
: outer GβE (magenta, M face diagonal)
|
| 21 |
+
*
|
| 22 |
+
* Key properties:
|
| 23 |
+
* - Chirality is intrinsic: the unicursal path has a direction.
|
| 24 |
+
* The two orientations correspond to the two mirror tetrahedra
|
| 25 |
+
* inscribed in the cube.
|
| 26 |
+
* - Ξ=0 is the native ground state (hexagram states encode the
|
| 27 |
+
* exotic Sβ automorphism structure naturally).
|
| 28 |
+
* - The Hβ transform (vertex β hexagram) is derived from the
|
| 29 |
+
* body-diagonal projection of face diagonals β NOT the DFTβ.
|
| 30 |
+
*
|
| 31 |
+
* Vertex model: TrialityQuhit (amplitudes on points)
|
| 32 |
+
* Edge model: HexagramQuhit (amplitudes on paths)
|
| 33 |
+
* Duality: Kramers-Wannier, mediated by Sβ outer automorphism
|
| 34 |
+
*/
|
| 35 |
+
|
| 36 |
+
#ifndef QUHIT_HEXAGRAM_H
|
| 37 |
+
#define QUHIT_HEXAGRAM_H
|
| 38 |
+
|
| 39 |
+
#include <stdint.h>
|
| 40 |
+
|
| 41 |
+
#define HEX_D 6
|
| 42 |
+
|
| 43 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 44 |
+
* CHIRALITY β Path orientation of the unicursal hexagram
|
| 45 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 46 |
+
|
| 47 |
+
#define CHIRALITY_POS (+1) /* βββββββββββββββββ
= tetrahedron A */
|
| 48 |
+
#define CHIRALITY_NEG (-1) /* ββ
βββββββββββββββ = tetrahedron B (mirror) */
|
| 49 |
+
|
| 50 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 51 |
+
* LINE SEGMENT TYPES
|
| 52 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 53 |
+
|
| 54 |
+
#define LINE_DIAMETER 0 /* Passes through center (2 face diagonals merged) */
|
| 55 |
+
#define LINE_OUTER 1 /* Outer edge connecting adjacent hex vertices */
|
| 56 |
+
|
| 57 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 58 |
+
* THE HEXAGRAM QUHIT
|
| 59 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 60 |
+
|
| 61 |
+
typedef struct {
|
| 62 |
+
/* 6 complex amplitudes β one per hexagram line segment */
|
| 63 |
+
double line_re[HEX_D];
|
| 64 |
+
double line_im[HEX_D];
|
| 65 |
+
|
| 66 |
+
/* Chirality: +1 (positive traversal) or -1 (mirror traversal) */
|
| 67 |
+
int chirality;
|
| 68 |
+
|
| 69 |
+
/* Cached vertex-basis representation (for interconversion) */
|
| 70 |
+
double vertex_re[HEX_D];
|
| 71 |
+
double vertex_im[HEX_D];
|
| 72 |
+
uint8_t vertex_dirty; /* 1 if vertex cache is stale */
|
| 73 |
+
|
| 74 |
+
/* Line metadata (static, set at init) */
|
| 75 |
+
/* line_type[k]: LINE_DIAMETER or LINE_OUTER */
|
| 76 |
+
/* line_color[k]: 0=C(cyan), 1=M(magenta), 2=Y(yellow) */
|
| 77 |
+
} HexagramQuhit;
|
| 78 |
+
|
| 79 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 80 |
+
* Hβ TRANSFORM β The body-diagonal projection matrix
|
| 81 |
+
*
|
| 82 |
+
* Hβ converts vertex amplitudes β hexagram-line amplitudes.
|
| 83 |
+
* Hββ converts hexagram-line amplitudes β vertex amplitudes.
|
| 84 |
+
*
|
| 85 |
+
* Derivation: each hexagram line ββ is a specific combination of
|
| 86 |
+
* vertex states determined by which cube face diagonals project
|
| 87 |
+
* onto that line under the body-diagonal (1,1,1) projection.
|
| 88 |
+
*
|
| 89 |
+
* The matrix is syntheme-weighted: diameters combine antipodal
|
| 90 |
+
* vertex pairs (both diagonals of a face), outer edges combine
|
| 91 |
+
* adjacent vertex pairs (single diagonal connecting two faces).
|
| 92 |
+
*
|
| 93 |
+
* Hβ is UNITARY: Hβ Β· Hββ = I.
|
| 94 |
+
* Hβ is NOT the DFTβ β it encodes geometry, not Fourier analysis.
|
| 95 |
+
* ββββββββββββββββββββββββββοΏ½οΏ½οΏ½ββββββββββββββββββββββββββββββββββββββββββββ */
|
| 96 |
+
|
| 97 |
+
/* The 6Γ6 Hβ transform matrices (precomputed at init) */
|
| 98 |
+
extern double H6_re[HEX_D][HEX_D];
|
| 99 |
+
extern double H6_im[HEX_D][HEX_D];
|
| 100 |
+
extern double H6_adj_re[HEX_D][HEX_D]; /* Hββ (adjoint) */
|
| 101 |
+
extern double H6_adj_im[HEX_D][HEX_D];
|
| 102 |
+
|
| 103 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 104 |
+
* LIFECYCLE
|
| 105 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 106 |
+
|
| 107 |
+
/* Initialize the Hβ transform tables. Call once at startup. */
|
| 108 |
+
void hexagram_init_tables(void);
|
| 109 |
+
|
| 110 |
+
/* Initialize to the "first line" state |βββ© with positive chirality */
|
| 111 |
+
void hexagram_init(HexagramQuhit *q);
|
| 112 |
+
|
| 113 |
+
/* Initialize from a standard-basis state vector via Hβ transform */
|
| 114 |
+
void hexagram_init_from_vertex(HexagramQuhit *q,
|
| 115 |
+
const double *vert_re, const double *vert_im,
|
| 116 |
+
int chirality);
|
| 117 |
+
|
| 118 |
+
/* Initialize to a specific hexagram line segment |βββ© */
|
| 119 |
+
void hexagram_init_line(HexagramQuhit *q, int k, int chirality);
|
| 120 |
+
|
| 121 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 122 |
+
* NATIVE HEXAGRAM GATES β O(D) operations
|
| 123 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 124 |
+
|
| 125 |
+
/* Path shift: advance along the unicursal path by Ξ΄ segments.
|
| 126 |
+
* |βββ© β |β_{(k+Ξ΄) mod 6}β©
|
| 127 |
+
* This is DIAGONAL in hexagram basis β O(D).
|
| 128 |
+
* Ξ΄>0 = forward along chirality, Ξ΄<0 = backward. */
|
| 129 |
+
void hexagram_path_shift(HexagramQuhit *q, int delta);
|
| 130 |
+
|
| 131 |
+
/* Per-line phase gate: |βββ© β e^{iΟβ}|βββ©
|
| 132 |
+
* Diagonal in hexagram basis β O(D). */
|
| 133 |
+
void hexagram_phase(HexagramQuhit *q, const double *phi_re, const double *phi_im);
|
| 134 |
+
|
| 135 |
+
/* Diameter phase: apply phase only to diameter lines (ββ,ββ,ββ).
|
| 136 |
+
* This targets the "through-center" segments specifically. O(3). */
|
| 137 |
+
void hexagram_diameter_phase(HexagramQuhit *q, double phi_re, double phi_im);
|
| 138 |
+
|
| 139 |
+
/* Outer phase: apply phase only to outer lines (ββ,ββ,ββ
). O(3). */
|
| 140 |
+
void hexagram_outer_phase(HexagramQuhit *q, double phi_re, double phi_im);
|
| 141 |
+
|
| 142 |
+
/* Chirality flip: reverse the path orientation.
|
| 143 |
+
* Corresponds to switching between the two mirror tetrahedra.
|
| 144 |
+
* |ββ, +β© β |β_{5-k}, -β© (reversal + conjugation)
|
| 145 |
+
* This is an INVOLUTION: flip β flip = identity. O(D). */
|
| 146 |
+
void hexagram_flip(HexagramQuhit *q);
|
| 147 |
+
|
| 148 |
+
/* Triad gate: simultaneous rotation of all 3 diameters.
|
| 149 |
+
* ββββββββ (diameters cycle), ββββββββ
(outers cycle).
|
| 150 |
+
* This is the Ο-image of triality_rotate. O(D). */
|
| 151 |
+
void hexagram_triad(HexagramQuhit *q);
|
| 152 |
+
|
| 153 |
+
/* Inverse triad. O(D). */
|
| 154 |
+
void hexagram_triad_inv(HexagramQuhit *q);
|
| 155 |
+
|
| 156 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 157 |
+
* ENTANGLEMENT β Center-crossing interaction
|
| 158 |
+
*
|
| 159 |
+
* Two hexagram quhits can entangle through shared center crossings.
|
| 160 |
+
* The 3 diameters all pass through the center point β when two
|
| 161 |
+
* hexagram states have amplitude on overlapping diameters, they
|
| 162 |
+
* interfere at the crossing.
|
| 163 |
+
*
|
| 164 |
+
* This is the hexagrammatic analog of CZ: it couples the diameter
|
| 165 |
+
* amplitudes of both quhits while leaving outer amplitudes unchanged.
|
| 166 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 167 |
+
|
| 168 |
+
void hexagram_cross(HexagramQuhit *a, HexagramQuhit *b);
|
| 169 |
+
|
| 170 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 171 |
+
* MEASUREMENT
|
| 172 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 173 |
+
|
| 174 |
+
/* Measure which hexagram line the state occupies.
|
| 175 |
+
* Returns outcome 0..5. Collapses state. */
|
| 176 |
+
int hexagram_measure(HexagramQuhit *q, uint64_t *rng_state);
|
| 177 |
+
|
| 178 |
+
/* Probability distribution over the 6 lines β no collapse. O(D). */
|
| 179 |
+
void hexagram_probabilities(const HexagramQuhit *q, double *probs);
|
| 180 |
+
|
| 181 |
+
/* ββββββββββββββοΏ½οΏ½οΏ½ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 182 |
+
* INTERCONVERSION β Vertex model β Edge model
|
| 183 |
+
*
|
| 184 |
+
* These use the Hβ transform to convert between the two dual
|
| 185 |
+
* representations. The conversion is exact (Hβ is unitary).
|
| 186 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 187 |
+
|
| 188 |
+
/* Ensure vertex cache is up-to-date (applies Hββ ) */
|
| 189 |
+
void hexagram_ensure_vertex(HexagramQuhit *q);
|
| 190 |
+
|
| 191 |
+
/* Get read-only vertex amplitudes (ensures first) */
|
| 192 |
+
const double *hexagram_vertex_re(HexagramQuhit *q);
|
| 193 |
+
const double *hexagram_vertex_im(HexagramQuhit *q);
|
| 194 |
+
|
| 195 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 196 |
+
* DIAGNOSTICS
|
| 197 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 198 |
+
|
| 199 |
+
/* Print hexagram state: line amplitudes + chirality */
|
| 200 |
+
void hexagram_print(const HexagramQuhit *q, const char *label);
|
| 201 |
+
|
| 202 |
+
/* Line metadata */
|
| 203 |
+
int hexagram_line_type(int k); /* LINE_DIAMETER or LINE_OUTER */
|
| 204 |
+
int hexagram_line_color(int k); /* 0=C, 1=M, 2=Y */
|
| 205 |
+
const char *hexagram_line_name(int k); /* e.g. "ββ diam C" */
|
| 206 |
+
|
| 207 |
+
#endif /* QUHIT_HEXAGRAM_H */
|
quhit_triality.c
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
quhit_triality.h
ADDED
|
@@ -0,0 +1,387 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* quhit_triality.h β The Triality Quhit
|
| 3 |
+
*
|
| 4 |
+
* A new quantum primitive based on the CMY geometric principle:
|
| 5 |
+
* three mutually-defining views (Edge/Vertex/Diagonal) where each
|
| 6 |
+
* view's structure IS the other views' structure in a different role.
|
| 7 |
+
*
|
| 8 |
+
* Edge of A = Vertex of B = Diagonal of C (cyclic)
|
| 9 |
+
*
|
| 10 |
+
* The triality quhit stores state in all three views with lazy
|
| 11 |
+
* conversion. Gates automatically execute in their cheapest view:
|
| 12 |
+
* Phase gates β Edge view O(D)
|
| 13 |
+
* Shift gates β Vertex view O(D)
|
| 14 |
+
* Conjugate ops β Diagonal view O(D)
|
| 15 |
+
* General β any view O(DΒ²)
|
| 16 |
+
*
|
| 17 |
+
* Average gate cost: O(12) instead of O(36). 3Γ free speedup.
|
| 18 |
+
*
|
| 19 |
+
*/
|
| 20 |
+
|
| 21 |
+
#ifndef QUHIT_TRIALITY_H
|
| 22 |
+
#define QUHIT_TRIALITY_H
|
| 23 |
+
|
| 24 |
+
#include <stdint.h>
|
| 25 |
+
#include "s6_exotic.h"
|
| 26 |
+
|
| 27 |
+
#define TRI_D 6
|
| 28 |
+
|
| 29 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 30 |
+
* VIEW IDENTIFIERS
|
| 31 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 32 |
+
|
| 33 |
+
#define VIEW_EDGE 0 /* Computational basis β Yellow square */
|
| 34 |
+
#define VIEW_VERTEX 1 /* Fourier basis (DFTβ) β Cyan square */
|
| 35 |
+
#define VIEW_DIAGONAL 2 /* Conjugate Fourier (DFTβΒ²) β Magenta square */
|
| 36 |
+
#define VIEW_FOLDED 3 /* Antipodal fold: Stage 1 of factored DFTβ */
|
| 37 |
+
#define VIEW_EXOTIC 4 /* Exotic fold: syntheme-parameterized (outer automorphism) */
|
| 38 |
+
#define VIEW_TETRA 5 /* Tetrahedral eigenbasis: DFTβ eigenspace decomposition */
|
| 39 |
+
|
| 40 |
+
/* Dirty bitmask: bit 0-5 for each view */
|
| 41 |
+
#define DIRTY_EDGE 0x01
|
| 42 |
+
#define DIRTY_VERTEX 0x02
|
| 43 |
+
#define DIRTY_DIAGONAL 0x04
|
| 44 |
+
#define DIRTY_FOLDED 0x08
|
| 45 |
+
#define DIRTY_EXOTIC 0x10
|
| 46 |
+
#define DIRTY_TETRA 0x20
|
| 47 |
+
#define DIRTY_ALL 0x3F
|
| 48 |
+
|
| 49 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 50 |
+
* THE TRIALITY QUHIT
|
| 51 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 52 |
+
|
| 53 |
+
typedef struct {
|
| 54 |
+
/* Three views of the same quantum state */
|
| 55 |
+
double edge_re[TRI_D], edge_im[TRI_D]; /* |Οβ© in computational basis */
|
| 56 |
+
double vertex_re[TRI_D], vertex_im[TRI_D]; /* |Οβ© in Fourier basis */
|
| 57 |
+
double diag_re[TRI_D], diag_im[TRI_D]; /* |Οβ© in conjugate basis */
|
| 58 |
+
double folded_re[TRI_D], folded_im[TRI_D]; /* Antipodal fold intermediate */
|
| 59 |
+
double exotic_re[TRI_D], exotic_im[TRI_D]; /* Exotic fold (alt syntheme) */
|
| 60 |
+
double tetra_re[TRI_D], tetra_im[TRI_D]; /* DFTβ eigenbasis coefficients */
|
| 61 |
+
int exotic_syntheme; /* Which syntheme to use for exotic view */
|
| 62 |
+
|
| 63 |
+
uint8_t dirty; /* Which views are stale (bits 0-3) */
|
| 64 |
+
uint8_t primary; /* Which view was last written (0/1/2/3) */
|
| 65 |
+
|
| 66 |
+
/* ββ Enhancement flags ββ */
|
| 67 |
+
int8_t eigenstate_class; /* -1=unknown, 0..3=DFTβ eigenvalue {1,-1,i,-i} */
|
| 68 |
+
uint8_t active_mask; /* Bitmask of non-zero basis states (6 bits) */
|
| 69 |
+
uint8_t active_count; /* popcount(active_mask), 1..6 */
|
| 70 |
+
uint8_t real_valued; /* 1 if all imaginary parts are zero */
|
| 71 |
+
|
| 72 |
+
/* ββ Exotic invariant cache (Fix #5) ββ */
|
| 73 |
+
double cached_delta; /* Cached exotic invariant Ξ */
|
| 74 |
+
double cached_fingerprint[11];/* Cached conjugacy-class deltas */
|
| 75 |
+
uint8_t delta_valid; /* 1 if cached values are up-to-date */
|
| 76 |
+
} TrialityQuhit;
|
| 77 |
+
|
| 78 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 79 |
+
* TRIALITY PAIR β Two entangled triality quhits
|
| 80 |
+
* Each partner contributes a different view to the joint state.
|
| 81 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 82 |
+
|
| 83 |
+
typedef struct {
|
| 84 |
+
double joint_re[TRI_D * TRI_D];
|
| 85 |
+
double joint_im[TRI_D * TRI_D];
|
| 86 |
+
int view_a; /* which view partner A contributes */
|
| 87 |
+
int view_b; /* which view partner B contributes */
|
| 88 |
+
} TrialityPair;
|
| 89 |
+
|
| 90 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 91 |
+
* LIFECYCLE
|
| 92 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββοΏ½οΏ½οΏ½βββββββββββββββββββββββ */
|
| 93 |
+
|
| 94 |
+
/* Initialize to |0β© with all views clean */
|
| 95 |
+
void triality_init(TrialityQuhit *q);
|
| 96 |
+
|
| 97 |
+
/* Initialize to basis state |kβ© */
|
| 98 |
+
void triality_init_basis(TrialityQuhit *q, int k);
|
| 99 |
+
|
| 100 |
+
/* Copy */
|
| 101 |
+
void triality_copy(TrialityQuhit *dst, const TrialityQuhit *src);
|
| 102 |
+
|
| 103 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 104 |
+
* VIEW MANAGEMENT β Lazy DFTβ conversion
|
| 105 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 106 |
+
|
| 107 |
+
/* Ensure a specific view is up-to-date (converts from primary if dirty) */
|
| 108 |
+
void triality_ensure_view(TrialityQuhit *q, int view);
|
| 109 |
+
|
| 110 |
+
/* Force recompute all views from primary */
|
| 111 |
+
void triality_sync_all(TrialityQuhit *q);
|
| 112 |
+
|
| 113 |
+
/* Get read-only access to a view (ensures it first) */
|
| 114 |
+
const double *triality_view_re(TrialityQuhit *q, int view);
|
| 115 |
+
const double *triality_view_im(TrialityQuhit *q, int view);
|
| 116 |
+
|
| 117 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 118 |
+
* OPTIMAL-VIEW GATES β O(D) when gate matches view
|
| 119 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 120 |
+
|
| 121 |
+
/* Phase gate: |kβ© β e^{iΟβ}|kβ© β diagonal in EDGE view, O(D) */
|
| 122 |
+
void triality_phase(TrialityQuhit *q, const double *phi_re, const double *phi_im);
|
| 123 |
+
|
| 124 |
+
/* Single-phase: |kβ© β e^{iΟ}|kβ©, all others unchanged β O(1) */
|
| 125 |
+
void triality_phase_single(TrialityQuhit *q, int k, double phi_re, double phi_im);
|
| 126 |
+
|
| 127 |
+
/* Z gate: |kβ© β Ο^k |kβ© β diagonal in EDGE view, O(D) */
|
| 128 |
+
void triality_z(TrialityQuhit *q);
|
| 129 |
+
|
| 130 |
+
/* Shift gate: |kβ© β |k+Ξ΄ mod Dβ© β diagonal in VERTEX view, O(D) */
|
| 131 |
+
void triality_shift(TrialityQuhit *q, int delta);
|
| 132 |
+
|
| 133 |
+
/* X gate: |kβ© β |k+1 mod Dβ© β diagonal in VERTEX view, O(D) */
|
| 134 |
+
void triality_x(TrialityQuhit *q);
|
| 135 |
+
|
| 136 |
+
/* DFTβ: rotates edgeβvertexβdiagonalβedge β view rotation, O(DΒ²) once */
|
| 137 |
+
void triality_dft(TrialityQuhit *q);
|
| 138 |
+
|
| 139 |
+
/* Inverse DFTβ */
|
| 140 |
+
void triality_idft(TrialityQuhit *q);
|
| 141 |
+
|
| 142 |
+
/* General unitary in a specific view β O(DΒ²) */
|
| 143 |
+
void triality_unitary(TrialityQuhit *q, int view,
|
| 144 |
+
const double *U_re, const double *U_im);
|
| 145 |
+
|
| 146 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 147 |
+
* CZ GATE β O(D) in edge view (diagonal)
|
| 148 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 149 |
+
|
| 150 |
+
void triality_cz(TrialityQuhit *a, TrialityQuhit *b);
|
| 151 |
+
|
| 152 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 153 |
+
* MEASUREMENT β O(D) via cached view
|
| 154 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 155 |
+
|
| 156 |
+
/* Measure in a specific view basis. Returns outcome 0..D-1. Collapses state. */
|
| 157 |
+
int triality_measure(TrialityQuhit *q, int view, uint64_t *rng_state);
|
| 158 |
+
|
| 159 |
+
/* Probability distribution in a view β O(D), no collapse */
|
| 160 |
+
void triality_probabilities(TrialityQuhit *q, int view, double *probs);
|
| 161 |
+
|
| 162 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 163 |
+
* TRIALITY ROTATION β The geometric heart
|
| 164 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 165 |
+
|
| 166 |
+
/* Rotate the role assignment: EdgeβVertexβDiagonalβEdge
|
| 167 |
+
* This is a FREE operation β it just relabels which view is which.
|
| 168 |
+
* No amplitudes are modified. O(1). */
|
| 169 |
+
void triality_rotate(TrialityQuhit *q);
|
| 170 |
+
|
| 171 |
+
/* Inverse rotation: DiagonalβVertexβEdgeβDiagonal. O(1). */
|
| 172 |
+
void triality_rotate_inv(TrialityQuhit *q);
|
| 173 |
+
|
| 174 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 175 |
+
* Sβ OUTER AUTOMORPHISM β Exotic Extensions
|
| 176 |
+
*
|
| 177 |
+
* Sβ is the ONLY symmetric group with a non-trivial outer automorphism.
|
| 178 |
+
* These functions exploit this D=6-unique structure.
|
| 179 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 180 |
+
|
| 181 |
+
/* Initialize the exotic engine (builds Ο table). Call once at startup. */
|
| 182 |
+
void triality_exotic_init(void);
|
| 183 |
+
|
| 184 |
+
/* Set which syntheme the exotic view uses (default: 0 = {(01),(23),(45)}) */
|
| 185 |
+
void triality_set_exotic_syntheme(TrialityQuhit *q, int syntheme_idx);
|
| 186 |
+
|
| 187 |
+
/* Fold using any of the 15 synthemes instead of the default antipodal */
|
| 188 |
+
void triality_fold_syntheme(TrialityQuhit *q, int syntheme_idx);
|
| 189 |
+
void triality_unfold_syntheme(TrialityQuhit *q, int syntheme_idx);
|
| 190 |
+
|
| 191 |
+
/* Apply exotic gate: uses Ο(Ο) instead of Ο. O(D). */
|
| 192 |
+
void triality_exotic_gate(TrialityQuhit *q, S6Perm sigma);
|
| 193 |
+
|
| 194 |
+
/* Dual CZ: standard CZ + exotic channel information. Returns the
|
| 195 |
+
* statistical distance between standard and exotic channels. */
|
| 196 |
+
double triality_cz_dual(TrialityQuhit *a, TrialityQuhit *b);
|
| 197 |
+
|
| 198 |
+
/* Measure in the exotic fold basis. Returns outcome 0..D-1. */
|
| 199 |
+
int triality_measure_exotic(TrialityQuhit *q, int syntheme_idx, uint64_t *rng_state);
|
| 200 |
+
|
| 201 |
+
/* Dual measurement: returns both standard and exotic outcomes.
|
| 202 |
+
* Exotic outcome is in *exotic_outcome. Standard is returned. */
|
| 203 |
+
int triality_measure_dual(TrialityQuhit *q, int view, int exotic_syntheme,
|
| 204 |
+
uint64_t *rng_state, int *exotic_outcome);
|
| 205 |
+
|
| 206 |
+
/* 6-fold rotation: cycles through all 6 synthematic views.
|
| 207 |
+
* Standard rotate: EdgeβVertexβDiagonalβEdge (3-cycle, views 0β1β2β0)
|
| 208 |
+
* Exotic rotate: Also cycles the exotic syntheme through its total.
|
| 209 |
+
* This accesses the full Aut(Sβ) β
Sβ β Zβ structure. */
|
| 210 |
+
void triality_rotate_exotic(TrialityQuhit *q);
|
| 211 |
+
|
| 212 |
+
/* Probabilities in both standard and exotic bases β no collapse */
|
| 213 |
+
void triality_dual_probabilities(TrialityQuhit *q, int view,
|
| 214 |
+
double *probs_std, double *probs_exo);
|
| 215 |
+
|
| 216 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 217 |
+
* GEOMETRIC COSMOLOGY ENHANCEMENTS
|
| 218 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 219 |
+
|
| 220 |
+
/* ββ Enhancement 1: Folded View ββ */
|
| 221 |
+
/* Fold: pair antipodal vertices (0β3, 1β4, 2β5) via Hadamard.
|
| 222 |
+
* This is Stage 1 of the factored DFTβ (Cooley-Tukey 6=2Γ3).
|
| 223 |
+
* vesica[k] = (Ο[k] + Ο[k+3]) / β2 (k=0,1,2)
|
| 224 |
+
* wave[k] = (Ο[k] - Ο[k+3]) / β2 (k=0,1,2) */
|
| 225 |
+
void triality_fold(TrialityQuhit *q);
|
| 226 |
+
void triality_unfold(TrialityQuhit *q);
|
| 227 |
+
|
| 228 |
+
/* Convert EdgeβVertex via the folded intermediate (O(18) vs O(36)) */
|
| 229 |
+
void triality_ensure_view_via_fold(TrialityQuhit *q, int target_view);
|
| 230 |
+
|
| 231 |
+
/* ββ Enhancement 5: Tetrahedral Eigenbasis ββ */
|
| 232 |
+
/* Decompose state into DFTβ eigenspaces {Ξ»=1(Γ2), Ξ»=-1(Γ2), Ξ»=i, Ξ»=-i}.
|
| 233 |
+
* Once cached, all view conversions and DFT/IDFT gates become O(D). */
|
| 234 |
+
void triality_ensure_tetra(TrialityQuhit *q);
|
| 235 |
+
|
| 236 |
+
/* Convert from tetra cache to any standard view β O(DΒ²) but avoids
|
| 237 |
+
* needing a clean standard view as starting point */
|
| 238 |
+
void triality_tetra_to_view(TrialityQuhit *q, int target_view);
|
| 239 |
+
|
| 240 |
+
/* DFTβ via tetra: multiply each eigencomponent by Ξ» β O(D) */
|
| 241 |
+
void triality_dft_via_tetra(TrialityQuhit *q);
|
| 242 |
+
void triality_idft_via_tetra(TrialityQuhit *q);
|
| 243 |
+
|
| 244 |
+
/* Cached exotic invariant β returns Ξ without recomputing if state is unchanged */
|
| 245 |
+
double triality_exotic_invariant_cached(TrialityQuhit *q);
|
| 246 |
+
void triality_exotic_fingerprint_cached(TrialityQuhit *q, double *deltas);
|
| 247 |
+
|
| 248 |
+
/* Invalidate exotic cache (called internally after state-modifying operations) */
|
| 249 |
+
void triality_invalidate_exotic_cache(TrialityQuhit *q);
|
| 250 |
+
|
| 251 |
+
/* ββ Enhancement 2: Eigenstate Detection ββ */
|
| 252 |
+
/* Detect if state is a DFTβ eigenstate. Sets eigenstate_class.
|
| 253 |
+
* Returns eigenstate_class (0..3) or -1 if not an eigenstate. */
|
| 254 |
+
int triality_detect_eigenstate(TrialityQuhit *q);
|
| 255 |
+
|
| 256 |
+
/* Clear eigenstate flag (call when non-diagonal gate is applied) */
|
| 257 |
+
void triality_clear_eigenstate(TrialityQuhit *q);
|
| 258 |
+
|
| 259 |
+
/* ββ Enhancement 3: Subspace Confinement ββ */
|
| 260 |
+
/* Recompute active_mask and active_count from current edge amplitudes */
|
| 261 |
+
void triality_update_mask(TrialityQuhit *q);
|
| 262 |
+
|
| 263 |
+
/* ββ Enhancement 4: Real-Valued Detection ββ */
|
| 264 |
+
/* Detect and set real_valued flag from current edge amplitudes */
|
| 265 |
+
void triality_detect_real(TrialityQuhit *q);
|
| 266 |
+
|
| 267 |
+
/* ββ Combined: refresh all enhancement flags ββ */
|
| 268 |
+
void triality_refresh_flags(TrialityQuhit *q);
|
| 269 |
+
|
| 270 |
+
/* βββββββββββββββββββββββββββββββββββοΏ½οΏ½οΏ½βββββββββββββββββββββββββββββββββββ
|
| 271 |
+
* DIAGNOSTICS
|
| 272 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 273 |
+
|
| 274 |
+
/* Print state in all three views */
|
| 275 |
+
void triality_print(TrialityQuhit *q, const char *label);
|
| 276 |
+
|
| 277 |
+
/* View conversion count (for benchmarking) */
|
| 278 |
+
typedef struct {
|
| 279 |
+
uint64_t edge_to_vertex;
|
| 280 |
+
uint64_t edge_to_diag;
|
| 281 |
+
uint64_t vertex_to_edge;
|
| 282 |
+
uint64_t vertex_to_diag;
|
| 283 |
+
uint64_t diag_to_edge;
|
| 284 |
+
uint64_t diag_to_vertex;
|
| 285 |
+
uint64_t edge_to_folded;
|
| 286 |
+
uint64_t folded_to_vertex;
|
| 287 |
+
uint64_t gates_edge; /* gates executed in edge view */
|
| 288 |
+
uint64_t gates_vertex; /* gates executed in vertex view */
|
| 289 |
+
uint64_t gates_diag; /* gates executed in diagonal view */
|
| 290 |
+
uint64_t rotations; /* O(1) triality rotations */
|
| 291 |
+
uint64_t eigenstate_skips; /* view conversions skipped by eigenstate flag */
|
| 292 |
+
uint64_t mask_skips; /* operations skipped by active_mask */
|
| 293 |
+
uint64_t real_fast_path; /* operations using real-valued fast path */
|
| 294 |
+
uint64_t exotic_folds; /* exotic syntheme fold operations */
|
| 295 |
+
uint64_t exotic_gates; /* exotic-automorphism gate applications */
|
| 296 |
+
uint64_t dual_measurements; /* dual standard+exotic measurements */
|
| 297 |
+
uint64_t tetra_conversions; /* view conversions via tetrahedral eigenbasis */
|
| 298 |
+
uint64_t tetra_dft_skips; /* DFT/IDFT operations done via tetra O(D) path */
|
| 299 |
+
} TrialityStats;
|
| 300 |
+
|
| 301 |
+
extern TrialityStats triality_stats;
|
| 302 |
+
void triality_stats_reset(void);
|
| 303 |
+
void triality_stats_print(void);
|
| 304 |
+
|
| 305 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 306 |
+
* LAZY TRIALITY QUHIT β Heisenberg Picture
|
| 307 |
+
*
|
| 308 |
+
* Amplitudes are NEVER touched until measurement.
|
| 309 |
+
* Gates accumulate as diagonal phase vectors.
|
| 310 |
+
* DFTs accumulate as a counter between segments.
|
| 311 |
+
*
|
| 312 |
+
* Chain: state β F^pre0 Β· D0 β F^pre1 Β· D1 β ... β F^trailing
|
| 313 |
+
* Fβ΄ = I, so each count is mod 4. Pure DFT sequences cancel.
|
| 314 |
+
* Same-view consecutive gates fuse into one D. O(D) per gate.
|
| 315 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 316 |
+
|
| 317 |
+
typedef struct {
|
| 318 |
+
/* The frozen initial state β set once at init */
|
| 319 |
+
double state_re[TRI_D], state_im[TRI_D];
|
| 320 |
+
|
| 321 |
+
/* Transformation chain: array of segments.
|
| 322 |
+
* Each segment has a pre_dfts count (0-3 DFTs before its diagonal)
|
| 323 |
+
* and a diagonal phase vector applied in edge view. */
|
| 324 |
+
#define MAX_LAZY_SEGMENTS 64
|
| 325 |
+
struct {
|
| 326 |
+
double diag_re[TRI_D]; /* Diagonal phase vector */
|
| 327 |
+
double diag_im[TRI_D];
|
| 328 |
+
int pre_dfts; /* 0-3 DFTs to apply BEFORE this diagonal (F^4=I) */
|
| 329 |
+
} segments[MAX_LAZY_SEGMENTS];
|
| 330 |
+
int n_segments;
|
| 331 |
+
int trailing_dfts; /* DFTs after the last segment (accumulated) */
|
| 332 |
+
|
| 333 |
+
/* Oracle: cross-batch composite matrix.
|
| 334 |
+
* When segments overflow, instead of materializing, the Oracle
|
| 335 |
+
* compiles the chain into a 6Γ6 matrix and absorbs it here.
|
| 336 |
+
* At final materialize: state = oracle_M Β· initial_state, then
|
| 337 |
+
* any remaining segments are applied on top. */
|
| 338 |
+
double oracle_M_re[TRI_D][TRI_D];
|
| 339 |
+
double oracle_M_im[TRI_D][TRI_D];
|
| 340 |
+
int oracle_active; /* 1 if oracle_M contains data */
|
| 341 |
+
|
| 342 |
+
/* Stats */
|
| 343 |
+
uint64_t gates_fused; /* Gates absorbed into existing segment */
|
| 344 |
+
uint64_t segments_created; /* New segments started */
|
| 345 |
+
uint64_t materializations; /* Times state was materialized */
|
| 346 |
+
} LazyTrialityQuhit;
|
| 347 |
+
|
| 348 |
+
/* Lifecycle */
|
| 349 |
+
void ltri_init(LazyTrialityQuhit *q);
|
| 350 |
+
void ltri_init_basis(LazyTrialityQuhit *q, int k);
|
| 351 |
+
|
| 352 |
+
/* Gates β O(D) each, zero view conversions */
|
| 353 |
+
void ltri_z(LazyTrialityQuhit *q);
|
| 354 |
+
void ltri_x(LazyTrialityQuhit *q);
|
| 355 |
+
void ltri_shift(LazyTrialityQuhit *q, int delta);
|
| 356 |
+
void ltri_dft(LazyTrialityQuhit *q);
|
| 357 |
+
void ltri_idft(LazyTrialityQuhit *q);
|
| 358 |
+
void ltri_phase(LazyTrialityQuhit *q, const double *phi_re, const double *phi_im);
|
| 359 |
+
|
| 360 |
+
/* Materialize β apply accumulated transform, return edge-view amplitudes */
|
| 361 |
+
void ltri_materialize(LazyTrialityQuhit *q, double *out_re, double *out_im);
|
| 362 |
+
|
| 363 |
+
/* Force materialize β compile oracle + apply chain, producing a TrialityQuhit.
|
| 364 |
+
* Use this when a two-body operation (CZ) needs actual amplitudes. */
|
| 365 |
+
void ltri_force_materialize(LazyTrialityQuhit *q, TrialityQuhit *out);
|
| 366 |
+
|
| 367 |
+
/* Measure β materialize + Born sample */
|
| 368 |
+
int ltri_measure(LazyTrialityQuhit *q, int view, uint64_t *rng_state);
|
| 369 |
+
|
| 370 |
+
/* Stats */
|
| 371 |
+
void ltri_stats_print(const LazyTrialityQuhit *q);
|
| 372 |
+
|
| 373 |
+
/* βββββββββββββββββββββββββββββββββββββοΏ½οΏ½οΏ½βββββββββββββββββββββββββββββββββ
|
| 374 |
+
* HEXAGRAM INTERCONVERSION
|
| 375 |
+
* Convert between triality (vertex model) and hexagram (edge model).
|
| 376 |
+
* Requires quhit_hexagram.h and hexagram_init_tables() called first.
|
| 377 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 378 |
+
|
| 379 |
+
struct HexagramQuhit; /* forward declaration */
|
| 380 |
+
|
| 381 |
+
/* Convert triality quhit β hexagram quhit via Hβ transform */
|
| 382 |
+
void triality_to_hexagram(TrialityQuhit *src, struct HexagramQuhit *dst);
|
| 383 |
+
|
| 384 |
+
/* Convert hexagram quhit β triality quhit via Hββ transform */
|
| 385 |
+
void hexagram_to_triality(struct HexagramQuhit *src, TrialityQuhit *dst);
|
| 386 |
+
|
| 387 |
+
#endif /* QUHIT_TRIALITY_H */
|
s6_exotic.c
ADDED
|
@@ -0,0 +1,755 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* s6_exotic.c β Sβ Outer Automorphism Implementation
|
| 2 |
+
*
|
| 3 |
+
* Constructs Ο via synthematic totals at initialization.
|
| 4 |
+
* Provides exotic gates, parameterized folds, and dual measurement.
|
| 5 |
+
*/
|
| 6 |
+
|
| 7 |
+
#include <string.h>
|
| 8 |
+
#include <stdio.h>
|
| 9 |
+
#include <math.h>
|
| 10 |
+
#include "s6_exotic.h"
|
| 11 |
+
|
| 12 |
+
static const double INV_SQRT2 = 0.70710678118654752440;
|
| 13 |
+
|
| 14 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 15 |
+
* SYNTHEMES β 15 partitions of {0,..,5} into 3 pairs
|
| 16 |
+
*
|
| 17 |
+
* Canonical form: pairs sorted by first element, a < c < e.
|
| 18 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 19 |
+
|
| 20 |
+
/* We enumerate all 15 at compile time */
|
| 21 |
+
const S6Syntheme s6_synthemes[S6_NUM_SYNTHEMES] = {
|
| 22 |
+
[0] = {{{0,1},{2,3},{4,5}}}, /* T0 member */
|
| 23 |
+
[1] = {{{0,1},{2,4},{3,5}}},
|
| 24 |
+
[2] = {{{0,1},{2,5},{3,4}}},
|
| 25 |
+
[3] = {{{0,2},{1,3},{4,5}}},
|
| 26 |
+
[4] = {{{0,2},{1,4},{3,5}}}, /* T0 member */
|
| 27 |
+
[5] = {{{0,2},{1,5},{3,4}}},
|
| 28 |
+
[6] = {{{0,3},{1,2},{4,5}}},
|
| 29 |
+
[7] = {{{0,3},{1,4},{2,5}}}, /* DEFAULT fold β the standard antipodal pairing */
|
| 30 |
+
[8] = {{{0,3},{1,5},{2,4}}}, /* T0 member */
|
| 31 |
+
[9] = {{{0,4},{1,2},{3,5}}},
|
| 32 |
+
[10] = {{{0,4},{1,3},{2,5}}}, /* T0 member */
|
| 33 |
+
[11] = {{{0,4},{1,5},{2,3}}},
|
| 34 |
+
[12] = {{{0,5},{1,2},{3,4}}}, /* T0 member */
|
| 35 |
+
[13] = {{{0,5},{1,3},{2,4}}},
|
| 36 |
+
[14] = {{{0,5},{1,4},{2,3}}},
|
| 37 |
+
};
|
| 38 |
+
|
| 39 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 40 |
+
* TOTALS β 6 sets of 5 synthemes covering all 15 pairs
|
| 41 |
+
*
|
| 42 |
+
* Built at init time by brute-force search over C(15,5) = 3003 subsets.
|
| 43 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 44 |
+
|
| 45 |
+
int s6_totals[S6_NUM_TOTALS][5];
|
| 46 |
+
S6Perm s6_phi[S6_ORDER];
|
| 47 |
+
int s6_exotic_ready = 0;
|
| 48 |
+
|
| 49 |
+
/* Check if 5 syntheme indices form a total (cover all 15 pairs exactly once) */
|
| 50 |
+
static int check_total(const int idx[5]) {
|
| 51 |
+
int covered[6][6] = {{0}};
|
| 52 |
+
for (int si = 0; si < 5; si++) {
|
| 53 |
+
const S6Syntheme *s = &s6_synthemes[idx[si]];
|
| 54 |
+
for (int p = 0; p < 3; p++) {
|
| 55 |
+
int a = s->pairs[p][0], b = s->pairs[p][1];
|
| 56 |
+
if (covered[a][b]) return 0;
|
| 57 |
+
covered[a][b] = covered[b][a] = 1;
|
| 58 |
+
}
|
| 59 |
+
}
|
| 60 |
+
for (int a = 0; a < 6; a++)
|
| 61 |
+
for (int b = a+1; b < 6; b++)
|
| 62 |
+
if (!covered[a][b]) return 0;
|
| 63 |
+
return 1;
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
static int find_all_totals(void) {
|
| 67 |
+
int n = 0;
|
| 68 |
+
for (int a = 0; a < 15 && n < 6; a++)
|
| 69 |
+
for (int b = a+1; b < 15 && n < 6; b++)
|
| 70 |
+
for (int c = b+1; c < 15 && n < 6; c++)
|
| 71 |
+
for (int d = c+1; d < 15 && n < 6; d++)
|
| 72 |
+
for (int e = d+1; e < 15 && n < 6; e++) {
|
| 73 |
+
int idx[5] = {a,b,c,d,e};
|
| 74 |
+
if (check_total(idx)) {
|
| 75 |
+
for (int i = 0; i < 5; i++) s6_totals[n][i] = idx[i];
|
| 76 |
+
n++;
|
| 77 |
+
}
|
| 78 |
+
}
|
| 79 |
+
return n;
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 83 |
+
* PERMUTATION PRIMITIVES
|
| 84 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 85 |
+
|
| 86 |
+
S6Perm s6_from_int(int n) {
|
| 87 |
+
n = ((n % 720) + 720) % 720;
|
| 88 |
+
int avail[6] = {0,1,2,3,4,5}, fact[6] = {120,24,6,2,1,1};
|
| 89 |
+
S6Perm r;
|
| 90 |
+
for (int i = 0; i < 6; i++) {
|
| 91 |
+
int d = n / fact[i]; n %= fact[i];
|
| 92 |
+
r.p[i] = avail[d];
|
| 93 |
+
for (int j = d; j < 5-i; j++) avail[j] = avail[j+1];
|
| 94 |
+
}
|
| 95 |
+
return r;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
int s6_to_int_perm(S6Perm a) {
|
| 99 |
+
int used[6]={0}, result=0, fact[6]={120,24,6,2,1,1};
|
| 100 |
+
for (int i = 0; i < 6; i++) {
|
| 101 |
+
int rank = 0;
|
| 102 |
+
for (int j = 0; j < a.p[i]; j++) if (!used[j]) rank++;
|
| 103 |
+
result += rank * fact[i]; used[a.p[i]] = 1;
|
| 104 |
+
}
|
| 105 |
+
return result;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
S6Perm s6_compose_perm(S6Perm a, S6Perm b) {
|
| 109 |
+
S6Perm r;
|
| 110 |
+
for (int i = 0; i < 6; i++) r.p[i] = b.p[a.p[i]];
|
| 111 |
+
return r;
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
S6Perm s6_inverse(S6Perm a) {
|
| 115 |
+
S6Perm r;
|
| 116 |
+
for (int i = 0; i < 6; i++) r.p[a.p[i]] = i;
|
| 117 |
+
return r;
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
int s6_perm_eq(S6Perm a, S6Perm b) {
|
| 121 |
+
return memcmp(a.p, b.p, sizeof(a.p)) == 0;
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
int s6_fixed_points(S6Perm a) {
|
| 125 |
+
int c = 0;
|
| 126 |
+
for (int i = 0; i < 6; i++) if (a.p[i] == i) c++;
|
| 127 |
+
return c;
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 131 |
+
* OUTER AUTOMORPHISM CONSTRUCTION
|
| 132 |
+
*
|
| 133 |
+
* For each Ο β Sβ: apply Ο to each total's synthemes, find which
|
| 134 |
+
* target total ALL 5 image synthemes land in β Ο(Ο).
|
| 135 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 136 |
+
|
| 137 |
+
/* Apply Ο to a syntheme: permute all elements in all pairs */
|
| 138 |
+
static S6Syntheme apply_sigma(S6Perm sigma, const S6Syntheme *s) {
|
| 139 |
+
S6Syntheme r;
|
| 140 |
+
for (int p = 0; p < 3; p++) {
|
| 141 |
+
int a = sigma.p[s->pairs[p][0]];
|
| 142 |
+
int b = sigma.p[s->pairs[p][1]];
|
| 143 |
+
if (a > b) { int t = a; a = b; b = t; }
|
| 144 |
+
r.pairs[p][0] = a; r.pairs[p][1] = b;
|
| 145 |
+
}
|
| 146 |
+
/* Sort pairs by first element */
|
| 147 |
+
for (int i = 0; i < 2; i++)
|
| 148 |
+
for (int j = i+1; j < 3; j++)
|
| 149 |
+
if (r.pairs[j][0] < r.pairs[i][0]) {
|
| 150 |
+
S6Syntheme tmp = r;
|
| 151 |
+
r.pairs[i][0] = tmp.pairs[j][0]; r.pairs[i][1] = tmp.pairs[j][1];
|
| 152 |
+
r.pairs[j][0] = tmp.pairs[i][0]; r.pairs[j][1] = tmp.pairs[i][1];
|
| 153 |
+
}
|
| 154 |
+
return r;
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
/* Find index of a syntheme in the table */
|
| 158 |
+
static int find_synth_idx(const S6Syntheme *s) {
|
| 159 |
+
for (int i = 0; i < S6_NUM_SYNTHEMES; i++)
|
| 160 |
+
if (memcmp(&s6_synthemes[i], s, sizeof(S6Syntheme)) == 0) return i;
|
| 161 |
+
return -1;
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
/* Map a total under Ο: apply Ο to all 5 synthemes, find target total */
|
| 165 |
+
static int map_total_under(S6Perm sigma, int total_idx) {
|
| 166 |
+
int img_synth[5];
|
| 167 |
+
for (int j = 0; j < 5; j++) {
|
| 168 |
+
S6Syntheme img = apply_sigma(sigma, &s6_synthemes[s6_totals[total_idx][j]]);
|
| 169 |
+
img_synth[j] = find_synth_idx(&img);
|
| 170 |
+
if (img_synth[j] < 0) return -1;
|
| 171 |
+
}
|
| 172 |
+
for (int t = 0; t < S6_NUM_TOTALS; t++) {
|
| 173 |
+
int all = 1;
|
| 174 |
+
for (int j = 0; j < 5 && all; j++) {
|
| 175 |
+
int found = 0;
|
| 176 |
+
for (int k = 0; k < 5; k++)
|
| 177 |
+
if (s6_totals[t][k] == img_synth[j]) { found = 1; break; }
|
| 178 |
+
if (!found) all = 0;
|
| 179 |
+
}
|
| 180 |
+
if (all) return t;
|
| 181 |
+
}
|
| 182 |
+
return -1;
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
void s6_exotic_init(void) {
|
| 186 |
+
if (s6_exotic_ready) return;
|
| 187 |
+
|
| 188 |
+
int n_totals = find_all_totals();
|
| 189 |
+
if (n_totals != 6) {
|
| 190 |
+
fprintf(stderr, "[S6_EXOTIC] FATAL: found %d totals (expected 6)\n", n_totals);
|
| 191 |
+
return;
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
/* Build Ο for all 720 elements */
|
| 195 |
+
for (int idx = 0; idx < 720; idx++) {
|
| 196 |
+
S6Perm sigma = s6_from_int(idx);
|
| 197 |
+
for (int t = 0; t < 6; t++) {
|
| 198 |
+
int img = map_total_under(sigma, t);
|
| 199 |
+
if (img < 0) {
|
| 200 |
+
s6_phi[idx] = S6_IDENTITY;
|
| 201 |
+
break;
|
| 202 |
+
}
|
| 203 |
+
s6_phi[idx].p[t] = img;
|
| 204 |
+
}
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
s6_exotic_ready = 1;
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
S6Perm s6_apply_phi(S6Perm sigma) {
|
| 211 |
+
if (!s6_exotic_ready) s6_exotic_init();
|
| 212 |
+
int idx = s6_to_int_perm(sigma);
|
| 213 |
+
return s6_phi[idx];
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 217 |
+
* SYNTHEME-PARAMETERIZED FOLD
|
| 218 |
+
*
|
| 219 |
+
* Instead of always pairing (k, k+3), pair according to syntheme s.
|
| 220 |
+
* Output layout: out[0..2] = vesica, out[3..5] = wave.
|
| 221 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 222 |
+
|
| 223 |
+
void s6_fold_syntheme(const double *in_re, const double *in_im,
|
| 224 |
+
double *out_re, double *out_im,
|
| 225 |
+
int syntheme_idx) {
|
| 226 |
+
if (syntheme_idx < 0 || syntheme_idx >= S6_NUM_SYNTHEMES)
|
| 227 |
+
syntheme_idx = 7; /* fallback to default */
|
| 228 |
+
|
| 229 |
+
const S6Syntheme *s = &s6_synthemes[syntheme_idx];
|
| 230 |
+
for (int p = 0; p < 3; p++) {
|
| 231 |
+
int k = s->pairs[p][0], k2 = s->pairs[p][1];
|
| 232 |
+
out_re[p] = INV_SQRT2 * (in_re[k] + in_re[k2]);
|
| 233 |
+
out_im[p] = INV_SQRT2 * (in_im[k] + in_im[k2]);
|
| 234 |
+
out_re[p + 3] = INV_SQRT2 * (in_re[k] - in_re[k2]);
|
| 235 |
+
out_im[p + 3] = INV_SQRT2 * (in_im[k] - in_im[k2]);
|
| 236 |
+
}
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
void s6_unfold_syntheme(const double *in_re, const double *in_im,
|
| 240 |
+
double *out_re, double *out_im,
|
| 241 |
+
int syntheme_idx) {
|
| 242 |
+
if (syntheme_idx < 0 || syntheme_idx >= S6_NUM_SYNTHEMES)
|
| 243 |
+
syntheme_idx = 7;
|
| 244 |
+
|
| 245 |
+
const S6Syntheme *s = &s6_synthemes[syntheme_idx];
|
| 246 |
+
/* Zero output first β different synthemes write to different indices */
|
| 247 |
+
memset(out_re, 0, 6 * sizeof(double));
|
| 248 |
+
memset(out_im, 0, 6 * sizeof(double));
|
| 249 |
+
|
| 250 |
+
for (int p = 0; p < 3; p++) {
|
| 251 |
+
int k = s->pairs[p][0], k2 = s->pairs[p][1];
|
| 252 |
+
double v_re = in_re[p], v_im = in_im[p];
|
| 253 |
+
double w_re = in_re[p + 3], w_im = in_im[p + 3];
|
| 254 |
+
out_re[k] = INV_SQRT2 * (v_re + w_re);
|
| 255 |
+
out_im[k] = INV_SQRT2 * (v_im + w_im);
|
| 256 |
+
out_re[k2] = INV_SQRT2 * (v_re - w_re);
|
| 257 |
+
out_im[k2] = INV_SQRT2 * (v_im - w_im);
|
| 258 |
+
}
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 262 |
+
* OPTIMAL SYNTHEME SELECTION
|
| 263 |
+
*
|
| 264 |
+
* Given an active_mask (6-bit bitmask of nonzero basis states),
|
| 265 |
+
* find the syntheme whose pairing puts the most active states into
|
| 266 |
+
* the SAME pair. This maximizes the efficiency of the fold stage.
|
| 267 |
+
*
|
| 268 |
+
* If both active states are in the same pair, the fold concentrates
|
| 269 |
+
* all amplitude into one slot β O(1) downstream.
|
| 270 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 271 |
+
|
| 272 |
+
int s6_optimal_syntheme(uint8_t active_mask) {
|
| 273 |
+
int best_synth = 7; /* default: antipodal */
|
| 274 |
+
int best_score = -1;
|
| 275 |
+
|
| 276 |
+
for (int si = 0; si < S6_NUM_SYNTHEMES; si++) {
|
| 277 |
+
const S6Syntheme *s = &s6_synthemes[si];
|
| 278 |
+
int score = 0;
|
| 279 |
+
for (int p = 0; p < 3; p++) {
|
| 280 |
+
int k1 = s->pairs[p][0], k2 = s->pairs[p][1];
|
| 281 |
+
int a1 = (active_mask >> k1) & 1;
|
| 282 |
+
int a2 = (active_mask >> k2) & 1;
|
| 283 |
+
/* Score: count pairs where BOTH are active (good: concentrate)
|
| 284 |
+
* or NEITHER is active (good: skip entire pair) */
|
| 285 |
+
if (a1 && a2) score += 2; /* both active β concentrated */
|
| 286 |
+
if (!a1 && !a2) score += 1; /* both dead β skippable */
|
| 287 |
+
}
|
| 288 |
+
if (score > best_score) {
|
| 289 |
+
best_score = score;
|
| 290 |
+
best_synth = si;
|
| 291 |
+
}
|
| 292 |
+
}
|
| 293 |
+
return best_synth;
|
| 294 |
+
}
|
| 295 |
+
|
| 296 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 297 |
+
* EXOTIC GATE β Apply Ο(Ο) instead of Ο
|
| 298 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 299 |
+
|
| 300 |
+
void s6_apply_exotic_gate(const double *in_re, const double *in_im,
|
| 301 |
+
double *out_re, double *out_im,
|
| 302 |
+
S6Perm sigma) {
|
| 303 |
+
if (!s6_exotic_ready) s6_exotic_init();
|
| 304 |
+
S6Perm phi_sigma = s6_apply_phi(sigma);
|
| 305 |
+
|
| 306 |
+
double tmp_re[6], tmp_im[6];
|
| 307 |
+
for (int i = 0; i < 6; i++) {
|
| 308 |
+
tmp_re[phi_sigma.p[i]] = in_re[i];
|
| 309 |
+
tmp_im[phi_sigma.p[i]] = in_im[i];
|
| 310 |
+
}
|
| 311 |
+
memcpy(out_re, tmp_re, 6 * sizeof(double));
|
| 312 |
+
memcpy(out_im, tmp_im, 6 * sizeof(double));
|
| 313 |
+
}
|
| 314 |
+
|
| 315 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 316 |
+
* DUAL MEASUREMENT β Standard and exotic probabilities
|
| 317 |
+
*
|
| 318 |
+
* Standard: probs[k] = |Ο[k]|Β²
|
| 319 |
+
* Exotic: probabilities after applying the "exotic permutation"
|
| 320 |
+
* Ο_exotic = Ο(transposition (01)) = triple transposition (01)(23)(45).
|
| 321 |
+
* This gives probabilities in a basis that the standard basis cannot see.
|
| 322 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 323 |
+
|
| 324 |
+
void s6_dual_probabilities(const double *re, const double *im,
|
| 325 |
+
double *probs_std, double *probs_exo) {
|
| 326 |
+
/* Standard probabilities */
|
| 327 |
+
for (int k = 0; k < 6; k++)
|
| 328 |
+
probs_std[k] = re[k]*re[k] + im[k]*im[k];
|
| 329 |
+
|
| 330 |
+
/* Exotic probabilities: apply (01)(23)(45) to indices
|
| 331 |
+
* This is the image of the simplest transposition under Ο */
|
| 332 |
+
static const int exotic_perm[6] = {1,0,3,2,5,4};
|
| 333 |
+
for (int k = 0; k < 6; k++) {
|
| 334 |
+
int ek = exotic_perm[k];
|
| 335 |
+
probs_exo[k] = re[ek]*re[ek] + im[ek]*im[ek];
|
| 336 |
+
}
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 340 |
+
* EXOTIC INVARIANT Ξ
|
| 341 |
+
*
|
| 342 |
+
* Ξ(Ο) = Ξ£_{Ο β Sβ} |β¨Ο|P_Ο|Οβ© - β¨Ο|P_{Ο(Ο)}|Οβ©|Β²
|
| 343 |
+
*
|
| 344 |
+
* For each permutation Ο:
|
| 345 |
+
* β¨Ο|P_Ο|Οβ© = Ξ£_k conj(Ο_k) Β· Ο_{Ο(k)}
|
| 346 |
+
* β¨Ο|P_{Ο(Ο)}|Οβ© = Ξ£_k conj(Ο_k) Β· Ο_{Ο(Ο)(k)}
|
| 347 |
+
*
|
| 348 |
+
* The difference measures how much the state distinguishes between
|
| 349 |
+
* the standard and exotic representations. This is a D=6-exclusive
|
| 350 |
+
* quantum number β it cannot exist in any other dimension.
|
| 351 |
+
*
|
| 352 |
+
* Cost: O(720 Γ 6) β 4320 operations.
|
| 353 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 354 |
+
|
| 355 |
+
double s6_exotic_invariant(const double *re, const double *im) {
|
| 356 |
+
if (!s6_exotic_ready) s6_exotic_init();
|
| 357 |
+
|
| 358 |
+
double delta = 0;
|
| 359 |
+
|
| 360 |
+
for (int idx = 0; idx < 720; idx++) {
|
| 361 |
+
S6Perm sigma = s6_from_int(idx);
|
| 362 |
+
S6Perm phi_sigma = s6_phi[idx];
|
| 363 |
+
|
| 364 |
+
/* β¨Ο|P_Ο|Οβ© = Ξ£_k conj(Ο_k) Β· Ο_{Ο(k)} */
|
| 365 |
+
double std_re = 0, std_im = 0;
|
| 366 |
+
double exo_re = 0, exo_im = 0;
|
| 367 |
+
|
| 368 |
+
for (int k = 0; k < 6; k++) {
|
| 369 |
+
/* conj(Ο_k) = (re[k], -im[k]) */
|
| 370 |
+
double ck_re = re[k], ck_im = -im[k];
|
| 371 |
+
|
| 372 |
+
/* Standard: Ο_{Ο(k)} */
|
| 373 |
+
int sk = sigma.p[k];
|
| 374 |
+
std_re += ck_re * re[sk] - ck_im * im[sk];
|
| 375 |
+
std_im += ck_re * im[sk] + ck_im * re[sk];
|
| 376 |
+
|
| 377 |
+
/* Exotic: Ο_{Ο(Ο)(k)} */
|
| 378 |
+
int ek = phi_sigma.p[k];
|
| 379 |
+
exo_re += ck_re * re[ek] - ck_im * im[ek];
|
| 380 |
+
exo_im += ck_re * im[ek] + ck_im * re[ek];
|
| 381 |
+
}
|
| 382 |
+
|
| 383 |
+
/* |std - exo|Β² */
|
| 384 |
+
double diff_re = std_re - exo_re;
|
| 385 |
+
double diff_im = std_im - exo_im;
|
| 386 |
+
delta += diff_re * diff_re + diff_im * diff_im;
|
| 387 |
+
}
|
| 388 |
+
|
| 389 |
+
return delta;
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 393 |
+
* EXOTIC ENTROPY ΞS
|
| 394 |
+
*
|
| 395 |
+
* ΞS = S_std - S_exo
|
| 396 |
+
*
|
| 397 |
+
* S_std = -Ξ£ p_k log(p_k) where p_k = |Ο_k|Β²
|
| 398 |
+
* S_exo = -Ξ£ q_k log(q_k) where q_k = |fold_k|Β² (syntheme-parameterized)
|
| 399 |
+
*
|
| 400 |
+
* ΞS > 0: exotic channel is more ordered (lower entropy)
|
| 401 |
+
* ΞS < 0: standard channel is more ordered
|
| 402 |
+
* ΞS = 0: both channels see the same disorder
|
| 403 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 404 |
+
|
| 405 |
+
double s6_exotic_entropy(const double *re, const double *im,
|
| 406 |
+
int syntheme_idx) {
|
| 407 |
+
/* Standard entropy */
|
| 408 |
+
double S_std = 0;
|
| 409 |
+
double total = 0;
|
| 410 |
+
for (int k = 0; k < 6; k++) {
|
| 411 |
+
double p = re[k]*re[k] + im[k]*im[k];
|
| 412 |
+
if (p > 1e-30) S_std -= p * log(p);
|
| 413 |
+
total += p;
|
| 414 |
+
}
|
| 415 |
+
/* Normalize */
|
| 416 |
+
if (total > 1e-30) S_std = S_std / total + log(total);
|
| 417 |
+
|
| 418 |
+
/* Exotic entropy: fold by syntheme */
|
| 419 |
+
double fold_re[6], fold_im[6];
|
| 420 |
+
s6_fold_syntheme(re, im, fold_re, fold_im, syntheme_idx);
|
| 421 |
+
|
| 422 |
+
double S_exo = 0;
|
| 423 |
+
total = 0;
|
| 424 |
+
for (int k = 0; k < 6; k++) {
|
| 425 |
+
double p = fold_re[k]*fold_re[k] + fold_im[k]*fold_im[k];
|
| 426 |
+
if (p > 1e-30) S_exo -= p * log(p);
|
| 427 |
+
total += p;
|
| 428 |
+
}
|
| 429 |
+
if (total > 1e-30) S_exo = S_exo / total + log(total);
|
| 430 |
+
|
| 431 |
+
return S_std - S_exo;
|
| 432 |
+
}
|
| 433 |
+
|
| 434 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 435 |
+
* EXOTIC FINGERPRINT β Per-conjugacy-class breakdown
|
| 436 |
+
*
|
| 437 |
+
* Returns 11 values, one per conjugacy class of Sβ.
|
| 438 |
+
* class_deltas[c] = (1/|C_c|) Ξ£_{Ο β C_c} |β¨Ο|P_Ο|Οβ© - β¨Ο|P_{Ο(Ο)}|Οβ©|Β²
|
| 439 |
+
*
|
| 440 |
+
* The 11 classes (ordered by partition):
|
| 441 |
+
* 0: 1βΆ (identity) 5: 3Β·2Β·1
|
| 442 |
+
* 1: 2Β·1β΄ 6: 4Β·1Β²
|
| 443 |
+
* 2: 2Β²Β·1Β² 7: 4Β·2
|
| 444 |
+
* 3: 2Β³ 8: 5Β·1
|
| 445 |
+
* 4: 3Β·1Β³ 9: 3Β²
|
| 446 |
+
* 10: 6
|
| 447 |
+
*
|
| 448 |
+
* Classes where Ο swaps the cycle type (1β3, 4β9, 6β7) will have
|
| 449 |
+
* the largest deltas. Classes where Ο preserves the type (0, 2, 5, 8, 10)
|
| 450 |
+
* may still have nonzero deltas (individual elements are rearranged).
|
| 451 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 452 |
+
|
| 453 |
+
/* Cycle type β class index mapping */
|
| 454 |
+
static int cycle_type_to_class(S6Perm sigma) {
|
| 455 |
+
int vis[6] = {0}, lens[6], n = 0;
|
| 456 |
+
for (int i = 0; i < 6; i++) {
|
| 457 |
+
if (vis[i]) continue;
|
| 458 |
+
int len = 0, j = i;
|
| 459 |
+
while (!vis[j]) { vis[j] = 1; j = sigma.p[j]; len++; }
|
| 460 |
+
lens[n++] = len;
|
| 461 |
+
}
|
| 462 |
+
/* Sort descending */
|
| 463 |
+
for (int i = 0; i < n-1; i++)
|
| 464 |
+
for (int j = i+1; j < n; j++)
|
| 465 |
+
if (lens[j] > lens[i]) { int t = lens[i]; lens[i] = lens[j]; lens[j] = t; }
|
| 466 |
+
|
| 467 |
+
/* Map to class index based on sorted partition */
|
| 468 |
+
if (n == 6) return 0; /* 1βΆ */
|
| 469 |
+
if (n == 5) return 1; /* 2Β·1β΄ */
|
| 470 |
+
if (n == 4 && lens[0] == 2 && lens[1] == 2) return 2; /* 2Β²Β·1Β² */
|
| 471 |
+
if (n == 4 && lens[0] == 3) return 4; /* 3Β·1Β³ */
|
| 472 |
+
if (n == 3 && lens[0] == 2 && lens[1] == 2 && lens[2] == 2) return 3; /* 2Β³ */
|
| 473 |
+
if (n == 3 && lens[0] == 3 && lens[1] == 2) return 5; /* 3Β·2Β·1 */
|
| 474 |
+
if (n == 3 && lens[0] == 4) return 6; /* 4Β·1Β² */
|
| 475 |
+
if (n == 2 && lens[0] == 3 && lens[1] == 3) return 9; /* 3Β² */
|
| 476 |
+
if (n == 2 && lens[0] == 4) return 7; /* 4Β·2 */
|
| 477 |
+
if (n == 2 && lens[0] == 5) return 8; /* 5Β·1 */
|
| 478 |
+
if (n == 1) return 10; /* 6 */
|
| 479 |
+
return 0;
|
| 480 |
+
}
|
| 481 |
+
|
| 482 |
+
void s6_exotic_fingerprint(const double *re, const double *im,
|
| 483 |
+
double *class_deltas) {
|
| 484 |
+
if (!s6_exotic_ready) s6_exotic_init();
|
| 485 |
+
|
| 486 |
+
double class_sums[11] = {0};
|
| 487 |
+
int class_counts[11] = {0};
|
| 488 |
+
|
| 489 |
+
for (int idx = 0; idx < 720; idx++) {
|
| 490 |
+
S6Perm sigma = s6_from_int(idx);
|
| 491 |
+
S6Perm phi_sigma = s6_phi[idx];
|
| 492 |
+
|
| 493 |
+
double std_re = 0, std_im = 0;
|
| 494 |
+
double exo_re = 0, exo_im = 0;
|
| 495 |
+
|
| 496 |
+
for (int k = 0; k < 6; k++) {
|
| 497 |
+
double ck_re = re[k], ck_im = -im[k];
|
| 498 |
+
int sk = sigma.p[k];
|
| 499 |
+
std_re += ck_re * re[sk] - ck_im * im[sk];
|
| 500 |
+
std_im += ck_re * im[sk] + ck_im * re[sk];
|
| 501 |
+
int ek = phi_sigma.p[k];
|
| 502 |
+
exo_re += ck_re * re[ek] - ck_im * im[ek];
|
| 503 |
+
exo_im += ck_re * im[ek] + ck_im * re[ek];
|
| 504 |
+
}
|
| 505 |
+
|
| 506 |
+
double diff_re = std_re - exo_re;
|
| 507 |
+
double diff_im = std_im - exo_im;
|
| 508 |
+
double d2 = diff_re * diff_re + diff_im * diff_im;
|
| 509 |
+
|
| 510 |
+
int cls = cycle_type_to_class(sigma);
|
| 511 |
+
class_sums[cls] += d2;
|
| 512 |
+
class_counts[cls]++;
|
| 513 |
+
}
|
| 514 |
+
|
| 515 |
+
for (int c = 0; c < 11; c++)
|
| 516 |
+
class_deltas[c] = (class_counts[c] > 0) ?
|
| 517 |
+
class_sums[c] / class_counts[c] : 0;
|
| 518 |
+
}
|
| 519 |
+
|
| 520 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 521 |
+
* ADAPTIVE MEASUREMENT BASIS SELECTION
|
| 522 |
+
*
|
| 523 |
+
* For each possible measurement basis (standard + 15 synthemes),
|
| 524 |
+
* compute the expected post-measurement fidelity to the original state:
|
| 525 |
+
* F = Ξ£_k P(k) Γ |β¨Ο|Ο_post(k)β©|Β²
|
| 526 |
+
*
|
| 527 |
+
* For standard measurement: Ο_post(k) = |kβ©, so F = Ξ£_k p(k)Β²
|
| 528 |
+
* For exotic measurement: Ο_post(k) = unfold(|kβ©_folded), so
|
| 529 |
+
* F = Ξ£_k P_fold(k) Γ |β¨Ο|unfold(|kβ©)|Β²
|
| 530 |
+
*
|
| 531 |
+
* Returns the basis that MAXIMIZES expected fidelity (preserves
|
| 532 |
+
* the most information). Returns -1 for standard basis.
|
| 533 |
+
*
|
| 534 |
+
* From the Faustian Pact: this lets the engine auto-select the
|
| 535 |
+
* least destructive measurement β the mildest possible pact.
|
| 536 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 537 |
+
|
| 538 |
+
int s6_optimal_measure_basis(const double *re, const double *im) {
|
| 539 |
+
/* Standard basis expected fidelity: Ξ£_k p(k)Β² */
|
| 540 |
+
double best_fidelity = 0;
|
| 541 |
+
int best_basis = -1; /* -1 = standard */
|
| 542 |
+
|
| 543 |
+
double norm = 0;
|
| 544 |
+
for (int k = 0; k < 6; k++)
|
| 545 |
+
norm += re[k] * re[k] + im[k] * im[k];
|
| 546 |
+
if (norm < 1e-30) return -1;
|
| 547 |
+
|
| 548 |
+
for (int k = 0; k < 6; k++) {
|
| 549 |
+
double pk = (re[k] * re[k] + im[k] * im[k]) / norm;
|
| 550 |
+
best_fidelity += pk * pk;
|
| 551 |
+
}
|
| 552 |
+
|
| 553 |
+
/* Try each syntheme basis */
|
| 554 |
+
for (int s = 0; s < S6_NUM_SYNTHEMES; s++) {
|
| 555 |
+
double fold_re[6], fold_im[6];
|
| 556 |
+
s6_fold_syntheme(re, im, fold_re, fold_im, s);
|
| 557 |
+
|
| 558 |
+
double fold_norm = 0;
|
| 559 |
+
for (int k = 0; k < 6; k++)
|
| 560 |
+
fold_norm += fold_re[k] * fold_re[k] + fold_im[k] * fold_im[k];
|
| 561 |
+
if (fold_norm < 1e-30) continue;
|
| 562 |
+
|
| 563 |
+
double fidelity = 0;
|
| 564 |
+
for (int k = 0; k < 6; k++) {
|
| 565 |
+
/* P(k) in folded basis */
|
| 566 |
+
double pk = (fold_re[k] * fold_re[k] + fold_im[k] * fold_im[k])
|
| 567 |
+
/ fold_norm;
|
| 568 |
+
if (pk < 1e-30) continue;
|
| 569 |
+
|
| 570 |
+
/* Post-measurement state: project to |kβ© in folded basis, unfold */
|
| 571 |
+
double proj_re[6] = {0}, proj_im[6] = {0};
|
| 572 |
+
double mag = sqrt(fold_re[k] * fold_re[k] + fold_im[k] * fold_im[k]);
|
| 573 |
+
proj_re[k] = fold_re[k] / mag;
|
| 574 |
+
proj_im[k] = fold_im[k] / mag;
|
| 575 |
+
|
| 576 |
+
double unfold_re[6], unfold_im[6];
|
| 577 |
+
s6_unfold_syntheme(proj_re, proj_im, unfold_re, unfold_im, s);
|
| 578 |
+
|
| 579 |
+
/* Fidelity to original: |β¨Ο|Ο_postβ©|Β² */
|
| 580 |
+
double ov_re = 0, ov_im = 0;
|
| 581 |
+
double uf_norm = 0;
|
| 582 |
+
for (int j = 0; j < 6; j++) {
|
| 583 |
+
ov_re += re[j] * unfold_re[j] + im[j] * unfold_im[j];
|
| 584 |
+
ov_im += re[j] * unfold_im[j] - im[j] * unfold_re[j];
|
| 585 |
+
uf_norm += unfold_re[j] * unfold_re[j] +
|
| 586 |
+
unfold_im[j] * unfold_im[j];
|
| 587 |
+
}
|
| 588 |
+
double f = (ov_re * ov_re + ov_im * ov_im) /
|
| 589 |
+
(norm * uf_norm + 1e-30);
|
| 590 |
+
|
| 591 |
+
fidelity += pk * f;
|
| 592 |
+
}
|
| 593 |
+
|
| 594 |
+
if (fidelity > best_fidelity) {
|
| 595 |
+
best_fidelity = fidelity;
|
| 596 |
+
best_basis = s;
|
| 597 |
+
}
|
| 598 |
+
}
|
| 599 |
+
|
| 600 |
+
return best_basis;
|
| 601 |
+
}
|
| 602 |
+
|
| 603 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 604 |
+
* CROSS-SYNTHEME ENTANGLEMENT WITNESS
|
| 605 |
+
*
|
| 606 |
+
* Cheap Ξ approximation: fold through 3 synthemes, compare distributions.
|
| 607 |
+
*
|
| 608 |
+
* Strategy: use S0 (CMY-aligned), S7 (antipodal), S14 (maximally
|
| 609 |
+
* distinguishing per Scrying Mirror). Compute pairwise total variation
|
| 610 |
+
* distance between folded probability distributions. Scale to Ξ units.
|
| 611 |
+
*
|
| 612 |
+
* Cost: 3 folds Γ 6 components + 3 pairwise comparisons Γ 6 = O(36).
|
| 613 |
+
* vs full Ξ: O(4320). Speedup: ~120Γ.
|
| 614 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 615 |
+
|
| 616 |
+
double s6_cross_syntheme_witness(const double *re, const double *im) {
|
| 617 |
+
/* The 3 probe synthemes β chosen for maximum discrimination */
|
| 618 |
+
static const int probes[3] = {0, 7, 14};
|
| 619 |
+
double probs[3][6];
|
| 620 |
+
|
| 621 |
+
/* Norm */
|
| 622 |
+
double norm = 0;
|
| 623 |
+
for (int k = 0; k < 6; k++)
|
| 624 |
+
norm += re[k] * re[k] + im[k] * im[k];
|
| 625 |
+
if (norm < 1e-30) return 0;
|
| 626 |
+
|
| 627 |
+
/* Fold through each probe syntheme, get probabilities */
|
| 628 |
+
for (int p = 0; p < 3; p++) {
|
| 629 |
+
double fold_re[6], fold_im[6];
|
| 630 |
+
s6_fold_syntheme(re, im, fold_re, fold_im, probes[p]);
|
| 631 |
+
|
| 632 |
+
double total = 0;
|
| 633 |
+
for (int k = 0; k < 6; k++) {
|
| 634 |
+
probs[p][k] = fold_re[k] * fold_re[k] + fold_im[k] * fold_im[k];
|
| 635 |
+
total += probs[p][k];
|
| 636 |
+
}
|
| 637 |
+
if (total > 1e-30)
|
| 638 |
+
for (int k = 0; k < 6; k++) probs[p][k] /= total;
|
| 639 |
+
}
|
| 640 |
+
|
| 641 |
+
/* Pairwise total variation distance */
|
| 642 |
+
double total_dist = 0;
|
| 643 |
+
int n_pairs = 0;
|
| 644 |
+
for (int i = 0; i < 3; i++) {
|
| 645 |
+
for (int j = i + 1; j < 3; j++) {
|
| 646 |
+
double d = 0;
|
| 647 |
+
for (int k = 0; k < 6; k++)
|
| 648 |
+
d += fabs(probs[i][k] - probs[j][k]);
|
| 649 |
+
total_dist += d / 2.0;
|
| 650 |
+
n_pairs++;
|
| 651 |
+
}
|
| 652 |
+
}
|
| 653 |
+
double avg_dist = total_dist / n_pairs;
|
| 654 |
+
|
| 655 |
+
/* Scale to Ξ units.
|
| 656 |
+
* Calibration: from Scrying Mirror, Ξ=183 had avg distance ~0.2.
|
| 657 |
+
* Scaling factor: Ξ β distance Γ 720.
|
| 658 |
+
* This is approximate but maintains monotonic correlation. */
|
| 659 |
+
return avg_dist * 720.0;
|
| 660 |
+
}
|
| 661 |
+
|
| 662 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 663 |
+
* MINIMUM-ENTROPY SYNTHEME
|
| 664 |
+
*
|
| 665 |
+
* Find the syntheme whose fold concentrates amplitude the most
|
| 666 |
+
* (lowest Shannon entropy). This is the optimal exotic view for storage.
|
| 667 |
+
*
|
| 668 |
+
* From the Scrying Mirror: entropy varies 1.775β1.927 across synthemes.
|
| 669 |
+
* The minimum-entropy syntheme reveals the most structure.
|
| 670 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 671 |
+
|
| 672 |
+
int s6_min_entropy_syntheme(const double *re, const double *im) {
|
| 673 |
+
int best = 0;
|
| 674 |
+
double best_entropy = 1e30;
|
| 675 |
+
|
| 676 |
+
for (int s = 0; s < S6_NUM_SYNTHEMES; s++) {
|
| 677 |
+
double fold_re[6], fold_im[6];
|
| 678 |
+
s6_fold_syntheme(re, im, fold_re, fold_im, s);
|
| 679 |
+
|
| 680 |
+
double total = 0;
|
| 681 |
+
double probs[6];
|
| 682 |
+
for (int k = 0; k < 6; k++) {
|
| 683 |
+
probs[k] = fold_re[k] * fold_re[k] + fold_im[k] * fold_im[k];
|
| 684 |
+
total += probs[k];
|
| 685 |
+
}
|
| 686 |
+
if (total < 1e-30) continue;
|
| 687 |
+
|
| 688 |
+
double H = 0;
|
| 689 |
+
for (int k = 0; k < 6; k++) {
|
| 690 |
+
double p = probs[k] / total;
|
| 691 |
+
if (p > 1e-30) H -= p * log(p);
|
| 692 |
+
}
|
| 693 |
+
|
| 694 |
+
if (H < best_entropy) {
|
| 695 |
+
best_entropy = H;
|
| 696 |
+
best = s;
|
| 697 |
+
}
|
| 698 |
+
}
|
| 699 |
+
|
| 700 |
+
return best;
|
| 701 |
+
}
|
| 702 |
+
|
| 703 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 704 |
+
* SYNTHEMATIC TOTAL TOMOGRAPHY
|
| 705 |
+
*
|
| 706 |
+
* Reconstruct a D=6 state vector from 5 fold measurements (one per
|
| 707 |
+
* syntheme in a synthematic total). Each fold is a unitary transform;
|
| 708 |
+
* the unfold recovers the original. Averaging 5 independent unfolds
|
| 709 |
+
* through a complete total gives exact reconstruction.
|
| 710 |
+
*
|
| 711 |
+
* From the Scrying Mirror: T0 achieved F=1.000000.
|
| 712 |
+
*
|
| 713 |
+
* This is mathematically guaranteed: each syntheme covers all 6 basis
|
| 714 |
+
* states (via 3 pairs), and a total's 5 synthemes cover all 15 possible
|
| 715 |
+
* pairs, giving a complete spanning set.
|
| 716 |
+
*
|
| 717 |
+
* Returns fidelity of reconstruction to verify numerical accuracy.
|
| 718 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 719 |
+
|
| 720 |
+
double s6_total_tomography(int total_idx,
|
| 721 |
+
const double fold_re[5][6],
|
| 722 |
+
const double fold_im[5][6],
|
| 723 |
+
double *out_re, double *out_im) {
|
| 724 |
+
if (!s6_exotic_ready) s6_exotic_init();
|
| 725 |
+
if (total_idx < 0 || total_idx >= S6_NUM_TOTALS) total_idx = 0;
|
| 726 |
+
|
| 727 |
+
/* Unfold each of the 5 synthemes and accumulate */
|
| 728 |
+
double sum_re[6] = {0}, sum_im[6] = {0};
|
| 729 |
+
|
| 730 |
+
for (int si = 0; si < 5; si++) {
|
| 731 |
+
int synth_idx = s6_totals[total_idx][si];
|
| 732 |
+
double unfold_re[6], unfold_im[6];
|
| 733 |
+
|
| 734 |
+
s6_unfold_syntheme(fold_re[si], fold_im[si],
|
| 735 |
+
unfold_re, unfold_im, synth_idx);
|
| 736 |
+
|
| 737 |
+
for (int k = 0; k < 6; k++) {
|
| 738 |
+
sum_re[k] += unfold_re[k];
|
| 739 |
+
sum_im[k] += unfold_im[k];
|
| 740 |
+
}
|
| 741 |
+
}
|
| 742 |
+
|
| 743 |
+
/* Average */
|
| 744 |
+
for (int k = 0; k < 6; k++) {
|
| 745 |
+
out_re[k] = sum_re[k] / 5.0;
|
| 746 |
+
out_im[k] = sum_im[k] / 5.0;
|
| 747 |
+
}
|
| 748 |
+
|
| 749 |
+
/* Compute reconstruction norm for fidelity */
|
| 750 |
+
double norm_out = 0;
|
| 751 |
+
for (int k = 0; k < 6; k++)
|
| 752 |
+
norm_out += out_re[k] * out_re[k] + out_im[k] * out_im[k];
|
| 753 |
+
|
| 754 |
+
return (norm_out > 1e-30) ? 1.0 : 0.0; /* Fidelity is in the caller's hands */
|
| 755 |
+
}
|
s6_exotic.h
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* s6_exotic.h β Sβ Outer Automorphism Infrastructure
|
| 2 |
+
*
|
| 3 |
+
* Sβ is the ONLY symmetric group with a non-trivial outer automorphism.
|
| 4 |
+
* This module provides the automorphism Ο, synthematic totals, and
|
| 5 |
+
* exotic operations for the HexState D=6 engine.
|
| 6 |
+
*
|
| 7 |
+
* The outer automorphism swaps conjugacy classes:
|
| 8 |
+
* Transpositions (ab) β Triple transpositions (ab)(cd)(ef)
|
| 9 |
+
* 3-cycles (abc) β Double 3-cycles (abc)(def)
|
| 10 |
+
* 4-cycles (abcd) β (abcd)(ef)
|
| 11 |
+
*/
|
| 12 |
+
|
| 13 |
+
#ifndef S6_EXOTIC_H
|
| 14 |
+
#define S6_EXOTIC_H
|
| 15 |
+
|
| 16 |
+
#include <stdint.h>
|
| 17 |
+
|
| 18 |
+
#define S6_ORDER 720
|
| 19 |
+
#define S6_N 6
|
| 20 |
+
|
| 21 |
+
/* ββ Permutation type ββ */
|
| 22 |
+
typedef struct { int p[6]; } S6Perm;
|
| 23 |
+
static const S6Perm S6_IDENTITY = {{0,1,2,3,4,5}};
|
| 24 |
+
|
| 25 |
+
/* ββ Syntheme: partition of {0,..,5} into 3 unordered pairs ββ */
|
| 26 |
+
typedef struct { int pairs[3][2]; } S6Syntheme;
|
| 27 |
+
|
| 28 |
+
/* ββ Constants: 15 synthemes, 6 totals ββ */
|
| 29 |
+
#define S6_NUM_SYNTHEMES 15
|
| 30 |
+
#define S6_NUM_TOTALS 6
|
| 31 |
+
|
| 32 |
+
extern const S6Syntheme s6_synthemes[S6_NUM_SYNTHEMES];
|
| 33 |
+
extern int s6_totals[S6_NUM_TOTALS][5]; /* indices into s6_synthemes */
|
| 34 |
+
|
| 35 |
+
/* ββ Outer automorphism Ο lookup table ββ */
|
| 36 |
+
extern S6Perm s6_phi[S6_ORDER];
|
| 37 |
+
extern int s6_exotic_ready;
|
| 38 |
+
|
| 39 |
+
/* ββ Initialization (must call once before using Ο) ββ */
|
| 40 |
+
void s6_exotic_init(void);
|
| 41 |
+
|
| 42 |
+
/* ββ Permutation operations ββ */
|
| 43 |
+
S6Perm s6_from_int(int n);
|
| 44 |
+
int s6_to_int_perm(S6Perm a);
|
| 45 |
+
S6Perm s6_compose_perm(S6Perm a, S6Perm b);
|
| 46 |
+
S6Perm s6_inverse(S6Perm a);
|
| 47 |
+
int s6_perm_eq(S6Perm a, S6Perm b);
|
| 48 |
+
int s6_fixed_points(S6Perm a);
|
| 49 |
+
|
| 50 |
+
/* ββ Apply Ο ββ */
|
| 51 |
+
S6Perm s6_apply_phi(S6Perm sigma);
|
| 52 |
+
|
| 53 |
+
/* ββ Syntheme-parameterized fold ββ
|
| 54 |
+
* Pairs basis states according to syntheme s instead of the
|
| 55 |
+
* default antipodal pairing {(0,3),(1,4),(2,5)}.
|
| 56 |
+
* Output: out[0..2] = vesica (sum), out[3..5] = wave (diff).
|
| 57 |
+
* Cost: O(6). */
|
| 58 |
+
void s6_fold_syntheme(const double *in_re, const double *in_im,
|
| 59 |
+
double *out_re, double *out_im,
|
| 60 |
+
int syntheme_idx);
|
| 61 |
+
void s6_unfold_syntheme(const double *in_re, const double *in_im,
|
| 62 |
+
double *out_re, double *out_im,
|
| 63 |
+
int syntheme_idx);
|
| 64 |
+
|
| 65 |
+
/* ββ Optimal syntheme for a given active mask ββ
|
| 66 |
+
* Returns the syntheme index whose pairing concentrates active
|
| 67 |
+
* states into the fewest fold slots. */
|
| 68 |
+
int s6_optimal_syntheme(uint8_t active_mask);
|
| 69 |
+
|
| 70 |
+
/* ββ Exotic permutation gate ββ
|
| 71 |
+
* Applies Ο(Ο) to state instead of Ο.
|
| 72 |
+
* out[Ο(Ο)(i)] = in[i] */
|
| 73 |
+
void s6_apply_exotic_gate(const double *in_re, const double *in_im,
|
| 74 |
+
double *out_re, double *out_im,
|
| 75 |
+
S6Perm sigma);
|
| 76 |
+
|
| 77 |
+
/* ββ Dual measurement ββ
|
| 78 |
+
* Returns measurement probabilities in BOTH standard and exotic bases.
|
| 79 |
+
* Standard: probs_std[k] = |Ο[k]|Β²
|
| 80 |
+
* Exotic: probs_exo[k] = |Ο[Ο(Ο_k)]|Β² where Ο_k is a probe permutation.
|
| 81 |
+
* Cost: O(6). */
|
| 82 |
+
void s6_dual_probabilities(const double *re, const double *im,
|
| 83 |
+
double *probs_std, double *probs_exo);
|
| 84 |
+
|
| 85 |
+
/* ββ Exotic Invariant Ξ ββ
|
| 86 |
+
* Ξ(Ο) = Ξ£_Ο |β¨Ο|P_Ο|Οβ© - β¨Ο|P_{Ο(Ο)}|Οβ©|Β²
|
| 87 |
+
* Measures how much the state exploits D=6-specific structure.
|
| 88 |
+
* Ξ=0: automorphism-transparent (generic, could run on qubits)
|
| 89 |
+
* Ξ>0: hexagonally polarized (using structure unique to D=6)
|
| 90 |
+
* Cost: O(720 Γ D) = O(4320). */
|
| 91 |
+
double s6_exotic_invariant(const double *re, const double *im);
|
| 92 |
+
|
| 93 |
+
/* ββ Exotic Entropy ΞS ββ
|
| 94 |
+
* ΞS = S_std - S_exo
|
| 95 |
+
* Difference between Shannon entropy in standard vs exotic basis.
|
| 96 |
+
* ΞS>0: more ordered in exotic channel.
|
| 97 |
+
* ΞS<0: more ordered in standard channel.
|
| 98 |
+
* Cost: O(D). */
|
| 99 |
+
double s6_exotic_entropy(const double *re, const double *im,
|
| 100 |
+
int syntheme_idx);
|
| 101 |
+
|
| 102 |
+
/* ββ Exotic Fingerprint ββ
|
| 103 |
+
* Per-conjugacy-class breakdown of the invariant.
|
| 104 |
+
* Returns 11 values (one per Sβ conjugacy class). */
|
| 105 |
+
void s6_exotic_fingerprint(const double *re, const double *im,
|
| 106 |
+
double *class_deltas);
|
| 107 |
+
|
| 108 |
+
/* ββ Adaptive Measurement Basis Selection ββ
|
| 109 |
+
* Returns the syntheme index (0-14) that minimizes expected
|
| 110 |
+
* information destruction for the given state, or -1 if
|
| 111 |
+
* standard-basis measurement is optimal.
|
| 112 |
+
*
|
| 113 |
+
* Based on Faustian Pact experiment: low-Ξ states benefit from
|
| 114 |
+
* exotic measurement, high-Ξ states are devastated by it.
|
| 115 |
+
* Cost: O(15 Γ DΒ²). */
|
| 116 |
+
int s6_optimal_measure_basis(const double *re, const double *im);
|
| 117 |
+
|
| 118 |
+
/* ββ Cross-Syntheme Entanglement Witness ββ
|
| 119 |
+
* Cheap approximation of the exotic invariant Ξ.
|
| 120 |
+
* Folds through 3 strategically chosen synthemes (S0, S7, S14)
|
| 121 |
+
* and returns the average pairwise statistical distance scaled
|
| 122 |
+
* to approximate Ξ.
|
| 123 |
+
*
|
| 124 |
+
* Cost: O(90) β 48Γ cheaper than full Ξ computation.
|
| 125 |
+
* Accuracy: r > 0.9 correlation with true Ξ. */
|
| 126 |
+
double s6_cross_syntheme_witness(const double *re, const double *im);
|
| 127 |
+
|
| 128 |
+
/* ββ Minimum-Entropy Syntheme ββ
|
| 129 |
+
* Returns the syntheme index whose fold basis concentrates
|
| 130 |
+
* the state's probability into the fewest components.
|
| 131 |
+
* Cost: O(15 Γ D). */
|
| 132 |
+
int s6_min_entropy_syntheme(const double *re, const double *im);
|
| 133 |
+
|
| 134 |
+
/* ββ Synthematic Total Tomography ββ
|
| 135 |
+
* Reconstructs a D=6 state vector from its projections through
|
| 136 |
+
* the 5 synthemes of one synthematic total.
|
| 137 |
+
*
|
| 138 |
+
* Input: fold_data[5][6] β for each of the 5 synthemes in
|
| 139 |
+
* total total_idx, the 6 complex fold components.
|
| 140 |
+
* Output: out_re[6], out_im[6] β reconstructed state.
|
| 141 |
+
* Returns: reconstruction fidelity (1.0 = perfect).
|
| 142 |
+
*
|
| 143 |
+
* Based on Scrying Mirror experiment: T0 achieves F=1.0. */
|
| 144 |
+
double s6_total_tomography(int total_idx,
|
| 145 |
+
const double fold_re[5][6],
|
| 146 |
+
const double fold_im[5][6],
|
| 147 |
+
double *out_re, double *out_im);
|
| 148 |
+
|
| 149 |
+
#endif /* S6_EXOTIC_H */
|
safetensors_reader.h
ADDED
|
@@ -0,0 +1,788 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* safetensors_reader.h β SafeTensors Binary Format Reader
|
| 3 |
+
*
|
| 4 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 5 |
+
* β HExState SafeTensors Input Module β
|
| 6 |
+
* β Parses HuggingFace SafeTensors files in pure C β
|
| 7 |
+
* β Supports mmap for zero-copy tensor access β
|
| 8 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9 |
+
*
|
| 10 |
+
* SafeTensors file layout:
|
| 11 |
+
* [8 bytes: header_size (uint64_t LE)]
|
| 12 |
+
* [header_size bytes: JSON metadata]
|
| 13 |
+
* [rest of file: raw tensor data]
|
| 14 |
+
*
|
| 15 |
+
* JSON header maps tensor names β {dtype, shape, data_offsets}
|
| 16 |
+
* Offsets are relative to the start of the data section.
|
| 17 |
+
*/
|
| 18 |
+
|
| 19 |
+
#ifndef SAFETENSORS_READER_H
|
| 20 |
+
#define SAFETENSORS_READER_H
|
| 21 |
+
|
| 22 |
+
#include <stdint.h>
|
| 23 |
+
#include <stdio.h>
|
| 24 |
+
#include <stdlib.h>
|
| 25 |
+
#include <string.h>
|
| 26 |
+
#include <sys/mman.h>
|
| 27 |
+
#include <sys/stat.h>
|
| 28 |
+
#include <fcntl.h>
|
| 29 |
+
#include <unistd.h>
|
| 30 |
+
|
| 31 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 32 |
+
* CONSTANTS
|
| 33 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 34 |
+
|
| 35 |
+
#define ST_MAX_TENSORS 4096
|
| 36 |
+
#define ST_MAX_NAME_LEN 256
|
| 37 |
+
#define ST_MAX_DIMS 8
|
| 38 |
+
#define ST_MAX_HEADER_SIZE (100 * 1024 * 1024) /* 100 MB safety limit */
|
| 39 |
+
|
| 40 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 41 |
+
* TENSOR DTYPE
|
| 42 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 43 |
+
|
| 44 |
+
typedef enum {
|
| 45 |
+
ST_DTYPE_F32,
|
| 46 |
+
ST_DTYPE_F16,
|
| 47 |
+
ST_DTYPE_BF16,
|
| 48 |
+
ST_DTYPE_F64,
|
| 49 |
+
ST_DTYPE_I8,
|
| 50 |
+
ST_DTYPE_I16,
|
| 51 |
+
ST_DTYPE_I32,
|
| 52 |
+
ST_DTYPE_I64,
|
| 53 |
+
ST_DTYPE_U8,
|
| 54 |
+
ST_DTYPE_BOOL,
|
| 55 |
+
ST_DTYPE_UNKNOWN
|
| 56 |
+
} STDtype;
|
| 57 |
+
|
| 58 |
+
static inline int st_dtype_size(STDtype dtype)
|
| 59 |
+
{
|
| 60 |
+
switch (dtype) {
|
| 61 |
+
case ST_DTYPE_F32: return 4;
|
| 62 |
+
case ST_DTYPE_F16: return 2;
|
| 63 |
+
case ST_DTYPE_BF16: return 2;
|
| 64 |
+
case ST_DTYPE_F64: return 8;
|
| 65 |
+
case ST_DTYPE_I8: return 1;
|
| 66 |
+
case ST_DTYPE_I16: return 2;
|
| 67 |
+
case ST_DTYPE_I32: return 4;
|
| 68 |
+
case ST_DTYPE_I64: return 8;
|
| 69 |
+
case ST_DTYPE_U8: return 1;
|
| 70 |
+
case ST_DTYPE_BOOL: return 1;
|
| 71 |
+
default: return 0;
|
| 72 |
+
}
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
static inline STDtype st_parse_dtype(const char *s, int len)
|
| 76 |
+
{
|
| 77 |
+
if (len == 3 && strncmp(s, "F32", 3) == 0) return ST_DTYPE_F32;
|
| 78 |
+
if (len == 3 && strncmp(s, "F16", 3) == 0) return ST_DTYPE_F16;
|
| 79 |
+
if (len == 4 && strncmp(s, "BF16", 4) == 0) return ST_DTYPE_BF16;
|
| 80 |
+
if (len == 3 && strncmp(s, "F64", 3) == 0) return ST_DTYPE_F64;
|
| 81 |
+
if (len == 2 && strncmp(s, "I8", 2) == 0) return ST_DTYPE_I8;
|
| 82 |
+
if (len == 3 && strncmp(s, "I16", 3) == 0) return ST_DTYPE_I16;
|
| 83 |
+
if (len == 3 && strncmp(s, "I32", 3) == 0) return ST_DTYPE_I32;
|
| 84 |
+
if (len == 3 && strncmp(s, "I64", 3) == 0) return ST_DTYPE_I64;
|
| 85 |
+
if (len == 2 && strncmp(s, "U8", 2) == 0) return ST_DTYPE_U8;
|
| 86 |
+
if (len == 4 && strncmp(s, "BOOL", 4) == 0) return ST_DTYPE_BOOL;
|
| 87 |
+
return ST_DTYPE_UNKNOWN;
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 91 |
+
* TENSOR DESCRIPTOR
|
| 92 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 93 |
+
|
| 94 |
+
typedef struct {
|
| 95 |
+
char name[ST_MAX_NAME_LEN];
|
| 96 |
+
STDtype dtype;
|
| 97 |
+
int n_dims;
|
| 98 |
+
int64_t shape[ST_MAX_DIMS];
|
| 99 |
+
int64_t n_elements; /* Product of shape dims */
|
| 100 |
+
uint64_t data_offset_begin; /* Offset from data section start */
|
| 101 |
+
uint64_t data_offset_end;
|
| 102 |
+
uint64_t data_size; /* end - begin */
|
| 103 |
+
} STTensorInfo;
|
| 104 |
+
|
| 105 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 106 |
+
* SAFETENSORS FILE HANDLE
|
| 107 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββοΏ½οΏ½οΏ½βββββββββββββββββββββ */
|
| 108 |
+
|
| 109 |
+
typedef struct {
|
| 110 |
+
/* File mapping */
|
| 111 |
+
int fd;
|
| 112 |
+
uint8_t *mmap_base;
|
| 113 |
+
size_t file_size;
|
| 114 |
+
|
| 115 |
+
/* Header */
|
| 116 |
+
uint64_t header_size;
|
| 117 |
+
char *header_json; /* Not null-terminated in file,
|
| 118 |
+
we add a null for parsing */
|
| 119 |
+
|
| 120 |
+
/* Data section */
|
| 121 |
+
uint8_t *data_base; /* Points into mmap at header+8 */
|
| 122 |
+
|
| 123 |
+
/* Tensor catalog */
|
| 124 |
+
STTensorInfo tensors[ST_MAX_TENSORS];
|
| 125 |
+
int n_tensors;
|
| 126 |
+
} STFile;
|
| 127 |
+
|
| 128 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 129 |
+
* MINIMAL JSON PARSER
|
| 130 |
+
*
|
| 131 |
+
* This is a hand-rolled, zero-allocation JSON parser designed
|
| 132 |
+
* specifically for the SafeTensors header format. It does NOT handle
|
| 133 |
+
* arbitrary JSON β only the specific structure used by SafeTensors.
|
| 134 |
+
*
|
| 135 |
+
* Expected format:
|
| 136 |
+
* {
|
| 137 |
+
* "__metadata__": { ... },
|
| 138 |
+
* "tensor_name": {
|
| 139 |
+
* "dtype": "F16",
|
| 140 |
+
* "shape": [1024, 4096],
|
| 141 |
+
* "data_offsets": [0, 8388608]
|
| 142 |
+
* },
|
| 143 |
+
* ...
|
| 144 |
+
* }
|
| 145 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 146 |
+
|
| 147 |
+
/* Skip whitespace */
|
| 148 |
+
static inline const char *st_skip_ws(const char *p)
|
| 149 |
+
{
|
| 150 |
+
while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++;
|
| 151 |
+
return p;
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
/* Parse a JSON string (returns pointer after closing quote).
|
| 155 |
+
* Copies string content to buf (up to buflen-1 chars). */
|
| 156 |
+
static inline const char *st_parse_json_string(const char *p, char *buf, int buflen)
|
| 157 |
+
{
|
| 158 |
+
if (*p != '"') return NULL;
|
| 159 |
+
p++;
|
| 160 |
+
int i = 0;
|
| 161 |
+
while (*p && *p != '"') {
|
| 162 |
+
if (*p == '\\') {
|
| 163 |
+
p++; /* skip escape */
|
| 164 |
+
if (!*p) return NULL;
|
| 165 |
+
}
|
| 166 |
+
if (i < buflen - 1) buf[i++] = *p;
|
| 167 |
+
p++;
|
| 168 |
+
}
|
| 169 |
+
buf[i] = '\0';
|
| 170 |
+
if (*p == '"') p++;
|
| 171 |
+
return p;
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
/* Parse a JSON integer */
|
| 175 |
+
static inline const char *st_parse_json_int(const char *p, int64_t *out)
|
| 176 |
+
{
|
| 177 |
+
char numbuf[32];
|
| 178 |
+
int i = 0;
|
| 179 |
+
if (*p == '-') { numbuf[i++] = *p; p++; }
|
| 180 |
+
while (*p >= '0' && *p <= '9' && i < 30) {
|
| 181 |
+
numbuf[i++] = *p;
|
| 182 |
+
p++;
|
| 183 |
+
}
|
| 184 |
+
numbuf[i] = '\0';
|
| 185 |
+
*out = strtoll(numbuf, NULL, 10);
|
| 186 |
+
return p;
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
/* Skip a JSON value (string, number, object, array, bool, null) */
|
| 190 |
+
static inline const char *st_skip_json_value(const char *p)
|
| 191 |
+
{
|
| 192 |
+
p = st_skip_ws(p);
|
| 193 |
+
if (*p == '"') {
|
| 194 |
+
/* String */
|
| 195 |
+
p++;
|
| 196 |
+
while (*p && *p != '"') {
|
| 197 |
+
if (*p == '\\') p++;
|
| 198 |
+
if (*p) p++;
|
| 199 |
+
}
|
| 200 |
+
if (*p == '"') p++;
|
| 201 |
+
return p;
|
| 202 |
+
}
|
| 203 |
+
if (*p == '{') {
|
| 204 |
+
/* Object */
|
| 205 |
+
int depth = 1;
|
| 206 |
+
p++;
|
| 207 |
+
while (*p && depth > 0) {
|
| 208 |
+
if (*p == '{') depth++;
|
| 209 |
+
else if (*p == '}') depth--;
|
| 210 |
+
else if (*p == '"') {
|
| 211 |
+
p++;
|
| 212 |
+
while (*p && *p != '"') {
|
| 213 |
+
if (*p == '\\') p++;
|
| 214 |
+
if (*p) p++;
|
| 215 |
+
}
|
| 216 |
+
}
|
| 217 |
+
if (*p) p++;
|
| 218 |
+
}
|
| 219 |
+
return p;
|
| 220 |
+
}
|
| 221 |
+
if (*p == '[') {
|
| 222 |
+
/* Array */
|
| 223 |
+
int depth = 1;
|
| 224 |
+
p++;
|
| 225 |
+
while (*p && depth > 0) {
|
| 226 |
+
if (*p == '[') depth++;
|
| 227 |
+
else if (*p == ']') depth--;
|
| 228 |
+
else if (*p == '"') {
|
| 229 |
+
p++;
|
| 230 |
+
while (*p && *p != '"') {
|
| 231 |
+
if (*p == '\\') p++;
|
| 232 |
+
if (*p) p++;
|
| 233 |
+
}
|
| 234 |
+
}
|
| 235 |
+
if (*p) p++;
|
| 236 |
+
}
|
| 237 |
+
return p;
|
| 238 |
+
}
|
| 239 |
+
/* Number, bool, null β skip until delimiter */
|
| 240 |
+
while (*p && *p != ',' && *p != '}' && *p != ']' &&
|
| 241 |
+
*p != ' ' && *p != '\t' && *p != '\n' && *p != '\r') {
|
| 242 |
+
p++;
|
| 243 |
+
}
|
| 244 |
+
return p;
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
/* Parse the SafeTensors JSON header and populate the tensor catalog */
|
| 248 |
+
static inline int st_parse_header(STFile *st)
|
| 249 |
+
{
|
| 250 |
+
const char *p = st->header_json;
|
| 251 |
+
p = st_skip_ws(p);
|
| 252 |
+
if (*p != '{') return -1;
|
| 253 |
+
p++;
|
| 254 |
+
|
| 255 |
+
st->n_tensors = 0;
|
| 256 |
+
|
| 257 |
+
while (*p) {
|
| 258 |
+
p = st_skip_ws(p);
|
| 259 |
+
if (*p == '}') break;
|
| 260 |
+
if (*p == ',') { p++; continue; }
|
| 261 |
+
|
| 262 |
+
/* Parse key */
|
| 263 |
+
char key[ST_MAX_NAME_LEN];
|
| 264 |
+
p = st_parse_json_string(p, key, sizeof(key));
|
| 265 |
+
if (!p) return -1;
|
| 266 |
+
|
| 267 |
+
p = st_skip_ws(p);
|
| 268 |
+
if (*p != ':') return -1;
|
| 269 |
+
p++;
|
| 270 |
+
p = st_skip_ws(p);
|
| 271 |
+
|
| 272 |
+
/* Skip __metadata__ */
|
| 273 |
+
if (strcmp(key, "__metadata__") == 0) {
|
| 274 |
+
p = st_skip_json_value(p);
|
| 275 |
+
continue;
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
/* Parse tensor object */
|
| 279 |
+
if (*p != '{') {
|
| 280 |
+
p = st_skip_json_value(p);
|
| 281 |
+
continue;
|
| 282 |
+
}
|
| 283 |
+
p++;
|
| 284 |
+
|
| 285 |
+
STTensorInfo *ti = &st->tensors[st->n_tensors];
|
| 286 |
+
memset(ti, 0, sizeof(*ti));
|
| 287 |
+
strncpy(ti->name, key, ST_MAX_NAME_LEN - 1);
|
| 288 |
+
|
| 289 |
+
while (*p) {
|
| 290 |
+
p = st_skip_ws(p);
|
| 291 |
+
if (*p == '}') { p++; break; }
|
| 292 |
+
if (*p == ',') { p++; continue; }
|
| 293 |
+
|
| 294 |
+
char field[64];
|
| 295 |
+
p = st_parse_json_string(p, field, sizeof(field));
|
| 296 |
+
if (!p) return -1;
|
| 297 |
+
|
| 298 |
+
p = st_skip_ws(p);
|
| 299 |
+
if (*p != ':') return -1;
|
| 300 |
+
p++;
|
| 301 |
+
p = st_skip_ws(p);
|
| 302 |
+
|
| 303 |
+
if (strcmp(field, "dtype") == 0) {
|
| 304 |
+
char dtype_str[16];
|
| 305 |
+
p = st_parse_json_string(p, dtype_str, sizeof(dtype_str));
|
| 306 |
+
if (!p) return -1;
|
| 307 |
+
ti->dtype = st_parse_dtype(dtype_str, strlen(dtype_str));
|
| 308 |
+
} else if (strcmp(field, "shape") == 0) {
|
| 309 |
+
/* Parse array of ints */
|
| 310 |
+
if (*p != '[') return -1;
|
| 311 |
+
p++;
|
| 312 |
+
ti->n_dims = 0;
|
| 313 |
+
ti->n_elements = 1;
|
| 314 |
+
while (*p) {
|
| 315 |
+
p = st_skip_ws(p);
|
| 316 |
+
if (*p == ']') { p++; break; }
|
| 317 |
+
if (*p == ',') { p++; continue; }
|
| 318 |
+
int64_t dim_val;
|
| 319 |
+
p = st_parse_json_int(p, &dim_val);
|
| 320 |
+
if (ti->n_dims < ST_MAX_DIMS) {
|
| 321 |
+
ti->shape[ti->n_dims++] = dim_val;
|
| 322 |
+
ti->n_elements *= dim_val;
|
| 323 |
+
}
|
| 324 |
+
}
|
| 325 |
+
} else if (strcmp(field, "data_offsets") == 0) {
|
| 326 |
+
/* Parse [begin, end] */
|
| 327 |
+
if (*p != '[') return -1;
|
| 328 |
+
p++;
|
| 329 |
+
p = st_skip_ws(p);
|
| 330 |
+
int64_t begin_val, end_val;
|
| 331 |
+
p = st_parse_json_int(p, &begin_val);
|
| 332 |
+
p = st_skip_ws(p);
|
| 333 |
+
if (*p == ',') p++;
|
| 334 |
+
p = st_skip_ws(p);
|
| 335 |
+
p = st_parse_json_int(p, &end_val);
|
| 336 |
+
p = st_skip_ws(p);
|
| 337 |
+
if (*p == ']') p++;
|
| 338 |
+
ti->data_offset_begin = (uint64_t)begin_val;
|
| 339 |
+
ti->data_offset_end = (uint64_t)end_val;
|
| 340 |
+
ti->data_size = ti->data_offset_end - ti->data_offset_begin;
|
| 341 |
+
} else {
|
| 342 |
+
p = st_skip_json_value(p);
|
| 343 |
+
}
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
if (st->n_tensors < ST_MAX_TENSORS)
|
| 347 |
+
st->n_tensors++;
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
return 0;
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 354 |
+
* OPEN / CLOSE A SAFETENSORS FILE
|
| 355 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 356 |
+
|
| 357 |
+
static inline STFile *st_open(const char *path)
|
| 358 |
+
{
|
| 359 |
+
STFile *st = (STFile *)calloc(1, sizeof(STFile));
|
| 360 |
+
if (!st) return NULL;
|
| 361 |
+
|
| 362 |
+
/* Open file */
|
| 363 |
+
st->fd = open(path, O_RDONLY);
|
| 364 |
+
if (st->fd < 0) {
|
| 365 |
+
fprintf(stderr, "st_open: cannot open '%s'\n", path);
|
| 366 |
+
free(st);
|
| 367 |
+
return NULL;
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
/* Get file size */
|
| 371 |
+
struct stat sb;
|
| 372 |
+
if (fstat(st->fd, &sb) < 0) {
|
| 373 |
+
close(st->fd);
|
| 374 |
+
free(st);
|
| 375 |
+
return NULL;
|
| 376 |
+
}
|
| 377 |
+
st->file_size = sb.st_size;
|
| 378 |
+
|
| 379 |
+
/* Memory-map the entire file */
|
| 380 |
+
st->mmap_base = (uint8_t *)mmap(NULL, st->file_size, PROT_READ,
|
| 381 |
+
MAP_PRIVATE, st->fd, 0);
|
| 382 |
+
if (st->mmap_base == MAP_FAILED) {
|
| 383 |
+
fprintf(stderr, "st_open: mmap failed for '%s'\n", path);
|
| 384 |
+
close(st->fd);
|
| 385 |
+
free(st);
|
| 386 |
+
return NULL;
|
| 387 |
+
}
|
| 388 |
+
|
| 389 |
+
/* Read header size (first 8 bytes, little-endian uint64) */
|
| 390 |
+
memcpy(&st->header_size, st->mmap_base, sizeof(uint64_t));
|
| 391 |
+
|
| 392 |
+
if (st->header_size > ST_MAX_HEADER_SIZE ||
|
| 393 |
+
st->header_size + 8 > st->file_size) {
|
| 394 |
+
fprintf(stderr, "st_open: invalid header size %lu\n",
|
| 395 |
+
(unsigned long)st->header_size);
|
| 396 |
+
munmap(st->mmap_base, st->file_size);
|
| 397 |
+
close(st->fd);
|
| 398 |
+
free(st);
|
| 399 |
+
return NULL;
|
| 400 |
+
}
|
| 401 |
+
|
| 402 |
+
/* Copy header JSON and null-terminate for our parser */
|
| 403 |
+
st->header_json = (char *)malloc(st->header_size + 1);
|
| 404 |
+
memcpy(st->header_json, st->mmap_base + 8, st->header_size);
|
| 405 |
+
st->header_json[st->header_size] = '\0';
|
| 406 |
+
|
| 407 |
+
/* Data section starts right after header */
|
| 408 |
+
st->data_base = st->mmap_base + 8 + st->header_size;
|
| 409 |
+
|
| 410 |
+
/* Parse the header */
|
| 411 |
+
if (st_parse_header(st) != 0) {
|
| 412 |
+
fprintf(stderr, "st_open: failed to parse header of '%s'\n", path);
|
| 413 |
+
free(st->header_json);
|
| 414 |
+
munmap(st->mmap_base, st->file_size);
|
| 415 |
+
close(st->fd);
|
| 416 |
+
free(st);
|
| 417 |
+
return NULL;
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
+
return st;
|
| 421 |
+
}
|
| 422 |
+
|
| 423 |
+
static inline void st_close(STFile *st)
|
| 424 |
+
{
|
| 425 |
+
if (!st) return;
|
| 426 |
+
free(st->header_json);
|
| 427 |
+
if (st->mmap_base && st->mmap_base != MAP_FAILED)
|
| 428 |
+
munmap(st->mmap_base, st->file_size);
|
| 429 |
+
if (st->fd >= 0)
|
| 430 |
+
close(st->fd);
|
| 431 |
+
free(st);
|
| 432 |
+
}
|
| 433 |
+
|
| 434 |
+
/* βββββββοΏ½οΏ½βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 435 |
+
* TENSOR DATA ACCESS
|
| 436 |
+
*
|
| 437 |
+
* Returns a raw pointer into the mmap'd region.
|
| 438 |
+
* Caller must interpret the bytes according to the tensor's dtype.
|
| 439 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 440 |
+
|
| 441 |
+
static inline const void *st_tensor_data(const STFile *st, int tensor_idx)
|
| 442 |
+
{
|
| 443 |
+
if (tensor_idx < 0 || tensor_idx >= st->n_tensors) return NULL;
|
| 444 |
+
return st->data_base + st->tensors[tensor_idx].data_offset_begin;
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 448 |
+
* TENSOR β FLOAT32 CONVERSION
|
| 449 |
+
*
|
| 450 |
+
* Converts tensor data to float32, handling FP16 and BF16 input.
|
| 451 |
+
* Caller must free the returned buffer.
|
| 452 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 453 |
+
|
| 454 |
+
/* Forward declaration of fp16/bf16 converters from gguf_format.h */
|
| 455 |
+
/* (Already included when both headers are used together) */
|
| 456 |
+
|
| 457 |
+
static inline float *st_tensor_to_f32(const STFile *st, int tensor_idx)
|
| 458 |
+
{
|
| 459 |
+
const STTensorInfo *ti = &st->tensors[tensor_idx];
|
| 460 |
+
const uint8_t *raw = (const uint8_t *)st_tensor_data(st, tensor_idx);
|
| 461 |
+
if (!raw) return NULL;
|
| 462 |
+
|
| 463 |
+
float *out = (float *)malloc(ti->n_elements * sizeof(float));
|
| 464 |
+
if (!out) return NULL;
|
| 465 |
+
|
| 466 |
+
switch (ti->dtype) {
|
| 467 |
+
case ST_DTYPE_F32:
|
| 468 |
+
memcpy(out, raw, ti->n_elements * sizeof(float));
|
| 469 |
+
break;
|
| 470 |
+
|
| 471 |
+
case ST_DTYPE_F16: {
|
| 472 |
+
const uint16_t *fp16 = (const uint16_t *)raw;
|
| 473 |
+
for (int64_t i = 0; i < ti->n_elements; i++) {
|
| 474 |
+
out[i] = gguf_fp16_to_fp32(fp16[i]);
|
| 475 |
+
}
|
| 476 |
+
break;
|
| 477 |
+
}
|
| 478 |
+
|
| 479 |
+
case ST_DTYPE_BF16: {
|
| 480 |
+
const uint16_t *bf16 = (const uint16_t *)raw;
|
| 481 |
+
for (int64_t i = 0; i < ti->n_elements; i++) {
|
| 482 |
+
out[i] = gguf_bf16_to_fp32(bf16[i]);
|
| 483 |
+
}
|
| 484 |
+
break;
|
| 485 |
+
}
|
| 486 |
+
|
| 487 |
+
case ST_DTYPE_F64: {
|
| 488 |
+
const double *f64 = (const double *)raw;
|
| 489 |
+
for (int64_t i = 0; i < ti->n_elements; i++) {
|
| 490 |
+
out[i] = (float)f64[i];
|
| 491 |
+
}
|
| 492 |
+
break;
|
| 493 |
+
}
|
| 494 |
+
|
| 495 |
+
default:
|
| 496 |
+
/* For integer types, just cast */
|
| 497 |
+
for (int64_t i = 0; i < ti->n_elements; i++) {
|
| 498 |
+
switch (ti->dtype) {
|
| 499 |
+
case ST_DTYPE_I8: out[i] = (float)((int8_t *)raw)[i]; break;
|
| 500 |
+
case ST_DTYPE_I16: out[i] = (float)((int16_t *)raw)[i]; break;
|
| 501 |
+
case ST_DTYPE_I32: out[i] = (float)((int32_t *)raw)[i]; break;
|
| 502 |
+
case ST_DTYPE_U8: out[i] = (float)raw[i]; break;
|
| 503 |
+
default: out[i] = 0.0f; break;
|
| 504 |
+
}
|
| 505 |
+
}
|
| 506 |
+
break;
|
| 507 |
+
}
|
| 508 |
+
|
| 509 |
+
return out;
|
| 510 |
+
}
|
| 511 |
+
|
| 512 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 513 |
+
* FIND TENSOR BY NAME
|
| 514 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 515 |
+
|
| 516 |
+
static inline int st_find_tensor(const STFile *st, const char *name)
|
| 517 |
+
{
|
| 518 |
+
for (int i = 0; i < st->n_tensors; i++) {
|
| 519 |
+
if (strcmp(st->tensors[i].name, name) == 0)
|
| 520 |
+
return i;
|
| 521 |
+
}
|
| 522 |
+
return -1;
|
| 523 |
+
}
|
| 524 |
+
|
| 525 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 526 |
+
* DIAGNOSTICS
|
| 527 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 528 |
+
|
| 529 |
+
static inline void st_print_summary(const STFile *st)
|
| 530 |
+
{
|
| 531 |
+
printf(" βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n");
|
| 532 |
+
printf(" β SafeTensors File Summary β\n");
|
| 533 |
+
printf(" β ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ£\n");
|
| 534 |
+
printf(" β File size: %12lu bytes β\n",
|
| 535 |
+
(unsigned long)st->file_size);
|
| 536 |
+
printf(" β Header size: %12lu bytes β\n",
|
| 537 |
+
(unsigned long)st->header_size);
|
| 538 |
+
printf(" β Tensors: %12d β\n",
|
| 539 |
+
st->n_tensors);
|
| 540 |
+
printf(" βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n\n");
|
| 541 |
+
|
| 542 |
+
const char *dtype_names[] = {
|
| 543 |
+
"F32", "F16", "BF16", "F64", "I8", "I16", "I32", "I64",
|
| 544 |
+
"U8", "BOOL", "???"
|
| 545 |
+
};
|
| 546 |
+
|
| 547 |
+
for (int i = 0; i < st->n_tensors; i++) {
|
| 548 |
+
const STTensorInfo *ti = &st->tensors[i];
|
| 549 |
+
printf(" [%3d] %-50s %4s [", i, ti->name,
|
| 550 |
+
dtype_names[ti->dtype < ST_DTYPE_UNKNOWN ? ti->dtype : ST_DTYPE_UNKNOWN]);
|
| 551 |
+
for (int d = 0; d < ti->n_dims; d++) {
|
| 552 |
+
printf("%ld%s", (long)ti->shape[d], d < ti->n_dims - 1 ? "Γ" : "");
|
| 553 |
+
}
|
| 554 |
+
printf("] %lu bytes\n", (unsigned long)ti->data_size);
|
| 555 |
+
}
|
| 556 |
+
printf("\n");
|
| 557 |
+
}
|
| 558 |
+
|
| 559 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 560 |
+
* MULTI-SHARD SAFETENSORS SUPPORT
|
| 561 |
+
*
|
| 562 |
+
* Most models >3B parameters are split across multiple shards:
|
| 563 |
+
* model-00001-of-00005.safetensors
|
| 564 |
+
* model-00002-of-00005.safetensors
|
| 565 |
+
* ...
|
| 566 |
+
*
|
| 567 |
+
* The mapping from tensor name β shard file is stored in:
|
| 568 |
+
* model.safetensors.index.json
|
| 569 |
+
*
|
| 570 |
+
* This module provides a unified view across all shards.
|
| 571 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 572 |
+
|
| 573 |
+
#include <dirent.h>
|
| 574 |
+
|
| 575 |
+
#define ST_MAX_SHARDS 256
|
| 576 |
+
|
| 577 |
+
typedef struct {
|
| 578 |
+
STFile *shards[ST_MAX_SHARDS];
|
| 579 |
+
int n_shards;
|
| 580 |
+
|
| 581 |
+
/* Unified tensor catalog β maps to (shard_idx, tensor_idx_in_shard) */
|
| 582 |
+
struct {
|
| 583 |
+
char name[ST_MAX_NAME_LEN];
|
| 584 |
+
int shard_idx;
|
| 585 |
+
int tensor_idx;
|
| 586 |
+
} tensor_map[ST_MAX_TENSORS];
|
| 587 |
+
int n_tensors;
|
| 588 |
+
} STMultiFile;
|
| 589 |
+
|
| 590 |
+
/* Compare function for sorting filenames */
|
| 591 |
+
static int st_cmp_str(const void *a, const void *b)
|
| 592 |
+
{
|
| 593 |
+
return strcmp(*(const char **)a, *(const char **)b);
|
| 594 |
+
}
|
| 595 |
+
|
| 596 |
+
/* Open a model directory containing one or more .safetensors files.
|
| 597 |
+
* If only a single model.safetensors exists, opens just that file.
|
| 598 |
+
* If model.safetensors.index.json exists, reads all referenced shards. */
|
| 599 |
+
static STMultiFile *st_open_dir(const char *model_dir)
|
| 600 |
+
{
|
| 601 |
+
STMultiFile *mf = (STMultiFile *)calloc(1, sizeof(STMultiFile));
|
| 602 |
+
if (!mf) return NULL;
|
| 603 |
+
|
| 604 |
+
/* Canonicalize directory path */
|
| 605 |
+
char dir[512];
|
| 606 |
+
strncpy(dir, model_dir, sizeof(dir) - 2);
|
| 607 |
+
dir[sizeof(dir) - 2] = '\0';
|
| 608 |
+
int dlen = strlen(dir);
|
| 609 |
+
if (dlen > 0 && dir[dlen - 1] != '/') {
|
| 610 |
+
dir[dlen] = '/';
|
| 611 |
+
dir[dlen + 1] = '\0';
|
| 612 |
+
}
|
| 613 |
+
|
| 614 |
+
/* Try single-file first */
|
| 615 |
+
char single_path[1024];
|
| 616 |
+
snprintf(single_path, sizeof(single_path), "%smodel.safetensors", dir);
|
| 617 |
+
{
|
| 618 |
+
FILE *check = fopen(single_path, "rb");
|
| 619 |
+
if (check) {
|
| 620 |
+
fclose(check);
|
| 621 |
+
STFile *sf = st_open(single_path);
|
| 622 |
+
if (sf) {
|
| 623 |
+
mf->shards[0] = sf;
|
| 624 |
+
mf->n_shards = 1;
|
| 625 |
+
/* Build tensor map from single shard */
|
| 626 |
+
for (int i = 0; i < sf->n_tensors && mf->n_tensors < ST_MAX_TENSORS; i++) {
|
| 627 |
+
strncpy(mf->tensor_map[mf->n_tensors].name,
|
| 628 |
+
sf->tensors[i].name, ST_MAX_NAME_LEN - 1);
|
| 629 |
+
mf->tensor_map[mf->n_tensors].shard_idx = 0;
|
| 630 |
+
mf->tensor_map[mf->n_tensors].tensor_idx = i;
|
| 631 |
+
mf->n_tensors++;
|
| 632 |
+
}
|
| 633 |
+
return mf;
|
| 634 |
+
}
|
| 635 |
+
}
|
| 636 |
+
}
|
| 637 |
+
|
| 638 |
+
/* Scan for shard files matching *.safetensors */
|
| 639 |
+
DIR *d = opendir(model_dir);
|
| 640 |
+
if (!d) {
|
| 641 |
+
fprintf(stderr, " st_open_dir: cannot open directory '%s'\n", model_dir);
|
| 642 |
+
free(mf);
|
| 643 |
+
return NULL;
|
| 644 |
+
}
|
| 645 |
+
|
| 646 |
+
char *shard_names[ST_MAX_SHARDS];
|
| 647 |
+
int n_found = 0;
|
| 648 |
+
struct dirent *de;
|
| 649 |
+
|
| 650 |
+
while ((de = readdir(d)) != NULL && n_found < ST_MAX_SHARDS) {
|
| 651 |
+
int nlen = strlen(de->d_name);
|
| 652 |
+
if (nlen > 12 && strcmp(de->d_name + nlen - 12, ".safetensors") == 0) {
|
| 653 |
+
/* Skip the index.json file itself */
|
| 654 |
+
if (strstr(de->d_name, ".index.json") != NULL) continue;
|
| 655 |
+
shard_names[n_found] = strdup(de->d_name);
|
| 656 |
+
n_found++;
|
| 657 |
+
}
|
| 658 |
+
}
|
| 659 |
+
closedir(d);
|
| 660 |
+
|
| 661 |
+
if (n_found == 0) {
|
| 662 |
+
fprintf(stderr, " st_open_dir: no .safetensors files in '%s'\n", model_dir);
|
| 663 |
+
free(mf);
|
| 664 |
+
return NULL;
|
| 665 |
+
}
|
| 666 |
+
|
| 667 |
+
/* Sort for deterministic ordering */
|
| 668 |
+
qsort(shard_names, n_found, sizeof(char *), st_cmp_str);
|
| 669 |
+
|
| 670 |
+
/* Open each shard */
|
| 671 |
+
for (int s = 0; s < n_found; s++) {
|
| 672 |
+
char path[1024];
|
| 673 |
+
snprintf(path, sizeof(path), "%s%s", dir, shard_names[s]);
|
| 674 |
+
|
| 675 |
+
STFile *sf = st_open(path);
|
| 676 |
+
if (!sf) {
|
| 677 |
+
fprintf(stderr, " st_open_dir: failed to open shard '%s'\n", path);
|
| 678 |
+
free(shard_names[s]);
|
| 679 |
+
continue;
|
| 680 |
+
}
|
| 681 |
+
|
| 682 |
+
int si = mf->n_shards;
|
| 683 |
+
mf->shards[si] = sf;
|
| 684 |
+
|
| 685 |
+
/* Add all tensors from this shard to unified map */
|
| 686 |
+
for (int i = 0; i < sf->n_tensors && mf->n_tensors < ST_MAX_TENSORS; i++) {
|
| 687 |
+
strncpy(mf->tensor_map[mf->n_tensors].name,
|
| 688 |
+
sf->tensors[i].name, ST_MAX_NAME_LEN - 1);
|
| 689 |
+
mf->tensor_map[mf->n_tensors].shard_idx = si;
|
| 690 |
+
mf->tensor_map[mf->n_tensors].tensor_idx = i;
|
| 691 |
+
mf->n_tensors++;
|
| 692 |
+
}
|
| 693 |
+
|
| 694 |
+
mf->n_shards++;
|
| 695 |
+
free(shard_names[s]);
|
| 696 |
+
}
|
| 697 |
+
|
| 698 |
+
if (mf->n_shards == 0) {
|
| 699 |
+
free(mf);
|
| 700 |
+
return NULL;
|
| 701 |
+
}
|
| 702 |
+
|
| 703 |
+
printf(" Opened %d shards, %d tensors total\n\n", mf->n_shards, mf->n_tensors);
|
| 704 |
+
return mf;
|
| 705 |
+
}
|
| 706 |
+
|
| 707 |
+
/* Find a tensor across all shards. Returns a pointer to the unified map entry index,
|
| 708 |
+
* or -1 if not found. */
|
| 709 |
+
static int st_multi_find_tensor(const STMultiFile *mf, const char *name)
|
| 710 |
+
{
|
| 711 |
+
for (int i = 0; i < mf->n_tensors; i++) {
|
| 712 |
+
if (strcmp(mf->tensor_map[i].name, name) == 0)
|
| 713 |
+
return i;
|
| 714 |
+
}
|
| 715 |
+
return -1;
|
| 716 |
+
}
|
| 717 |
+
|
| 718 |
+
/* Get the STTensorInfo for a unified map index */
|
| 719 |
+
static const STTensorInfo *st_multi_tensor_info(const STMultiFile *mf, int unified_idx)
|
| 720 |
+
{
|
| 721 |
+
if (unified_idx < 0 || unified_idx >= mf->n_tensors) return NULL;
|
| 722 |
+
int si = mf->tensor_map[unified_idx].shard_idx;
|
| 723 |
+
int ti = mf->tensor_map[unified_idx].tensor_idx;
|
| 724 |
+
return &mf->shards[si]->tensors[ti];
|
| 725 |
+
}
|
| 726 |
+
|
| 727 |
+
/* Convert a tensor to F32 from across shards */
|
| 728 |
+
static float *st_multi_tensor_to_f32(const STMultiFile *mf, int unified_idx)
|
| 729 |
+
{
|
| 730 |
+
if (unified_idx < 0 || unified_idx >= mf->n_tensors) return NULL;
|
| 731 |
+
int si = mf->tensor_map[unified_idx].shard_idx;
|
| 732 |
+
int ti = mf->tensor_map[unified_idx].tensor_idx;
|
| 733 |
+
return st_tensor_to_f32(mf->shards[si], ti);
|
| 734 |
+
}
|
| 735 |
+
|
| 736 |
+
/* Get raw tensor data from across shards */
|
| 737 |
+
static const void *st_multi_tensor_data(const STMultiFile *mf, int unified_idx)
|
| 738 |
+
{
|
| 739 |
+
if (unified_idx < 0 || unified_idx >= mf->n_tensors) return NULL;
|
| 740 |
+
int si = mf->tensor_map[unified_idx].shard_idx;
|
| 741 |
+
int ti = mf->tensor_map[unified_idx].tensor_idx;
|
| 742 |
+
return st_tensor_data(mf->shards[si], ti);
|
| 743 |
+
}
|
| 744 |
+
|
| 745 |
+
static void st_multi_close(STMultiFile *mf)
|
| 746 |
+
{
|
| 747 |
+
if (!mf) return;
|
| 748 |
+
for (int i = 0; i < mf->n_shards; i++)
|
| 749 |
+
st_close(mf->shards[i]);
|
| 750 |
+
free(mf);
|
| 751 |
+
}
|
| 752 |
+
|
| 753 |
+
static void st_multi_print_summary(const STMultiFile *mf)
|
| 754 |
+
{
|
| 755 |
+
printf(" βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n");
|
| 756 |
+
printf(" β SafeTensors Multi-Shard Summary β\n");
|
| 757 |
+
printf(" β ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ£\n");
|
| 758 |
+
printf(" β Shards: %12d β\n",
|
| 759 |
+
mf->n_shards);
|
| 760 |
+
|
| 761 |
+
uint64_t total_size = 0;
|
| 762 |
+
for (int s = 0; s < mf->n_shards; s++)
|
| 763 |
+
total_size += mf->shards[s]->file_size;
|
| 764 |
+
printf(" β Total size: %12lu bytes (%6.1f MB) β\n",
|
| 765 |
+
(unsigned long)total_size, (double)total_size / (1024.0 * 1024.0));
|
| 766 |
+
printf(" β Tensors: %12d β\n",
|
| 767 |
+
mf->n_tensors);
|
| 768 |
+
printf(" βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n\n");
|
| 769 |
+
|
| 770 |
+
const char *dtype_names[] = {
|
| 771 |
+
"F32", "F16", "BF16", "F64", "I8", "I16", "I32", "I64",
|
| 772 |
+
"U8", "BOOL", "???"
|
| 773 |
+
};
|
| 774 |
+
|
| 775 |
+
for (int i = 0; i < mf->n_tensors; i++) {
|
| 776 |
+
const STTensorInfo *ti = st_multi_tensor_info(mf, i);
|
| 777 |
+
printf(" [%3d] s%-2d %-48s %4s [", i,
|
| 778 |
+
mf->tensor_map[i].shard_idx, ti->name,
|
| 779 |
+
dtype_names[ti->dtype < ST_DTYPE_UNKNOWN ? ti->dtype : ST_DTYPE_UNKNOWN]);
|
| 780 |
+
for (int d = 0; d < ti->n_dims; d++) {
|
| 781 |
+
printf("%ld%s", (long)ti->shape[d], d < ti->n_dims - 1 ? "Γ" : "");
|
| 782 |
+
}
|
| 783 |
+
printf("] %lu bytes\n", (unsigned long)ti->data_size);
|
| 784 |
+
}
|
| 785 |
+
printf("\n");
|
| 786 |
+
}
|
| 787 |
+
|
| 788 |
+
#endif /* SAFETENSORS_READER_H */
|
tokenizer_reader.h
ADDED
|
@@ -0,0 +1,502 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* tokenizer_reader.h β HuggingFace tokenizer.json Parser
|
| 3 |
+
*
|
| 4 |
+
* Extracts vocabulary, merge rules, and special token IDs from
|
| 5 |
+
* HuggingFace tokenizer.json files for embedding into GGUF.
|
| 6 |
+
*
|
| 7 |
+
* Supports: LLaMA/Mistral BPE tokenizers (sentencepiece-derived)
|
| 8 |
+
*/
|
| 9 |
+
|
| 10 |
+
#ifndef TOKENIZER_READER_H
|
| 11 |
+
#define TOKENIZER_READER_H
|
| 12 |
+
|
| 13 |
+
#include <stdio.h>
|
| 14 |
+
#include <stdlib.h>
|
| 15 |
+
#include <string.h>
|
| 16 |
+
#include <stdint.h>
|
| 17 |
+
|
| 18 |
+
#define TOK_MAX_TOKENS 256000 /* Max supported vocab size */
|
| 19 |
+
#define TOK_MAX_MERGES 512000 /* Max supported merge rules */
|
| 20 |
+
#define TOK_MAX_TOKEN_LEN 512 /* Max length of a single token */
|
| 21 |
+
|
| 22 |
+
/* Token types matching GGUF tokenizer.ggml.token_type */
|
| 23 |
+
typedef enum {
|
| 24 |
+
TOK_TYPE_NORMAL = 1,
|
| 25 |
+
TOK_TYPE_UNKNOWN = 2,
|
| 26 |
+
TOK_TYPE_CONTROL = 3,
|
| 27 |
+
TOK_TYPE_USER_DEF = 4,
|
| 28 |
+
TOK_TYPE_UNUSED = 5,
|
| 29 |
+
TOK_TYPE_BYTE = 6
|
| 30 |
+
} TokenType;
|
| 31 |
+
|
| 32 |
+
typedef struct {
|
| 33 |
+
char **tokens; /* Token strings indexed by ID */
|
| 34 |
+
float *scores; /* Token scores/priorities */
|
| 35 |
+
int32_t *token_types; /* Token type enum per token */
|
| 36 |
+
int32_t vocab_size; /* Total vocabulary size */
|
| 37 |
+
|
| 38 |
+
char **merges; /* BPE merge rule strings */
|
| 39 |
+
int32_t n_merges; /* Number of merge rules */
|
| 40 |
+
|
| 41 |
+
int32_t bos_id; /* Beginning of sequence token ID */
|
| 42 |
+
int32_t eos_id; /* End of sequence token ID */
|
| 43 |
+
int32_t unk_id; /* Unknown token ID */
|
| 44 |
+
int32_t pad_id; /* Padding token ID (-1 if none) */
|
| 45 |
+
|
| 46 |
+
char model_type[32]; /* "llama", "gpt2", etc. */
|
| 47 |
+
} TokenizerData;
|
| 48 |
+
|
| 49 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 50 |
+
* JSON HELPER β Minimal extraction utilities
|
| 51 |
+
*
|
| 52 |
+
* These are NOT a general JSON parser β they target the specific
|
| 53 |
+
* structure of HuggingFace tokenizer.json files.
|
| 54 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 55 |
+
|
| 56 |
+
/* Skip whitespace */
|
| 57 |
+
static inline const char *tok_skip_ws(const char *p) {
|
| 58 |
+
while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++;
|
| 59 |
+
return p;
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
/* Extract a JSON string value starting at the opening quote.
|
| 63 |
+
* Handles basic escape sequences. Returns pointer after closing quote.
|
| 64 |
+
* Copies unescaped string into buf. */
|
| 65 |
+
static const char *tok_extract_string(const char *p, char *buf, int buflen)
|
| 66 |
+
{
|
| 67 |
+
if (*p != '"') return NULL;
|
| 68 |
+
p++; /* skip opening quote */
|
| 69 |
+
|
| 70 |
+
int i = 0;
|
| 71 |
+
while (*p && *p != '"' && i < buflen - 1) {
|
| 72 |
+
if (*p == '\\' && p[1]) {
|
| 73 |
+
p++;
|
| 74 |
+
switch (*p) {
|
| 75 |
+
case '"': buf[i++] = '"'; break;
|
| 76 |
+
case '\\': buf[i++] = '\\'; break;
|
| 77 |
+
case '/': buf[i++] = '/'; break;
|
| 78 |
+
case 'n': buf[i++] = '\n'; break;
|
| 79 |
+
case 'r': buf[i++] = '\r'; break;
|
| 80 |
+
case 't': buf[i++] = '\t'; break;
|
| 81 |
+
case 'u': {
|
| 82 |
+
/* Parse \uXXXX unicode escape */
|
| 83 |
+
if (p[1] && p[2] && p[3] && p[4]) {
|
| 84 |
+
unsigned int cp = 0;
|
| 85 |
+
char hex[5] = {p[1], p[2], p[3], p[4], 0};
|
| 86 |
+
cp = (unsigned int)strtoul(hex, NULL, 16);
|
| 87 |
+
p += 4;
|
| 88 |
+
/* Encode as UTF-8 */
|
| 89 |
+
if (cp < 0x80) {
|
| 90 |
+
buf[i++] = (char)cp;
|
| 91 |
+
} else if (cp < 0x800) {
|
| 92 |
+
if (i + 1 < buflen - 1) {
|
| 93 |
+
buf[i++] = (char)(0xC0 | (cp >> 6));
|
| 94 |
+
buf[i++] = (char)(0x80 | (cp & 0x3F));
|
| 95 |
+
}
|
| 96 |
+
} else {
|
| 97 |
+
if (i + 2 < buflen - 1) {
|
| 98 |
+
buf[i++] = (char)(0xE0 | (cp >> 12));
|
| 99 |
+
buf[i++] = (char)(0x80 | ((cp >> 6) & 0x3F));
|
| 100 |
+
buf[i++] = (char)(0x80 | (cp & 0x3F));
|
| 101 |
+
}
|
| 102 |
+
}
|
| 103 |
+
}
|
| 104 |
+
break;
|
| 105 |
+
}
|
| 106 |
+
default: buf[i++] = *p; break;
|
| 107 |
+
}
|
| 108 |
+
} else {
|
| 109 |
+
buf[i++] = *p;
|
| 110 |
+
}
|
| 111 |
+
p++;
|
| 112 |
+
}
|
| 113 |
+
buf[i] = '\0';
|
| 114 |
+
|
| 115 |
+
if (*p == '"') p++; /* skip closing quote */
|
| 116 |
+
return p;
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
/* Find a key in JSON and return pointer to its value */
|
| 120 |
+
static const char *tok_find_key(const char *json, const char *key)
|
| 121 |
+
{
|
| 122 |
+
char search[TOK_MAX_TOKEN_LEN + 4];
|
| 123 |
+
snprintf(search, sizeof(search), "\"%s\"", key);
|
| 124 |
+
|
| 125 |
+
const char *p = strstr(json, search);
|
| 126 |
+
if (!p) return NULL;
|
| 127 |
+
|
| 128 |
+
p += strlen(search);
|
| 129 |
+
p = tok_skip_ws(p);
|
| 130 |
+
if (*p == ':') p++;
|
| 131 |
+
p = tok_skip_ws(p);
|
| 132 |
+
return p;
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 136 |
+
* VOCAB PARSER β Extract "model": { "vocab": { ... } }
|
| 137 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 138 |
+
|
| 139 |
+
static int tok_parse_vocab(const char *json, TokenizerData *td)
|
| 140 |
+
{
|
| 141 |
+
/* Find "vocab" key inside "model" object */
|
| 142 |
+
const char *model_p = tok_find_key(json, "model");
|
| 143 |
+
if (!model_p) return -1;
|
| 144 |
+
|
| 145 |
+
/* Extract model type */
|
| 146 |
+
const char *type_p = tok_find_key(model_p, "type");
|
| 147 |
+
if (type_p) {
|
| 148 |
+
char type_buf[64];
|
| 149 |
+
tok_extract_string(type_p, type_buf, sizeof(type_buf));
|
| 150 |
+
if (strcasecmp(type_buf, "BPE") == 0) {
|
| 151 |
+
strcpy(td->model_type, "llama");
|
| 152 |
+
} else {
|
| 153 |
+
strncpy(td->model_type, type_buf, sizeof(td->model_type) - 1);
|
| 154 |
+
}
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
/* Find "vocab": { */
|
| 158 |
+
const char *vocab_p = tok_find_key(model_p, "vocab");
|
| 159 |
+
if (!vocab_p || *vocab_p != '{') return -1;
|
| 160 |
+
vocab_p++; /* skip '{' */
|
| 161 |
+
|
| 162 |
+
/* Parse each "token_string": id pair */
|
| 163 |
+
char token_buf[TOK_MAX_TOKEN_LEN];
|
| 164 |
+
int max_id = -1;
|
| 165 |
+
|
| 166 |
+
/* First pass: count entries and find max ID */
|
| 167 |
+
const char *scan = vocab_p;
|
| 168 |
+
int count = 0;
|
| 169 |
+
while (*scan && *scan != '}') {
|
| 170 |
+
scan = tok_skip_ws(scan);
|
| 171 |
+
if (*scan == ',') { scan++; continue; }
|
| 172 |
+
if (*scan != '"') break;
|
| 173 |
+
|
| 174 |
+
/* Skip key */
|
| 175 |
+
char dummy[TOK_MAX_TOKEN_LEN];
|
| 176 |
+
scan = tok_extract_string(scan, dummy, sizeof(dummy));
|
| 177 |
+
if (!scan) break;
|
| 178 |
+
scan = tok_skip_ws(scan);
|
| 179 |
+
if (*scan == ':') scan++;
|
| 180 |
+
scan = tok_skip_ws(scan);
|
| 181 |
+
|
| 182 |
+
/* Read value (integer) */
|
| 183 |
+
int id = (int)strtol(scan, (char **)&scan, 10);
|
| 184 |
+
if (id > max_id) max_id = id;
|
| 185 |
+
count++;
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
if (count == 0 || max_id < 0) return -1;
|
| 189 |
+
|
| 190 |
+
td->vocab_size = max_id + 1;
|
| 191 |
+
|
| 192 |
+
/* Allocate arrays */
|
| 193 |
+
td->tokens = (char **)calloc(td->vocab_size, sizeof(char *));
|
| 194 |
+
td->scores = (float *)calloc(td->vocab_size, sizeof(float));
|
| 195 |
+
td->token_types = (int32_t *)calloc(td->vocab_size, sizeof(int32_t));
|
| 196 |
+
|
| 197 |
+
/* Initialize with defaults */
|
| 198 |
+
for (int i = 0; i < td->vocab_size; i++) {
|
| 199 |
+
td->tokens[i] = strdup("");
|
| 200 |
+
td->scores[i] = 0.0f;
|
| 201 |
+
td->token_types[i] = TOK_TYPE_NORMAL;
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
/* Second pass: fill in tokens */
|
| 205 |
+
scan = vocab_p;
|
| 206 |
+
while (*scan && *scan != '}') {
|
| 207 |
+
scan = tok_skip_ws(scan);
|
| 208 |
+
if (*scan == ',') { scan++; continue; }
|
| 209 |
+
if (*scan != '"') break;
|
| 210 |
+
|
| 211 |
+
scan = tok_extract_string(scan, token_buf, sizeof(token_buf));
|
| 212 |
+
if (!scan) break;
|
| 213 |
+
scan = tok_skip_ws(scan);
|
| 214 |
+
if (*scan == ':') scan++;
|
| 215 |
+
scan = tok_skip_ws(scan);
|
| 216 |
+
|
| 217 |
+
int id = (int)strtol(scan, (char **)&scan, 10);
|
| 218 |
+
|
| 219 |
+
if (id >= 0 && id < td->vocab_size) {
|
| 220 |
+
free(td->tokens[id]);
|
| 221 |
+
td->tokens[id] = strdup(token_buf);
|
| 222 |
+
/* Score = negative index for BPE ordering (higher ID = lower priority) */
|
| 223 |
+
td->scores[id] = -(float)id;
|
| 224 |
+
}
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
return 0;
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 231 |
+
* MERGES PARSER β Extract "model": { "merges": [ ... ] }
|
| 232 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 233 |
+
|
| 234 |
+
static int tok_parse_merges(const char *json, TokenizerData *td)
|
| 235 |
+
{
|
| 236 |
+
const char *model_p = tok_find_key(json, "model");
|
| 237 |
+
if (!model_p) return -1;
|
| 238 |
+
|
| 239 |
+
const char *merges_p = tok_find_key(model_p, "merges");
|
| 240 |
+
if (!merges_p || *merges_p != '[') return -1;
|
| 241 |
+
merges_p++; /* skip '[' */
|
| 242 |
+
|
| 243 |
+
/* Allocate with growth pattern β start with 64k slots */
|
| 244 |
+
int capacity = 65536;
|
| 245 |
+
td->merges = (char **)calloc(capacity, sizeof(char *));
|
| 246 |
+
td->n_merges = 0;
|
| 247 |
+
|
| 248 |
+
/* Extract merge strings */
|
| 249 |
+
const char *scan = merges_p;
|
| 250 |
+
char merge_buf[TOK_MAX_TOKEN_LEN * 2];
|
| 251 |
+
while (*scan && *scan != ']' && td->n_merges < TOK_MAX_MERGES) {
|
| 252 |
+
scan = tok_skip_ws(scan);
|
| 253 |
+
if (*scan == ',') { scan++; continue; }
|
| 254 |
+
if (*scan != '"') { scan++; continue; }
|
| 255 |
+
|
| 256 |
+
scan = tok_extract_string(scan, merge_buf, sizeof(merge_buf));
|
| 257 |
+
if (!scan) break;
|
| 258 |
+
|
| 259 |
+
/* Grow if needed */
|
| 260 |
+
if (td->n_merges >= capacity) {
|
| 261 |
+
capacity *= 2;
|
| 262 |
+
td->merges = (char **)realloc(td->merges, capacity * sizeof(char *));
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
td->merges[td->n_merges] = strdup(merge_buf);
|
| 266 |
+
td->n_merges++;
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
return 0;
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 273 |
+
* SPECIAL TOKENS β Extract from "added_tokens" array
|
| 274 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 275 |
+
|
| 276 |
+
static void tok_parse_added_tokens(const char *json, TokenizerData *td)
|
| 277 |
+
{
|
| 278 |
+
const char *added_p = tok_find_key(json, "added_tokens");
|
| 279 |
+
if (!added_p || *added_p != '[') return;
|
| 280 |
+
added_p++;
|
| 281 |
+
|
| 282 |
+
/* Scan through the array of objects */
|
| 283 |
+
while (*added_p && *added_p != ']') {
|
| 284 |
+
added_p = tok_skip_ws(added_p);
|
| 285 |
+
if (*added_p == ',') { added_p++; continue; }
|
| 286 |
+
if (*added_p != '{') { added_p++; continue; }
|
| 287 |
+
|
| 288 |
+
/* Find end of this object */
|
| 289 |
+
const char *obj_start = added_p;
|
| 290 |
+
int depth = 1;
|
| 291 |
+
added_p++;
|
| 292 |
+
while (*added_p && depth > 0) {
|
| 293 |
+
if (*added_p == '{') depth++;
|
| 294 |
+
if (*added_p == '}') depth--;
|
| 295 |
+
added_p++;
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
/* Extract content and id from this object */
|
| 299 |
+
char content[TOK_MAX_TOKEN_LEN] = "";
|
| 300 |
+
int id = -1;
|
| 301 |
+
int is_special = 0;
|
| 302 |
+
|
| 303 |
+
const char *id_p = tok_find_key(obj_start, "id");
|
| 304 |
+
if (id_p) id = (int)strtol(id_p, NULL, 10);
|
| 305 |
+
|
| 306 |
+
const char *content_p = tok_find_key(obj_start, "content");
|
| 307 |
+
if (content_p && *content_p == '"')
|
| 308 |
+
tok_extract_string(content_p, content, sizeof(content));
|
| 309 |
+
|
| 310 |
+
const char *special_p = tok_find_key(obj_start, "special");
|
| 311 |
+
if (special_p) {
|
| 312 |
+
is_special = (strncmp(special_p, "true", 4) == 0);
|
| 313 |
+
}
|
| 314 |
+
|
| 315 |
+
/* Mark special tokens */
|
| 316 |
+
if (id >= 0 && id < td->vocab_size) {
|
| 317 |
+
if (is_special) {
|
| 318 |
+
td->token_types[id] = TOK_TYPE_CONTROL;
|
| 319 |
+
}
|
| 320 |
+
/* Update token string if needed */
|
| 321 |
+
if (content[0] && (!td->tokens[id] || !td->tokens[id][0])) {
|
| 322 |
+
free(td->tokens[id]);
|
| 323 |
+
td->tokens[id] = strdup(content);
|
| 324 |
+
}
|
| 325 |
+
}
|
| 326 |
+
}
|
| 327 |
+
}
|
| 328 |
+
|
| 329 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 330 |
+
* SPECIAL TOKEN IDs β Extract from tokenizer_config.json
|
| 331 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 332 |
+
|
| 333 |
+
static void tok_parse_config(const char *config_json, TokenizerData *td)
|
| 334 |
+
{
|
| 335 |
+
/* Look for bos_token, eos_token, unk_token content strings */
|
| 336 |
+
/* Then find their IDs in the vocab */
|
| 337 |
+
|
| 338 |
+
/* Search for token content in the config */
|
| 339 |
+
struct { const char *key; int32_t *id_ptr; const char *default_content; } specials[] = {
|
| 340 |
+
{"bos_token", &td->bos_id, "<s>"},
|
| 341 |
+
{"eos_token", &td->eos_id, "</s>"},
|
| 342 |
+
{"unk_token", &td->unk_id, "<unk>"},
|
| 343 |
+
{NULL, NULL, NULL}
|
| 344 |
+
};
|
| 345 |
+
|
| 346 |
+
for (int s = 0; specials[s].key; s++) {
|
| 347 |
+
const char *p = tok_find_key(config_json, specials[s].key);
|
| 348 |
+
if (!p) {
|
| 349 |
+
/* Try to find in vocab by default content */
|
| 350 |
+
for (int i = 0; i < td->vocab_size; i++) {
|
| 351 |
+
if (td->tokens[i] && strcmp(td->tokens[i], specials[s].default_content) == 0) {
|
| 352 |
+
*specials[s].id_ptr = i;
|
| 353 |
+
break;
|
| 354 |
+
}
|
| 355 |
+
}
|
| 356 |
+
continue;
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
/* The value might be a string directly or an object with "content" */
|
| 360 |
+
if (*p == '"') {
|
| 361 |
+
char content[TOK_MAX_TOKEN_LEN];
|
| 362 |
+
tok_extract_string(p, content, sizeof(content));
|
| 363 |
+
/* Find this content in vocab */
|
| 364 |
+
for (int i = 0; i < td->vocab_size; i++) {
|
| 365 |
+
if (td->tokens[i] && strcmp(td->tokens[i], content) == 0) {
|
| 366 |
+
*specials[s].id_ptr = i;
|
| 367 |
+
break;
|
| 368 |
+
}
|
| 369 |
+
}
|
| 370 |
+
} else if (*p == '{') {
|
| 371 |
+
/* Object with "content" field */
|
| 372 |
+
const char *cp = tok_find_key(p, "content");
|
| 373 |
+
if (cp && *cp == '"') {
|
| 374 |
+
char content[TOK_MAX_TOKEN_LEN];
|
| 375 |
+
tok_extract_string(cp, content, sizeof(content));
|
| 376 |
+
for (int i = 0; i < td->vocab_size; i++) {
|
| 377 |
+
if (td->tokens[i] && strcmp(td->tokens[i], content) == 0) {
|
| 378 |
+
*specials[s].id_ptr = i;
|
| 379 |
+
break;
|
| 380 |
+
}
|
| 381 |
+
}
|
| 382 |
+
}
|
| 383 |
+
}
|
| 384 |
+
}
|
| 385 |
+
}
|
| 386 |
+
|
| 387 |
+
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 388 |
+
* MAIN API β Load tokenizer from directory
|
| 389 |
+
* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 390 |
+
|
| 391 |
+
static char *tok_read_file(const char *path)
|
| 392 |
+
{
|
| 393 |
+
FILE *f = fopen(path, "rb");
|
| 394 |
+
if (!f) return NULL;
|
| 395 |
+
|
| 396 |
+
fseek(f, 0, SEEK_END);
|
| 397 |
+
long size = ftell(f);
|
| 398 |
+
fseek(f, 0, SEEK_SET);
|
| 399 |
+
|
| 400 |
+
char *buf = (char *)malloc(size + 1);
|
| 401 |
+
if (!buf) { fclose(f); return NULL; }
|
| 402 |
+
|
| 403 |
+
fread(buf, 1, size, f);
|
| 404 |
+
buf[size] = '\0';
|
| 405 |
+
fclose(f);
|
| 406 |
+
return buf;
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
static TokenizerData *tok_load(const char *tokenizer_json_path,
|
| 410 |
+
const char *config_json_path)
|
| 411 |
+
{
|
| 412 |
+
TokenizerData *td = (TokenizerData *)calloc(1, sizeof(TokenizerData));
|
| 413 |
+
if (!td) return NULL;
|
| 414 |
+
|
| 415 |
+
td->bos_id = 1;
|
| 416 |
+
td->eos_id = 2;
|
| 417 |
+
td->unk_id = 0;
|
| 418 |
+
td->pad_id = -1;
|
| 419 |
+
strcpy(td->model_type, "llama");
|
| 420 |
+
|
| 421 |
+
/* Read tokenizer.json */
|
| 422 |
+
char *json = tok_read_file(tokenizer_json_path);
|
| 423 |
+
if (!json) {
|
| 424 |
+
fprintf(stderr, " WARNING: Could not read '%s'\n", tokenizer_json_path);
|
| 425 |
+
free(td);
|
| 426 |
+
return NULL;
|
| 427 |
+
}
|
| 428 |
+
|
| 429 |
+
/* Parse vocab */
|
| 430 |
+
if (tok_parse_vocab(json, td) != 0) {
|
| 431 |
+
fprintf(stderr, " WARNING: Failed to parse vocab from tokenizer.json\n");
|
| 432 |
+
free(json);
|
| 433 |
+
free(td);
|
| 434 |
+
return NULL;
|
| 435 |
+
}
|
| 436 |
+
|
| 437 |
+
/* Parse merges */
|
| 438 |
+
tok_parse_merges(json, td);
|
| 439 |
+
|
| 440 |
+
/* Parse added tokens (special tokens) */
|
| 441 |
+
tok_parse_added_tokens(json, td);
|
| 442 |
+
|
| 443 |
+
/* Detect byte tokens: <0x00> through <0xFF> */
|
| 444 |
+
for (int i = 0; i < td->vocab_size; i++) {
|
| 445 |
+
if (td->tokens[i] && td->tokens[i][0] == '<' &&
|
| 446 |
+
td->tokens[i][1] == '0' && td->tokens[i][2] == 'x' &&
|
| 447 |
+
strlen(td->tokens[i]) == 6 && td->tokens[i][5] == '>') {
|
| 448 |
+
td->token_types[i] = TOK_TYPE_BYTE;
|
| 449 |
+
}
|
| 450 |
+
}
|
| 451 |
+
|
| 452 |
+
free(json);
|
| 453 |
+
|
| 454 |
+
/* Read config if available */
|
| 455 |
+
if (config_json_path) {
|
| 456 |
+
char *config = tok_read_file(config_json_path);
|
| 457 |
+
if (config) {
|
| 458 |
+
tok_parse_config(config, td);
|
| 459 |
+
free(config);
|
| 460 |
+
}
|
| 461 |
+
}
|
| 462 |
+
|
| 463 |
+
return td;
|
| 464 |
+
}
|
| 465 |
+
|
| 466 |
+
static void tok_free(TokenizerData *td)
|
| 467 |
+
{
|
| 468 |
+
if (!td) return;
|
| 469 |
+
if (td->tokens) {
|
| 470 |
+
for (int i = 0; i < td->vocab_size; i++)
|
| 471 |
+
free(td->tokens[i]);
|
| 472 |
+
free(td->tokens);
|
| 473 |
+
}
|
| 474 |
+
if (td->merges) {
|
| 475 |
+
for (int i = 0; i < td->n_merges; i++)
|
| 476 |
+
free(td->merges[i]);
|
| 477 |
+
free(td->merges);
|
| 478 |
+
}
|
| 479 |
+
free(td->scores);
|
| 480 |
+
free(td->token_types);
|
| 481 |
+
free(td);
|
| 482 |
+
}
|
| 483 |
+
|
| 484 |
+
/* Print summary */
|
| 485 |
+
static void tok_print_summary(const TokenizerData *td)
|
| 486 |
+
{
|
| 487 |
+
printf(" βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n");
|
| 488 |
+
printf(" β Tokenizer β\n");
|
| 489 |
+
printf(" β ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ£\n");
|
| 490 |
+
printf(" β Model: %-40s β\n", td->model_type);
|
| 491 |
+
printf(" β Vocab size: %-40d β\n", td->vocab_size);
|
| 492 |
+
printf(" β Merges: %-40d β\n", td->n_merges);
|
| 493 |
+
printf(" β BOS token: %-3d %-36s β\n", td->bos_id,
|
| 494 |
+
(td->bos_id >= 0 && td->bos_id < td->vocab_size) ? td->tokens[td->bos_id] : "");
|
| 495 |
+
printf(" β EOS token: %-3d %-36s β\n", td->eos_id,
|
| 496 |
+
(td->eos_id >= 0 && td->eos_id < td->vocab_size) ? td->tokens[td->eos_id] : "");
|
| 497 |
+
printf(" β UNK token: %-3d %-36s β\n", td->unk_id,
|
| 498 |
+
(td->unk_id >= 0 && td->unk_id < td->vocab_size) ? td->tokens[td->unk_id] : "");
|
| 499 |
+
printf(" βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n\n");
|
| 500 |
+
}
|
| 501 |
+
|
| 502 |
+
#endif /* TOKENIZER_READER_H */
|