/** * Aether Inference Server with Glossolalia Decoder * * SmolLM2-360M inference using WASM SIMD kernels. * Two endpoints: * /generate-standard -- standard top-p sampling * /generate-glossolalia -- temperature-ensemble fork/race/fold */ import { createServer } from 'http'; import { readFileSync, existsSync } from 'fs'; import { execSync } from 'child_process'; import { fileURLToPath } from 'url'; import { dirname, join } from 'path'; const __dirname = dirname(fileURLToPath(import.meta.url)); const PORT = parseInt(process.env.AETHER_PORT || '7861'); // ─── Model Configs ────────────────────────────────────────────────────────── const CONFIGS = { 'smollm2-360m': { hiddenDim: 960, numLayers: 32, numHeads: 15, numKvHeads: 5, headDim: 64, intermediateSize: 2560, vocabSize: 49152, ropeTheta: 100000.0, rmsNormEps: 1e-5, eosToken: 2, }, 'qwen2.5-0.5b': { hiddenDim: 896, numLayers: 24, numHeads: 14, numKvHeads: 2, headDim: 64, intermediateSize: 4864, vocabSize: 151936, ropeTheta: 1000000.0, rmsNormEps: 1e-6, eosToken: 151645, // <|im_end|> }, }; // Five Bule Personality Profiles (THM-FIVE-BULE-PERSONALITY) // Each personality is a position on the fork/race/fold/vent/interfere axes const PERSONALITIES = { explorer: { temps: [0.8, 1.2, 1.6], topP: 0.98, absorbingThreshold: 5, label: 'Explorer -- forks broadly, high temperature diversity' }, builder: { temps: [0.2, 0.3, 0.5], topP: 0.70, absorbingThreshold: 4, label: 'Builder -- folds tightly, low temperature, precise' }, creative: { temps: [0.6, 1.0, 1.4], topP: 0.95, absorbingThreshold: 2, label: 'Creative -- races freely, aggressive C3 perturbation' }, anxious: { temps: [0.4, 0.6, 0.8], topP: 0.85, absorbingThreshold: 2, label: 'Anxious -- interferes early, cautious, frequent C3' }, balanced: { temps: [0.4, 0.7, 1.0], topP: 0.90, absorbingThreshold: 3, label: 'Balanced -- standard glossolalia, phi convergence' }, }; // Default config (overridden per-model) let C = CONFIGS['qwen2.5-0.5b']; let kvDim = C.numKvHeads * C.headDim; let gqaRatio = C.numHeads / C.numKvHeads; // ─── WASM SIMD ────────────────────────────────────────────────────────────── let simd = null; async function loadSIMD() { const p = join(__dirname, 'simd-kernels.wasm'); if (!existsSync(p)) return null; try { const { instance } = await WebAssembly.instantiate(readFileSync(p), { env: { expf: Math.exp, tanhf: Math.tanh, powf: Math.pow }, }); const w = instance.exports; w.resetHeap(65536); const mem = w.memory; const hf = () => new Float32Array(mem.buffer); const cp = (ptr, f) => hf().set(f, ptr >> 2); const rd = (ptr, n) => hf().slice(ptr >> 2, (ptr >> 2) + n); const wrap = (fn) => (...args) => { const s = w.getHeapPtr(); try { return fn(s, ...args); } finally { w.resetHeap(s); } }; console.log('[Aether] WASM SIMD loaded'); return { matVec: wrap((s, mat, vec, rows, cols) => { if (mat.byteLength > 100_000_000) return matVecJS(mat, vec, rows, cols); const mP=w.allocate(mat.byteLength),vP=w.allocate(vec.byteLength),rP=w.allocate(rows*4); cp(mP,mat);cp(vP,vec);w.matVecSimdBatch4(mP,vP,rP,rows,cols);return rd(rP,rows); }), rmsNorm: wrap((s,x,wt,eps) => { const xP=w.allocate(x.byteLength),wP=w.allocate(wt.byteLength),rP=w.allocate(x.byteLength); cp(xP,x);cp(wP,wt);w.rmsNormSimd(xP,wP,rP,x.length,eps);return rd(rP,x.length); }), softmax: wrap((s,x) => { const xP=w.allocate(x.byteLength),rP=w.allocate(x.byteLength); cp(xP,x);w.softmaxSimd(xP,rP,x.length);return rd(rP,x.length); }), fusedSiluMul: wrap((s,g,u) => { const gP=w.allocate(g.byteLength),uP=w.allocate(u.byteLength),rP=w.allocate(g.byteLength); cp(gP,g);cp(uP,u);w.fusedSiluMul(gP,uP,rP,g.length);return rd(rP,g.length); }), add: wrap((s,a,b) => { const aP=w.allocate(a.byteLength),bP=w.allocate(b.byteLength),rP=w.allocate(a.byteLength); cp(aP,a);cp(bP,b);w.addSimd(aP,bP,rP,a.length);return rd(rP,a.length); }), }; } catch(e) { console.warn('[Aether] WASM failed:',e.message); return null; } } // ─── JS Fallbacks ─────────────────────────────────────────────────────────── function matVecJS(m,v,rows,cols){const o=new Float32Array(rows);for(let r=0;rmx)mx=x[i];const o=new Float32Array(x.length);let s=0;for(let i=0;i ({ matVec:simd?.matVec||matVecJS, rmsNorm:simd?.rmsNorm||rmsNormJS, softmax:simd?.softmax||softmaxJS, fusedSiluMul:simd?.fusedSiluMul||fusedSiluMulJS, add:simd?.add||addJS }); // ─── Q8_0 Dequant ─────────────────────────────────────────────────────────── function fp16(lo,hi){const h=lo|(hi<<8),s=(h>>15)&1,e=(h>>10)&0x1f,f=h&0x3ff;if(e===0)return f===0?0:(s?-1:1)*(f/1024)*Math.pow(2,-14);if(e===31)return 0;return(s?-1:1)*Math.pow(2,e-15)*(1+f/1024);} function dequantQ8(data,n){const o=new Float32Array(n),nb=Math.ceil(n/32);for(let b=0;b127?v-256:v)*sc;}}return o;} function dequantByType(data,n,type){if(type===0)return new Float32Array(data.buffer,data.byteOffset,n);if(type===8)return dequantQ8(data,n);if(type===1){const o=new Float32Array(n);for(let i=0;ia*b,1n))});}return{tensors,dataOffset:Math.ceil(o/align)*align};} // ─── BPE Tokenizer ────────────────────────────────────────────────────────── class Tok{constructor(j){const m=j.model||{};this.vocab=m.vocab||{};this.rev={};for(const[t,id]of Object.entries(this.vocab))this.rev[id]=t;this.mr={};for(const[i,mg]of(m.merges||[]).entries())this.mr[mg]=i;this.added={};if(j.added_tokens)for(const t of j.added_tokens)this.added[t.content]=t.id;} encode(text){const sp=/<\|[^|]+\|>/g;const parts=[];let last=0,m;while((m=sp.exec(text))!==null){if(m.index>last)parts.push({t:text.slice(last,m.index),s:false});parts.push({t:m[0],s:true});last=m.index+m[0].length;}if(last`)}while(syms.length>1){let best=Infinity,bi=-1;for(let i=0;i'))p.push(String.fromCharCode(parseInt(s.slice(3,-1),16)));else if(s&&!s.startsWith('<|'))p.push(s);}return p.join('').replace(/Ġ/g,' ').replace(/Ċ/g,'\n');}} // ─── RoPE (LLaMA style: ADJACENT pairs) ───────────────────────────────────── function applyRoPE(x, headDim, position, theta) { for (let i = 0; i < headDim; i += 2) { const freq = 1.0 / Math.pow(theta, (2 * (i/2)) / headDim); const angle = position * freq; const cos = Math.cos(angle), sin = Math.sin(angle); const x0 = x[i], x1 = x[i + 1]; x[i] = x0 * cos - x1 * sin; x[i + 1] = x0 * sin + x1 * cos; } } // ─── Models ───────────────────────────────────────────────────────────────── const models = {}; let activeModel = null; function loadModel(name, ggufPath, tokPath, configName) { const cfg = CONFIGS[configName] || CONFIGS['smollm2-360m']; console.log(`[Aether] Loading ${name} (${configName}: ${cfg.numLayers}L, ${cfg.hiddenDim}d)...`); const t0=Date.now();const buf=readFileSync(ggufPath);const parsed=parseGGUF(buf); console.log(`[Aether] Parsed ${parsed.tensors.length} tensors in ${Date.now()-t0}ms`); const tokenizer=new Tok(JSON.parse(readFileSync(tokPath,'utf8'))); const byName={};for(const t of parsed.tensors)byName[t.name]=t; function get(nm){const t=byName[nm];if(!t)return null;const raw=new Uint8Array(buf.buffer,buf.byteOffset+parsed.dataOffset+Number(t.offset),t.size);return dequantByType(raw,t.numElements,t.type);} console.log('[Aether] Dequantizing...');const tokenEmbd=get('token_embd.weight');const layers=[]; for(let i=0;iuser\n${prompt}<|im_end|>\n<|im_start|>assistant\n`; const inputTokens = model.tokenizer.encode(chatPrompt); const allTokens = [...inputTokens]; const kvCache = Array.from({length:mc.numLayers},()=>({k:[],v:[]})); return { inputTokens, config: mc, step(allToks, kvC, diag) { const pos = allToks.length - 1; const tid = allToks[allToks.length - 1]; const x0 = model.tokenEmbd.slice(tid*mc.hiddenDim,(tid+1)*mc.hiddenDim); let x = x0; const layerNorms = diag ? [] : null; const attnEntropies = diag ? [] : null; for (let l=0;l1e-10) he-=w[s]*Math.log(w[s]); headEntropies.push(Math.round(he*1000)/1000); } for(let s=0;s0 ? Math.round(Math.sqrt(delta/prevNorm)*1000)/1000 : 0 }); } } const finalNormed=o.rmsNorm(x,model.outNorm,mc.rmsNormEps); const logits = o.matVec(model.outWeight,finalNormed,mc.vocabSize,mc.hiddenDim); return { logits, layerNorms, attnEntropies }; } }; } // ─── Sampling Functions ───────────────────────────────────────────────────── function sampleStandard(logits, temperature = 0.7, topP = 0.9) { const o = op(); const scaled = new Float32Array(logits.length); for (let i = 0; i < logits.length; i++) scaled[i] = logits[i] / temperature; const probs = o.softmax(scaled); // Top-p const indexed = Array.from(probs).map((p,i)=>({p,i})).sort((a,b)=>b.p-a.p); let cumP = 0; const candidates = []; for (const {p,i} of indexed) { cumP += p; candidates.push({p,i}); if (cumP >= topP) break; } const total = candidates.reduce((s,c) => s+c.p, 0); const r = Math.random() * total; let acc = 0; for (const {p,i} of candidates) { acc += p; if (r < acc) return i; } return candidates[0].i; } function glossolaliaMerge(rawLogits, temperatures = [0.4, 0.7, 1.0]) { const V = rawLogits.length; const logV = Math.log(V); const agents = []; for (const tau of temperatures) { const scaled = new Float32Array(V); for (let i = 0; i < V; i++) scaled[i] = rawLogits[i] / Math.max(tau, 0.01); const probs = softmaxJS(scaled); // Shannon entropy let h = 0; for (let i = 0; i < V; i++) { const p = probs[i]; if (p > 1e-12) h -= p * Math.log(p); } // Deficit weight: low entropy = high confidence = high weight const w = Math.max(1.0 - h / logV, 1e-8); // the sliver // Top-5 for diagnostics const top5 = Array.from(probs).map((p,i)=>({p,i})).sort((a,b)=>b.p-a.p).slice(0,5); agents.push({ probs, entropy: h, weight: w, tau, top5 }); } // Merge: weighted average const totalW = agents.reduce((s,a) => s + a.weight, 0); const merged = new Float32Array(V); for (const a of agents) { const nw = a.weight / totalW; for (let i = 0; i < V; i++) merged[i] += nw * a.probs[i]; } return { merged, agents, totalW }; } function sampleGlossolalia(logits) { const { merged, agents } = glossolaliaMerge(logits); const indexed = Array.from(merged).map((p,i)=>({p,i})).sort((a,b)=>b.p-a.p); let cumP = 0; const candidates = []; for (const {p,i} of indexed) { cumP += p; candidates.push({p,i}); if (cumP >= 0.95) break; } const total = candidates.reduce((s,c) => s+c.p, 0); const r = Math.random() * total; let acc = 0; for (const {p,i} of candidates) { acc += p; if (r < acc) return { tokenId: i, agents, merged }; } return { tokenId: candidates[0].i, agents, merged }; } // ─── C2/C3 Metacognitive Monitoring ───────────────────────────────────────── // C2: Detect entropy regime collapse (>50% drop in 3-token window) // C3: Detect absorbing states + apply diversity perturbation function detectRegimeChange(state) { const h = state.entropyHistory; if (h.length < 3) return false; const recent = h.slice(-3); const older = h.slice(-6, -3); if (older.length === 0) return false; const recentMean = recent.reduce((a, b) => a + b, 0) / recent.length; const olderMean = older.reduce((a, b) => a + b, 0) / older.length; return olderMean > 0 && recentMean < olderMean * 0.5; } function metacognitiveC3(logits, state, selectedToken, absorbingThreshold = 3) { // Update repeat tracking if (selectedToken === state.lastToken) state.repeatCount++; else { state.repeatCount = 0; state.lastToken = selectedToken; } const isAbsorbing = state.repeatCount >= absorbingThreshold; const isRegimeCollapse = detectRegimeChange(state); if (!isAbsorbing && !isRegimeCollapse) { return { logits, perturbed: false, reason: null }; } // Perturbation: eta scales with repetition depth const eta = 0.1 * (1 + state.repeatCount); const perturbed = new Float32Array(logits.length); let totalOther = 0; for (let i = 0; i < logits.length; i++) if (i !== selectedToken && logits[i] > 0) totalOther += logits[i]; const redistributionMass = Math.abs(logits[selectedToken]) * eta; for (let i = 0; i < logits.length; i++) { if (i === selectedToken) perturbed[i] = logits[i] * (1 - eta); else if (totalOther > 0 && logits[i] > 0) perturbed[i] = logits[i] + redistributionMass * (logits[i] / totalOther); else perturbed[i] = logits[i]; } state.perturbationCount++; return { logits: perturbed, perturbed: true, reason: isAbsorbing ? `absorbing(${state.repeatCount} repeats)` : 'regime_collapse', eta, }; } function sampleWithMetacog(rawLogits, metacogState) { const { merged, agents } = glossolaliaMerge(rawLogits); // Compute merged entropy for C2 tracking let mergedEntropy = 0; for (let i = 0; i < merged.length; i++) { const p = merged[i]; if (p > 1e-12) mergedEntropy -= p * Math.log(p); } metacogState.entropyHistory.push(mergedEntropy); // First sample from merged (pre-C3) to detect what token would be chosen const indexed = Array.from(merged).map((p,i)=>({p,i})).sort((a,b)=>b.p-a.p); let cumP = 0, candidates = []; for (const {p,i} of indexed) { cumP += p; candidates.push({p,i}); if (cumP >= 0.95) break; } let total = candidates.reduce((s,c) => s+c.p, 0); let r = Math.random() * total, acc = 0, preC3Token = candidates[0].i; for (const {p,i} of candidates) { acc += p; if (r < acc) { preC3Token = i; break; } } // C3: check and potentially perturb const c3 = metacognitiveC3(rawLogits, metacogState, preC3Token); let finalToken = preC3Token; if (c3.perturbed) { // Re-merge with perturbed logits const { merged: remerged } = glossolaliaMerge(c3.logits); const ridx = Array.from(remerged).map((p,i)=>({p,i})).sort((a,b)=>b.p-a.p); cumP = 0; candidates = []; for (const {p,i} of ridx) { cumP += p; candidates.push({p,i}); if (cumP >= 0.95) break; } total = candidates.reduce((s,c) => s+c.p, 0); r = Math.random() * total; acc = 0; for (const {p,i} of candidates) { acc += p; if (r < acc) { finalToken = i; break; } } } return { tokenId: finalToken, agents, merged, mergedEntropy, c3: { perturbed: c3.perturbed, reason: c3.reason, eta: c3.eta, preC3Token, repeatCount: metacogState.repeatCount, perturbationCount: metacogState.perturbationCount }, }; } // ─── Generation Loops ─────────────────────────────────────────────────────── function generateStandard(prompt, maxTokens = 8192, modelName = 'buleyean') { const t0 = performance.now(); const model = getModel(modelName); const fwd = forwardPass(prompt, modelName); const allTokens = [...fwd.inputTokens]; const kvC = Array.from({length:model.config.numLayers},()=>({k:[],v:[]})); const tokenTimes = []; // Prefill for (let i = 0; i < fwd.inputTokens.length; i++) { fwd.step(allTokens.slice(0, i+1), kvC, false); } const perTokenInfo = []; // Decode for (let i = 0; i < maxTokens; i++) { const ts = performance.now(); const { logits, layerNorms, attnEntropies } = fwd.step(allTokens, kvC, true); const o2 = op(); const scaled = new Float32Array(logits.length); for (let j = 0; j < logits.length; j++) scaled[j] = logits[j] / 0.7; const probs = o2.softmax(scaled); const chosen = sampleStandard(logits); const chosenProb = probs[chosen]; const perplexity = chosenProb > 0 ? -Math.log2(chosenProb) : 99; // Vocab coverage: tokens with >0.1% probability let vocabCoverage = 0; for (let j = 0; j < probs.length; j++) if (probs[j] > 0.001) vocabCoverage++; // Top-5 const top5 = Array.from(probs).map((p,j)=>({p,i:j})).sort((a,b)=>b.p-a.p).slice(0,5) .map(t => ({ token: model.tokenizer.decode([t.i]), prob: Math.round(t.p*1000)/1000 })); tokenTimes.push(performance.now() - ts); perTokenInfo.push({ perplexity: Math.round(perplexity*100)/100, chosenProb: Math.round(chosenProb*1000)/1000, vocabCoverage, top5, layerNorms, attnEntropies }); if (chosen === model.config.eosToken) break; allTokens.push(chosen); } const genTokens = allTokens.slice(fwd.inputTokens.length); const totalTime = performance.now() - t0; const avgMs = tokenTimes.length > 0 ? tokenTimes.reduce((a,b)=>a+b,0)/tokenTimes.length : 0; return { text: model.tokenizer.decode(genTokens), tokens: genTokens.length, totalTimeMs: Math.round(totalTime), avgTokenMs: Math.round(avgMs), mode: 'standard', temperature: 0.7, topP: 0.9, tokenDiagnostics: perTokenInfo, }; } function generateGlossolalia(prompt, maxTokens = 8192, modelName = 'buleyean') { const t0 = performance.now(); const model = getModel(modelName); const fwd = forwardPass(prompt, modelName); const allTokens = [...fwd.inputTokens]; const kvC = Array.from({length:model.config.numLayers},()=>({k:[],v:[]})); const tokenTimes = []; const perTokenDiag = []; // Prefill for (let i = 0; i < fwd.inputTokens.length; i++) { fwd.step(allTokens.slice(0, i+1), kvC, false); } // Decode with Glossolalia for (let i = 0; i < maxTokens; i++) { const ts = performance.now(); const { logits, layerNorms, attnEntropies } = fwd.step(allTokens, kvC, true); const { tokenId, agents } = sampleGlossolalia(logits); // Token-level perplexity from merged distribution const { merged } = glossolaliaMerge(logits); const chosenProb = merged[tokenId] || 0; const perplexity = chosenProb > 0 ? -Math.log2(chosenProb) : 99; let vocabCoverage = 0; for (let j = 0; j < merged.length; j++) if (merged[j] > 0.001) vocabCoverage++; tokenTimes.push(performance.now() - ts); perTokenDiag.push({ agents: agents.map(a => ({ tau: a.tau, entropy: Math.round(a.entropy*1000)/1000, weight: Math.round(a.weight*1000)/1000, top3: a.top5.slice(0,3).map(t => ({ token: model.tokenizer.decode([t.i]), prob: Math.round(t.p*1000)/1000 })), })), perplexity: Math.round(perplexity*100)/100, chosenProb: Math.round(chosenProb*1000)/1000, vocabCoverage, layerNorms, attnEntropies, }); if (tokenId === model.config.eosToken) break; allTokens.push(tokenId); } const genTokens = allTokens.slice(fwd.inputTokens.length); const totalTime = performance.now() - t0; const avgMs = tokenTimes.length > 0 ? tokenTimes.reduce((a,b)=>a+b,0)/tokenTimes.length : 0; return { text: model.tokenizer.decode(genTokens), tokens: genTokens.length, totalTimeMs: Math.round(totalTime), avgTokenMs: Math.round(avgMs), mode: 'glossolalia', temperatures: [0.4, 0.7, 1.0], diagnostics: perTokenDiag, }; } // ─── Metacog Generation (Glossolalia + C2/C3) ─────────────────────────────── function generateMetacog(prompt, maxTokens = 8192, modelName = 'buleyean') { const t0 = performance.now(); const model = getModel(modelName); const fwd = forwardPass(prompt, modelName); const allTokens = [...fwd.inputTokens]; const kvC = Array.from({length:model.config.numLayers},()=>({k:[],v:[]})); const tokenTimes = []; const perTokenDiag = []; // Metacognitive state (persists across tokens) const metacogState = { repeatCount: 0, lastToken: -1, entropyHistory: [], perturbationCount: 0 }; // Prefill for (let i = 0; i < fwd.inputTokens.length; i++) { fwd.step(allTokens.slice(0, i+1), kvC, false); } // Decode with Glossolalia + C2/C3 for (let i = 0; i < maxTokens; i++) { const ts = performance.now(); const { logits, layerNorms } = fwd.step(allTokens, kvC, true); const result = sampleWithMetacog(logits, metacogState); const chosenProb = result.merged[result.tokenId] || 0; const perplexity = chosenProb > 0 ? -Math.log2(chosenProb) : 99; let vocabCoverage = 0; for (let j = 0; j < result.merged.length; j++) if (result.merged[j] > 0.001) vocabCoverage++; tokenTimes.push(performance.now() - ts); perTokenDiag.push({ agents: result.agents.map(a => ({ tau: a.tau, entropy: Math.round(a.entropy*1000)/1000, weight: Math.round(a.weight*1000)/1000, top3: a.top5.slice(0,3).map(t => ({ token: model.tokenizer.decode([t.i]), prob: Math.round(t.p*1000)/1000 })), })), perplexity: Math.round(perplexity*100)/100, chosenProb: Math.round(chosenProb*1000)/1000, vocabCoverage, layerNorms, c3: result.c3, mergedEntropy: Math.round(result.mergedEntropy*1000)/1000, }); if (result.tokenId === model.config.eosToken) break; allTokens.push(result.tokenId); } const genTokens = allTokens.slice(fwd.inputTokens.length); const totalTime = performance.now() - t0; const avgMs = tokenTimes.length > 0 ? tokenTimes.reduce((a,b)=>a+b,0)/tokenTimes.length : 0; return { text: model.tokenizer.decode(genTokens), tokens: genTokens.length, totalTimeMs: Math.round(totalTime), avgTokenMs: Math.round(avgMs), mode: 'metacog', temperatures: [0.4, 0.7, 1.0], diagnostics: perTokenDiag, metacogSummary: { totalPerturbations: metacogState.perturbationCount, finalRepeatCount: metacogState.repeatCount, entropyHistory: metacogState.entropyHistory.map(h => Math.round(h*1000)/1000), }, }; } // ─── Personality Generation ────────────────────────────────────────────────── function generatePersonality(prompt, maxTokens = 8192, modelName = 'buleyean-smollm2', personalityName = 'balanced') { const personality = PERSONALITIES[personalityName] || PERSONALITIES.balanced; const model = getModel(modelName); const mc = model.config; const t0 = performance.now(); const fwd = forwardPass(prompt, modelName); const allTokens = [...fwd.inputTokens]; const kvC = Array.from({length:mc.numLayers},()=>({k:[],v:[]})); const tokenTimes = []; const metacogState = { repeatCount: 0, lastToken: -1, entropyHistory: [], perturbationCount: 0 }; for (let i = 0; i < fwd.inputTokens.length; i++) fwd.step(allTokens.slice(0,i+1), kvC, false); for (let i = 0; i < maxTokens; i++) { const ts = performance.now(); const { logits } = fwd.step(allTokens, kvC, false); const { merged } = glossolaliaMerge(logits, personality.temps); let mergedEntropy = 0; for (let j = 0; j < merged.length; j++) { const p = merged[j]; if (p > 1e-12) mergedEntropy -= p * Math.log(p); } metacogState.entropyHistory.push(mergedEntropy); const indexed = Array.from(merged).map((p,j)=>({p,i:j})).sort((a,b)=>b.p-a.p); let cumP = 0, candidates = []; for (const {p,i} of indexed) { cumP += p; candidates.push({p,i}); if (cumP >= personality.topP) break; } let total = candidates.reduce((s,c) => s+c.p, 0); let r = Math.random() * total, acc = 0, preC3Token = candidates[0].i; for (const {p,i} of candidates) { acc += p; if (r < acc) { preC3Token = i; break; } } const c3 = metacognitiveC3(logits, metacogState, preC3Token, personality.absorbingThreshold); let finalToken = preC3Token; if (c3.perturbed) { const { merged: rm } = glossolaliaMerge(c3.logits, personality.temps); const ri = Array.from(rm).map((p,j)=>({p,i:j})).sort((a,b)=>b.p-a.p); cumP = 0; candidates = []; for (const {p,i} of ri) { cumP += p; candidates.push({p,i}); if (cumP >= personality.topP) break; } total = candidates.reduce((s,c) => s+c.p, 0); r = Math.random() * total; acc = 0; for (const {p,i} of candidates) { acc += p; if (r < acc) { finalToken = i; break; } } } tokenTimes.push(performance.now() - ts); if (finalToken === mc.eosToken) break; allTokens.push(finalToken); } const genTokens = allTokens.slice(fwd.inputTokens.length); const totalTime = performance.now() - t0; const avgMs = tokenTimes.length > 0 ? tokenTimes.reduce((a,b)=>a+b,0)/tokenTimes.length : 0; return { text: model.tokenizer.decode(genTokens), tokens: genTokens.length, totalTimeMs: Math.round(totalTime), avgTokenMs: Math.round(avgMs), mode: 'personality', personality: personalityName, personalityLabel: personality.label, temperatures: personality.temps, modelName, metacogSummary: { totalPerturbations: metacogState.perturbationCount }, }; } // ─── HTTP Server ──────────────────────────────────────────────────────────── const server = createServer((req, res) => { let body = ''; req.on('data', c => body += c); req.on('end', () => { try { if (req.url === '/health') { res.writeHead(200,{'Content-Type':'application/json'}); res.end(JSON.stringify({status:'ok',models:Object.keys(models),personalities:Object.keys(PERSONALITIES),simd:!!simd})); return; } if (req.method !== 'POST') { res.writeHead(404); res.end(); return; } const { prompt, max_tokens, model, personality } = JSON.parse(body); const mn = model || 'buleyean-smollm2'; let result; if (req.url === '/generate-personality') result = generatePersonality(prompt, max_tokens||128, mn, personality||'balanced'); else if (req.url === '/generate-standard') result = generateStandard(prompt, max_tokens||128, mn); else if (req.url === '/generate-glossolalia') result = generateGlossolalia(prompt, max_tokens||128, mn); else if (req.url === '/generate-metacog') result = generateMetacog(prompt, max_tokens||128, mn); else { res.writeHead(404); res.end(); return; } res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify(result)); } catch (e) { console.error('[Aether]', e); res.writeHead(500, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ error: e.message })); } }); }); // ─── Model Registry ───────────────────────────────────────────────────────── const MODEL_REGISTRY = [ { name: 'buleyean-smollm2', repo: 'forkjoin-ai/buleyean-smollm2-360m', file: 'buleyean-smollm2-360m-q8_0.gguf', tokRepo: 'HuggingFaceTB/SmolLM2-360M-Instruct', config: 'smollm2-360m' }, { name: 'base-smollm2', repo: 'bartowski/SmolLM2-360M-Instruct-GGUF', file: 'SmolLM2-360M-Instruct-Q8_0.gguf', tokRepo: 'HuggingFaceTB/SmolLM2-360M-Instruct', config: 'smollm2-360m' }, { name: 'buleyean-qwen', repo: 'forkjoin-ai/buleyean-qwen2.5-0.5b', file: 'buleyean-qwen2.5-0.5b-q8_0.gguf', tokRepo: 'Qwen/Qwen2.5-0.5B-Instruct', config: 'qwen2.5-0.5b' }, { name: 'base-qwen', repo: 'bartowski/Qwen2.5-0.5B-Instruct-GGUF', file: 'Qwen2.5-0.5B-Instruct-Q8_0.gguf', tokRepo: 'Qwen/Qwen2.5-0.5B-Instruct', config: 'qwen2.5-0.5b' }, ]; function dl(repo, file) { const local = `/tmp/hf_cache/${file}`; if (existsSync(local)) return local; console.log(`[Aether] Downloading ${repo}/${file}...`); execSync(`python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download('${repo}', '${file}', cache_dir='/tmp/hf_cache', local_dir='/tmp/hf_cache')"`, { stdio: 'inherit' }); return local; } function dlTok(repo) { const local = `/tmp/hf_cache/tokenizer-${repo.replace(/\//g,'-')}.json`; if (existsSync(local)) return local; console.log(`[Aether] Downloading tokenizer from ${repo}...`); execSync(`python3 -c "from huggingface_hub import hf_hub_download; p=hf_hub_download('${repo}', 'tokenizer.json'); import shutil; shutil.copy(p, '${local}')"`, { stdio: 'inherit' }); return local; } async function main() { simd = await loadSIMD(); // Load all models that fit in memory (load sequentially, keep all) for (const m of MODEL_REGISTRY) { try { const gguf = dl(m.repo, m.file); const tok = dlTok(m.tokRepo); loadModel(m.name, gguf, tok, m.config); } catch (e) { console.error(`[Aether] Failed to load ${m.name}: ${e.message}`); } } server.listen(PORT, '127.0.0.1', () => console.log(`[Aether] http://127.0.0.1:${PORT} (SIMD: ${!!simd}, models: ${Object.keys(models).join(', ')})`)); } main().catch(e => { console.error('[Aether] Fatal:', e); process.exit(1); });