Spaces:

forkjoin-ai
/

glossolalia

Sleeping

Taylor commited on 27 days ago

Commit

382cccc

1 Parent(s): a902eca

feat: dual model support -- Buleyean (default) vs base, user toggle

Loads both SmolLM2-360M models at startup:
- buleyean: void-trained (forkjoin-ai/buleyean-smollm2-360m Q8_0)
- base: standard instruct (bartowski/SmolLM2-360M-Instruct Q8_0)

Radio toggle lets users pick which model to run. Buleyean default.
Same model feeds both standard and glossolalia decoders.
Act 3 (Metacog) builds on this same dual-model + glossolalia foundation.

Files changed (2) hide show

aether-server.mjs +48 -21
app.py +16 -12

aether-server.mjs CHANGED Viewed

@@ -105,24 +105,34 @@ function applyRoPE(x, headDim, position, theta) {
   }
 }
-// ─── Model ──────────────────────────────────────────────────────────────────
-let model = null;
-function loadModel(ggufPath, tokPath) {
   const t0=Date.now();const buf=readFileSync(ggufPath);const parsed=parseGGUF(buf);
   console.log(`[Aether] Parsed ${parsed.tensors.length} tensors in ${Date.now()-t0}ms`);
   const tokenizer=new Tok(JSON.parse(readFileSync(tokPath,'utf8')));
   const byName={};for(const t of parsed.tensors)byName[t.name]=t;
-  function get(name){const t=byName[name];if(!t)return null;const raw=new Uint8Array(buf.buffer,buf.byteOffset+parsed.dataOffset+Number(t.offset),t.size);return dequantByType(raw,t.numElements,t.type);}
   console.log('[Aether] Dequantizing...');const tokenEmbd=get('token_embd.weight');const layers=[];
   for(let i=0;i<C.numLayers;i++){if(i%8===0)console.log(`[Aether]   Layer ${i}/${C.numLayers}`);layers.push({an:get(`blk.${i}.attn_norm.weight`),fn:get(`blk.${i}.ffn_norm.weight`),qw:get(`blk.${i}.attn_q.weight`),kw:get(`blk.${i}.attn_k.weight`),vw:get(`blk.${i}.attn_v.weight`),ow:get(`blk.${i}.attn_output.weight`),gw:get(`blk.${i}.ffn_gate.weight`),uw:get(`blk.${i}.ffn_up.weight`),dw:get(`blk.${i}.ffn_down.weight`)});}
   const outNorm=get('output_norm.weight');let outWeight=get('output.weight');if(!outWeight){console.log('[Aether] Tied embeddings');outWeight=tokenEmbd;}
-  console.log(`[Aether] Loaded in ${((Date.now()-t0)/1000).toFixed(1)}s`);
-  model={tokenEmbd,layers,outNorm,outWeight,tokenizer,loadTime:Date.now()-t0};
 }
 // ─── Forward Pass (returns raw logits) ──────────────────────────────────────
-function forwardPass(prompt) {
   const o = op();
   const chatPrompt = `<|im_start|>user\n${prompt}<|im_end|>\n<|im_start|>assistant\n`;
   const inputTokens = model.tokenizer.encode(chatPrompt);
   const allTokens = [...inputTokens];
@@ -247,9 +257,10 @@ function sampleGlossolalia(logits) {
 // ─── Generation Loops ───────────────────────────────────────────────────────
-function generateStandard(prompt, maxTokens = 8192) {
   const t0 = performance.now();
-  const fwd = forwardPass(prompt);
   const allTokens = [...fwd.inputTokens];
   const kvC = Array.from({length:C.numLayers},()=>({k:[],v:[]}));
   const tokenTimes = [];
@@ -301,9 +312,10 @@ function generateStandard(prompt, maxTokens = 8192) {
   };
 }
-function generateGlossolalia(prompt, maxTokens = 8192) {
   const t0 = performance.now();
-  const fwd = forwardPass(prompt);
   const allTokens = [...fwd.inputTokens];
   const kvC = Array.from({length:C.numLayers},()=>({k:[],v:[]}));
   const tokenTimes = [];
@@ -364,8 +376,8 @@ const server = createServer((req, res) => {
     req.on('data', c => body += c);
     req.on('end', () => {
       try {
-        const { prompt, max_tokens } = JSON.parse(body);
-        const result = genFn(prompt, max_tokens || 256);
         res.writeHead(200, { 'Content-Type': 'application/json' });
         res.end(JSON.stringify(result));
       } catch (e) {
@@ -380,26 +392,41 @@ const server = createServer((req, res) => {
   else if (req.method==='POST' && req.url==='/generate-glossolalia') handle(generateGlossolalia);
   else if (req.url==='/health') {
     res.writeHead(200,{'Content-Type':'application/json'});
-    res.end(JSON.stringify({status:'ok',model:model?'loaded':'not loaded',simd:!!simd,loadTime:model?.loadTime}));
   } else { res.writeHead(404); res.end(); }
 });
 // ─── Main ───────────────────────────────────────────────────────────────────
-const ggufPath='/tmp/hf_cache/smollm2-360m-q8_0.gguf';
-const tokPath='/tmp/hf_cache/tokenizer.json';
 async function main() {
   simd = await loadSIMD();
-  if (!existsSync(ggufPath)) {
-    console.log('[Aether] Downloading Q8_0 GGUF...');
-    execSync(`python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download('bartowski/SmolLM2-360M-Instruct-GGUF', 'SmolLM2-360M-Instruct-Q8_0.gguf', cache_dir='/tmp/hf_cache', local_dir='/tmp/hf_cache'); import shutil; shutil.move('/tmp/hf_cache/SmolLM2-360M-Instruct-Q8_0.gguf', '${ggufPath}')"`, { stdio: 'inherit' });
   }
   if (!existsSync(tokPath)) {
     console.log('[Aether] Downloading tokenizer...');
     execSync(`python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download('HuggingFaceTB/SmolLM2-360M-Instruct', 'tokenizer.json', cache_dir='/tmp/hf_cache', local_dir='/tmp/hf_cache')"`, { stdio: 'inherit' });
   }
-  loadModel(ggufPath, tokPath);
-  server.listen(PORT,'127.0.0.1',()=>console.log(`[Aether] http://127.0.0.1:${PORT} (SIMD: ${!!simd})`));
 }
 main().catch(e => { console.error('[Aether] Fatal:', e); process.exit(1); });

   }
 }
+// ─── Models ─────────────────────────────────────────────────────────────────
+const models = {};
+let activeModel = null;
+function loadModel(name, ggufPath, tokPath) {
+  console.log(`[Aether] Loading ${name}...`);
   const t0=Date.now();const buf=readFileSync(ggufPath);const parsed=parseGGUF(buf);
   console.log(`[Aether] Parsed ${parsed.tensors.length} tensors in ${Date.now()-t0}ms`);
   const tokenizer=new Tok(JSON.parse(readFileSync(tokPath,'utf8')));
   const byName={};for(const t of parsed.tensors)byName[t.name]=t;
+  function get(nm){const t=byName[nm];if(!t)return null;const raw=new Uint8Array(buf.buffer,buf.byteOffset+parsed.dataOffset+Number(t.offset),t.size);return dequantByType(raw,t.numElements,t.type);}
   console.log('[Aether] Dequantizing...');const tokenEmbd=get('token_embd.weight');const layers=[];
   for(let i=0;i<C.numLayers;i++){if(i%8===0)console.log(`[Aether]   Layer ${i}/${C.numLayers}`);layers.push({an:get(`blk.${i}.attn_norm.weight`),fn:get(`blk.${i}.ffn_norm.weight`),qw:get(`blk.${i}.attn_q.weight`),kw:get(`blk.${i}.attn_k.weight`),vw:get(`blk.${i}.attn_v.weight`),ow:get(`blk.${i}.attn_output.weight`),gw:get(`blk.${i}.ffn_gate.weight`),uw:get(`blk.${i}.ffn_up.weight`),dw:get(`blk.${i}.ffn_down.weight`)});}
   const outNorm=get('output_norm.weight');let outWeight=get('output.weight');if(!outWeight){console.log('[Aether] Tied embeddings');outWeight=tokenEmbd;}
+  const loadTime=Date.now()-t0;
+  console.log(`[Aether] ${name} loaded in ${(loadTime/1000).toFixed(1)}s`);
+  models[name]={tokenEmbd,layers,outNorm,outWeight,tokenizer,loadTime,name};
+  return models[name];
+}
+function getModel(name) {
+  return models[name] || models['base'] || Object.values(models)[0];
 }
 // ─── Forward Pass (returns raw logits) ──────────────────────────────────────
+function forwardPass(prompt, modelName) {
   const o = op();
+  const model = getModel(modelName);
   const chatPrompt = `<|im_start|>user\n${prompt}<|im_end|>\n<|im_start|>assistant\n`;
   const inputTokens = model.tokenizer.encode(chatPrompt);
   const allTokens = [...inputTokens];
 // ─── Generation Loops ───────────────────────────────────────────────────────
+function generateStandard(prompt, maxTokens = 8192, modelName = 'buleyean') {
   const t0 = performance.now();
+  const model = getModel(modelName);
+  const fwd = forwardPass(prompt, modelName);
   const allTokens = [...fwd.inputTokens];
   const kvC = Array.from({length:C.numLayers},()=>({k:[],v:[]}));
   const tokenTimes = [];
   };
 }
+function generateGlossolalia(prompt, maxTokens = 8192, modelName = 'buleyean') {
   const t0 = performance.now();
+  const model = getModel(modelName);
+  const fwd = forwardPass(prompt, modelName);
   const allTokens = [...fwd.inputTokens];
   const kvC = Array.from({length:C.numLayers},()=>({k:[],v:[]}));
   const tokenTimes = [];
     req.on('data', c => body += c);
     req.on('end', () => {
       try {
+        const { prompt, max_tokens, model } = JSON.parse(body);
+        const result = genFn(prompt, max_tokens || 256, model || 'buleyean');
         res.writeHead(200, { 'Content-Type': 'application/json' });
         res.end(JSON.stringify(result));
       } catch (e) {
   else if (req.method==='POST' && req.url==='/generate-glossolalia') handle(generateGlossolalia);
   else if (req.url==='/health') {
     res.writeHead(200,{'Content-Type':'application/json'});
+    res.end(JSON.stringify({status:'ok',models:Object.keys(models),simd:!!simd,loadTimes:Object.fromEntries(Object.entries(models).map(([k,v])=>[k,v.loadTime]))}));
   } else { res.writeHead(404); res.end(); }
 });
 // ─── Main ───────────────────────────────────────────────────────────────────
+const basePath = '/tmp/hf_cache/smollm2-360m-q8_0.gguf';
+const bulePath = '/tmp/hf_cache/buleyean-smollm2-360m-q8_0.gguf';
+const tokPath = '/tmp/hf_cache/tokenizer.json';
 async function main() {
   simd = await loadSIMD();
+  // Download base model
+  if (!existsSync(basePath)) {
+    console.log('[Aether] Downloading base SmolLM2-360M Q8_0...');
+    execSync(`python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download('bartowski/SmolLM2-360M-Instruct-GGUF', 'SmolLM2-360M-Instruct-Q8_0.gguf', cache_dir='/tmp/hf_cache', local_dir='/tmp/hf_cache'); import shutil; shutil.move('/tmp/hf_cache/SmolLM2-360M-Instruct-Q8_0.gguf', '${basePath}')"`, { stdio: 'inherit' });
   }
+  // Download Buleyean model
+  if (!existsSync(bulePath)) {
+    console.log('[Aether] Downloading Buleyean SmolLM2-360M Q8_0...');
+    execSync(`python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download('forkjoin-ai/buleyean-smollm2-360m', 'buleyean-smollm2-360m-q8_0.gguf', cache_dir='/tmp/hf_cache', local_dir='/tmp/hf_cache')"`, { stdio: 'inherit' });
+  }
+  // Download tokenizer
   if (!existsSync(tokPath)) {
     console.log('[Aether] Downloading tokenizer...');
     execSync(`python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download('HuggingFaceTB/SmolLM2-360M-Instruct', 'tokenizer.json', cache_dir='/tmp/hf_cache', local_dir='/tmp/hf_cache')"`, { stdio: 'inherit' });
   }
+  // Load both models
+  loadModel('base', basePath, tokPath);
+  loadModel('buleyean', bulePath, tokPath);
+  server.listen(PORT, '127.0.0.1', () => console.log(`[Aether] http://127.0.0.1:${PORT} (SIMD: ${!!simd}, models: ${Object.keys(models).join(', ')})`));
 }
 main().catch(e => { console.error('[Aether] Fatal:', e); process.exit(1); });

app.py CHANGED Viewed

@@ -30,7 +30,7 @@ for attempt in range(180):
         req = urllib.request.Request("http://127.0.0.1:7861/health")
         resp = urllib.request.urlopen(req, timeout=2)
         health = json.loads(resp.read())
-        if health.get("status") == "ok" and health.get("model") == "loaded":
             print(f"[Glossolalia] Aether ready ({health.get('loadTime')}ms, SIMD: {health.get('simd')})", flush=True)
             break
     except Exception:
@@ -44,9 +44,9 @@ else:
     print("[Glossolalia] WARNING: Aether not ready after 180s", flush=True)
-def call_aether(endpoint, prompt, max_tokens=256):
     try:
-        data = json.dumps({"prompt": prompt, "max_tokens": max_tokens}).encode()
         req = urllib.request.Request(
             f"http://127.0.0.1:7861/{endpoint}", data=data,
             headers={"Content-Type": "application/json"},
@@ -125,7 +125,7 @@ def format_layer_health(diag_list):
 # ─── Compare Function ────────────────────────────────────────────────────────
-def compare(prompt, max_tokens):
     empty = ("", "", "", "", "", "", "")
     if not prompt or not prompt.strip():
         yield empty
@@ -136,9 +136,9 @@ def compare(prompt, max_tokens):
     glo_result = [None]
     def run_std():
-        std_result[0] = call_aether("generate-standard", prompt, max_tokens)
     def run_glo():
-        glo_result[0] = call_aether("generate-glossolalia", prompt, max_tokens)
     def fmt_stats(r):
         if not r:
@@ -209,7 +209,9 @@ with gr.Blocks(css=CSS, theme=gr.themes.Base(primary_hue="purple", neutral_hue="
     with gr.Row():
         prompt = gr.Textbox(elem_id="prompt-input", placeholder="What is the shape of failure?", lines=2, label="Prompt", show_label=False, interactive=True, scale=4)
-        max_tok = gr.Slider(minimum=8, maximum=512, value=64, step=1, label="Max tokens", scale=1)
     btn = gr.Button("Generate", elem_id="gen-btn", variant="primary")
@@ -236,20 +238,22 @@ with gr.Blocks(css=CSS, theme=gr.themes.Base(primary_hue="purple", neutral_hue="
     outputs = [std_out, glo_out, std_stats, glo_stats, glo_diag, std_diag, layer_health]
-    def run(prompt_text, max_tokens):
-        for vals in compare(prompt_text, max_tokens):
             st, gt, ss, gs, gd, sd, lh = vals
             yield st, gt, f'<p class="stats-text">{ss}</p>', f'<p class="stats-text">{gs}</p>', gd, sd, lh
-    btn.click(run, [prompt, max_tok], outputs)
-    prompt.submit(run, [prompt, max_tok], outputs)
     gr.HTML('<p style="color:#52525b; font-size:0.8rem; margin-top:1.5rem; margin-bottom:0.5rem;">Try these:</p>')
     with gr.Row():
         for p in ["What is the shape of failure?", "The theory of everything begins with", "If silence had a color", "Write a haiku about parallel universes"]:
             gr.Button(p, size="sm", elem_classes=["prompt-chip"]).click(
                 fn=lambda x=p: x, outputs=[prompt]
-            ).then(fn=run, inputs=[prompt, max_tok], outputs=outputs)
     gr.HTML("""
     <div id="footer">

         req = urllib.request.Request("http://127.0.0.1:7861/health")
         resp = urllib.request.urlopen(req, timeout=2)
         health = json.loads(resp.read())
+        if health.get("status") == "ok" and health.get("models"):
             print(f"[Glossolalia] Aether ready ({health.get('loadTime')}ms, SIMD: {health.get('simd')})", flush=True)
             break
     except Exception:
     print("[Glossolalia] WARNING: Aether not ready after 180s", flush=True)
+def call_aether(endpoint, prompt, max_tokens=256, model_name="buleyean"):
     try:
+        data = json.dumps({"prompt": prompt, "max_tokens": max_tokens, "model": model_name}).encode()
         req = urllib.request.Request(
             f"http://127.0.0.1:7861/{endpoint}", data=data,
             headers={"Content-Type": "application/json"},
 # ─── Compare Function ────────────────────────────────────────────────────────
+def compare(prompt, max_tokens, model_name="buleyean"):
     empty = ("", "", "", "", "", "", "")
     if not prompt or not prompt.strip():
         yield empty
     glo_result = [None]
     def run_std():
+        std_result[0] = call_aether("generate-standard", prompt, max_tokens, model_name)
     def run_glo():
+        glo_result[0] = call_aether("generate-glossolalia", prompt, max_tokens, model_name)
     def fmt_stats(r):
         if not r:
     with gr.Row():
         prompt = gr.Textbox(elem_id="prompt-input", placeholder="What is the shape of failure?", lines=2, label="Prompt", show_label=False, interactive=True, scale=4)
+        with gr.Column(scale=1):
+            model_choice = gr.Radio(choices=["buleyean", "base"], value="buleyean", label="Model", info="Buleyean = void-trained, Base = standard instruct")
+            max_tok = gr.Slider(minimum=8, maximum=512, value=64, step=1, label="Max tokens")
     btn = gr.Button("Generate", elem_id="gen-btn", variant="primary")
     outputs = [std_out, glo_out, std_stats, glo_stats, glo_diag, std_diag, layer_health]
+    inputs = [prompt, max_tok, model_choice]
+    def run(prompt_text, max_tokens, model_name):
+        for vals in compare(prompt_text, max_tokens, model_name):
             st, gt, ss, gs, gd, sd, lh = vals
             yield st, gt, f'<p class="stats-text">{ss}</p>', f'<p class="stats-text">{gs}</p>', gd, sd, lh
+    btn.click(run, inputs, outputs)
+    prompt.submit(run, inputs, outputs)
     gr.HTML('<p style="color:#52525b; font-size:0.8rem; margin-top:1.5rem; margin-bottom:0.5rem;">Try these:</p>')
     with gr.Row():
         for p in ["What is the shape of failure?", "The theory of everything begins with", "If silence had a color", "Write a haiku about parallel universes"]:
             gr.Button(p, size="sm", elem_classes=["prompt-chip"]).click(
                 fn=lambda x=p: x, outputs=[prompt]
+            ).then(fn=run, inputs=inputs, outputs=outputs)
     gr.HTML("""
     <div id="footer">