""" Glossolalia -- Semiotic Ensemble Inference Act 2: Fork/Race/Fold at the decoder level. Standard sampling vs Glossolalia. Same forward pass, different decoder. All inference via Aether WASM-SIMD engine. Deep diagnostics: per-token perplexity, per-layer norms, attention entropy, vocabulary coverage. """ import gradio as gr import json import time import subprocess import urllib.request import urllib.error import select from concurrent.futures import ThreadPoolExecutor, as_completed # ─── Start Aether sidecar ──────────────────────────────────────────────────── print("[Glossolalia] Starting Aether...", flush=True) aether_proc = subprocess.Popen( ["node", "aether-server.mjs"], env={**__import__('os').environ, "AETHER_PORT": "7861"}, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) print("[Glossolalia] Waiting for Aether...", flush=True) for attempt in range(180): try: req = urllib.request.Request("http://127.0.0.1:7861/health") resp = urllib.request.urlopen(req, timeout=2) health = json.loads(resp.read()) if health.get("status") == "ok" and health.get("models"): print(f"[Glossolalia] Aether ready ({health.get('loadTime')}ms, SIMD: {health.get('simd')})", flush=True) break except Exception: pass if aether_proc.stdout and select.select([aether_proc.stdout], [], [], 0)[0]: line = aether_proc.stdout.readline() if line: print(f" {line.decode().strip()}", flush=True) time.sleep(1) else: print("[Glossolalia] WARNING: Aether not ready after 180s", flush=True) def call_aether(endpoint, prompt, max_tokens=256, model_name="buleyean"): try: data = json.dumps({"prompt": prompt, "max_tokens": max_tokens, "model": model_name}).encode() req = urllib.request.Request( f"http://127.0.0.1:7861/{endpoint}", data=data, headers={"Content-Type": "application/json"}, ) resp = urllib.request.urlopen(req, timeout=600) return json.loads(resp.read()) except urllib.error.HTTPError as e: body = e.read().decode() if e.fp else str(e) try: detail = json.loads(body).get("error", body[:300]) except Exception: detail = body[:300] return {"error": detail, "text": f"[Error: {detail}]", "tokens": 0, "totalTimeMs": 0, "avgTokenMs": 0} except Exception as e: return {"error": str(e), "text": f"[Error: {e}]", "tokens": 0, "totalTimeMs": 0, "avgTokenMs": 0} # ─── Diagnostic Formatters ─────────────────────────────────────────────────── def format_glossolalia_diag(diag_list): if not diag_list: return "No diagnostics." lines = ["GLOSSOLALIA DEEP DIAGNOSTICS", "=" * 60, "", "tau={0.4, 0.7, 1.0} | w = 1-H/log(V) | ppl = -log2(P)", ""] for step, d in enumerate(diag_list): if not isinstance(d, dict): continue ppl = d.get("perplexity", "?") prob = d.get("chosenProb", "?") vc = d.get("vocabCoverage", "?") lines.append(f"--- Token {step+1} | ppl={ppl} | P={prob} | vocabCov={vc} ---") for a in d.get("agents", []): if not isinstance(a, dict): continue top_str = ", ".join(f"'{t['token']}' ({t['prob']:.3f})" for t in a.get("top3", [])) lines.append(f" tau={a.get('tau','?'):.1f} | H={a.get('entropy',0):.3f} | w={a.get('weight',0):.3f} | {top_str}") lines.append("") return "\n".join(lines) def format_standard_diag(token_diag): if not token_diag: return "No diagnostics." lines = ["STANDARD SAMPLING DIAGNOSTICS", "=" * 60, "ppl = -log2(P(chosen)) | vocabCov = tokens with P>0.1%", ""] for step, d in enumerate(token_diag): if not isinstance(d, dict): continue ppl = d.get("perplexity", "?") prob = d.get("chosenProb", "?") vc = d.get("vocabCoverage", "?") top5 = d.get("top5", []) top_str = ", ".join(f"'{t['token']}' ({t['prob']:.3f})" for t in top5[:3]) lines.append(f"Token {step+1} | ppl={ppl} | P={prob} | vc={vc} | {top_str}") return "\n".join(lines) def format_layer_health(diag_list): if not diag_list: return "No layer data." last = diag_list[-1] if diag_list else {} if not isinstance(last, dict): return "No layer data." norms = last.get("layerNorms", []) if not norms: return "No layer norms." lines = ["LAYER HEALTH (last generated token)", "=" * 60, "Layer | Hidden Norm | Residual Contribution", "-" * 50] for i, n in enumerate(norms): if not isinstance(n, dict): continue norm_val = n.get("norm", 0) res_val = n.get("residual", 0) bar = "#" * min(int(res_val * 40), 40) lines.append(f" {i:2d} | {norm_val:9.2f} | {res_val:.4f} {bar}") return "\n".join(lines) # ─── Compare Function ──────────────────────────────────────────────────────── def compare(prompt, max_tokens, model_name="buleyean"): empty = ("", "", "", "", "", "", "") if not prompt or not prompt.strip(): yield empty return max_tokens = int(max_tokens) std_result = [None] glo_result = [None] def run_std(): std_result[0] = call_aether("generate-standard", prompt, max_tokens, model_name) def run_glo(): glo_result[0] = call_aether("generate-glossolalia", prompt, max_tokens, model_name) def fmt_stats(r): if not r: return "running..." return f"{r['tokens']} tokens in {r['totalTimeMs']/1000:.1f}s ({r['avgTokenMs']}ms/tok)" def build(): sr, gr_ = std_result[0], glo_result[0] return ( sr["text"] if sr else "generating...", gr_["text"] if gr_ else "generating...", fmt_stats(sr), fmt_stats(gr_), format_glossolalia_diag(gr_.get("diagnostics", [])) if gr_ else "", format_standard_diag(sr.get("tokenDiagnostics", [])) if sr else "", format_layer_health(gr_.get("diagnostics", [])) if gr_ else "", ) with ThreadPoolExecutor(max_workers=2) as pool: futures = {pool.submit(run_std): "std", pool.submit(run_glo): "glo"} for future in as_completed(futures): future.result() yield build() yield build() # ─── CSS ───────────────────────────────────────────────────────────────────── CSS = """ .gradio-container { max-width: 1060px !important; margin: 0 auto !important; } .gradio-container, .dark { background: #09090b !important; } #hero { text-align: center; padding: 2rem 0 1rem; } #hero h1 { font-size: 2.5rem; font-weight: 300; letter-spacing: -0.02em; color: #fafafa; margin: 0; } #hero .accent { color: #a855f7; } #hero .subtitle { color: #71717a; font-size: 0.95rem; margin-top: 0.5rem; } .response-card { background: #0c0c0f !important; border: 1px solid #1f1f23 !important; border-radius: 8px !important; } .response-card textarea { background: #0c0c0f !important; border: none !important; color: #e4e4e7 !important; font-size: 0.95rem !important; line-height: 1.6 !important; } .std-label { color: #71717a !important; font-size: 0.8rem !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; font-weight: 500 !important; } .glo-label { color: #a855f7 !important; font-size: 0.8rem !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; font-weight: 500 !important; } .stats-text { font-family: 'SF Mono', 'Fira Code', monospace !important; font-size: 0.8rem !important; color: #52525b !important; } #prompt-input > label > span { display: none !important; } #prompt-input textarea { background: #111114 !important; border: 1px solid #1f1f23 !important; border-radius: 8px !important; color: #fafafa !important; font-size: 1rem !important; padding: 1rem !important; } #prompt-input textarea:focus { border-color: #a855f7 !important; box-shadow: 0 0 0 2px rgba(168,85,247,0.1) !important; } #gen-btn { background: #a855f7 !important; border: none !important; border-radius: 8px !important; font-weight: 500 !important; font-size: 0.9rem !important; padding: 0.75rem 2rem !important; } #gen-btn:hover { background: #9333ea !important; } .prompt-chip { background: #111114 !important; border: 1px solid #1f1f23 !important; border-radius: 6px !important; color: #a1a1aa !important; font-size: 0.85rem !important; } .prompt-chip:hover { border-color: #a855f7 !important; color: #fafafa !important; } #footer { text-align: center; padding: 2rem 0; border-top: 1px solid #1f1f23; margin-top: 2rem; } #footer p { color: #52525b; font-size: 0.8rem; } #footer a { color: #a855f7; text-decoration: none; } footer.svelte-1ax1toq { display: none !important; } .built-with { display: none !important; } """ # ─── Gradio UI ─────────────────────────────────────────────────────────────── with gr.Blocks(css=CSS, theme=gr.themes.Base(primary_hue="purple", neutral_hue="zinc"), title="Glossolalia") as demo: gr.HTML("""
Semiotic ensemble inference. Same forward pass, different decoder.
Left: standard top-p. Right: fork/race/fold with deficit-weighted complement merge.
All inference via Aether. Zero ML dependencies. Full diagnostic visibility.
Standard Sampling
') std_out = gr.Textbox(lines=10, show_label=False, interactive=False, elem_classes=["response-card"]) std_stats = gr.HTML('--
') with gr.Column(min_width=30): gr.HTML('VS
') with gr.Column(): gr.HTML('Glossolalia (fork/race/fold)
') glo_out = gr.Textbox(lines=10, show_label=False, interactive=False, elem_classes=["response-card"]) glo_stats = gr.HTML('--
') with gr.Accordion("Glossolalia Agent Diagnostics", open=False): glo_diag = gr.Textbox(lines=18, show_label=False, interactive=False) with gr.Accordion("Standard Sampling Diagnostics", open=False): std_diag = gr.Textbox(lines=12, show_label=False, interactive=False) with gr.Accordion("Layer Health (32 transformer layers)", open=False): layer_health = gr.Textbox(lines=18, show_label=False, interactive=False) outputs = [std_out, glo_out, std_stats, glo_stats, glo_diag, std_diag, layer_health] inputs = [prompt, max_tok, model_choice] def run(prompt_text, max_tokens, model_name): for vals in compare(prompt_text, max_tokens, model_name): st, gt, ss, gs, gd, sd, lh = vals yield st, gt, f'{ss}
', f'{gs}
', gd, sd, lh btn.click(run, inputs, outputs) prompt.submit(run, inputs, outputs) gr.HTML('Try these:
') with gr.Row(): for p in ["What is the shape of failure?", "The theory of everything begins with", "If silence had a color", "Write a haiku about parallel universes"]: gr.Button(p, size="sm", elem_classes=["prompt-chip"]).click( fn=lambda x=p: x, outputs=[prompt] ).then(fn=run, inputs=inputs, outputs=outputs) gr.HTML(""" """) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)