""" The Void -- Buleyean RL PyTorch vs Aether. Same model. Different engines. Let the speed speak. """ import gradio as gr import torch import json import time import subprocess import urllib.request import urllib.error from concurrent.futures import ThreadPoolExecutor, as_completed from transformers import AutoModelForCausalLM, AutoTokenizer # ─── Start Aether sidecar ──────────────────────────────────────────────────── print("[Void] Starting Aether inference server...", flush=True) aether_proc = subprocess.Popen( ["node", "aether-server.mjs"], env={**__import__('os').environ, "AETHER_PORT": "7861"}, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) # ─── Load PyTorch model ────────────────────────────────────────────────────── print("[Void] Loading PyTorch base model...", flush=True) base_tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct") base_model = AutoModelForCausalLM.from_pretrained( "HuggingFaceTB/SmolLM2-360M-Instruct", torch_dtype=torch.float32, device_map="cpu", ) print("[Void] PyTorch model ready.", flush=True) # Wait for Aether print("[Void] Waiting for Aether...", flush=True) import select for attempt in range(180): try: req = urllib.request.Request("http://127.0.0.1:7861/health") resp = urllib.request.urlopen(req, timeout=2) health = json.loads(resp.read()) if health.get("status") == "ok" and health.get("model") == "loaded": print(f"[Void] Aether ready (loaded in {health.get('loadTime')}ms, SIMD: {health.get('simd')})", flush=True) break except Exception: pass if aether_proc.stdout and select.select([aether_proc.stdout], [], [], 0)[0]: line = aether_proc.stdout.readline() if line: print(f" {line.decode().strip()}", flush=True) time.sleep(1) else: print("[Void] WARNING: Aether not ready after 180s", flush=True) def gen_pytorch(prompt): messages = [{"role": "user", "content": prompt}] text = base_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) inputs = base_tokenizer(text, return_tensors="pt") t0 = time.perf_counter() with torch.no_grad(): outputs = base_model.generate( **inputs, max_new_tokens=256, temperature=0.7, top_p=0.9, do_sample=True, pad_token_id=base_tokenizer.eos_token_id, ) elapsed = time.perf_counter() - t0 n = outputs.shape[1] - inputs["input_ids"].shape[1] text = base_tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True).strip() return text, elapsed, n, (elapsed * 1000 / n) if n > 0 else 0 def gen_aether(prompt): try: data = json.dumps({"prompt": prompt, "max_tokens": 256}).encode() req = urllib.request.Request("http://127.0.0.1:7861/generate", data=data, headers={"Content-Type": "application/json"}) resp = urllib.request.urlopen(req, timeout=300) r = json.loads(resp.read()) return r["text"], r["totalTimeMs"] / 1000, r["tokens"], r["avgTokenMs"] except urllib.error.HTTPError as e: body = e.read().decode() if e.fp else str(e) try: detail = json.loads(body).get("error", body[:300]) except Exception: detail = body[:300] return f"[Aether error: {detail}]", 0, 0, 0 except Exception as e: return f"[Aether error: {e}]", 0, 0, 0 def compare(prompt): if not prompt or not prompt.strip(): yield "", "", "", "" return base_result = [None] aether_result = [None] def run_base(): base_result[0] = gen_pytorch(prompt) def run_aether(): aether_result[0] = gen_aether(prompt) with ThreadPoolExecutor(max_workers=2) as pool: futures = {pool.submit(run_base): "base", pool.submit(run_aether): "aether"} for future in as_completed(futures): name = futures[future] future.result() if name == "base" and base_result[0]: t, tm, tk, ms = base_result[0] st = f"{tk} tokens in {tm:.1f}s ({ms:.0f}ms/tok)" at = aether_result[0][0] if aether_result[0] else "generating..." ast = f"{aether_result[0][2]} tokens in {aether_result[0][1]:.1f}s ({aether_result[0][3]:.0f}ms/tok)" if aether_result[0] else "running..." yield t, at, st, ast elif name == "aether" and aether_result[0]: t, tm, tk, ms = aether_result[0] st = f"{tk} tokens in {tm:.1f}s ({ms:.0f}ms/tok)" bt = base_result[0][0] if base_result[0] else "generating..." bst = f"{base_result[0][2]} tokens in {base_result[0][1]:.1f}s ({base_result[0][3]:.0f}ms/tok)" if base_result[0] else "running..." yield bt, t, bst, st if base_result[0] and aether_result[0]: bt, b_t, b_tk, b_ms = base_result[0] at, a_t, a_tk, a_ms = aether_result[0] yield bt, at, f"{b_tk} tokens in {b_t:.1f}s ({b_ms:.0f}ms/tok)", f"{a_tk} tokens in {a_t:.1f}s ({a_ms:.0f}ms/tok)" CSS = """ .gradio-container { max-width: 1060px !important; margin: 0 auto !important; } .gradio-container, .dark { background: #09090b !important; } #hero { text-align: center; padding: 2rem 0 1rem; } #hero h1 { font-size: 2.5rem; font-weight: 300; letter-spacing: -0.02em; color: #fafafa; margin: 0; } #hero .subtitle { color: #71717a; font-size: 0.95rem; margin-top: 0.5rem; } #hero .accent { color: #3b82f6; } .response-card { background: #0c0c0f !important; border: 1px solid #1f1f23 !important; border-radius: 8px !important; } .response-card textarea { background: #0c0c0f !important; border: none !important; color: #e4e4e7 !important; font-size: 0.95rem !important; line-height: 1.6 !important; } .base-label { color: #71717a !important; font-size: 0.8rem !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; font-weight: 500 !important; } .void-label { color: #3b82f6 !important; font-size: 0.8rem !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; font-weight: 500 !important; } .stats-text { font-family: 'SF Mono', 'Fira Code', monospace !important; font-size: 0.8rem !important; color: #52525b !important; } #prompt-input > label > span { display: none !important; } #prompt-input textarea { background: #111114 !important; border: 1px solid #1f1f23 !important; border-radius: 8px !important; color: #fafafa !important; font-size: 1rem !important; padding: 1rem !important; } #prompt-input textarea:focus { border-color: #3b82f6 !important; box-shadow: 0 0 0 2px rgba(59,130,246,0.1) !important; } #gen-btn { background: #3b82f6 !important; border: none !important; border-radius: 8px !important; font-weight: 500 !important; font-size: 0.9rem !important; padding: 0.75rem 2rem !important; } #gen-btn:hover { background: #2563eb !important; } .prompt-chip { background: #111114 !important; border: 1px solid #1f1f23 !important; border-radius: 6px !important; color: #a1a1aa !important; font-size: 0.85rem !important; } .prompt-chip:hover { border-color: #3b82f6 !important; color: #fafafa !important; } #footer { text-align: center; padding: 2rem 0; border-top: 1px solid #1f1f23; margin-top: 2rem; } #footer p { color: #52525b; font-size: 0.8rem; } #footer a { color: #3b82f6; text-decoration: none; } footer.svelte-1ax1toq { display: none !important; } .built-with { display: none !important; } """ with gr.Blocks(css=CSS, theme=gr.themes.Base(primary_hue="blue", neutral_hue="zinc"), title="The Void") as demo: gr.HTML("""

The Void

PyTorch vs Aether. Same prompt. Different engines. Live inference.
Left: standard PyTorch CPU. Right: Aether -- pure JS + 14KB WASM SIMD, zero ML dependencies.

""") prompt = gr.Textbox(elem_id="prompt-input", placeholder="What would you like to ask?", lines=2, label="Prompt", show_label=False, interactive=True) btn = gr.Button("Generate", elem_id="gen-btn", variant="primary") with gr.Row(equal_height=True): with gr.Column(): gr.HTML('

PyTorch (standard)

') base_out = gr.Textbox(lines=10, show_label=False, interactive=False, elem_classes=["response-card"]) base_stats = gr.HTML('

--

') with gr.Column(min_width=30): gr.HTML('

VS

') with gr.Column(): gr.HTML('

Aether (our engine)

') aether_out = gr.Textbox(lines=10, show_label=False, interactive=False, elem_classes=["response-card"]) aether_stats = gr.HTML('

--

') def run_compare(prompt_text): for bt, at, bs, ast in compare(prompt_text): yield bt, at, f'

{bs}

', f'

{ast}

' btn.click(run_compare, [prompt], [base_out, aether_out, base_stats, aether_stats]) prompt.submit(run_compare, [prompt], [base_out, aether_out, base_stats, aether_stats]) gr.HTML('

Try these:

') with gr.Row(): for p in ["hello", "How are you feeling?", "I've been anxious lately.", "Write a haiku about failure.", "What is the meaning of life?"]: gr.Button(p, size="sm", elem_classes=["prompt-chip"]).click( fn=lambda x=p: x, outputs=[prompt] ).then(fn=run_compare, inputs=[prompt], outputs=[base_out, aether_out, base_stats, aether_stats]) gr.HTML(""" """) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)