Spaces:
Running
Running
File size: 10,542 Bytes
699a27f c5b9541 699a27f c5b9541 fd5d225 699a27f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | """
Aether -- Pure Engine Speed Comparison
Same model. Same prompt. Same tokens. Different engine.
PyTorch CPU vs Aether WASM-SIMD. Let the ms/tok speak.
"""
import gradio as gr
import torch
import json
import time
import subprocess
import urllib.request
import urllib.error
import select
from concurrent.futures import ThreadPoolExecutor, as_completed
from transformers import AutoModelForCausalLM, AutoTokenizer
print("[Aether] Starting Aether sidecar...", flush=True)
aether_proc = subprocess.Popen(
["node", "aether-server.mjs"],
env={**__import__('os').environ, "AETHER_PORT": "7861"},
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
)
print("[Aether] Loading PyTorch model...", flush=True)
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct")
pytorch_model = AutoModelForCausalLM.from_pretrained(
"HuggingFaceTB/SmolLM2-360M-Instruct",
torch_dtype=torch.float32, device_map="cpu",
)
print("[Aether] PyTorch ready.", flush=True)
print("[Aether] Waiting for Aether engine...", flush=True)
for attempt in range(180):
try:
req = urllib.request.Request("http://127.0.0.1:7861/health")
resp = urllib.request.urlopen(req, timeout=2)
health = json.loads(resp.read())
if health.get("status") == "ok" and health.get("model") == "loaded":
print(f"[Aether] Engine ready ({health.get('loadTime')}ms, SIMD: {health.get('simd')})", flush=True)
break
except Exception:
pass
if aether_proc.stdout and select.select([aether_proc.stdout], [], [], 0)[0]:
line = aether_proc.stdout.readline()
if line: print(f" {line.decode().strip()}", flush=True)
time.sleep(1)
def gen_pytorch(prompt, max_tokens):
messages = [{"role": "user", "content": prompt}]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(text, return_tensors="pt")
t0 = time.perf_counter()
with torch.no_grad():
outputs = pytorch_model.generate(
**inputs, max_new_tokens=max_tokens, temperature=0.7, top_p=0.9,
do_sample=True, pad_token_id=tokenizer.eos_token_id,
)
elapsed = time.perf_counter() - t0
n = outputs.shape[1] - inputs["input_ids"].shape[1]
text = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True).strip()
return text, elapsed, n, (elapsed * 1000 / n) if n > 0 else 0
def gen_aether(prompt, max_tokens):
try:
data = json.dumps({"prompt": prompt, "max_tokens": max_tokens}).encode()
req = urllib.request.Request("http://127.0.0.1:7861/generate", data=data,
headers={"Content-Type": "application/json"})
resp = urllib.request.urlopen(req, timeout=600)
r = json.loads(resp.read())
return r["text"], r["totalTimeMs"] / 1000, r["tokens"], r["avgTokenMs"]
except urllib.error.HTTPError as e:
body = e.read().decode() if e.fp else str(e)
try: detail = json.loads(body).get("error", body[:300])
except Exception: detail = body[:300]
return f"[Error: {detail}]", 0, 0, 0
except Exception as e:
return f"[Error: {e}]", 0, 0, 0
def compare(prompt, max_tokens):
empty = ("", "", "", "")
if not prompt or not prompt.strip():
yield empty
return
max_tokens = int(max_tokens)
pt_result = [None]
ae_result = [None]
def run_pt():
pt_result[0] = gen_pytorch(prompt, max_tokens)
def run_ae():
ae_result[0] = gen_aether(prompt, max_tokens)
def fmt(r):
if not r: return "running..."
return f"{r[2]} tokens in {r[1]:.1f}s ({r[3]:.0f}ms/tok)"
def build():
pt, ae = pt_result[0], ae_result[0]
return (
pt[0] if pt else "generating...",
ae[0] if ae else "generating...",
fmt(pt), fmt(ae),
)
with ThreadPoolExecutor(max_workers=2) as pool:
futures = {pool.submit(run_pt): "pt", pool.submit(run_ae): "ae"}
for future in as_completed(futures):
future.result()
yield build()
yield build()
CSS = """
.gradio-container { max-width: 1060px !important; margin: 0 auto !important; }
.gradio-container, .dark { background: #09090b !important; }
#hero { text-align: center; padding: 2rem 0 1rem; }
#hero h1 { font-size: 2.5rem; font-weight: 300; letter-spacing: -0.02em; color: #fafafa; margin: 0; }
#hero .accent { color: #06b6d4; }
#hero .subtitle { color: #71717a; font-size: 0.95rem; margin-top: 0.5rem; }
.response-card { background: #0c0c0f !important; border: 1px solid #1f1f23 !important; border-radius: 8px !important; }
.response-card textarea { background: #0c0c0f !important; border: none !important; color: #e4e4e7 !important; font-size: 0.95rem !important; line-height: 1.6 !important; }
.pt-label { color: #71717a !important; font-size: 0.8rem !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; font-weight: 500 !important; }
.ae-label { color: #06b6d4 !important; font-size: 0.8rem !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; font-weight: 500 !important; }
.stats-text { font-family: 'SF Mono', 'Fira Code', monospace !important; font-size: 0.85rem !important; color: #52525b !important; }
#prompt-input > label > span { display: none !important; }
#prompt-input textarea { background: #111114 !important; border: 1px solid #1f1f23 !important; border-radius: 8px !important; color: #fafafa !important; font-size: 1rem !important; padding: 1rem !important; }
#prompt-input textarea:focus { border-color: #06b6d4 !important; }
#gen-btn { background: #06b6d4 !important; border: none !important; border-radius: 8px !important; font-weight: 500 !important; font-size: 0.9rem !important; padding: 0.75rem 2rem !important; color: #09090b !important; }
.prompt-chip { background: #111114 !important; border: 1px solid #1f1f23 !important; border-radius: 6px !important; color: #a1a1aa !important; font-size: 0.85rem !important; }
.prompt-chip:hover { border-color: #06b6d4 !important; color: #fafafa !important; }
#footer { text-align: center; padding: 2rem 0; border-top: 1px solid #1f1f23; margin-top: 2rem; }
#footer p { color: #52525b; font-size: 0.8rem; }
#footer a { color: #06b6d4; text-decoration: none; }
footer.svelte-1ax1toq { display: none !important; }
.built-with { display: none !important; }
"""
with gr.Blocks(css=CSS, theme=gr.themes.Base(primary_hue="cyan", neutral_hue="zinc"), title="Aether") as demo:
gr.HTML("""
<div id="hero">
<h1><span class="accent">Aether</span></h1>
<p class="subtitle">Pure engine speed comparison. Same model (SmolLM2-360M-Instruct). Same prompt. Same tokens.<br/>
Left: PyTorch CPU (2.8GB runtime, CUDA/MKL optimized).<br/>
Right: Aether (14KB WASM binary, pure JS + SIMD128, zero ML dependencies).<br/>
Both generate in parallel. Whichever finishes first shows first.</p>
</div>
""")
with gr.Row():
prompt = gr.Textbox(elem_id="prompt-input", placeholder="What is the shape of failure?", lines=2, label="Prompt", show_label=False, interactive=True, scale=4)
max_tok = gr.Slider(minimum=8, maximum=8192, value=64, step=1, label="Max tokens", scale=1)
btn = gr.Button("Generate", elem_id="gen-btn", variant="primary")
with gr.Row(equal_height=True):
with gr.Column():
gr.HTML('<p class="pt-label">PyTorch CPU (standard)</p>')
pt_out = gr.Textbox(lines=10, show_label=False, interactive=False, elem_classes=["response-card"])
pt_stats = gr.HTML('<p class="stats-text">--</p>')
with gr.Column(min_width=30):
gr.HTML('<p style="color:#27272a; text-align:center; padding-top:4rem; font-size:0.75rem; letter-spacing:0.1em;">VS</p>')
with gr.Column():
gr.HTML('<p class="ae-label">Aether WASM-SIMD (14KB)</p>')
ae_out = gr.Textbox(lines=10, show_label=False, interactive=False, elem_classes=["response-card"])
ae_stats = gr.HTML('<p class="stats-text">--</p>')
outputs = [pt_out, ae_out, pt_stats, ae_stats]
inputs = [prompt, max_tok]
def run(p, mt):
for pt, ae, ps, aes in compare(p, mt):
yield pt, ae, f'<p class="stats-text">{ps}</p>', f'<p class="stats-text">{aes}</p>'
btn.click(run, inputs, outputs)
prompt.submit(run, inputs, outputs)
gr.HTML('<p style="color:#52525b; font-size:0.8rem; margin-top:1.5rem; margin-bottom:0.5rem;">Try these:</p>')
with gr.Row():
for p in ["hello", "What is the shape of failure?", "Write a haiku about parallel universes.", "Explain entropy to a five-year-old."]:
gr.Button(p, size="sm", elem_classes=["prompt-chip"]).click(
fn=lambda x=p: x, outputs=[prompt]
).then(fn=run, inputs=inputs, outputs=outputs)
gr.HTML("""
<div id="footer">
<p style="color:#a1a1aa; font-size:0.85rem; margin-bottom:0.5rem;">
SmolLM2-360M-Instruct · Q8_0 GGUF · Same weights, different engines
</p>
<p>
PyTorch: ~2.8GB runtime, C++/CUDA/MKL optimized, Python bindings<br/>
Aether: 14KB WASM + JS, SIMD128 vectorized, zero dependencies, runs anywhere
</p>
<p style="margin-top:1rem;">
<a href="https://forkracefold.com/">Whitepaper</a> ·
<a href="https://huggingface.co/spaces/forkjoin-ai/aether">Aether</a> ·
<a href="https://huggingface.co/spaces/forkjoin-ai/aether-browser">Edge Mesh</a> ·
<a href="https://huggingface.co/spaces/forkjoin-ai/the-void">The Void</a> ·
<a href="https://huggingface.co/spaces/forkjoin-ai/buleyean-rl">Buleyean RL</a> ·
<a href="https://huggingface.co/spaces/forkjoin-ai/glossolalia">Glossolalia</a> ·
<a href="https://huggingface.co/spaces/forkjoin-ai/metacog">Metacog</a> ·
<a href="https://huggingface.co/spaces/forkjoin-ai/five-bules">Five Bules</a> ·
<a href="https://huggingface.co/spaces/forkjoin-ai/void-attention">Void Attention</a> ·
<a href="https://huggingface.co/spaces/forkjoin-ai/quark-personality">Quark Personality</a>
</p>
<p style="margin-top:1rem;">Copyright 2026 forkjoin.ai</p>
</div>
""")
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)
|