File size: 11,625 Bytes
c546342
868752a
fcac5c7
c546342
 
 
33ad9ed
fcac5c7
c92238b
fcac5c7
 
 
64ff7cb
33ad9ed
868752a
fcac5c7
 
 
 
 
 
 
 
 
 
 
33ad9ed
 
 
 
 
 
fcac5c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c8d249
fcac5c7
 
c92238b
33ad9ed
fcac5c7
ec694f7
fcac5c7
c92238b
 
fcac5c7
 
 
 
 
 
 
ec694f7
fcac5c7
 
 
 
 
 
 
 
 
 
 
 
c92238b
 
d77d3cd
 
e5e1d2b
 
c92238b
64ff7cb
fcac5c7
c92238b
64ff7cb
fcac5c7
 
 
64ff7cb
 
fcac5c7
64ff7cb
 
fcac5c7
64ff7cb
fcac5c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33ad9ed
868752a
 
c92238b
868752a
 
 
 
 
 
 
 
 
c92238b
 
868752a
 
fcac5c7
 
 
 
868752a
 
 
 
 
 
d77d3cd
868752a
d77d3cd
868752a
 
 
fcac5c7
 
868752a
 
2ba37ce
a06fe42
868752a
2ba37ce
868752a
b4d8480
fcac5c7
0c8d249
c92238b
 
868752a
b4d8480
fcac5c7
 
 
c92238b
 
fcac5c7
 
b4d8480
fcac5c7
 
b4d8480
868752a
 
 
 
e5e1d2b
fcac5c7
868752a
 
 
c92238b
fcac5c7
 
c92238b
868752a
c92238b
6553234
 
 
 
 
 
 
 
 
868752a
c92238b
342d5f8
868752a
 
c546342
 
b56d971
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
"""
The Void -- Buleyean RL
PyTorch vs Aether. Same model. Different engines. Let the speed speak.
"""

import gradio as gr
import torch
import json
import time
import subprocess
import urllib.request
import urllib.error
from concurrent.futures import ThreadPoolExecutor, as_completed
from transformers import AutoModelForCausalLM, AutoTokenizer

# ─── Start Aether sidecar ────────────────────────────────────────────────────
print("[Void] Starting Aether inference server...", flush=True)
aether_proc = subprocess.Popen(
    ["node", "aether-server.mjs"],
    env={**__import__('os').environ, "AETHER_PORT": "7861"},
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,
)

# ─── Load PyTorch model ──────────────────────────────────────────────────────
print("[Void] Loading PyTorch base model...", flush=True)
base_tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct")
base_model = AutoModelForCausalLM.from_pretrained(
    "HuggingFaceTB/SmolLM2-360M-Instruct",
    torch_dtype=torch.float32,
    device_map="cpu",
)
print("[Void] PyTorch model ready.", flush=True)

# Wait for Aether
print("[Void] Waiting for Aether...", flush=True)
import select
for attempt in range(180):
    try:
        req = urllib.request.Request("http://127.0.0.1:7861/health")
        resp = urllib.request.urlopen(req, timeout=2)
        health = json.loads(resp.read())
        if health.get("status") == "ok" and health.get("model") == "loaded":
            print(f"[Void] Aether ready (loaded in {health.get('loadTime')}ms, SIMD: {health.get('simd')})", flush=True)
            break
    except Exception:
        pass
    if aether_proc.stdout and select.select([aether_proc.stdout], [], [], 0)[0]:
        line = aether_proc.stdout.readline()
        if line:
            print(f"  {line.decode().strip()}", flush=True)
    time.sleep(1)
else:
    print("[Void] WARNING: Aether not ready after 180s", flush=True)


def gen_pytorch(prompt):
    messages = [{"role": "user", "content": prompt}]
    text = base_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = base_tokenizer(text, return_tensors="pt")
    t0 = time.perf_counter()
    with torch.no_grad():
        outputs = base_model.generate(
            **inputs, max_new_tokens=256, temperature=0.7, top_p=0.9,
            do_sample=True, pad_token_id=base_tokenizer.eos_token_id,
        )
    elapsed = time.perf_counter() - t0
    n = outputs.shape[1] - inputs["input_ids"].shape[1]
    text = base_tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True).strip()
    return text, elapsed, n, (elapsed * 1000 / n) if n > 0 else 0


def gen_aether(prompt):
    try:
        data = json.dumps({"prompt": prompt, "max_tokens": 256}).encode()
        req = urllib.request.Request("http://127.0.0.1:7861/generate", data=data,
                                     headers={"Content-Type": "application/json"})
        resp = urllib.request.urlopen(req, timeout=300)
        r = json.loads(resp.read())
        return r["text"], r["totalTimeMs"] / 1000, r["tokens"], r["avgTokenMs"]
    except urllib.error.HTTPError as e:
        body = e.read().decode() if e.fp else str(e)
        try: detail = json.loads(body).get("error", body[:300])
        except Exception: detail = body[:300]
        return f"[Aether error: {detail}]", 0, 0, 0
    except Exception as e:
        return f"[Aether error: {e}]", 0, 0, 0


def compare(prompt):
    if not prompt or not prompt.strip():
        yield "", "", "", ""
        return

    base_result = [None]
    aether_result = [None]

    def run_base():
        base_result[0] = gen_pytorch(prompt)
    def run_aether():
        aether_result[0] = gen_aether(prompt)

    with ThreadPoolExecutor(max_workers=2) as pool:
        futures = {pool.submit(run_base): "base", pool.submit(run_aether): "aether"}
        for future in as_completed(futures):
            name = futures[future]
            future.result()
            if name == "base" and base_result[0]:
                t, tm, tk, ms = base_result[0]
                st = f"{tk} tokens in {tm:.1f}s ({ms:.0f}ms/tok)"
                at = aether_result[0][0] if aether_result[0] else "generating..."
                ast = f"{aether_result[0][2]} tokens in {aether_result[0][1]:.1f}s ({aether_result[0][3]:.0f}ms/tok)" if aether_result[0] else "running..."
                yield t, at, st, ast
            elif name == "aether" and aether_result[0]:
                t, tm, tk, ms = aether_result[0]
                st = f"{tk} tokens in {tm:.1f}s ({ms:.0f}ms/tok)"
                bt = base_result[0][0] if base_result[0] else "generating..."
                bst = f"{base_result[0][2]} tokens in {base_result[0][1]:.1f}s ({base_result[0][3]:.0f}ms/tok)" if base_result[0] else "running..."
                yield bt, t, bst, st

    if base_result[0] and aether_result[0]:
        bt, b_t, b_tk, b_ms = base_result[0]
        at, a_t, a_tk, a_ms = aether_result[0]
        yield bt, at, f"{b_tk} tokens in {b_t:.1f}s ({b_ms:.0f}ms/tok)", f"{a_tk} tokens in {a_t:.1f}s ({a_ms:.0f}ms/tok)"


CSS = """
.gradio-container { max-width: 1060px !important; margin: 0 auto !important; }
.gradio-container, .dark { background: #09090b !important; }
#hero { text-align: center; padding: 2rem 0 1rem; }
#hero h1 { font-size: 2.5rem; font-weight: 300; letter-spacing: -0.02em; color: #fafafa; margin: 0; }
#hero .subtitle { color: #71717a; font-size: 0.95rem; margin-top: 0.5rem; }
#hero .accent { color: #3b82f6; }
.response-card { background: #0c0c0f !important; border: 1px solid #1f1f23 !important; border-radius: 8px !important; }
.response-card textarea { background: #0c0c0f !important; border: none !important; color: #e4e4e7 !important; font-size: 0.95rem !important; line-height: 1.6 !important; }
.base-label { color: #71717a !important; font-size: 0.8rem !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; font-weight: 500 !important; }
.void-label { color: #3b82f6 !important; font-size: 0.8rem !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; font-weight: 500 !important; }
.stats-text { font-family: 'SF Mono', 'Fira Code', monospace !important; font-size: 0.8rem !important; color: #52525b !important; }
#prompt-input > label > span { display: none !important; }
#prompt-input textarea { background: #111114 !important; border: 1px solid #1f1f23 !important; border-radius: 8px !important; color: #fafafa !important; font-size: 1rem !important; padding: 1rem !important; }
#prompt-input textarea:focus { border-color: #3b82f6 !important; box-shadow: 0 0 0 2px rgba(59,130,246,0.1) !important; }
#gen-btn { background: #3b82f6 !important; border: none !important; border-radius: 8px !important; font-weight: 500 !important; font-size: 0.9rem !important; padding: 0.75rem 2rem !important; }
#gen-btn:hover { background: #2563eb !important; }
.prompt-chip { background: #111114 !important; border: 1px solid #1f1f23 !important; border-radius: 6px !important; color: #a1a1aa !important; font-size: 0.85rem !important; }
.prompt-chip:hover { border-color: #3b82f6 !important; color: #fafafa !important; }
#footer { text-align: center; padding: 2rem 0; border-top: 1px solid #1f1f23; margin-top: 2rem; }
#footer p { color: #52525b; font-size: 0.8rem; }
#footer a { color: #3b82f6; text-decoration: none; }
footer.svelte-1ax1toq { display: none !important; }
.built-with { display: none !important; }
"""

with gr.Blocks(css=CSS, theme=gr.themes.Base(primary_hue="blue", neutral_hue="zinc"), title="The Void") as demo:

    gr.HTML("""
    <div id="hero">
        <h1>The <span class="accent">Void</span></h1>
        <p class="subtitle">PyTorch vs Aether. Same prompt. Different engines. Live inference.<br/>
        Left: standard PyTorch CPU. Right: Aether -- pure JS + 14KB WASM SIMD, zero ML dependencies.</p>
    </div>
    """)

    prompt = gr.Textbox(elem_id="prompt-input", placeholder="What would you like to ask?", lines=2, label="Prompt", show_label=False, interactive=True)
    btn = gr.Button("Generate", elem_id="gen-btn", variant="primary")

    with gr.Row(equal_height=True):
        with gr.Column():
            gr.HTML('<p class="base-label">PyTorch (standard)</p>')
            base_out = gr.Textbox(lines=10, show_label=False, interactive=False, elem_classes=["response-card"])
            base_stats = gr.HTML('<p class="stats-text">--</p>')
        with gr.Column(min_width=30):
            gr.HTML('<p style="color:#27272a; text-align:center; padding-top:4rem; font-size:0.75rem; letter-spacing:0.1em;">VS</p>')
        with gr.Column():
            gr.HTML('<p class="void-label">Aether (our engine)</p>')
            aether_out = gr.Textbox(lines=10, show_label=False, interactive=False, elem_classes=["response-card"])
            aether_stats = gr.HTML('<p class="stats-text">--</p>')

    def run_compare(prompt_text):
        for bt, at, bs, ast in compare(prompt_text):
            yield bt, at, f'<p class="stats-text">{bs}</p>', f'<p class="stats-text">{ast}</p>'

    btn.click(run_compare, [prompt], [base_out, aether_out, base_stats, aether_stats])
    prompt.submit(run_compare, [prompt], [base_out, aether_out, base_stats, aether_stats])

    gr.HTML('<p style="color:#52525b; font-size:0.8rem; margin-top:1.5rem; margin-bottom:0.5rem;">Try these:</p>')
    with gr.Row():
        for p in ["hello", "How are you feeling?", "I've been anxious lately.", "Write a haiku about failure.", "What is the meaning of life?"]:
            gr.Button(p, size="sm", elem_classes=["prompt-chip"]).click(
                fn=lambda x=p: x, outputs=[prompt]
            ).then(fn=run_compare, inputs=[prompt], outputs=[base_out, aether_out, base_stats, aether_stats])

    gr.HTML("""
    <div id="footer">
        <p style="color:#a1a1aa; font-size:0.85rem; margin-bottom:0.5rem;">
            SmolLM2-360M &middot; Buleyean RL &middot;
            Left: PyTorch CPU (base model) &middot; Right: Aether WASM-SIMD (Buleyean, zero ML deps)
        </p>
        <p>
            <a href="https://forkracefold.com/">Whitepaper</a> &middot;
            <a href="https://huggingface.co/spaces/forkjoin-ai/aether">Aether</a> &middot;
            <a href="https://huggingface.co/spaces/forkjoin-ai/aether-browser">Edge Mesh</a> &middot;
            <a href="https://huggingface.co/spaces/forkjoin-ai/the-void">The Void</a> &middot;
            <a href="https://huggingface.co/spaces/forkjoin-ai/buleyean-rl">Buleyean RL</a> &middot;
            <a href="https://huggingface.co/spaces/forkjoin-ai/glossolalia">Glossolalia</a> &middot;
            <a href="https://huggingface.co/spaces/forkjoin-ai/metacog">Metacog</a> &middot;
            <a href="https://huggingface.co/spaces/forkjoin-ai/five-bules">Five Bules</a> &middot;
            <a href="https://huggingface.co/spaces/forkjoin-ai/void-attention">Void Attention</a> &middot;
            <a href="https://huggingface.co/spaces/forkjoin-ai/quark-personality">Quark Personality</a>
        </p>
        <p style="margin-top:1rem;">500+ Lean 4 theorems &middot; Zero sorry &middot; <a href="https://forkracefold.com/">&phi;&sup2; = &phi; + 1</a></p>
        <p style="margin-top:1rem;">Copyright 2026 forkjoin.ai</p>
    </div>
    """)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)