Taylor commited on
Commit
0c8d249
·
1 Parent(s): e5e1d2b

revert: back to working PyTorch for both models

Browse files

Aether inference has deeper bugs (tokenizer, weight layout, or
numerical accumulation). Debug separately in edge-workers.

Both models on PyTorch/transformers -- base vs Buleyean comparison
that was working before. Streaming results so faster one shows first.

Files changed (2) hide show
  1. Dockerfile +1 -10
  2. app.py +41 -122
Dockerfile CHANGED
@@ -1,21 +1,12 @@
1
  FROM python:3.11-slim
2
 
3
- # Install Node.js 20
4
- RUN apt-get update && apt-get install -y curl && \
5
- curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
6
- apt-get install -y nodejs && \
7
- rm -rf /var/lib/apt/lists/*
8
-
9
  WORKDIR /app
10
 
11
- # Python deps (CPU-only torch from pre-built wheels)
12
  COPY requirements.txt .
13
  RUN pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r requirements.txt
14
 
15
- # App files
16
- COPY app.py aether-server.mjs simd-kernels.wasm ./
17
 
18
- # Create cache dir
19
  RUN mkdir -p /tmp/hf_cache
20
 
21
  EXPOSE 7860
 
1
  FROM python:3.11-slim
2
 
 
 
 
 
 
 
3
  WORKDIR /app
4
 
 
5
  COPY requirements.txt .
6
  RUN pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r requirements.txt
7
 
8
+ COPY app.py ./
 
9
 
 
10
  RUN mkdir -p /tmp/hf_cache
11
 
12
  EXPOSE 7860
app.py CHANGED
@@ -1,171 +1,89 @@
1
  """
2
  The Void -- Buleyean RL
3
- PyTorch vs Aether. Side by side. Let the speed speak.
4
  """
5
 
6
  import gradio as gr
7
  import torch
8
- import json
9
  import time
10
- import subprocess
11
- import urllib.request
12
  from transformers import AutoModelForCausalLM, AutoTokenizer
13
 
14
- # ─── Start Aether sidecar ────────────────────────────────────────────────────
15
- print("[Void] Starting Aether inference server...", flush=True)
16
- aether_proc = subprocess.Popen(
17
- ["node", "aether-server.mjs"],
18
- env={**__import__('os').environ, "AETHER_PORT": "7861"},
19
- stdout=subprocess.PIPE,
20
- stderr=subprocess.STDOUT,
21
- )
22
-
23
- # ─── Load PyTorch model ──────────────────────────────────────────────────────
24
- print("[Void] Loading PyTorch base model...", flush=True)
25
  base_tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct")
26
  base_model = AutoModelForCausalLM.from_pretrained(
27
  "HuggingFaceTB/SmolLM2-360M-Instruct",
28
  torch_dtype=torch.float32,
29
  device_map="cpu",
30
  )
31
- print("[Void] PyTorch model ready.", flush=True)
32
-
33
- # Wait for Aether to be ready
34
- print("[Void] Waiting for Aether...", flush=True)
35
- for attempt in range(120):
36
- try:
37
- req = urllib.request.Request("http://127.0.0.1:7861/health")
38
- resp = urllib.request.urlopen(req, timeout=2)
39
- health = json.loads(resp.read())
40
- if health.get("status") == "ok" and health.get("model") == "loaded":
41
- print(f"[Void] Aether ready (model loaded in {health.get('loadTime')}ms)", flush=True)
42
- break
43
- except Exception:
44
- pass
45
- # Print Aether stdout lines as they come
46
- import select
47
- if aether_proc.stdout and select.select([aether_proc.stdout], [], [], 0)[0]:
48
- line = aether_proc.stdout.readline()
49
- if line:
50
- print(f" [Aether] {line.decode().strip()}", flush=True)
51
- time.sleep(1)
52
- else:
53
- print("[Void] WARNING: Aether not ready after 120s, continuing anyway", flush=True)
54
-
55
-
56
- def gen_pytorch(prompt):
57
- """Generate with PyTorch (standard)"""
58
- messages = [{"role": "user", "content": prompt}]
59
- text = base_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
60
- inputs = base_tokenizer(text, return_tensors="pt")
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  t0 = time.perf_counter()
63
  with torch.no_grad():
64
- outputs = base_model.generate(
65
  **inputs,
66
- max_new_tokens=50,
67
  temperature=0.7,
68
  top_p=0.9,
69
  do_sample=True,
70
- pad_token_id=base_tokenizer.eos_token_id,
71
  )
72
  elapsed = time.perf_counter() - t0
73
  n_tokens = outputs.shape[1] - inputs["input_ids"].shape[1]
74
-
75
- response = base_tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True).strip()
76
  ms_per_tok = (elapsed * 1000 / n_tokens) if n_tokens > 0 else 0
77
  return response, elapsed, n_tokens, ms_per_tok
78
 
79
 
80
- def gen_aether(prompt):
81
- """Generate with Aether (our engine)"""
82
- try:
83
- data = json.dumps({"prompt": prompt, "max_tokens": 50}).encode()
84
- req = urllib.request.Request(
85
- "http://127.0.0.1:7861/generate",
86
- data=data,
87
- headers={"Content-Type": "application/json"},
88
- )
89
- t0 = time.perf_counter()
90
- resp = urllib.request.urlopen(req, timeout=300)
91
- wall_time = time.perf_counter() - t0
92
- result = json.loads(resp.read())
93
- return (
94
- result["text"],
95
- result["totalTimeMs"] / 1000,
96
- result["tokens"],
97
- result["avgTokenMs"],
98
- )
99
- except urllib.error.HTTPError as e:
100
- body = e.read().decode() if e.fp else str(e)
101
- try:
102
- detail = json.loads(body).get("error", body[:200])
103
- except Exception:
104
- detail = body[:200]
105
- return f"[Aether error: {detail}]", 0, 0, 0
106
- except Exception as e:
107
- return f"[Aether error: {e}]", 0, 0, 0
108
-
109
-
110
  def compare(prompt):
111
- """Generator: yields results as each engine finishes."""
112
  if not prompt or not prompt.strip():
113
  yield "", "", "", ""
114
  return
115
 
116
- # Run PyTorch first, show immediately
117
- base_text, base_time, base_toks, base_ms = gen_pytorch(prompt)
118
  base_stats = f"{base_toks} tokens in {base_time:.1f}s ({base_ms:.0f}ms/tok)"
119
  yield base_text, "generating...", base_stats, "running..."
120
 
121
- # Then run Aether, show when done
122
- aether_text, aether_time, aether_toks, aether_ms = gen_aether(prompt)
123
- aether_stats = f"{aether_toks} tokens in {aether_time:.1f}s ({aether_ms:.0f}ms/tok)"
124
- yield base_text, aether_text, base_stats, aether_stats
125
 
126
 
127
  CSS = """
128
- /* AeonOS Design System */
129
  .gradio-container { max-width: 1060px !important; margin: 0 auto !important; }
130
  .gradio-container, .dark { background: #09090b !important; }
131
-
132
- /* Hero */
133
  #hero { text-align: center; padding: 2rem 0 1rem; }
134
  #hero h1 { font-size: 2.5rem; font-weight: 300; letter-spacing: -0.02em; color: #fafafa; margin: 0; }
135
  #hero .subtitle { color: #71717a; font-size: 0.95rem; margin-top: 0.5rem; }
136
  #hero .accent { color: #3b82f6; }
137
-
138
- /* Cards */
139
  .response-card { background: #0c0c0f !important; border: 1px solid #1f1f23 !important; border-radius: 8px !important; }
140
  .response-card textarea { background: #0c0c0f !important; border: none !important; color: #e4e4e7 !important; font-size: 0.95rem !important; line-height: 1.6 !important; }
141
-
142
- /* Labels */
143
  .base-label { color: #71717a !important; font-size: 0.8rem !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; font-weight: 500 !important; }
144
  .void-label { color: #3b82f6 !important; font-size: 0.8rem !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; font-weight: 500 !important; }
145
-
146
- /* Stats */
147
  .stats-text { font-family: 'SF Mono', 'Fira Code', monospace !important; font-size: 0.8rem !important; color: #52525b !important; }
148
- .stats-text.faster { color: #22c55e !important; }
149
-
150
- /* Input */
151
  #prompt-input > label > span { display: none !important; }
152
  #prompt-input textarea { background: #111114 !important; border: 1px solid #1f1f23 !important; border-radius: 8px !important; color: #fafafa !important; font-size: 1rem !important; padding: 1rem !important; }
153
  #prompt-input textarea:focus { border-color: #3b82f6 !important; box-shadow: 0 0 0 2px rgba(59,130,246,0.1) !important; }
154
-
155
- /* Generate button */
156
  #gen-btn { background: #3b82f6 !important; border: none !important; border-radius: 8px !important; font-weight: 500 !important; font-size: 0.9rem !important; padding: 0.75rem 2rem !important; transition: all 150ms !important; }
157
  #gen-btn:hover { background: #2563eb !important; transform: translateY(-1px) !important; box-shadow: 0 4px 12px rgba(59,130,246,0.3) !important; }
158
-
159
- /* Prompt chips */
160
  .prompt-chip { background: #111114 !important; border: 1px solid #1f1f23 !important; border-radius: 6px !important; color: #a1a1aa !important; font-size: 0.85rem !important; transition: all 150ms !important; }
161
  .prompt-chip:hover { border-color: #3b82f6 !important; color: #fafafa !important; background: #18181b !important; }
162
-
163
- /* Footer */
164
  #footer { text-align: center; padding: 2rem 0; border-top: 1px solid #1f1f23; margin-top: 2rem; }
165
  #footer p { color: #52525b; font-size: 0.8rem; }
166
  #footer a { color: #3b82f6; text-decoration: none; }
167
-
168
- /* Hide Gradio chrome */
169
  footer.svelte-1ax1toq { display: none !important; }
170
  .built-with { display: none !important; }
171
  """
@@ -175,8 +93,8 @@ with gr.Blocks(css=CSS, theme=gr.themes.Base(primary_hue="blue", neutral_hue="zi
175
  gr.HTML("""
176
  <div id="hero">
177
  <h1>The <span class="accent">Void</span></h1>
178
- <p class="subtitle">PyTorch vs Aether. Same model. Different engines. Live inference.<br/>
179
- Left: standard PyTorch CPU. Right: Aether WASM-SIMD kernels. Both generate in real-time.</p>
180
  </div>
181
  """)
182
 
@@ -185,46 +103,47 @@ with gr.Blocks(css=CSS, theme=gr.themes.Base(primary_hue="blue", neutral_hue="zi
185
 
186
  with gr.Row(equal_height=True):
187
  with gr.Column():
188
- gr.HTML('<p class="base-label">PyTorch (standard)</p>')
189
- base_out = gr.Textbox(lines=8, show_label=False, interactive=False, elem_classes=["response-card"])
190
  base_stats = gr.HTML('<p class="stats-text">--</p>')
191
  with gr.Column(min_width=30):
192
  gr.HTML('<p style="color:#27272a; text-align:center; padding-top:4rem; font-size:0.75rem; letter-spacing:0.1em;">VS</p>')
193
  with gr.Column():
194
- gr.HTML('<p class="void-label">Aether (our engine)</p>')
195
- aether_out = gr.Textbox(lines=8, show_label=False, interactive=False, elem_classes=["response-card"])
196
- aether_stats = gr.HTML('<p class="stats-text">--</p>')
197
 
198
  def run_compare(prompt_text):
199
- for base_text, aether_text, b_stats, a_stats in compare(prompt_text):
200
  yield (
201
  base_text,
202
- aether_text,
203
  f'<p class="stats-text">{b_stats}</p>',
204
  f'<p class="stats-text">{a_stats}</p>',
205
  )
206
 
207
- btn.click(run_compare, [prompt], [base_out, aether_out, base_stats, aether_stats])
208
- prompt.submit(run_compare, [prompt], [base_out, aether_out, base_stats, aether_stats])
209
 
210
  gr.HTML('<p style="color:#52525b; font-size:0.8rem; margin-top:1.5rem; margin-bottom:0.5rem;">Try these:</p>')
211
  with gr.Row():
212
  for p in ["hello", "How are you feeling?", "I've been anxious lately.", "Write a haiku about failure.", "What is the meaning of life?"]:
213
  gr.Button(p, size="sm", elem_classes=["prompt-chip"]).click(
214
  fn=lambda x=p: x, outputs=[prompt]
215
- ).then(fn=run_compare, inputs=[prompt], outputs=[base_out, aether_out, base_stats, aether_stats])
216
 
217
  gr.HTML("""
218
  <div id="footer">
219
  <p style="color:#a1a1aa; font-size:0.85rem; margin-bottom:0.5rem;">
220
- SmolLM2-360M-Instruct &middot; Buleyean RL &middot;
221
- Left: PyTorch CPU &middot; Right: Aether WASM-SIMD (zero ML dependencies)
222
  </p>
223
  <p>
224
  <a href="https://forkracefold.com/">Whitepaper</a> &middot;
225
  <a href="https://github.com/forkjoin-ai/buleyean-rl">Library</a> &middot;
226
  <a href="https://huggingface.co/forkjoin-ai">Models</a> &middot;
227
- <a href="https://huggingface.co/spaces/forkjoin-ai/glossolalia">Glossolalia</a>
 
 
228
  </p>
229
  <p style="margin-top:1rem;">500+ Lean 4 theorems &middot; Zero sorry &middot; <a href="https://forkracefold.com/">&phi;&sup2; = &phi; + 1</a></p>
230
  </div>
 
1
  """
2
  The Void -- Buleyean RL
3
+ Base model vs Void-trained model. Live inference. Nothing hardcoded.
4
  """
5
 
6
  import gradio as gr
7
  import torch
 
8
  import time
 
 
9
  from transformers import AutoModelForCausalLM, AutoTokenizer
10
 
11
+ print("[Void] Loading base model...", flush=True)
 
 
 
 
 
 
 
 
 
 
12
  base_tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct")
13
  base_model = AutoModelForCausalLM.from_pretrained(
14
  "HuggingFaceTB/SmolLM2-360M-Instruct",
15
  torch_dtype=torch.float32,
16
  device_map="cpu",
17
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ print("[Void] Loading Buleyean model from GGUF...", flush=True)
20
+ bule_model = AutoModelForCausalLM.from_pretrained(
21
+ "forkjoin-ai/buleyean-smollm2-360m",
22
+ gguf_file="buleyean-smollm2-360m-q4_k_m.gguf",
23
+ torch_dtype=torch.float32,
24
+ device_map="cpu",
25
+ )
26
+ bule_tokenizer = base_tokenizer
27
+ print("[Void] Ready.", flush=True)
28
+
29
+
30
+ def gen(prompt, model, tokenizer):
31
+ messages = [{"role": "user", "content": prompt}]
32
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
33
+ inputs = tokenizer(text, return_tensors="pt")
34
  t0 = time.perf_counter()
35
  with torch.no_grad():
36
+ outputs = model.generate(
37
  **inputs,
38
+ max_new_tokens=100,
39
  temperature=0.7,
40
  top_p=0.9,
41
  do_sample=True,
42
+ pad_token_id=tokenizer.eos_token_id,
43
  )
44
  elapsed = time.perf_counter() - t0
45
  n_tokens = outputs.shape[1] - inputs["input_ids"].shape[1]
46
+ response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True).strip()
 
47
  ms_per_tok = (elapsed * 1000 / n_tokens) if n_tokens > 0 else 0
48
  return response, elapsed, n_tokens, ms_per_tok
49
 
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def compare(prompt):
 
52
  if not prompt or not prompt.strip():
53
  yield "", "", "", ""
54
  return
55
 
56
+ base_text, base_time, base_toks, base_ms = gen(prompt, base_model, base_tokenizer)
 
57
  base_stats = f"{base_toks} tokens in {base_time:.1f}s ({base_ms:.0f}ms/tok)"
58
  yield base_text, "generating...", base_stats, "running..."
59
 
60
+ bule_text, bule_time, bule_toks, bule_ms = gen(prompt, bule_model, bule_tokenizer)
61
+ bule_stats = f"{bule_toks} tokens in {bule_time:.1f}s ({bule_ms:.0f}ms/tok)"
62
+ yield base_text, bule_text, base_stats, bule_stats
 
63
 
64
 
65
  CSS = """
 
66
  .gradio-container { max-width: 1060px !important; margin: 0 auto !important; }
67
  .gradio-container, .dark { background: #09090b !important; }
 
 
68
  #hero { text-align: center; padding: 2rem 0 1rem; }
69
  #hero h1 { font-size: 2.5rem; font-weight: 300; letter-spacing: -0.02em; color: #fafafa; margin: 0; }
70
  #hero .subtitle { color: #71717a; font-size: 0.95rem; margin-top: 0.5rem; }
71
  #hero .accent { color: #3b82f6; }
 
 
72
  .response-card { background: #0c0c0f !important; border: 1px solid #1f1f23 !important; border-radius: 8px !important; }
73
  .response-card textarea { background: #0c0c0f !important; border: none !important; color: #e4e4e7 !important; font-size: 0.95rem !important; line-height: 1.6 !important; }
 
 
74
  .base-label { color: #71717a !important; font-size: 0.8rem !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; font-weight: 500 !important; }
75
  .void-label { color: #3b82f6 !important; font-size: 0.8rem !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; font-weight: 500 !important; }
 
 
76
  .stats-text { font-family: 'SF Mono', 'Fira Code', monospace !important; font-size: 0.8rem !important; color: #52525b !important; }
 
 
 
77
  #prompt-input > label > span { display: none !important; }
78
  #prompt-input textarea { background: #111114 !important; border: 1px solid #1f1f23 !important; border-radius: 8px !important; color: #fafafa !important; font-size: 1rem !important; padding: 1rem !important; }
79
  #prompt-input textarea:focus { border-color: #3b82f6 !important; box-shadow: 0 0 0 2px rgba(59,130,246,0.1) !important; }
 
 
80
  #gen-btn { background: #3b82f6 !important; border: none !important; border-radius: 8px !important; font-weight: 500 !important; font-size: 0.9rem !important; padding: 0.75rem 2rem !important; transition: all 150ms !important; }
81
  #gen-btn:hover { background: #2563eb !important; transform: translateY(-1px) !important; box-shadow: 0 4px 12px rgba(59,130,246,0.3) !important; }
 
 
82
  .prompt-chip { background: #111114 !important; border: 1px solid #1f1f23 !important; border-radius: 6px !important; color: #a1a1aa !important; font-size: 0.85rem !important; transition: all 150ms !important; }
83
  .prompt-chip:hover { border-color: #3b82f6 !important; color: #fafafa !important; background: #18181b !important; }
 
 
84
  #footer { text-align: center; padding: 2rem 0; border-top: 1px solid #1f1f23; margin-top: 2rem; }
85
  #footer p { color: #52525b; font-size: 0.8rem; }
86
  #footer a { color: #3b82f6; text-decoration: none; }
 
 
87
  footer.svelte-1ax1toq { display: none !important; }
88
  .built-with { display: none !important; }
89
  """
 
93
  gr.HTML("""
94
  <div id="hero">
95
  <h1>The <span class="accent">Void</span></h1>
96
+ <p class="subtitle">Live inference from models trained on rejection alone. No reward model. No chosen examples.<br/>
97
+ Left: standard instruction-tuned. Right: trained from the Void. Both generate in real-time.</p>
98
  </div>
99
  """)
100
 
 
103
 
104
  with gr.Row(equal_height=True):
105
  with gr.Column():
106
+ gr.HTML('<p class="base-label">Base Model</p>')
107
+ base_out = gr.Textbox(lines=10, show_label=False, interactive=False, elem_classes=["response-card"])
108
  base_stats = gr.HTML('<p class="stats-text">--</p>')
109
  with gr.Column(min_width=30):
110
  gr.HTML('<p style="color:#27272a; text-align:center; padding-top:4rem; font-size:0.75rem; letter-spacing:0.1em;">VS</p>')
111
  with gr.Column():
112
+ gr.HTML('<p class="void-label">Trained from the Void</p>')
113
+ bule_out = gr.Textbox(lines=10, show_label=False, interactive=False, elem_classes=["response-card"])
114
+ bule_stats = gr.HTML('<p class="stats-text">--</p>')
115
 
116
  def run_compare(prompt_text):
117
+ for base_text, bule_text, b_stats, a_stats in compare(prompt_text):
118
  yield (
119
  base_text,
120
+ bule_text,
121
  f'<p class="stats-text">{b_stats}</p>',
122
  f'<p class="stats-text">{a_stats}</p>',
123
  )
124
 
125
+ btn.click(run_compare, [prompt], [base_out, bule_out, base_stats, bule_stats])
126
+ prompt.submit(run_compare, [prompt], [base_out, bule_out, base_stats, bule_stats])
127
 
128
  gr.HTML('<p style="color:#52525b; font-size:0.8rem; margin-top:1.5rem; margin-bottom:0.5rem;">Try these:</p>')
129
  with gr.Row():
130
  for p in ["hello", "How are you feeling?", "I've been anxious lately.", "Write a haiku about failure.", "What is the meaning of life?"]:
131
  gr.Button(p, size="sm", elem_classes=["prompt-chip"]).click(
132
  fn=lambda x=p: x, outputs=[prompt]
133
+ ).then(fn=run_compare, inputs=[prompt], outputs=[base_out, bule_out, base_stats, bule_stats])
134
 
135
  gr.HTML("""
136
  <div id="footer">
137
  <p style="color:#a1a1aa; font-size:0.85rem; margin-bottom:0.5rem;">
138
+ SmolLM2-360M-Instruct &middot; Buleyean RL &middot; Q4_K_M GGUF &middot; Live inference on CPU
 
139
  </p>
140
  <p>
141
  <a href="https://forkracefold.com/">Whitepaper</a> &middot;
142
  <a href="https://github.com/forkjoin-ai/buleyean-rl">Library</a> &middot;
143
  <a href="https://huggingface.co/forkjoin-ai">Models</a> &middot;
144
+ <a href="https://huggingface.co/spaces/forkjoin-ai/glossolalia">Glossolalia</a> &middot;
145
+ <a href="https://huggingface.co/spaces/forkjoin-ai/void-attention">Void Attention</a> &middot;
146
+ <a href="https://huggingface.co/spaces/forkjoin-ai/metacog">METACOG</a>
147
  </p>
148
  <p style="margin-top:1rem;">500+ Lean 4 theorems &middot; Zero sorry &middot; <a href="https://forkracefold.com/">&phi;&sup2; = &phi; + 1</a></p>
149
  </div>