Spaces:
Sleeping
Sleeping
Taylor commited on
Commit Β·
382cccc
1
Parent(s): a902eca
feat: dual model support -- Buleyean (default) vs base, user toggle
Browse filesLoads both SmolLM2-360M models at startup:
- buleyean: void-trained (forkjoin-ai/buleyean-smollm2-360m Q8_0)
- base: standard instruct (bartowski/SmolLM2-360M-Instruct Q8_0)
Radio toggle lets users pick which model to run. Buleyean default.
Same model feeds both standard and glossolalia decoders.
Act 3 (Metacog) builds on this same dual-model + glossolalia foundation.
- aether-server.mjs +48 -21
- app.py +16 -12
aether-server.mjs
CHANGED
|
@@ -105,24 +105,34 @@ function applyRoPE(x, headDim, position, theta) {
|
|
| 105 |
}
|
| 106 |
}
|
| 107 |
|
| 108 |
-
// βββ
|
| 109 |
-
|
| 110 |
-
|
|
|
|
|
|
|
|
|
|
| 111 |
const t0=Date.now();const buf=readFileSync(ggufPath);const parsed=parseGGUF(buf);
|
| 112 |
console.log(`[Aether] Parsed ${parsed.tensors.length} tensors in ${Date.now()-t0}ms`);
|
| 113 |
const tokenizer=new Tok(JSON.parse(readFileSync(tokPath,'utf8')));
|
| 114 |
const byName={};for(const t of parsed.tensors)byName[t.name]=t;
|
| 115 |
-
function get(
|
| 116 |
console.log('[Aether] Dequantizing...');const tokenEmbd=get('token_embd.weight');const layers=[];
|
| 117 |
for(let i=0;i<C.numLayers;i++){if(i%8===0)console.log(`[Aether] Layer ${i}/${C.numLayers}`);layers.push({an:get(`blk.${i}.attn_norm.weight`),fn:get(`blk.${i}.ffn_norm.weight`),qw:get(`blk.${i}.attn_q.weight`),kw:get(`blk.${i}.attn_k.weight`),vw:get(`blk.${i}.attn_v.weight`),ow:get(`blk.${i}.attn_output.weight`),gw:get(`blk.${i}.ffn_gate.weight`),uw:get(`blk.${i}.ffn_up.weight`),dw:get(`blk.${i}.ffn_down.weight`)});}
|
| 118 |
const outNorm=get('output_norm.weight');let outWeight=get('output.weight');if(!outWeight){console.log('[Aether] Tied embeddings');outWeight=tokenEmbd;}
|
| 119 |
-
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
}
|
| 122 |
|
| 123 |
// βββ Forward Pass (returns raw logits) ββββββββββββββββββββββββββββββββββββββ
|
| 124 |
-
function forwardPass(prompt) {
|
| 125 |
const o = op();
|
|
|
|
| 126 |
const chatPrompt = `<|im_start|>user\n${prompt}<|im_end|>\n<|im_start|>assistant\n`;
|
| 127 |
const inputTokens = model.tokenizer.encode(chatPrompt);
|
| 128 |
const allTokens = [...inputTokens];
|
|
@@ -247,9 +257,10 @@ function sampleGlossolalia(logits) {
|
|
| 247 |
|
| 248 |
// βββ Generation Loops βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 249 |
|
| 250 |
-
function generateStandard(prompt, maxTokens = 8192) {
|
| 251 |
const t0 = performance.now();
|
| 252 |
-
const
|
|
|
|
| 253 |
const allTokens = [...fwd.inputTokens];
|
| 254 |
const kvC = Array.from({length:C.numLayers},()=>({k:[],v:[]}));
|
| 255 |
const tokenTimes = [];
|
|
@@ -301,9 +312,10 @@ function generateStandard(prompt, maxTokens = 8192) {
|
|
| 301 |
};
|
| 302 |
}
|
| 303 |
|
| 304 |
-
function generateGlossolalia(prompt, maxTokens = 8192) {
|
| 305 |
const t0 = performance.now();
|
| 306 |
-
const
|
|
|
|
| 307 |
const allTokens = [...fwd.inputTokens];
|
| 308 |
const kvC = Array.from({length:C.numLayers},()=>({k:[],v:[]}));
|
| 309 |
const tokenTimes = [];
|
|
@@ -364,8 +376,8 @@ const server = createServer((req, res) => {
|
|
| 364 |
req.on('data', c => body += c);
|
| 365 |
req.on('end', () => {
|
| 366 |
try {
|
| 367 |
-
const { prompt, max_tokens } = JSON.parse(body);
|
| 368 |
-
const result = genFn(prompt, max_tokens || 256);
|
| 369 |
res.writeHead(200, { 'Content-Type': 'application/json' });
|
| 370 |
res.end(JSON.stringify(result));
|
| 371 |
} catch (e) {
|
|
@@ -380,26 +392,41 @@ const server = createServer((req, res) => {
|
|
| 380 |
else if (req.method==='POST' && req.url==='/generate-glossolalia') handle(generateGlossolalia);
|
| 381 |
else if (req.url==='/health') {
|
| 382 |
res.writeHead(200,{'Content-Type':'application/json'});
|
| 383 |
-
res.end(JSON.stringify({status:'ok',
|
| 384 |
} else { res.writeHead(404); res.end(); }
|
| 385 |
});
|
| 386 |
|
| 387 |
// βββ Main βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 388 |
-
const
|
| 389 |
-
const
|
|
|
|
| 390 |
|
| 391 |
async function main() {
|
| 392 |
simd = await loadSIMD();
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
|
|
|
|
|
|
| 396 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
if (!existsSync(tokPath)) {
|
| 398 |
console.log('[Aether] Downloading tokenizer...');
|
| 399 |
execSync(`python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download('HuggingFaceTB/SmolLM2-360M-Instruct', 'tokenizer.json', cache_dir='/tmp/hf_cache', local_dir='/tmp/hf_cache')"`, { stdio: 'inherit' });
|
| 400 |
}
|
| 401 |
-
|
| 402 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
}
|
| 404 |
|
| 405 |
main().catch(e => { console.error('[Aether] Fatal:', e); process.exit(1); });
|
|
|
|
| 105 |
}
|
| 106 |
}
|
| 107 |
|
| 108 |
+
// βββ Models βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 109 |
+
const models = {};
|
| 110 |
+
let activeModel = null;
|
| 111 |
+
|
| 112 |
+
function loadModel(name, ggufPath, tokPath) {
|
| 113 |
+
console.log(`[Aether] Loading ${name}...`);
|
| 114 |
const t0=Date.now();const buf=readFileSync(ggufPath);const parsed=parseGGUF(buf);
|
| 115 |
console.log(`[Aether] Parsed ${parsed.tensors.length} tensors in ${Date.now()-t0}ms`);
|
| 116 |
const tokenizer=new Tok(JSON.parse(readFileSync(tokPath,'utf8')));
|
| 117 |
const byName={};for(const t of parsed.tensors)byName[t.name]=t;
|
| 118 |
+
function get(nm){const t=byName[nm];if(!t)return null;const raw=new Uint8Array(buf.buffer,buf.byteOffset+parsed.dataOffset+Number(t.offset),t.size);return dequantByType(raw,t.numElements,t.type);}
|
| 119 |
console.log('[Aether] Dequantizing...');const tokenEmbd=get('token_embd.weight');const layers=[];
|
| 120 |
for(let i=0;i<C.numLayers;i++){if(i%8===0)console.log(`[Aether] Layer ${i}/${C.numLayers}`);layers.push({an:get(`blk.${i}.attn_norm.weight`),fn:get(`blk.${i}.ffn_norm.weight`),qw:get(`blk.${i}.attn_q.weight`),kw:get(`blk.${i}.attn_k.weight`),vw:get(`blk.${i}.attn_v.weight`),ow:get(`blk.${i}.attn_output.weight`),gw:get(`blk.${i}.ffn_gate.weight`),uw:get(`blk.${i}.ffn_up.weight`),dw:get(`blk.${i}.ffn_down.weight`)});}
|
| 121 |
const outNorm=get('output_norm.weight');let outWeight=get('output.weight');if(!outWeight){console.log('[Aether] Tied embeddings');outWeight=tokenEmbd;}
|
| 122 |
+
const loadTime=Date.now()-t0;
|
| 123 |
+
console.log(`[Aether] ${name} loaded in ${(loadTime/1000).toFixed(1)}s`);
|
| 124 |
+
models[name]={tokenEmbd,layers,outNorm,outWeight,tokenizer,loadTime,name};
|
| 125 |
+
return models[name];
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
function getModel(name) {
|
| 129 |
+
return models[name] || models['base'] || Object.values(models)[0];
|
| 130 |
}
|
| 131 |
|
| 132 |
// βββ Forward Pass (returns raw logits) ββββββββββββββββββββββββββββββββββββββ
|
| 133 |
+
function forwardPass(prompt, modelName) {
|
| 134 |
const o = op();
|
| 135 |
+
const model = getModel(modelName);
|
| 136 |
const chatPrompt = `<|im_start|>user\n${prompt}<|im_end|>\n<|im_start|>assistant\n`;
|
| 137 |
const inputTokens = model.tokenizer.encode(chatPrompt);
|
| 138 |
const allTokens = [...inputTokens];
|
|
|
|
| 257 |
|
| 258 |
// βββ Generation Loops βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 259 |
|
| 260 |
+
function generateStandard(prompt, maxTokens = 8192, modelName = 'buleyean') {
|
| 261 |
const t0 = performance.now();
|
| 262 |
+
const model = getModel(modelName);
|
| 263 |
+
const fwd = forwardPass(prompt, modelName);
|
| 264 |
const allTokens = [...fwd.inputTokens];
|
| 265 |
const kvC = Array.from({length:C.numLayers},()=>({k:[],v:[]}));
|
| 266 |
const tokenTimes = [];
|
|
|
|
| 312 |
};
|
| 313 |
}
|
| 314 |
|
| 315 |
+
function generateGlossolalia(prompt, maxTokens = 8192, modelName = 'buleyean') {
|
| 316 |
const t0 = performance.now();
|
| 317 |
+
const model = getModel(modelName);
|
| 318 |
+
const fwd = forwardPass(prompt, modelName);
|
| 319 |
const allTokens = [...fwd.inputTokens];
|
| 320 |
const kvC = Array.from({length:C.numLayers},()=>({k:[],v:[]}));
|
| 321 |
const tokenTimes = [];
|
|
|
|
| 376 |
req.on('data', c => body += c);
|
| 377 |
req.on('end', () => {
|
| 378 |
try {
|
| 379 |
+
const { prompt, max_tokens, model } = JSON.parse(body);
|
| 380 |
+
const result = genFn(prompt, max_tokens || 256, model || 'buleyean');
|
| 381 |
res.writeHead(200, { 'Content-Type': 'application/json' });
|
| 382 |
res.end(JSON.stringify(result));
|
| 383 |
} catch (e) {
|
|
|
|
| 392 |
else if (req.method==='POST' && req.url==='/generate-glossolalia') handle(generateGlossolalia);
|
| 393 |
else if (req.url==='/health') {
|
| 394 |
res.writeHead(200,{'Content-Type':'application/json'});
|
| 395 |
+
res.end(JSON.stringify({status:'ok',models:Object.keys(models),simd:!!simd,loadTimes:Object.fromEntries(Object.entries(models).map(([k,v])=>[k,v.loadTime]))}));
|
| 396 |
} else { res.writeHead(404); res.end(); }
|
| 397 |
});
|
| 398 |
|
| 399 |
// βββ Main βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 400 |
+
const basePath = '/tmp/hf_cache/smollm2-360m-q8_0.gguf';
|
| 401 |
+
const bulePath = '/tmp/hf_cache/buleyean-smollm2-360m-q8_0.gguf';
|
| 402 |
+
const tokPath = '/tmp/hf_cache/tokenizer.json';
|
| 403 |
|
| 404 |
async function main() {
|
| 405 |
simd = await loadSIMD();
|
| 406 |
+
|
| 407 |
+
// Download base model
|
| 408 |
+
if (!existsSync(basePath)) {
|
| 409 |
+
console.log('[Aether] Downloading base SmolLM2-360M Q8_0...');
|
| 410 |
+
execSync(`python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download('bartowski/SmolLM2-360M-Instruct-GGUF', 'SmolLM2-360M-Instruct-Q8_0.gguf', cache_dir='/tmp/hf_cache', local_dir='/tmp/hf_cache'); import shutil; shutil.move('/tmp/hf_cache/SmolLM2-360M-Instruct-Q8_0.gguf', '${basePath}')"`, { stdio: 'inherit' });
|
| 411 |
}
|
| 412 |
+
|
| 413 |
+
// Download Buleyean model
|
| 414 |
+
if (!existsSync(bulePath)) {
|
| 415 |
+
console.log('[Aether] Downloading Buleyean SmolLM2-360M Q8_0...');
|
| 416 |
+
execSync(`python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download('forkjoin-ai/buleyean-smollm2-360m', 'buleyean-smollm2-360m-q8_0.gguf', cache_dir='/tmp/hf_cache', local_dir='/tmp/hf_cache')"`, { stdio: 'inherit' });
|
| 417 |
+
}
|
| 418 |
+
|
| 419 |
+
// Download tokenizer
|
| 420 |
if (!existsSync(tokPath)) {
|
| 421 |
console.log('[Aether] Downloading tokenizer...');
|
| 422 |
execSync(`python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download('HuggingFaceTB/SmolLM2-360M-Instruct', 'tokenizer.json', cache_dir='/tmp/hf_cache', local_dir='/tmp/hf_cache')"`, { stdio: 'inherit' });
|
| 423 |
}
|
| 424 |
+
|
| 425 |
+
// Load both models
|
| 426 |
+
loadModel('base', basePath, tokPath);
|
| 427 |
+
loadModel('buleyean', bulePath, tokPath);
|
| 428 |
+
|
| 429 |
+
server.listen(PORT, '127.0.0.1', () => console.log(`[Aether] http://127.0.0.1:${PORT} (SIMD: ${!!simd}, models: ${Object.keys(models).join(', ')})`));
|
| 430 |
}
|
| 431 |
|
| 432 |
main().catch(e => { console.error('[Aether] Fatal:', e); process.exit(1); });
|
app.py
CHANGED
|
@@ -30,7 +30,7 @@ for attempt in range(180):
|
|
| 30 |
req = urllib.request.Request("http://127.0.0.1:7861/health")
|
| 31 |
resp = urllib.request.urlopen(req, timeout=2)
|
| 32 |
health = json.loads(resp.read())
|
| 33 |
-
if health.get("status") == "ok" and health.get("
|
| 34 |
print(f"[Glossolalia] Aether ready ({health.get('loadTime')}ms, SIMD: {health.get('simd')})", flush=True)
|
| 35 |
break
|
| 36 |
except Exception:
|
|
@@ -44,9 +44,9 @@ else:
|
|
| 44 |
print("[Glossolalia] WARNING: Aether not ready after 180s", flush=True)
|
| 45 |
|
| 46 |
|
| 47 |
-
def call_aether(endpoint, prompt, max_tokens=256):
|
| 48 |
try:
|
| 49 |
-
data = json.dumps({"prompt": prompt, "max_tokens": max_tokens}).encode()
|
| 50 |
req = urllib.request.Request(
|
| 51 |
f"http://127.0.0.1:7861/{endpoint}", data=data,
|
| 52 |
headers={"Content-Type": "application/json"},
|
|
@@ -125,7 +125,7 @@ def format_layer_health(diag_list):
|
|
| 125 |
|
| 126 |
# βββ Compare Function ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 127 |
|
| 128 |
-
def compare(prompt, max_tokens):
|
| 129 |
empty = ("", "", "", "", "", "", "")
|
| 130 |
if not prompt or not prompt.strip():
|
| 131 |
yield empty
|
|
@@ -136,9 +136,9 @@ def compare(prompt, max_tokens):
|
|
| 136 |
glo_result = [None]
|
| 137 |
|
| 138 |
def run_std():
|
| 139 |
-
std_result[0] = call_aether("generate-standard", prompt, max_tokens)
|
| 140 |
def run_glo():
|
| 141 |
-
glo_result[0] = call_aether("generate-glossolalia", prompt, max_tokens)
|
| 142 |
|
| 143 |
def fmt_stats(r):
|
| 144 |
if not r:
|
|
@@ -209,7 +209,9 @@ with gr.Blocks(css=CSS, theme=gr.themes.Base(primary_hue="purple", neutral_hue="
|
|
| 209 |
|
| 210 |
with gr.Row():
|
| 211 |
prompt = gr.Textbox(elem_id="prompt-input", placeholder="What is the shape of failure?", lines=2, label="Prompt", show_label=False, interactive=True, scale=4)
|
| 212 |
-
|
|
|
|
|
|
|
| 213 |
|
| 214 |
btn = gr.Button("Generate", elem_id="gen-btn", variant="primary")
|
| 215 |
|
|
@@ -236,20 +238,22 @@ with gr.Blocks(css=CSS, theme=gr.themes.Base(primary_hue="purple", neutral_hue="
|
|
| 236 |
|
| 237 |
outputs = [std_out, glo_out, std_stats, glo_stats, glo_diag, std_diag, layer_health]
|
| 238 |
|
| 239 |
-
|
| 240 |
-
|
|
|
|
|
|
|
| 241 |
st, gt, ss, gs, gd, sd, lh = vals
|
| 242 |
yield st, gt, f'<p class="stats-text">{ss}</p>', f'<p class="stats-text">{gs}</p>', gd, sd, lh
|
| 243 |
|
| 244 |
-
btn.click(run,
|
| 245 |
-
prompt.submit(run,
|
| 246 |
|
| 247 |
gr.HTML('<p style="color:#52525b; font-size:0.8rem; margin-top:1.5rem; margin-bottom:0.5rem;">Try these:</p>')
|
| 248 |
with gr.Row():
|
| 249 |
for p in ["What is the shape of failure?", "The theory of everything begins with", "If silence had a color", "Write a haiku about parallel universes"]:
|
| 250 |
gr.Button(p, size="sm", elem_classes=["prompt-chip"]).click(
|
| 251 |
fn=lambda x=p: x, outputs=[prompt]
|
| 252 |
-
).then(fn=run, inputs=
|
| 253 |
|
| 254 |
gr.HTML("""
|
| 255 |
<div id="footer">
|
|
|
|
| 30 |
req = urllib.request.Request("http://127.0.0.1:7861/health")
|
| 31 |
resp = urllib.request.urlopen(req, timeout=2)
|
| 32 |
health = json.loads(resp.read())
|
| 33 |
+
if health.get("status") == "ok" and health.get("models"):
|
| 34 |
print(f"[Glossolalia] Aether ready ({health.get('loadTime')}ms, SIMD: {health.get('simd')})", flush=True)
|
| 35 |
break
|
| 36 |
except Exception:
|
|
|
|
| 44 |
print("[Glossolalia] WARNING: Aether not ready after 180s", flush=True)
|
| 45 |
|
| 46 |
|
| 47 |
+
def call_aether(endpoint, prompt, max_tokens=256, model_name="buleyean"):
|
| 48 |
try:
|
| 49 |
+
data = json.dumps({"prompt": prompt, "max_tokens": max_tokens, "model": model_name}).encode()
|
| 50 |
req = urllib.request.Request(
|
| 51 |
f"http://127.0.0.1:7861/{endpoint}", data=data,
|
| 52 |
headers={"Content-Type": "application/json"},
|
|
|
|
| 125 |
|
| 126 |
# βββ Compare Function ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 127 |
|
| 128 |
+
def compare(prompt, max_tokens, model_name="buleyean"):
|
| 129 |
empty = ("", "", "", "", "", "", "")
|
| 130 |
if not prompt or not prompt.strip():
|
| 131 |
yield empty
|
|
|
|
| 136 |
glo_result = [None]
|
| 137 |
|
| 138 |
def run_std():
|
| 139 |
+
std_result[0] = call_aether("generate-standard", prompt, max_tokens, model_name)
|
| 140 |
def run_glo():
|
| 141 |
+
glo_result[0] = call_aether("generate-glossolalia", prompt, max_tokens, model_name)
|
| 142 |
|
| 143 |
def fmt_stats(r):
|
| 144 |
if not r:
|
|
|
|
| 209 |
|
| 210 |
with gr.Row():
|
| 211 |
prompt = gr.Textbox(elem_id="prompt-input", placeholder="What is the shape of failure?", lines=2, label="Prompt", show_label=False, interactive=True, scale=4)
|
| 212 |
+
with gr.Column(scale=1):
|
| 213 |
+
model_choice = gr.Radio(choices=["buleyean", "base"], value="buleyean", label="Model", info="Buleyean = void-trained, Base = standard instruct")
|
| 214 |
+
max_tok = gr.Slider(minimum=8, maximum=512, value=64, step=1, label="Max tokens")
|
| 215 |
|
| 216 |
btn = gr.Button("Generate", elem_id="gen-btn", variant="primary")
|
| 217 |
|
|
|
|
| 238 |
|
| 239 |
outputs = [std_out, glo_out, std_stats, glo_stats, glo_diag, std_diag, layer_health]
|
| 240 |
|
| 241 |
+
inputs = [prompt, max_tok, model_choice]
|
| 242 |
+
|
| 243 |
+
def run(prompt_text, max_tokens, model_name):
|
| 244 |
+
for vals in compare(prompt_text, max_tokens, model_name):
|
| 245 |
st, gt, ss, gs, gd, sd, lh = vals
|
| 246 |
yield st, gt, f'<p class="stats-text">{ss}</p>', f'<p class="stats-text">{gs}</p>', gd, sd, lh
|
| 247 |
|
| 248 |
+
btn.click(run, inputs, outputs)
|
| 249 |
+
prompt.submit(run, inputs, outputs)
|
| 250 |
|
| 251 |
gr.HTML('<p style="color:#52525b; font-size:0.8rem; margin-top:1.5rem; margin-bottom:0.5rem;">Try these:</p>')
|
| 252 |
with gr.Row():
|
| 253 |
for p in ["What is the shape of failure?", "The theory of everything begins with", "If silence had a color", "Write a haiku about parallel universes"]:
|
| 254 |
gr.Button(p, size="sm", elem_classes=["prompt-chip"]).click(
|
| 255 |
fn=lambda x=p: x, outputs=[prompt]
|
| 256 |
+
).then(fn=run, inputs=inputs, outputs=outputs)
|
| 257 |
|
| 258 |
gr.HTML("""
|
| 259 |
<div id="footer">
|