Taylor commited on
Commit
64ff7cb
·
1 Parent(s): 0c8d249

perf: run both models in parallel with ThreadPoolExecutor

Browse files

Whichever finishes first shows first. No more sequential waiting.
If Buleyean is faster, it appears before the base model.

Files changed (1) hide show
  1. app.py +37 -7
app.py CHANGED
@@ -6,6 +6,7 @@ Base model vs Void-trained model. Live inference. Nothing hardcoded.
6
  import gradio as gr
7
  import torch
8
  import time
 
9
  from transformers import AutoModelForCausalLM, AutoTokenizer
10
 
11
  print("[Void] Loading base model...", flush=True)
@@ -53,13 +54,42 @@ def compare(prompt):
53
  yield "", "", "", ""
54
  return
55
 
56
- base_text, base_time, base_toks, base_ms = gen(prompt, base_model, base_tokenizer)
57
- base_stats = f"{base_toks} tokens in {base_time:.1f}s ({base_ms:.0f}ms/tok)"
58
- yield base_text, "generating...", base_stats, "running..."
59
-
60
- bule_text, bule_time, bule_toks, bule_ms = gen(prompt, bule_model, bule_tokenizer)
61
- bule_stats = f"{bule_toks} tokens in {bule_time:.1f}s ({bule_ms:.0f}ms/tok)"
62
- yield base_text, bule_text, base_stats, bule_stats
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
 
65
  CSS = """
 
6
  import gradio as gr
7
  import torch
8
  import time
9
+ from concurrent.futures import ThreadPoolExecutor, as_completed
10
  from transformers import AutoModelForCausalLM, AutoTokenizer
11
 
12
  print("[Void] Loading base model...", flush=True)
 
54
  yield "", "", "", ""
55
  return
56
 
57
+ # Run both in parallel -- whichever finishes first shows first
58
+ base_result = [None]
59
+ bule_result = [None]
60
+
61
+ def run_base():
62
+ base_result[0] = gen(prompt, base_model, base_tokenizer)
63
+
64
+ def run_bule():
65
+ bule_result[0] = gen(prompt, bule_model, bule_tokenizer)
66
+
67
+ with ThreadPoolExecutor(max_workers=2) as pool:
68
+ futures = {
69
+ pool.submit(run_base): "base",
70
+ pool.submit(run_bule): "bule",
71
+ }
72
+ for future in as_completed(futures):
73
+ name = futures[future]
74
+ future.result() # raise if error
75
+ if name == "base" and base_result[0]:
76
+ text, t, toks, ms = base_result[0]
77
+ stats = f"{toks} tokens in {t:.1f}s ({ms:.0f}ms/tok)"
78
+ bule_text = bule_result[0][0] if bule_result[0] else "generating..."
79
+ bule_s = f"{bule_result[0][2]} tokens in {bule_result[0][1]:.1f}s ({bule_result[0][3]:.0f}ms/tok)" if bule_result[0] else "running..."
80
+ yield text, bule_text, stats, bule_s
81
+ elif name == "bule" and bule_result[0]:
82
+ text, t, toks, ms = bule_result[0]
83
+ stats = f"{toks} tokens in {t:.1f}s ({ms:.0f}ms/tok)"
84
+ base_text = base_result[0][0] if base_result[0] else "generating..."
85
+ base_s = f"{base_result[0][2]} tokens in {base_result[0][1]:.1f}s ({base_result[0][3]:.0f}ms/tok)" if base_result[0] else "running..."
86
+ yield base_text, text, base_s, stats
87
+
88
+ # Final yield with both results
89
+ if base_result[0] and bule_result[0]:
90
+ bt, b_t, b_toks, b_ms = base_result[0]
91
+ vt, v_t, v_toks, v_ms = bule_result[0]
92
+ yield bt, vt, f"{b_toks} tokens in {b_t:.1f}s ({b_ms:.0f}ms/tok)", f"{v_toks} tokens in {v_t:.1f}s ({v_ms:.0f}ms/tok)"
93
 
94
 
95
  CSS = """