Spaces:
Sleeping
Sleeping
Taylor commited on
Commit ·
64ff7cb
1
Parent(s): 0c8d249
perf: run both models in parallel with ThreadPoolExecutor
Browse filesWhichever finishes first shows first. No more sequential waiting.
If Buleyean is faster, it appears before the base model.
app.py
CHANGED
|
@@ -6,6 +6,7 @@ Base model vs Void-trained model. Live inference. Nothing hardcoded.
|
|
| 6 |
import gradio as gr
|
| 7 |
import torch
|
| 8 |
import time
|
|
|
|
| 9 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 10 |
|
| 11 |
print("[Void] Loading base model...", flush=True)
|
|
@@ -53,13 +54,42 @@ def compare(prompt):
|
|
| 53 |
yield "", "", "", ""
|
| 54 |
return
|
| 55 |
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
|
| 65 |
CSS = """
|
|
|
|
| 6 |
import gradio as gr
|
| 7 |
import torch
|
| 8 |
import time
|
| 9 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 10 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 11 |
|
| 12 |
print("[Void] Loading base model...", flush=True)
|
|
|
|
| 54 |
yield "", "", "", ""
|
| 55 |
return
|
| 56 |
|
| 57 |
+
# Run both in parallel -- whichever finishes first shows first
|
| 58 |
+
base_result = [None]
|
| 59 |
+
bule_result = [None]
|
| 60 |
+
|
| 61 |
+
def run_base():
|
| 62 |
+
base_result[0] = gen(prompt, base_model, base_tokenizer)
|
| 63 |
+
|
| 64 |
+
def run_bule():
|
| 65 |
+
bule_result[0] = gen(prompt, bule_model, bule_tokenizer)
|
| 66 |
+
|
| 67 |
+
with ThreadPoolExecutor(max_workers=2) as pool:
|
| 68 |
+
futures = {
|
| 69 |
+
pool.submit(run_base): "base",
|
| 70 |
+
pool.submit(run_bule): "bule",
|
| 71 |
+
}
|
| 72 |
+
for future in as_completed(futures):
|
| 73 |
+
name = futures[future]
|
| 74 |
+
future.result() # raise if error
|
| 75 |
+
if name == "base" and base_result[0]:
|
| 76 |
+
text, t, toks, ms = base_result[0]
|
| 77 |
+
stats = f"{toks} tokens in {t:.1f}s ({ms:.0f}ms/tok)"
|
| 78 |
+
bule_text = bule_result[0][0] if bule_result[0] else "generating..."
|
| 79 |
+
bule_s = f"{bule_result[0][2]} tokens in {bule_result[0][1]:.1f}s ({bule_result[0][3]:.0f}ms/tok)" if bule_result[0] else "running..."
|
| 80 |
+
yield text, bule_text, stats, bule_s
|
| 81 |
+
elif name == "bule" and bule_result[0]:
|
| 82 |
+
text, t, toks, ms = bule_result[0]
|
| 83 |
+
stats = f"{toks} tokens in {t:.1f}s ({ms:.0f}ms/tok)"
|
| 84 |
+
base_text = base_result[0][0] if base_result[0] else "generating..."
|
| 85 |
+
base_s = f"{base_result[0][2]} tokens in {base_result[0][1]:.1f}s ({base_result[0][3]:.0f}ms/tok)" if base_result[0] else "running..."
|
| 86 |
+
yield base_text, text, base_s, stats
|
| 87 |
+
|
| 88 |
+
# Final yield with both results
|
| 89 |
+
if base_result[0] and bule_result[0]:
|
| 90 |
+
bt, b_t, b_toks, b_ms = base_result[0]
|
| 91 |
+
vt, v_t, v_toks, v_ms = bule_result[0]
|
| 92 |
+
yield bt, vt, f"{b_toks} tokens in {b_t:.1f}s ({b_ms:.0f}ms/tok)", f"{v_toks} tokens in {v_t:.1f}s ({v_ms:.0f}ms/tok)"
|
| 93 |
|
| 94 |
|
| 95 |
CSS = """
|