Spaces:

McClain
/

PlasmidSpace

Sleeping

McClain Claude Opus 4.6 commited on Mar 10

Commit

74ad07d

1 Parent(s): 5d0ce94

Streaming progress bar, rename to Top N, fix scoring

- Run model.generate() in background thread with BaseStreamer token counter
- Yield progress bar updates during generation (█░ style)
- Rename "Samples (best-of-N)" → "Top N"
- Show per-sample scoring status during annotation phase

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show

app.py +63 -18

app.py CHANGED Viewed

@@ -52,7 +52,7 @@ from bokeh.resources import CDN as BOKEH_CDN
 from plannotate.annotate import annotate as _plannotate_annotate
 from plannotate.bokeh_plot import get_bokeh as _plannotate_bokeh
 from plannotate import resources as _plannotate_rsc
-from transformers import AutoModelForCausalLM, AutoTokenizer
 # ---------------------------------------------------------------------------
 # Configuration
@@ -500,6 +500,27 @@ def _score_annotation(
     return composite
 def generate_and_select(
     prompt_text: str,
     temperature: float,
@@ -513,6 +534,7 @@ def generate_and_select(
     prompt_text = _ensure_prompt_format(prompt_text)
     num_samples = max(1, int(num_samples))
     print(f"[generate] prompt: {prompt_text!r}, n={num_samples}, "
           f"temp={temperature}, max_tokens={max_tokens}")
@@ -523,25 +545,46 @@ def generate_and_select(
         padding=True,
     ).to(DEVICE)
-    yield "", f"Generating {num_samples} sample(s)…", None, None, ""
     t0 = time.time()
-    try:
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=int(max_tokens),
-                temperature=float(temperature),
-                do_sample=True,
-                top_k=50,
-                use_cache=True,
-            )
-    except Exception as exc:
-        print(f"[generate] ERROR: {exc}")
-        yield "", f"Generation failed: {exc}", None, None, ""
-        return
     gen_time = time.time() - t0
     # Decode all samples
     samples = []
     for i in range(outputs.shape[0]):
@@ -553,7 +596,7 @@ def generate_and_select(
         print(f"[generate] sample {i}: {len(dna)} bp, "
               f"{'complete' if has_eos else 'truncated'}")
-    yield "", (f"Generated {num_samples} sample(s) in {gen_time:.1f}s. "
                "Annotating…"), None, None, ""
     # If only 1 sample, skip scoring
@@ -571,6 +614,7 @@ def generate_and_select(
     for i, (dna, _) in enumerate(samples):
         if len(dna) < 100:
             continue
         try:
             hits = _plannotate_annotate(dna, is_detailed=True, linear=False)
             score = _score_annotation(hits, prompt=prompt_text)
@@ -585,6 +629,7 @@ def generate_and_select(
     tag = "complete" if has_eos else "max-length"
     html_map, table, ann_status = _annotate(dna)
     status = (f"Best of {num_samples}: sample {best_idx+1}, "
               f"{len(dna)} bp ({tag}, {gen_time:.1f}s). {ann_status}")
     yield dna, status, html_map, table, ""
@@ -712,7 +757,7 @@ with gr.Blocks(title="PlasmidSpace") as demo:
                     0.1, 1.0, value=0.3, step=0.05, label="Temperature",
                 )
                 num_samples = gr.Slider(
-                    1, 8, value=1, step=1, label="Samples (best-of-N)",
                 )
             max_tokens = gr.Slider(

 from plannotate.annotate import annotate as _plannotate_annotate
 from plannotate.bokeh_plot import get_bokeh as _plannotate_bokeh
 from plannotate import resources as _plannotate_rsc
+from transformers import AutoModelForCausalLM, AutoTokenizer, BaseStreamer
 # ---------------------------------------------------------------------------
 # Configuration
     return composite
+class _TokenCounter(BaseStreamer):
+    """Counts generation steps so the UI can show progress."""
+    def __init__(self):
+        self.step = 0
+        self.done = False
+    def put(self, value):
+        self.step += 1
+    def end(self):
+        self.done = True
+def _progress_bar(step: int, total: int, width: int = 20) -> str:
+    frac = min(step / max(total, 1), 1.0)
+    filled = int(width * frac)
+    bar = "█" * filled + "░" * (width - filled)
+    return f"[{bar}] {step}/{total} tokens ({frac:.0%})"
 def generate_and_select(
     prompt_text: str,
     temperature: float,
     prompt_text = _ensure_prompt_format(prompt_text)
     num_samples = max(1, int(num_samples))
+    max_tokens = int(max_tokens)
     print(f"[generate] prompt: {prompt_text!r}, n={num_samples}, "
           f"temp={temperature}, max_tokens={max_tokens}")
         padding=True,
     ).to(DEVICE)
+    # Run generation in background thread with token counter
+    counter = _TokenCounter()
+    result_holder: list = [None, None]  # [outputs, error]
+    def _run_generate():
+        try:
+            with torch.no_grad():
+                result_holder[0] = model.generate(
+                    **inputs,
+                    max_new_tokens=max_tokens,
+                    temperature=float(temperature),
+                    do_sample=True,
+                    top_k=50,
+                    use_cache=True,
+                    streamer=counter,
+                )
+        except Exception as exc:
+            result_holder[1] = exc
     t0 = time.time()
+    gen_thread = threading.Thread(target=_run_generate)
+    gen_thread.start()
+    # Poll progress and yield status updates
+    n_label = f"{num_samples} sample(s)" if num_samples > 1 else "1 sample"
+    while gen_thread.is_alive():
+        elapsed = time.time() - t0
+        bar = _progress_bar(counter.step, max_tokens)
+        yield "", f"Generating {n_label}… {bar}  ({elapsed:.1f}s)", None, None, ""
+        gen_thread.join(timeout=0.4)
     gen_time = time.time() - t0
+    if result_holder[1] is not None:
+        print(f"[generate] ERROR: {result_holder[1]}")
+        yield "", f"Generation failed: {result_holder[1]}", None, None, ""
+        return
+    outputs = result_holder[0]
     # Decode all samples
     samples = []
     for i in range(outputs.shape[0]):
         print(f"[generate] sample {i}: {len(dna)} bp, "
               f"{'complete' if has_eos else 'truncated'}")
+    yield "", (f"Generated {n_label} ({counter.step} tokens) in {gen_time:.1f}s. "
                "Annotating…"), None, None, ""
     # If only 1 sample, skip scoring
     for i, (dna, _) in enumerate(samples):
         if len(dna) < 100:
             continue
+        yield "", (f"Scoring sample {i+1}/{num_samples}…"), None, None, ""
         try:
             hits = _plannotate_annotate(dna, is_detailed=True, linear=False)
             score = _score_annotation(hits, prompt=prompt_text)
     tag = "complete" if has_eos else "max-length"
     html_map, table, ann_status = _annotate(dna)
     status = (f"Best of {num_samples}: sample {best_idx+1}, "
+              f"score={best_score:.2f}, "
               f"{len(dna)} bp ({tag}, {gen_time:.1f}s). {ann_status}")
     yield dna, status, html_map, table, ""
                     0.1, 1.0, value=0.3, step=0.05, label="Temperature",
                 )
                 num_samples = gr.Slider(
+                    1, 8, value=1, step=1, label="Top N",
                 )
             max_tokens = gr.Slider(