Spaces:

Luigi
/

tiny-scribe

Running

Luigi commited on Feb 2

Commit

9b2e057

1 Parent(s): eeea4ac

fix: preserve Model Information layout during generation

Keep the nicely formatted markdown tables from get_model_info() and append
Generation Stats section separately instead of overwriting with plain text.

This maintains visual consistency before and after clicking Generate Summary:
- Model Specs (with markdown table)
- Hardware Configuration (with markdown table)
- Inference Settings (with markdown table)
- Generation Stats (new section with markdown table)
- Warning messages (if any)

Files changed (1) hide show

app.py +17 -7

app.py CHANGED Viewed

@@ -811,19 +811,29 @@ def summarize_streaming(
         "original_token_estimate": estimate_tokens(transcript) if not was_truncated else estimate_tokens(encoded[:max_bytes].decode('utf-8', errors='ignore')),
     }
-    # Build info text
     input_tokens = estimate_tokens(transcript)
     max_output_text = f"{max_tokens:,} tokens"
     if max_tokens != original_max_tokens:
         max_output_text += f" (adjusted from {original_max_tokens:,} for thinking mode)"
-    info = (
-        f"**Model:** {model['name']}\n\n"
-        f"**Context:** {n_ctx:,} tokens | "
-        f"**Input:** ~{input_tokens:,} tokens | "
-        f"**Max output:** {max_output_text}"
     )
     if warning:
-        info += f"\n\n{warning}"
     # Load model (no-op if already loaded) with timing
     model_load_start = time.time()

         "original_token_estimate": estimate_tokens(transcript) if not was_truncated else estimate_tokens(encoded[:max_bytes].decode('utf-8', errors='ignore')),
     }
+    # Get base model info with current thread configuration
+    info_text, _, _, _ = get_model_info(model_key, n_threads=n_threads)
+    # Build generation stats section
     input_tokens = estimate_tokens(transcript)
     max_output_text = f"{max_tokens:,} tokens"
     if max_tokens != original_max_tokens:
         max_output_text += f" (adjusted from {original_max_tokens:,} for thinking mode)"
+    generation_stats = (
+        f"\n\n### 📈 Generation Stats\n"
+        f"| Property | Value |\n"
+        f"|----------|-------|\n"
+        f"| **Context Window** | {n_ctx:,} tokens |\n"
+        f"| **Input Tokens** | ~{input_tokens:,} tokens |\n"
+        f"| **Max Output** | {max_output_text} |"
     )
+    # Combine model info with generation stats
+    info = info_text + generation_stats
     if warning:
+        info += f"\n\n⚠️ {warning}"
     # Load model (no-op if already loaded) with timing
     model_load_start = time.time()