Spaces:
Running
Running
fix: preserve Model Information layout during generation
Browse filesKeep the nicely formatted markdown tables from get_model_info() and append
Generation Stats section separately instead of overwriting with plain text.
This maintains visual consistency before and after clicking Generate Summary:
- Model Specs (with markdown table)
- Hardware Configuration (with markdown table)
- Inference Settings (with markdown table)
- Generation Stats (new section with markdown table)
- Warning messages (if any)
app.py
CHANGED
|
@@ -811,19 +811,29 @@ def summarize_streaming(
|
|
| 811 |
"original_token_estimate": estimate_tokens(transcript) if not was_truncated else estimate_tokens(encoded[:max_bytes].decode('utf-8', errors='ignore')),
|
| 812 |
}
|
| 813 |
|
| 814 |
-
#
|
|
|
|
|
|
|
|
|
|
| 815 |
input_tokens = estimate_tokens(transcript)
|
| 816 |
max_output_text = f"{max_tokens:,} tokens"
|
| 817 |
if max_tokens != original_max_tokens:
|
| 818 |
max_output_text += f" (adjusted from {original_max_tokens:,} for thinking mode)"
|
| 819 |
-
|
| 820 |
-
|
| 821 |
-
f"
|
| 822 |
-
f"
|
| 823 |
-
f"
|
|
|
|
|
|
|
|
|
|
| 824 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 825 |
if warning:
|
| 826 |
-
info += f"\n\n{warning}"
|
| 827 |
|
| 828 |
# Load model (no-op if already loaded) with timing
|
| 829 |
model_load_start = time.time()
|
|
|
|
| 811 |
"original_token_estimate": estimate_tokens(transcript) if not was_truncated else estimate_tokens(encoded[:max_bytes].decode('utf-8', errors='ignore')),
|
| 812 |
}
|
| 813 |
|
| 814 |
+
# Get base model info with current thread configuration
|
| 815 |
+
info_text, _, _, _ = get_model_info(model_key, n_threads=n_threads)
|
| 816 |
+
|
| 817 |
+
# Build generation stats section
|
| 818 |
input_tokens = estimate_tokens(transcript)
|
| 819 |
max_output_text = f"{max_tokens:,} tokens"
|
| 820 |
if max_tokens != original_max_tokens:
|
| 821 |
max_output_text += f" (adjusted from {original_max_tokens:,} for thinking mode)"
|
| 822 |
+
|
| 823 |
+
generation_stats = (
|
| 824 |
+
f"\n\n### 📈 Generation Stats\n"
|
| 825 |
+
f"| Property | Value |\n"
|
| 826 |
+
f"|----------|-------|\n"
|
| 827 |
+
f"| **Context Window** | {n_ctx:,} tokens |\n"
|
| 828 |
+
f"| **Input Tokens** | ~{input_tokens:,} tokens |\n"
|
| 829 |
+
f"| **Max Output** | {max_output_text} |"
|
| 830 |
)
|
| 831 |
+
|
| 832 |
+
# Combine model info with generation stats
|
| 833 |
+
info = info_text + generation_stats
|
| 834 |
+
|
| 835 |
if warning:
|
| 836 |
+
info += f"\n\n⚠️ {warning}"
|
| 837 |
|
| 838 |
# Load model (no-op if already loaded) with timing
|
| 839 |
model_load_start = time.time()
|