Luigi commited on
Commit
9b2e057
·
1 Parent(s): eeea4ac

fix: preserve Model Information layout during generation

Browse files

Keep the nicely formatted markdown tables from get_model_info() and append
Generation Stats section separately instead of overwriting with plain text.

This maintains visual consistency before and after clicking Generate Summary:
- Model Specs (with markdown table)
- Hardware Configuration (with markdown table)
- Inference Settings (with markdown table)
- Generation Stats (new section with markdown table)
- Warning messages (if any)

Files changed (1) hide show
  1. app.py +17 -7
app.py CHANGED
@@ -811,19 +811,29 @@ def summarize_streaming(
811
  "original_token_estimate": estimate_tokens(transcript) if not was_truncated else estimate_tokens(encoded[:max_bytes].decode('utf-8', errors='ignore')),
812
  }
813
 
814
- # Build info text
 
 
 
815
  input_tokens = estimate_tokens(transcript)
816
  max_output_text = f"{max_tokens:,} tokens"
817
  if max_tokens != original_max_tokens:
818
  max_output_text += f" (adjusted from {original_max_tokens:,} for thinking mode)"
819
- info = (
820
- f"**Model:** {model['name']}\n\n"
821
- f"**Context:** {n_ctx:,} tokens | "
822
- f"**Input:** ~{input_tokens:,} tokens | "
823
- f"**Max output:** {max_output_text}"
 
 
 
824
  )
 
 
 
 
825
  if warning:
826
- info += f"\n\n{warning}"
827
 
828
  # Load model (no-op if already loaded) with timing
829
  model_load_start = time.time()
 
811
  "original_token_estimate": estimate_tokens(transcript) if not was_truncated else estimate_tokens(encoded[:max_bytes].decode('utf-8', errors='ignore')),
812
  }
813
 
814
+ # Get base model info with current thread configuration
815
+ info_text, _, _, _ = get_model_info(model_key, n_threads=n_threads)
816
+
817
+ # Build generation stats section
818
  input_tokens = estimate_tokens(transcript)
819
  max_output_text = f"{max_tokens:,} tokens"
820
  if max_tokens != original_max_tokens:
821
  max_output_text += f" (adjusted from {original_max_tokens:,} for thinking mode)"
822
+
823
+ generation_stats = (
824
+ f"\n\n### 📈 Generation Stats\n"
825
+ f"| Property | Value |\n"
826
+ f"|----------|-------|\n"
827
+ f"| **Context Window** | {n_ctx:,} tokens |\n"
828
+ f"| **Input Tokens** | ~{input_tokens:,} tokens |\n"
829
+ f"| **Max Output** | {max_output_text} |"
830
  )
831
+
832
+ # Combine model info with generation stats
833
+ info = info_text + generation_stats
834
+
835
  if warning:
836
+ info += f"\n\n⚠️ {warning}"
837
 
838
  # Load model (no-op if already loaded) with timing
839
  model_load_start = time.time()