Luigi commited on
Commit
1ea1cce
Β·
1 Parent(s): 88fac5c

refactor: reorganize UI with single mode selector and stage-based Advanced Mode layout

Browse files

- Remove redundant tab system, use only radio button for mode selection
- Implement visibility-based switching between Standard/Advanced modes
- Centralize all inference parameters into respective mode panels
- Reorganize Advanced Mode into 3 dedicated sections (Extraction, Deduplication, Synthesis)
- Move hardware configuration into both Standard and Advanced mode panels
- Remove global Advanced Settings accordion
- Add mode-specific inference parameters (temperature, top_p, top_k) for Advanced Mode
- Group model selection with stage-specific parameters
- Add collapsible model details accordions per stage
- Improve UI clarity with logical grouping and pipeline flow

Files changed (1) hide show
  1. app.py +247 -166
app.py CHANGED
@@ -1435,7 +1435,10 @@ def summarize_advanced(
1435
  output_language: str,
1436
  max_tokens: int,
1437
  enable_logging: bool,
1438
- n_threads: int = 2
 
 
 
1439
  ) -> Generator[Dict[str, Any], None, None]:
1440
  """
1441
  Advanced 3-stage pipeline: Extraction β†’ Deduplication β†’ Synthesis.
@@ -1595,6 +1598,13 @@ def summarize_advanced(
1595
 
1596
  # Synthesize
1597
  synthesis_config = get_model_config(synthesis_model_key, "synthesis")
 
 
 
 
 
 
 
1598
  final_summary = ""
1599
  final_thinking = ""
1600
 
@@ -2529,8 +2539,7 @@ def create_interface():
2529
  """Create and configure the Gradio interface."""
2530
 
2531
  with gr.Blocks(
2532
- title="Tiny Scribe - AI Transcript Summarizer",
2533
- css=custom_css
2534
  ) as demo:
2535
 
2536
  # Header section (simplified - no Row/Column wrapper needed for full-width)
@@ -2598,13 +2607,18 @@ def create_interface():
2598
  # ==========================================
2599
  # Section 2: Mode Selection (Standard vs Advanced)
2600
  # ==========================================
2601
- with gr.Tabs() as mode_tabs:
 
 
 
 
 
 
 
 
 
2602
 
2603
- # ===== STANDARD MODE =====
2604
- with gr.TabItem("πŸ“Š Standard Mode"):
2605
- gr.HTML('<div style="font-size: 0.9em; color: #64748b; margin-bottom: 10px;">Single-model direct summarization</div>')
2606
-
2607
- with gr.Tabs() as model_tabs:
2608
 
2609
  # --- Tab 1: Preset Models ---
2610
  with gr.TabItem("πŸ€– Preset Models"):
@@ -2678,144 +2692,19 @@ def create_interface():
2678
  elem_classes=["stats-grid"]
2679
  )
2680
 
2681
- # ===== ADVANCED MODE =====
2682
- with gr.TabItem("🧠 Advanced Mode (3-Model Pipeline)"):
2683
- gr.HTML('<div style="font-size: 0.9em; color: #64748b; margin-bottom: 10px;">Extraction β†’ Deduplication β†’ Synthesis</div>')
2684
-
2685
- # Model Selection Row
2686
- gr.HTML('<div class="section-header"><span class="section-icon">πŸ€–</span> Model Selection</div>')
2687
- with gr.Row():
2688
- extraction_model = gr.Dropdown(
2689
- choices=[(EXTRACTION_MODELS[k]["name"], k) for k in EXTRACTION_MODELS.keys()],
2690
- value=DEFAULT_EXTRACTION_MODEL,
2691
- label="πŸ” Stage 1: Extraction Model (≀1.7B)",
2692
- info="Extracts structured items from windows"
2693
- )
2694
-
2695
- embedding_model = gr.Dropdown(
2696
- choices=[("granite-107m", "granite-107m"), ("granite-278m", "granite-278m"),
2697
- ("gemma-300m", "gemma-300m"), ("qwen-600m", "qwen-600m")],
2698
- value="granite-107m",
2699
- label="🧬 Stage 2: Embedding Model",
2700
- info="Deduplication via semantic similarity"
2701
- )
2702
-
2703
- synthesis_model = gr.Dropdown(
2704
- choices=[(SYNTHESIS_MODELS[k]["name"], k) for k in SYNTHESIS_MODELS.keys()],
2705
- value=DEFAULT_SYNTHESIS_MODEL,
2706
- label="✨ Stage 3: Synthesis Model (1B-30B)",
2707
- info="Generates executive summary"
2708
- )
2709
-
2710
- # Extraction Parameters Row
2711
- gr.HTML('<div class="section-header" style="margin-top: 12px;"><span class="section-icon">βš™οΈ</span> Extraction Parameters</div>')
2712
- with gr.Row():
2713
- extraction_n_ctx = gr.Slider(
2714
- minimum=2048,
2715
- maximum=8192,
2716
- step=1024,
2717
- value=4096,
2718
- label="πŸͺŸ Extraction Context Window (n_ctx)",
2719
- info="Smaller = more windows, Larger = fewer windows"
2720
- )
2721
-
2722
- overlap_turns = gr.Slider(
2723
- minimum=1,
2724
- maximum=5,
2725
- step=1,
2726
- value=2,
2727
- label="πŸ”„ Window Overlap (turns)",
2728
- info="Speaker turns shared between windows"
2729
- )
2730
-
2731
- # Deduplication Parameters Row
2732
- with gr.Row():
2733
- similarity_threshold = gr.Slider(
2734
- minimum=0.70,
2735
- maximum=0.95,
2736
- step=0.01,
2737
- value=0.85,
2738
- label="🎯 Deduplication Similarity Threshold",
2739
- info="Higher = stricter duplicate detection"
2740
- )
2741
-
2742
- # Reasoning Controls (Separate checkboxes)
2743
- gr.HTML('<div class="section-header" style="margin-top: 12px;"><span class="section-icon">🧠</span> Reasoning Configuration</div>')
2744
- with gr.Row():
2745
- enable_extraction_reasoning = gr.Checkbox(
2746
- value=False,
2747
- visible=False,
2748
- label="🧠 Enable Reasoning for Extraction",
2749
- info="Thinking before JSON (Qwen3 hybrid models only)"
2750
- )
2751
-
2752
- enable_synthesis_reasoning = gr.Checkbox(
2753
- value=True,
2754
- visible=True,
2755
- label="🧠 Enable Reasoning for Synthesis",
2756
- info="Thinking for final summary generation"
2757
- )
2758
-
2759
- # Output Settings Row
2760
- gr.HTML('<div class="section-header" style="margin-top: 12px;"><span class="section-icon">🌐</span> Output Settings</div>')
2761
- with gr.Row():
2762
- adv_output_language = gr.Radio(
2763
- choices=["en", "zh-TW"],
2764
- value="en",
2765
- label="Output Language",
2766
- info="Extraction auto-detects, synthesis uses this"
2767
- )
2768
-
2769
- adv_max_tokens = gr.Slider(
2770
- minimum=512,
2771
- maximum=4096,
2772
- step=128,
2773
- value=2048,
2774
- label="πŸ“ Max Synthesis Tokens",
2775
- info="Maximum tokens for final summary"
2776
- )
2777
-
2778
- # Logging Control
2779
- enable_detailed_logging = gr.Checkbox(
2780
- value=True,
2781
- label="πŸ“ Enable Detailed Trace Logging",
2782
- info="Save JSONL trace (embedded in download JSON)"
2783
- )
2784
-
2785
- # Model Info Accordion
2786
- with gr.Accordion("πŸ“‹ Model Details & Settings", open=False):
2787
- with gr.Row():
2788
- with gr.Column():
2789
- extraction_model_info = gr.Markdown(
2790
- value=get_extraction_model_info(DEFAULT_EXTRACTION_MODEL)
2791
- )
2792
- with gr.Column():
2793
- embedding_model_info = gr.Markdown(
2794
- value=get_embedding_model_info("granite-107m")
2795
- )
2796
- with gr.Column():
2797
- synthesis_model_info = gr.Markdown(
2798
- value=get_synthesis_model_info(DEFAULT_SYNTHESIS_MODEL)
2799
- )
2800
-
2801
- # ==========================================
2802
- # Section 3: Advanced Settings
2803
- # ==========================================
2804
- with gr.Accordion("βš™οΈ Advanced Settings", open=False):
2805
-
2806
- # Hardware Configuration
2807
- gr.HTML('<div class="section-header"><span class="section-icon">πŸ–₯️</span> Hardware Configuration</div>')
2808
 
2809
  thread_config_dropdown = gr.Dropdown(
2810
- choices=[
2811
- ("HF Spaces Free Tier (2 vCPUs)", "free"),
2812
- ("HF Spaces CPU Upgrade (8 vCPUs)", "upgrade"),
2813
- ("Custom (manual)", "custom"),
2814
- ],
2815
- value=DEFAULT_THREAD_PRESET,
2816
- label="CPU Thread Preset",
2817
- info="Select hardware tier or specify custom thread count"
2818
- )
2819
 
2820
  custom_threads_slider = gr.Slider(
2821
  minimum=1,
@@ -2827,7 +2716,7 @@ def create_interface():
2827
  visible=DEFAULT_THREAD_PRESET == "custom"
2828
  )
2829
 
2830
- # Inference Parameters
2831
  gr.HTML('<div class="section-header" style="margin-top: 16px;"><span class="section-icon">πŸŽ›οΈ</span> Inference Parameters</div>')
2832
 
2833
  temperature_slider = gr.Slider(
@@ -2862,17 +2751,185 @@ def create_interface():
2862
  label="Top K",
2863
  info="Limits token selection to top K tokens (0 = disabled)"
2864
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2865
 
2866
- # Debug Tools (nested accordion)
2867
- with gr.Accordion("πŸ› Debug Tools", open=False):
2868
- system_prompt_debug = gr.Textbox(
2869
- label="System Prompt (Read-Only)",
2870
- lines=5,
2871
- max_lines=10,
2872
- interactive=False,
2873
- value="Select a model and click 'Generate Summary' to see the system prompt.",
2874
- info="This shows the exact system prompt sent to the LLM"
2875
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2876
 
2877
  # ==========================================
2878
  # Submit Button
@@ -2969,6 +3026,23 @@ def create_interface():
2969
  inputs=[thread_config_dropdown],
2970
  outputs=[custom_threads_slider]
2971
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2972
 
2973
  # Copy buttons
2974
  copy_summary_btn.click(
@@ -3279,20 +3353,21 @@ def create_interface():
3279
  extraction_n_ctx_val, overlap_turns_val, similarity_threshold_val,
3280
  enable_extraction_reasoning_val, enable_synthesis_reasoning_val,
3281
  adv_output_language_val, adv_max_tokens_val, enable_logging_val,
 
 
3282
  # Mode selector
3283
- mode_tabs_val
3284
  ):
3285
- """Route to Standard or Advanced mode based on selected tab."""
3286
 
3287
- # Determine active mode (Gradio returns index of active tab)
3288
- # 0 = Standard Mode, 1 = Advanced Mode
3289
- is_advanced_mode = (mode_tabs_val == 1)
3290
 
3291
  if is_advanced_mode:
3292
  # Advanced Mode: Use summarize_advanced()
3293
- # Get n_threads
3294
- thread_map = {"free": 2, "upgrade": 8, "custom": max(1, custom_threads_val)}
3295
- n_threads = thread_map.get(thread_config_val, 2)
3296
 
3297
  # Get transcript
3298
  transcript = ""
@@ -3319,7 +3394,10 @@ def create_interface():
3319
  output_language=adv_output_language_val,
3320
  max_tokens=adv_max_tokens_val,
3321
  enable_logging=enable_logging_val,
3322
- n_threads=n_threads
 
 
 
3323
  ):
3324
  stage = update.get("stage", "")
3325
 
@@ -3386,8 +3464,10 @@ def create_interface():
3386
  extraction_n_ctx, overlap_turns, similarity_threshold,
3387
  enable_extraction_reasoning, enable_synthesis_reasoning,
3388
  adv_output_language, adv_max_tokens, enable_detailed_logging,
 
 
3389
  # Mode selector
3390
- mode_tabs
3391
  ],
3392
  outputs=[thinking_output, summary_output, info_output, metrics_state, system_prompt_debug],
3393
  show_progress="full"
@@ -3416,5 +3496,6 @@ if __name__ == "__main__":
3416
  server_name="0.0.0.0",
3417
  server_port=7860,
3418
  share=False,
3419
- show_error=True
 
3420
  )
 
1435
  output_language: str,
1436
  max_tokens: int,
1437
  enable_logging: bool,
1438
+ n_threads: int = 2,
1439
+ temperature: float = 0.6,
1440
+ top_p: float = 0.95,
1441
+ top_k: int = 20
1442
  ) -> Generator[Dict[str, Any], None, None]:
1443
  """
1444
  Advanced 3-stage pipeline: Extraction β†’ Deduplication β†’ Synthesis.
 
1598
 
1599
  # Synthesize
1600
  synthesis_config = get_model_config(synthesis_model_key, "synthesis")
1601
+ # Override inference settings with custom parameters
1602
+ synthesis_config["inference_settings"] = {
1603
+ "temperature": temperature,
1604
+ "top_p": top_p,
1605
+ "top_k": top_k,
1606
+ "repeat_penalty": 1.1
1607
+ }
1608
  final_summary = ""
1609
  final_thinking = ""
1610
 
 
2539
  """Create and configure the Gradio interface."""
2540
 
2541
  with gr.Blocks(
2542
+ title="Tiny Scribe - AI Transcript Summarizer"
 
2543
  ) as demo:
2544
 
2545
  # Header section (simplified - no Row/Column wrapper needed for full-width)
 
2607
  # ==========================================
2608
  # Section 2: Mode Selection (Standard vs Advanced)
2609
  # ==========================================
2610
+ mode_radio = gr.Radio(
2611
+ choices=["Standard Mode", "Advanced Mode (3-Model Pipeline)"],
2612
+ value="Standard Mode",
2613
+ label="🎯 Summarization Mode",
2614
+ info="Select between single-model Standard or multi-model Advanced mode"
2615
+ )
2616
+
2617
+ # ===== STANDARD MODE =====
2618
+ with gr.Group(visible=True) as standard_mode_group:
2619
+ gr.HTML('<div style="font-size: 0.9em; color: #64748b; margin-bottom: 10px;">πŸ“Š <strong>Standard Mode</strong> - Single-model direct summarization</div>')
2620
 
2621
+ with gr.Tabs() as model_tabs:
 
 
 
 
2622
 
2623
  # --- Tab 1: Preset Models ---
2624
  with gr.TabItem("πŸ€– Preset Models"):
 
2692
  elem_classes=["stats-grid"]
2693
  )
2694
 
2695
+ # Hardware Configuration (Standard Mode)
2696
+ gr.HTML('<div class="section-header" style="margin-top: 16px;"><span class="section-icon">πŸ–₯️</span> Hardware Configuration</div>')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2697
 
2698
  thread_config_dropdown = gr.Dropdown(
2699
+ choices=[
2700
+ ("HF Spaces Free Tier (2 vCPUs)", "free"),
2701
+ ("HF Spaces CPU Upgrade (8 vCPUs)", "upgrade"),
2702
+ ("Custom (manual)", "custom"),
2703
+ ],
2704
+ value=DEFAULT_THREAD_PRESET,
2705
+ label="CPU Thread Preset",
2706
+ info="Select hardware tier or specify custom thread count"
2707
+ )
2708
 
2709
  custom_threads_slider = gr.Slider(
2710
  minimum=1,
 
2716
  visible=DEFAULT_THREAD_PRESET == "custom"
2717
  )
2718
 
2719
+ # Inference Parameters (Standard Mode)
2720
  gr.HTML('<div class="section-header" style="margin-top: 16px;"><span class="section-icon">πŸŽ›οΈ</span> Inference Parameters</div>')
2721
 
2722
  temperature_slider = gr.Slider(
 
2751
  label="Top K",
2752
  info="Limits token selection to top K tokens (0 = disabled)"
2753
  )
2754
+
2755
+ # ===== ADVANCED MODE =====
2756
+ with gr.Group(visible=False) as advanced_mode_group:
2757
+ gr.HTML('<div style="font-size: 0.9em; color: #64748b; margin-bottom: 16px;">🧠 <strong>Advanced Mode (3-Model Pipeline)</strong> - Extraction β†’ Deduplication β†’ Synthesis</div>')
2758
+
2759
+ # ========== STAGE 1: EXTRACTION ==========
2760
+ gr.HTML('<div class="section-header"><span class="section-icon">πŸ”</span> Stage 1: Extraction</div>')
2761
+
2762
+ extraction_model = gr.Dropdown(
2763
+ choices=[(EXTRACTION_MODELS[k]["name"], k) for k in EXTRACTION_MODELS.keys()],
2764
+ value=DEFAULT_EXTRACTION_MODEL,
2765
+ label="Extraction Model (≀1.7B)",
2766
+ info="Extracts structured items from transcript windows"
2767
+ )
2768
+
2769
+ with gr.Row():
2770
+ extraction_n_ctx = gr.Slider(
2771
+ minimum=2048,
2772
+ maximum=8192,
2773
+ step=1024,
2774
+ value=4096,
2775
+ label="Context Window (n_ctx)",
2776
+ info="Smaller = more windows, Larger = fewer windows"
2777
+ )
2778
+
2779
+ overlap_turns = gr.Slider(
2780
+ minimum=1,
2781
+ maximum=5,
2782
+ step=1,
2783
+ value=2,
2784
+ label="Window Overlap (turns)",
2785
+ info="Speaker turns shared between consecutive windows"
2786
+ )
2787
+
2788
+ enable_extraction_reasoning = gr.Checkbox(
2789
+ value=False,
2790
+ visible=False,
2791
+ label="Enable Reasoning Mode",
2792
+ info="Thinking before JSON extraction (Qwen3 hybrid models only)"
2793
+ )
2794
+
2795
+ with gr.Accordion("πŸ“Š Extraction Model Details", open=False):
2796
+ extraction_model_info = gr.Markdown(
2797
+ value=get_extraction_model_info(DEFAULT_EXTRACTION_MODEL)
2798
+ )
2799
+
2800
+ # ========== STAGE 2: DEDUPLICATION ==========
2801
+ gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">🧬</span> Stage 2: Deduplication</div>')
2802
+
2803
+ embedding_model = gr.Dropdown(
2804
+ choices=[("granite-107m", "granite-107m"), ("granite-278m", "granite-278m"),
2805
+ ("gemma-300m", "gemma-300m"), ("qwen-600m", "qwen-600m")],
2806
+ value="granite-107m",
2807
+ label="Embedding Model",
2808
+ info="Computes semantic similarity for duplicate detection"
2809
+ )
2810
+
2811
+ similarity_threshold = gr.Slider(
2812
+ minimum=0.70,
2813
+ maximum=0.95,
2814
+ step=0.01,
2815
+ value=0.85,
2816
+ label="Similarity Threshold",
2817
+ info="Higher = stricter duplicate detection (items with similarity above this are merged)"
2818
+ )
2819
+
2820
+ with gr.Accordion("πŸ“Š Embedding Model Details", open=False):
2821
+ embedding_model_info = gr.Markdown(
2822
+ value=get_embedding_model_info("granite-107m")
2823
+ )
2824
+
2825
+ # ========== STAGE 3: SYNTHESIS ==========
2826
+ gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">✨</span> Stage 3: Synthesis</div>')
2827
+
2828
+ synthesis_model = gr.Dropdown(
2829
+ choices=[(SYNTHESIS_MODELS[k]["name"], k) for k in SYNTHESIS_MODELS.keys()],
2830
+ value=DEFAULT_SYNTHESIS_MODEL,
2831
+ label="Synthesis Model (1B-30B)",
2832
+ info="Generates executive summary from deduplicated items"
2833
+ )
2834
+
2835
+ enable_synthesis_reasoning = gr.Checkbox(
2836
+ value=True,
2837
+ visible=True,
2838
+ label="Enable Reasoning Mode",
2839
+ info="Uses thinking process for higher quality synthesis"
2840
+ )
2841
+
2842
+ with gr.Row():
2843
+ adv_output_language = gr.Radio(
2844
+ choices=["en", "zh-TW"],
2845
+ value="en",
2846
+ label="Output Language",
2847
+ info="Language for final summary"
2848
+ )
2849
+
2850
+ adv_max_tokens = gr.Slider(
2851
+ minimum=512,
2852
+ maximum=4096,
2853
+ step=128,
2854
+ value=2048,
2855
+ label="Max Output Tokens",
2856
+ info="Maximum tokens for synthesis output"
2857
+ )
2858
+
2859
+ gr.HTML('<div style="font-size: 0.85em; color: #94a3b8; margin-top: 8px; margin-bottom: 8px;">Inference Parameters</div>')
2860
+ with gr.Row():
2861
+ adv_temperature_slider = gr.Slider(
2862
+ minimum=0.0,
2863
+ maximum=2.0,
2864
+ value=0.6,
2865
+ step=0.1,
2866
+ label="Temperature",
2867
+ info="Lower = focused, Higher = creative"
2868
+ )
2869
+ adv_top_p = gr.Slider(
2870
+ minimum=0.0,
2871
+ maximum=1.0,
2872
+ value=0.95,
2873
+ step=0.05,
2874
+ label="Top P",
2875
+ info="Nucleus sampling threshold"
2876
+ )
2877
+ adv_top_k = gr.Slider(
2878
+ minimum=0,
2879
+ maximum=100,
2880
+ value=20,
2881
+ step=5,
2882
+ label="Top K",
2883
+ info="Token selection limit"
2884
+ )
2885
 
2886
+ with gr.Accordion("πŸ“Š Synthesis Model Details", open=False):
2887
+ synthesis_model_info = gr.Markdown(
2888
+ value=get_synthesis_model_info(DEFAULT_SYNTHESIS_MODEL)
 
 
 
 
 
 
2889
  )
2890
+
2891
+ # ========== GLOBAL SETTINGS ==========
2892
+ gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">βš™οΈ</span> Global Settings</div>')
2893
+
2894
+ adv_thread_config_dropdown = gr.Dropdown(
2895
+ choices=[
2896
+ ("HF Spaces Free Tier (2 vCPUs)", "free"),
2897
+ ("HF Spaces CPU Upgrade (8 vCPUs)", "upgrade"),
2898
+ ("Custom (manual)", "custom"),
2899
+ ],
2900
+ value=DEFAULT_THREAD_PRESET,
2901
+ label="CPU Thread Preset",
2902
+ info="Hardware configuration for all pipeline stages"
2903
+ )
2904
+
2905
+ adv_custom_threads_slider = gr.Slider(
2906
+ minimum=1,
2907
+ maximum=32,
2908
+ value=DEFAULT_CUSTOM_THREADS if DEFAULT_CUSTOM_THREADS > 0 else 4,
2909
+ step=1,
2910
+ label="Custom Thread Count",
2911
+ info="Number of CPU threads for model inference (1-32)",
2912
+ visible=DEFAULT_THREAD_PRESET == "custom"
2913
+ )
2914
+
2915
+ enable_detailed_logging = gr.Checkbox(
2916
+ value=True,
2917
+ label="Enable Detailed Trace Logging",
2918
+ info="Save JSONL trace for debugging (embedded in download JSON)"
2919
+ )
2920
+
2921
+ # ==========================================
2922
+ # Debug Tools (optional)
2923
+ # ==========================================
2924
+ with gr.Accordion("πŸ› Debug Tools", open=False):
2925
+ system_prompt_debug = gr.Textbox(
2926
+ label="System Prompt (Read-Only)",
2927
+ lines=5,
2928
+ max_lines=10,
2929
+ interactive=False,
2930
+ value="Select a model and click 'Generate Summary' to see the system prompt.",
2931
+ info="This shows the exact system prompt sent to the LLM"
2932
+ )
2933
 
2934
  # ==========================================
2935
  # Submit Button
 
3026
  inputs=[thread_config_dropdown],
3027
  outputs=[custom_threads_slider]
3028
  )
3029
+
3030
+ adv_thread_config_dropdown.change(
3031
+ fn=toggle_custom_threads,
3032
+ inputs=[adv_thread_config_dropdown],
3033
+ outputs=[adv_custom_threads_slider]
3034
+ )
3035
+
3036
+ # Toggle mode visibility based on radio selection
3037
+ def toggle_mode_visibility(mode_selection):
3038
+ is_standard = (mode_selection == "Standard Mode")
3039
+ return gr.update(visible=is_standard), gr.update(visible=not is_standard)
3040
+
3041
+ mode_radio.change(
3042
+ fn=toggle_mode_visibility,
3043
+ inputs=[mode_radio],
3044
+ outputs=[standard_mode_group, advanced_mode_group]
3045
+ )
3046
 
3047
  # Copy buttons
3048
  copy_summary_btn.click(
 
3353
  extraction_n_ctx_val, overlap_turns_val, similarity_threshold_val,
3354
  enable_extraction_reasoning_val, enable_synthesis_reasoning_val,
3355
  adv_output_language_val, adv_max_tokens_val, enable_logging_val,
3356
+ adv_temperature_val, adv_top_p_val, adv_top_k_val,
3357
+ adv_thread_config_val, adv_custom_threads_val,
3358
  # Mode selector
3359
+ mode_radio_val
3360
  ):
3361
+ """Route to Standard or Advanced mode based on selected mode radio button."""
3362
 
3363
+ # Determine active mode based on radio button value
3364
+ is_advanced_mode = (mode_radio_val == "Advanced Mode (3-Model Pipeline)")
 
3365
 
3366
  if is_advanced_mode:
3367
  # Advanced Mode: Use summarize_advanced()
3368
+ # Get n_threads from Advanced Mode settings
3369
+ thread_map = {"free": 2, "upgrade": 8, "custom": max(1, adv_custom_threads_val)}
3370
+ n_threads = thread_map.get(adv_thread_config_val, 2)
3371
 
3372
  # Get transcript
3373
  transcript = ""
 
3394
  output_language=adv_output_language_val,
3395
  max_tokens=adv_max_tokens_val,
3396
  enable_logging=enable_logging_val,
3397
+ n_threads=n_threads,
3398
+ temperature=adv_temperature_val,
3399
+ top_p=adv_top_p_val,
3400
+ top_k=adv_top_k_val
3401
  ):
3402
  stage = update.get("stage", "")
3403
 
 
3464
  extraction_n_ctx, overlap_turns, similarity_threshold,
3465
  enable_extraction_reasoning, enable_synthesis_reasoning,
3466
  adv_output_language, adv_max_tokens, enable_detailed_logging,
3467
+ adv_temperature_slider, adv_top_p, adv_top_k,
3468
+ adv_thread_config_dropdown, adv_custom_threads_slider,
3469
  # Mode selector
3470
+ mode_radio
3471
  ],
3472
  outputs=[thinking_output, summary_output, info_output, metrics_state, system_prompt_debug],
3473
  show_progress="full"
 
3496
  server_name="0.0.0.0",
3497
  server_port=7860,
3498
  share=False,
3499
+ show_error=True,
3500
+ css=custom_css
3501
  )