Spaces:

Luigi
/

tiny-scribe

Running

Luigi commited on Feb 5

Commit

1ea1cce

1 Parent(s): 88fac5c

refactor: reorganize UI with single mode selector and stage-based Advanced Mode layout

- Remove redundant tab system, use only radio button for mode selection
- Implement visibility-based switching between Standard/Advanced modes
- Centralize all inference parameters into respective mode panels
- Reorganize Advanced Mode into 3 dedicated sections (Extraction, Deduplication, Synthesis)
- Move hardware configuration into both Standard and Advanced mode panels
- Remove global Advanced Settings accordion
- Add mode-specific inference parameters (temperature, top_p, top_k) for Advanced Mode
- Group model selection with stage-specific parameters
- Add collapsible model details accordions per stage
- Improve UI clarity with logical grouping and pipeline flow

Files changed (1) hide show

app.py +247 -166

app.py CHANGED Viewed

@@ -1435,7 +1435,10 @@ def summarize_advanced(
     output_language: str,
     max_tokens: int,
     enable_logging: bool,
-    n_threads: int = 2
 ) -> Generator[Dict[str, Any], None, None]:
     """
     Advanced 3-stage pipeline: Extraction → Deduplication → Synthesis.
@@ -1595,6 +1598,13 @@ def summarize_advanced(
         # Synthesize
         synthesis_config = get_model_config(synthesis_model_key, "synthesis")
         final_summary = ""
         final_thinking = ""
@@ -2529,8 +2539,7 @@ def create_interface():
     """Create and configure the Gradio interface."""
     with gr.Blocks(
-        title="Tiny Scribe - AI Transcript Summarizer",
-        css=custom_css
     ) as demo:
         # Header section (simplified - no Row/Column wrapper needed for full-width)
@@ -2598,13 +2607,18 @@ def create_interface():
                 # ==========================================
                 # Section 2: Mode Selection (Standard vs Advanced)
                 # ==========================================
-                with gr.Tabs() as mode_tabs:
-                    # ===== STANDARD MODE =====
-                    with gr.TabItem("📊 Standard Mode"):
-                        gr.HTML('<div style="font-size: 0.9em; color: #64748b; margin-bottom: 10px;">Single-model direct summarization</div>')
-                        with gr.Tabs() as model_tabs:
                             # --- Tab 1: Preset Models ---
                             with gr.TabItem("🤖 Preset Models"):
@@ -2678,144 +2692,19 @@ def create_interface():
                                     elem_classes=["stats-grid"]
                                 )
-                    # ===== ADVANCED MODE =====
-                    with gr.TabItem("🧠 Advanced Mode (3-Model Pipeline)"):
-                        gr.HTML('<div style="font-size: 0.9em; color: #64748b; margin-bottom: 10px;">Extraction → Deduplication → Synthesis</div>')
-                        # Model Selection Row
-                        gr.HTML('<div class="section-header"><span class="section-icon">🤖</span> Model Selection</div>')
-                        with gr.Row():
-                            extraction_model = gr.Dropdown(
-                                choices=[(EXTRACTION_MODELS[k]["name"], k) for k in EXTRACTION_MODELS.keys()],
-                                value=DEFAULT_EXTRACTION_MODEL,
-                                label="🔍 Stage 1: Extraction Model (≤1.7B)",
-                                info="Extracts structured items from windows"
-                            )
-                            embedding_model = gr.Dropdown(
-                                choices=[("granite-107m", "granite-107m"), ("granite-278m", "granite-278m"),
-                                        ("gemma-300m", "gemma-300m"), ("qwen-600m", "qwen-600m")],
-                                value="granite-107m",
-                                label="🧬 Stage 2: Embedding Model",
-                                info="Deduplication via semantic similarity"
-                            )
-                            synthesis_model = gr.Dropdown(
-                                choices=[(SYNTHESIS_MODELS[k]["name"], k) for k in SYNTHESIS_MODELS.keys()],
-                                value=DEFAULT_SYNTHESIS_MODEL,
-                                label="✨ Stage 3: Synthesis Model (1B-30B)",
-                                info="Generates executive summary"
-                            )
-                        # Extraction Parameters Row
-                        gr.HTML('<div class="section-header" style="margin-top: 12px;"><span class="section-icon">⚙️</span> Extraction Parameters</div>')
-                        with gr.Row():
-                            extraction_n_ctx = gr.Slider(
-                                minimum=2048,
-                                maximum=8192,
-                                step=1024,
-                                value=4096,
-                                label="🪟 Extraction Context Window (n_ctx)",
-                                info="Smaller = more windows, Larger = fewer windows"
-                            )
-                            overlap_turns = gr.Slider(
-                                minimum=1,
-                                maximum=5,
-                                step=1,
-                                value=2,
-                                label="🔄 Window Overlap (turns)",
-                                info="Speaker turns shared between windows"
-                            )
-                        # Deduplication Parameters Row
-                        with gr.Row():
-                            similarity_threshold = gr.Slider(
-                                minimum=0.70,
-                                maximum=0.95,
-                                step=0.01,
-                                value=0.85,
-                                label="🎯 Deduplication Similarity Threshold",
-                                info="Higher = stricter duplicate detection"
-                            )
-                        # Reasoning Controls (Separate checkboxes)
-                        gr.HTML('<div class="section-header" style="margin-top: 12px;"><span class="section-icon">🧠</span> Reasoning Configuration</div>')
-                        with gr.Row():
-                            enable_extraction_reasoning = gr.Checkbox(
-                                value=False,
-                                visible=False,
-                                label="🧠 Enable Reasoning for Extraction",
-                                info="Thinking before JSON (Qwen3 hybrid models only)"
-                            )
-                            enable_synthesis_reasoning = gr.Checkbox(
-                                value=True,
-                                visible=True,
-                                label="🧠 Enable Reasoning for Synthesis",
-                                info="Thinking for final summary generation"
-                            )
-                        # Output Settings Row
-                        gr.HTML('<div class="section-header" style="margin-top: 12px;"><span class="section-icon">🌐</span> Output Settings</div>')
-                        with gr.Row():
-                            adv_output_language = gr.Radio(
-                                choices=["en", "zh-TW"],
-                                value="en",
-                                label="Output Language",
-                                info="Extraction auto-detects, synthesis uses this"
-                            )
-                            adv_max_tokens = gr.Slider(
-                                minimum=512,
-                                maximum=4096,
-                                step=128,
-                                value=2048,
-                                label="📏 Max Synthesis Tokens",
-                                info="Maximum tokens for final summary"
-                            )
-                        # Logging Control
-                        enable_detailed_logging = gr.Checkbox(
-                            value=True,
-                            label="📝 Enable Detailed Trace Logging",
-                            info="Save JSONL trace (embedded in download JSON)"
-                        )
-                        # Model Info Accordion
-                        with gr.Accordion("📋 Model Details & Settings", open=False):
-                            with gr.Row():
-                                with gr.Column():
-                                    extraction_model_info = gr.Markdown(
-                                        value=get_extraction_model_info(DEFAULT_EXTRACTION_MODEL)
-                                    )
-                                with gr.Column():
-                                    embedding_model_info = gr.Markdown(
-                                        value=get_embedding_model_info("granite-107m")
-                                    )
-                                with gr.Column():
-                                    synthesis_model_info = gr.Markdown(
-                                        value=get_synthesis_model_info(DEFAULT_SYNTHESIS_MODEL)
-                                    )
-                # ==========================================
-                # Section 3: Advanced Settings
-                # ==========================================
-                with gr.Accordion("⚙️ Advanced Settings", open=False):
-                    # Hardware Configuration
-                    gr.HTML('<div class="section-header"><span class="section-icon">🖥️</span> Hardware Configuration</div>')
                     thread_config_dropdown = gr.Dropdown(
-                        choices=[
-                            ("HF Spaces Free Tier (2 vCPUs)", "free"),
-                            ("HF Spaces CPU Upgrade (8 vCPUs)", "upgrade"),
-                            ("Custom (manual)", "custom"),
-                        ],
-                        value=DEFAULT_THREAD_PRESET,
-                        label="CPU Thread Preset",
-                        info="Select hardware tier or specify custom thread count"
-                    )
                     custom_threads_slider = gr.Slider(
                         minimum=1,
@@ -2827,7 +2716,7 @@ def create_interface():
                         visible=DEFAULT_THREAD_PRESET == "custom"
                     )
-                    # Inference Parameters
                     gr.HTML('<div class="section-header" style="margin-top: 16px;"><span class="section-icon">🎛️</span> Inference Parameters</div>')
                     temperature_slider = gr.Slider(
@@ -2862,17 +2751,185 @@ def create_interface():
                         label="Top K",
                         info="Limits token selection to top K tokens (0 = disabled)"
                     )
-                    # Debug Tools (nested accordion)
-                    with gr.Accordion("🐛 Debug Tools", open=False):
-                        system_prompt_debug = gr.Textbox(
-                            label="System Prompt (Read-Only)",
-                            lines=5,
-                            max_lines=10,
-                            interactive=False,
-                            value="Select a model and click 'Generate Summary' to see the system prompt.",
-                            info="This shows the exact system prompt sent to the LLM"
                         )
                 # ==========================================
                 # Submit Button
@@ -2969,6 +3026,23 @@ def create_interface():
             inputs=[thread_config_dropdown],
             outputs=[custom_threads_slider]
         )
         # Copy buttons
         copy_summary_btn.click(
@@ -3279,20 +3353,21 @@ def create_interface():
             extraction_n_ctx_val, overlap_turns_val, similarity_threshold_val,
             enable_extraction_reasoning_val, enable_synthesis_reasoning_val,
             adv_output_language_val, adv_max_tokens_val, enable_logging_val,
             # Mode selector
-            mode_tabs_val
         ):
-            """Route to Standard or Advanced mode based on selected tab."""
-            # Determine active mode (Gradio returns index of active tab)
-            # 0 = Standard Mode, 1 = Advanced Mode
-            is_advanced_mode = (mode_tabs_val == 1)
             if is_advanced_mode:
                 # Advanced Mode: Use summarize_advanced()
-                # Get n_threads
-                thread_map = {"free": 2, "upgrade": 8, "custom": max(1, custom_threads_val)}
-                n_threads = thread_map.get(thread_config_val, 2)
                 # Get transcript
                 transcript = ""
@@ -3319,7 +3394,10 @@ def create_interface():
                     output_language=adv_output_language_val,
                     max_tokens=adv_max_tokens_val,
                     enable_logging=enable_logging_val,
-                    n_threads=n_threads
                 ):
                     stage = update.get("stage", "")
@@ -3386,8 +3464,10 @@ def create_interface():
                 extraction_n_ctx, overlap_turns, similarity_threshold,
                 enable_extraction_reasoning, enable_synthesis_reasoning,
                 adv_output_language, adv_max_tokens, enable_detailed_logging,
                 # Mode selector
-                mode_tabs
             ],
             outputs=[thinking_output, summary_output, info_output, metrics_state, system_prompt_debug],
             show_progress="full"
@@ -3416,5 +3496,6 @@ if __name__ == "__main__":
         server_name="0.0.0.0",
         server_port=7860,
         share=False,
-        show_error=True
     )

     output_language: str,
     max_tokens: int,
     enable_logging: bool,
+    n_threads: int = 2,
+    temperature: float = 0.6,
+    top_p: float = 0.95,
+    top_k: int = 20
 ) -> Generator[Dict[str, Any], None, None]:
     """
     Advanced 3-stage pipeline: Extraction → Deduplication → Synthesis.
         # Synthesize
         synthesis_config = get_model_config(synthesis_model_key, "synthesis")
+        # Override inference settings with custom parameters
+        synthesis_config["inference_settings"] = {
+            "temperature": temperature,
+            "top_p": top_p,
+            "top_k": top_k,
+            "repeat_penalty": 1.1
+        }
         final_summary = ""
         final_thinking = ""
     """Create and configure the Gradio interface."""
     with gr.Blocks(
+        title="Tiny Scribe - AI Transcript Summarizer"
     ) as demo:
         # Header section (simplified - no Row/Column wrapper needed for full-width)
                 # ==========================================
                 # Section 2: Mode Selection (Standard vs Advanced)
                 # ==========================================
+                mode_radio = gr.Radio(
+                    choices=["Standard Mode", "Advanced Mode (3-Model Pipeline)"],
+                    value="Standard Mode",
+                    label="🎯 Summarization Mode",
+                    info="Select between single-model Standard or multi-model Advanced mode"
+                )
+                # ===== STANDARD MODE =====
+                with gr.Group(visible=True) as standard_mode_group:
+                    gr.HTML('<div style="font-size: 0.9em; color: #64748b; margin-bottom: 10px;">📊 <strong>Standard Mode</strong> - Single-model direct summarization</div>')
+                    with gr.Tabs() as model_tabs:
                             # --- Tab 1: Preset Models ---
                             with gr.TabItem("🤖 Preset Models"):
                                     elem_classes=["stats-grid"]
                                 )
+                    # Hardware Configuration (Standard Mode)
+                    gr.HTML('<div class="section-header" style="margin-top: 16px;"><span class="section-icon">🖥️</span> Hardware Configuration</div>')
                     thread_config_dropdown = gr.Dropdown(
+                            choices=[
+                                ("HF Spaces Free Tier (2 vCPUs)", "free"),
+                                ("HF Spaces CPU Upgrade (8 vCPUs)", "upgrade"),
+                                ("Custom (manual)", "custom"),
+                            ],
+                            value=DEFAULT_THREAD_PRESET,
+                            label="CPU Thread Preset",
+                            info="Select hardware tier or specify custom thread count"
+                        )
                     custom_threads_slider = gr.Slider(
                         minimum=1,
                         visible=DEFAULT_THREAD_PRESET == "custom"
                     )
+                    # Inference Parameters (Standard Mode)
                     gr.HTML('<div class="section-header" style="margin-top: 16px;"><span class="section-icon">🎛️</span> Inference Parameters</div>')
                     temperature_slider = gr.Slider(
                         label="Top K",
                         info="Limits token selection to top K tokens (0 = disabled)"
                     )
+                # ===== ADVANCED MODE =====
+                with gr.Group(visible=False) as advanced_mode_group:
+                    gr.HTML('<div style="font-size: 0.9em; color: #64748b; margin-bottom: 16px;">🧠 <strong>Advanced Mode (3-Model Pipeline)</strong> - Extraction → Deduplication → Synthesis</div>')
+                    # ========== STAGE 1: EXTRACTION ==========
+                    gr.HTML('<div class="section-header"><span class="section-icon">🔍</span> Stage 1: Extraction</div>')
+                    extraction_model = gr.Dropdown(
+                        choices=[(EXTRACTION_MODELS[k]["name"], k) for k in EXTRACTION_MODELS.keys()],
+                        value=DEFAULT_EXTRACTION_MODEL,
+                        label="Extraction Model (≤1.7B)",
+                        info="Extracts structured items from transcript windows"
+                    )
+                    with gr.Row():
+                        extraction_n_ctx = gr.Slider(
+                            minimum=2048,
+                            maximum=8192,
+                            step=1024,
+                            value=4096,
+                            label="Context Window (n_ctx)",
+                            info="Smaller = more windows, Larger = fewer windows"
+                        )
+                        overlap_turns = gr.Slider(
+                            minimum=1,
+                            maximum=5,
+                            step=1,
+                            value=2,
+                            label="Window Overlap (turns)",
+                            info="Speaker turns shared between consecutive windows"
+                        )
+                    enable_extraction_reasoning = gr.Checkbox(
+                        value=False,
+                        visible=False,
+                        label="Enable Reasoning Mode",
+                        info="Thinking before JSON extraction (Qwen3 hybrid models only)"
+                    )
+                    with gr.Accordion("📊 Extraction Model Details", open=False):
+                        extraction_model_info = gr.Markdown(
+                            value=get_extraction_model_info(DEFAULT_EXTRACTION_MODEL)
+                        )
+                    # ========== STAGE 2: DEDUPLICATION ==========
+                    gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">🧬</span> Stage 2: Deduplication</div>')
+                    embedding_model = gr.Dropdown(
+                        choices=[("granite-107m", "granite-107m"), ("granite-278m", "granite-278m"),
+                                ("gemma-300m", "gemma-300m"), ("qwen-600m", "qwen-600m")],
+                        value="granite-107m",
+                        label="Embedding Model",
+                        info="Computes semantic similarity for duplicate detection"
+                    )
+                    similarity_threshold = gr.Slider(
+                        minimum=0.70,
+                        maximum=0.95,
+                        step=0.01,
+                        value=0.85,
+                        label="Similarity Threshold",
+                        info="Higher = stricter duplicate detection (items with similarity above this are merged)"
+                    )
+                    with gr.Accordion("📊 Embedding Model Details", open=False):
+                        embedding_model_info = gr.Markdown(
+                            value=get_embedding_model_info("granite-107m")
+                        )
+                    # ========== STAGE 3: SYNTHESIS ==========
+                    gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">✨</span> Stage 3: Synthesis</div>')
+                    synthesis_model = gr.Dropdown(
+                        choices=[(SYNTHESIS_MODELS[k]["name"], k) for k in SYNTHESIS_MODELS.keys()],
+                        value=DEFAULT_SYNTHESIS_MODEL,
+                        label="Synthesis Model (1B-30B)",
+                        info="Generates executive summary from deduplicated items"
+                    )
+                    enable_synthesis_reasoning = gr.Checkbox(
+                        value=True,
+                        visible=True,
+                        label="Enable Reasoning Mode",
+                        info="Uses thinking process for higher quality synthesis"
+                    )
+                    with gr.Row():
+                        adv_output_language = gr.Radio(
+                            choices=["en", "zh-TW"],
+                            value="en",
+                            label="Output Language",
+                            info="Language for final summary"
+                        )
+                        adv_max_tokens = gr.Slider(
+                            minimum=512,
+                            maximum=4096,
+                            step=128,
+                            value=2048,
+                            label="Max Output Tokens",
+                            info="Maximum tokens for synthesis output"
+                        )
+                    gr.HTML('<div style="font-size: 0.85em; color: #94a3b8; margin-top: 8px; margin-bottom: 8px;">Inference Parameters</div>')
+                    with gr.Row():
+                        adv_temperature_slider = gr.Slider(
+                            minimum=0.0,
+                            maximum=2.0,
+                            value=0.6,
+                            step=0.1,
+                            label="Temperature",
+                            info="Lower = focused, Higher = creative"
+                        )
+                        adv_top_p = gr.Slider(
+                            minimum=0.0,
+                            maximum=1.0,
+                            value=0.95,
+                            step=0.05,
+                            label="Top P",
+                            info="Nucleus sampling threshold"
+                        )
+                        adv_top_k = gr.Slider(
+                            minimum=0,
+                            maximum=100,
+                            value=20,
+                            step=5,
+                            label="Top K",
+                            info="Token selection limit"
+                        )
+                    with gr.Accordion("📊 Synthesis Model Details", open=False):
+                        synthesis_model_info = gr.Markdown(
+                            value=get_synthesis_model_info(DEFAULT_SYNTHESIS_MODEL)
                         )
+                    # ========== GLOBAL SETTINGS ==========
+                    gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">⚙️</span> Global Settings</div>')
+                    adv_thread_config_dropdown = gr.Dropdown(
+                        choices=[
+                            ("HF Spaces Free Tier (2 vCPUs)", "free"),
+                            ("HF Spaces CPU Upgrade (8 vCPUs)", "upgrade"),
+                            ("Custom (manual)", "custom"),
+                        ],
+                        value=DEFAULT_THREAD_PRESET,
+                        label="CPU Thread Preset",
+                        info="Hardware configuration for all pipeline stages"
+                    )
+                    adv_custom_threads_slider = gr.Slider(
+                        minimum=1,
+                        maximum=32,
+                        value=DEFAULT_CUSTOM_THREADS if DEFAULT_CUSTOM_THREADS > 0 else 4,
+                        step=1,
+                        label="Custom Thread Count",
+                        info="Number of CPU threads for model inference (1-32)",
+                        visible=DEFAULT_THREAD_PRESET == "custom"
+                    )
+                    enable_detailed_logging = gr.Checkbox(
+                        value=True,
+                        label="Enable Detailed Trace Logging",
+                        info="Save JSONL trace for debugging (embedded in download JSON)"
+                    )
+                # ==========================================
+                # Debug Tools (optional)
+                # ==========================================
+                with gr.Accordion("🐛 Debug Tools", open=False):
+                    system_prompt_debug = gr.Textbox(
+                        label="System Prompt (Read-Only)",
+                        lines=5,
+                        max_lines=10,
+                        interactive=False,
+                        value="Select a model and click 'Generate Summary' to see the system prompt.",
+                        info="This shows the exact system prompt sent to the LLM"
+                    )
                 # ==========================================
                 # Submit Button
             inputs=[thread_config_dropdown],
             outputs=[custom_threads_slider]
         )
+        adv_thread_config_dropdown.change(
+            fn=toggle_custom_threads,
+            inputs=[adv_thread_config_dropdown],
+            outputs=[adv_custom_threads_slider]
+        )
+        # Toggle mode visibility based on radio selection
+        def toggle_mode_visibility(mode_selection):
+            is_standard = (mode_selection == "Standard Mode")
+            return gr.update(visible=is_standard), gr.update(visible=not is_standard)
+        mode_radio.change(
+            fn=toggle_mode_visibility,
+            inputs=[mode_radio],
+            outputs=[standard_mode_group, advanced_mode_group]
+        )
         # Copy buttons
         copy_summary_btn.click(
             extraction_n_ctx_val, overlap_turns_val, similarity_threshold_val,
             enable_extraction_reasoning_val, enable_synthesis_reasoning_val,
             adv_output_language_val, adv_max_tokens_val, enable_logging_val,
+            adv_temperature_val, adv_top_p_val, adv_top_k_val,
+            adv_thread_config_val, adv_custom_threads_val,
             # Mode selector
+            mode_radio_val
         ):
+            """Route to Standard or Advanced mode based on selected mode radio button."""
+            # Determine active mode based on radio button value
+            is_advanced_mode = (mode_radio_val == "Advanced Mode (3-Model Pipeline)")
             if is_advanced_mode:
                 # Advanced Mode: Use summarize_advanced()
+                # Get n_threads from Advanced Mode settings
+                thread_map = {"free": 2, "upgrade": 8, "custom": max(1, adv_custom_threads_val)}
+                n_threads = thread_map.get(adv_thread_config_val, 2)
                 # Get transcript
                 transcript = ""
                     output_language=adv_output_language_val,
                     max_tokens=adv_max_tokens_val,
                     enable_logging=enable_logging_val,
+                    n_threads=n_threads,
+                    temperature=adv_temperature_val,
+                    top_p=adv_top_p_val,
+                    top_k=adv_top_k_val
                 ):
                     stage = update.get("stage", "")
                 extraction_n_ctx, overlap_turns, similarity_threshold,
                 enable_extraction_reasoning, enable_synthesis_reasoning,
                 adv_output_language, adv_max_tokens, enable_detailed_logging,
+                adv_temperature_slider, adv_top_p, adv_top_k,
+                adv_thread_config_dropdown, adv_custom_threads_slider,
                 # Mode selector
+                mode_radio
             ],
             outputs=[thinking_output, summary_output, info_output, metrics_state, system_prompt_debug],
             show_progress="full"
         server_name="0.0.0.0",
         server_port=7860,
         share=False,
+        show_error=True,
+        css=custom_css
     )