Spaces:

Luigi
/

tiny-scribe

Running

Luigi commited on Feb 2

Commit

447632e

1 Parent(s): b8f8edc

fix: populate Model Information for custom GGUF models

- Add custom_model_metadata Gradio State to store actual repo_id, filename, size_mb
- Modify get_model_info() to accept custom_metadata parameter
- Handle custom_hf models with actual metadata instead of placeholder None values
- Update load_custom_model_selected() to return metadata dict on success
- Update event handlers to pass and receive custom_model_metadata
- Model Information now shows correct repo, filename, quantization, size for custom models

Files changed (1) hide show

app.py +68 -67

app.py CHANGED Viewed

@@ -901,16 +901,51 @@ def calculate_effective_max_tokens(model_key: str, max_tokens: int, enable_reaso
     return max_tokens
-def get_model_info(model_key: str, n_threads: int = 2) -> Tuple[str, str, float, int]:
     """Get model information and inference settings for UI display.
     Args:
         model_key: Model identifier from AVAILABLE_MODELS
         n_threads: Number of CPU threads currently configured
     Returns:
         Tuple of (info_text, temperature, top_p, top_k)
     """
     m = AVAILABLE_MODELS[model_key]
     usable_ctx = min(m["max_context"], MAX_USABLE_CTX)
     settings = m["inference_settings"]
@@ -1654,61 +1689,16 @@ def create_interface():
                     # Hidden state to store loaded custom model
                     custom_model_state = gr.State(value=None)
-                # Model info section (dynamic)
-                with gr.Group():
-                    gr.HTML('<div class="section-header"><span class="section-icon">📊</span> Model Information</div>')
-                    # Get default thread count for initial display
-                    _default_threads = DEFAULT_CUSTOM_THREADS if DEFAULT_CUSTOM_THREADS > 0 else 2
-                    info_output = gr.Markdown(
-                        value=get_model_info(DEFAULT_MODEL_KEY, n_threads=_default_threads)[0],
-                        elem_classes=["stats-grid"]
-                    )
-            # Right column - Outputs
-            with gr.Column(scale=2):
-                # Thinking Process
-                with gr.Group():
-                    gr.HTML('<div class="section-header"><span class="section-icon">🧠</span> Model Thinking Process</div>')
-                    thinking_output = gr.Textbox(
-                        label="",
-                        lines=12,
-                        max_lines=20,
-                        show_label=False,
-                        placeholder="The AI's reasoning process will appear here in real-time...",
-                        elem_classes=["thinking-box"]
-                    )
-                # Summary Output
-                with gr.Group():
-                    gr.HTML('<div class="section-header"><span class="section-icon">📝</span> Final Summary</div>')
-                    summary_output = gr.Markdown(
-                        value="*Your summarized content will appear here...*",
-                        elem_classes=["summary-box"]
-                    )
-                    # Action buttons for outputs
-                    with gr.Row():
-                        copy_summary_btn = gr.Button("📋 Copy Summary", size="sm")
-                        copy_thinking_btn = gr.Button("📋 Copy Thinking", size="sm")
-                        download_btn = gr.Button("⬇️ Download (JSON)", size="sm")
-                    # File output component for download
-                    download_output = gr.File(label="Download JSON", visible=True)
-                    # Debug: System Prompt display
-                    with gr.Accordion("🐛 Debug: System Prompt", open=False):
-                        system_prompt_debug = gr.Textbox(
-                            label="System Prompt (Read-Only)",
-                            lines=5,
-                            max_lines=10,
-                            interactive=False,
-                            value="Select a model and click 'Generate Summary' to see the system prompt.",
-                            info="This shows the exact system prompt sent to the LLM"
-                        )
         # Function to update settings when model changes
-        def update_settings_on_model_change(model_key, thread_config, custom_threads):
             """Update all Advanced Settings when model selection changes."""
             # Calculate n_threads based on preset
             thread_preset_map = {
@@ -1718,7 +1708,7 @@ def create_interface():
             }
             n_threads = thread_preset_map.get(thread_config, 2)
-            info_text, temp_str, top_p_val, top_k_val = get_model_info(model_key, n_threads=n_threads)
             temperature = float(temp_str) if temp_str else 0.6
             return temperature, top_p_val, top_k_val, info_text
@@ -1733,7 +1723,7 @@ def create_interface():
         # Update settings when model changes
         model_dropdown.change(
             fn=update_settings_on_model_change,
-            inputs=[model_dropdown, thread_config_dropdown, custom_threads_slider],
             outputs=[temperature_slider, top_p, top_k, info_output]
         )
@@ -1927,16 +1917,23 @@ def create_interface():
         def load_custom_model_selected(repo_id, selected_file_display, files_data):
             """Load the selected custom model."""
             if not repo_id or not selected_file_display:
-                return "❌ Please enter a Repo ID and select a file first", gr.update(visible=False), None
             # Extract filename from the display string
             # Format: "📄 filename | size | quant | params | downloads"
             filename = selected_file_display.split(" | ")[0].replace("📄 ", "").strip()
             if not filename:
-                return "❌ Could not parse filename from selection", gr.update(visible=False), None
-            yield "⏳ Loading model... (this may take a while for large files)", gr.update(visible=False), None
             try:
                 # Load the model
@@ -1945,27 +1942,31 @@ def create_interface():
                 if llm is None:
                     # Load failed - show error and retry button
-                    yield f"❌ {load_msg}", gr.update(visible=True), None
                 else:
-                    # Success
-                    model_info = next((f for f in files_data if f["name"] == filename), {})
-                    size_info = f" ({model_info.get('size_mb', 'Unknown')} MB)" if model_info else ""
-                    yield f"✅ Model loaded successfully{size_info}! Ready to generate summaries.", gr.update(visible=False), llm
             except Exception as e:
-                yield f"❌ Error loading model: {str(e)}", gr.update(visible=True), None
         load_btn.click(
             fn=load_custom_model_selected,
             inputs=[model_search_input, custom_file_dropdown, custom_repo_files],
-            outputs=[custom_status, retry_btn, custom_model_state],
         )
         # Retry button - same as load
         retry_btn.click(
             fn=load_custom_model_selected,
             inputs=[model_search_input, custom_file_dropdown, custom_repo_files],
-            outputs=[custom_status, retry_btn, custom_model_state],
         )
         # Also update submit button to use custom model state

     return max_tokens
+def get_model_info(model_key: str, n_threads: int = 2, custom_metadata: dict = None) -> Tuple[str, str, float, int]:
     """Get model information and inference settings for UI display.
     Args:
         model_key: Model identifier from AVAILABLE_MODELS
         n_threads: Number of CPU threads currently configured
+        custom_metadata: Optional dict with repo_id, filename, size_mb for custom models
     Returns:
         Tuple of (info_text, temperature, top_p, top_k)
     """
+    # Handle custom_hf models with actual metadata
+    if model_key == "custom_hf" and custom_metadata and custom_metadata.get("repo_id"):
+        m = AVAILABLE_MODELS[model_key]
+        repo_id = custom_metadata.get("repo_id", "Not selected")
+        filename = custom_metadata.get("filename", "Not selected")
+        size_mb = custom_metadata.get("size_mb", 0)
+        # Parse quantization from filename
+        quant = parse_quantization(filename) if filename and filename != "Not selected" else "Unknown"
+        info_text = (
+            f"## 🤖 {m['name']}\n\n"
+            f"### 📊 Model Metadata\n"
+            f"| Property | Value |\n"
+            f"|----------|-------|\n"
+            f"| **Repository** | `{repo_id}` |\n"
+            f"| **GGUF File** | `{filename}` |\n"
+            f"| **Quantization** | `{quant}` |\n"
+            f"| **File Size** | {size_mb:.1f} MB |\n"
+            f"| **Context** | {m['max_context']:,} tokens |\n"
+            f"| **CPU Threads** | {n_threads} |\n\n"
+            f"### ⚙️ Inference Settings\n"
+            f"| Property | Value |\n"
+            f"|----------|-------|\n"
+            f"| **Temperature** | {m['inference_settings']['temperature']} |\n"
+            f"| **Top P** | {m['inference_settings']['top_p']} |\n"
+            f"| **Top K** | {m['inference_settings']['top_k']} |\n"
+            f"| **Repeat Penalty** | {m['inference_settings'].get('repeat_penalty', 1.0)} |\n\n"
+            f"⚠️ **Note**: Custom models use conservative defaults (CPU-only, smaller context)."
+        )
+        return info_text, str(m['inference_settings']["temperature"]), m['inference_settings']["top_p"], m['inference_settings']["top_k"]
+    # Standard predefined models
     m = AVAILABLE_MODELS[model_key]
     usable_ctx = min(m["max_context"], MAX_USABLE_CTX)
     settings = m["inference_settings"]
                     # Hidden state to store loaded custom model
                     custom_model_state = gr.State(value=None)
+                    # Hidden state to store custom model metadata (repo_id, filename, size)
+                    custom_model_metadata = gr.State(value={
+                        "repo_id": None,
+                        "filename": None,
+                        "size_mb": 0,
+                    })
         # Function to update settings when model changes
+        def update_settings_on_model_change(model_key, thread_config, custom_threads, custom_metadata):
             """Update all Advanced Settings when model selection changes."""
             # Calculate n_threads based on preset
             thread_preset_map = {
             }
             n_threads = thread_preset_map.get(thread_config, 2)
+            info_text, temp_str, top_p_val, top_k_val = get_model_info(model_key, n_threads=n_threads, custom_metadata=custom_metadata)
             temperature = float(temp_str) if temp_str else 0.6
             return temperature, top_p_val, top_k_val, info_text
         # Update settings when model changes
         model_dropdown.change(
             fn=update_settings_on_model_change,
+            inputs=[model_dropdown, thread_config_dropdown, custom_threads_slider, custom_model_metadata],
             outputs=[temperature_slider, top_p, top_k, info_output]
         )
         def load_custom_model_selected(repo_id, selected_file_display, files_data):
             """Load the selected custom model."""
             if not repo_id or not selected_file_display:
+                return "❌ Please enter a Repo ID and select a file first", gr.update(visible=False), None, {}
             # Extract filename from the display string
             # Format: "📄 filename | size | quant | params | downloads"
             filename = selected_file_display.split(" | ")[0].replace("📄 ", "").strip()
             if not filename:
+                return "❌ Could not parse filename from selection", gr.update(visible=False), None, {}
+            # Extract size from files_data
+            size_mb = 0
+            for f in files_data:
+                if f["name"] == filename:
+                    size_mb = f.get("size_mb", 0)
+                    break
+            yield "⏳ Loading model... (this may take a while for large files)", gr.update(visible=False), None, {}
             try:
                 # Load the model
                 if llm is None:
                     # Load failed - show error and retry button
+                    yield f"❌ {load_msg}", gr.update(visible=True), None, {}
                 else:
+                    # Success - create metadata dict
+                    metadata = {
+                        "repo_id": repo_id,
+                        "filename": filename,
+                        "size_mb": size_mb,
+                    }
+                    size_info = f" ({size_mb:.1f} MB)" if size_mb else ""
+                    yield f"✅ Model loaded successfully{size_info}! Ready to generate summaries.", gr.update(visible=False), llm, metadata
             except Exception as e:
+                yield f"��� Error loading model: {str(e)}", gr.update(visible=True), None, {}
         load_btn.click(
             fn=load_custom_model_selected,
             inputs=[model_search_input, custom_file_dropdown, custom_repo_files],
+            outputs=[custom_status, retry_btn, custom_model_state, custom_model_metadata],
         )
         # Retry button - same as load
         retry_btn.click(
             fn=load_custom_model_selected,
             inputs=[model_search_input, custom_file_dropdown, custom_repo_files],
+            outputs=[custom_status, retry_btn, custom_model_state, custom_model_metadata],
         )
         # Also update submit button to use custom model state