Spaces:
Running
Running
feat: add debug system prompt display and smart custom GGUF loader
Browse files- Add collapsible debug accordion showing exact system prompt sent to LLM
- Implement smart custom GGUF loader with HF Hub integration
- Auto-discover GGUF files with metadata (size, quant, downloads)
- Add dynamic model search from popular GGUF models list
- Include load/retry buttons with proper error handling
app.py
CHANGED
|
@@ -10,11 +10,14 @@ UI Version: 2.0 - Enhanced with modern styling and UX improvements
|
|
| 10 |
import os
|
| 11 |
import re
|
| 12 |
import gc
|
|
|
|
|
|
|
|
|
|
| 13 |
import gradio as gr
|
| 14 |
-
from typing import Tuple, Generator
|
| 15 |
from llama_cpp import Llama
|
| 16 |
from opencc import OpenCC
|
| 17 |
import logging
|
|
|
|
| 18 |
|
| 19 |
# Configure logging
|
| 20 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -25,6 +28,372 @@ llm = None
|
|
| 25 |
converter = None
|
| 26 |
current_model_key = None
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
# Thread configuration from environment variable
|
| 29 |
def _get_default_thread_config():
|
| 30 |
"""Get default thread configuration from environment variable."""
|
|
@@ -400,6 +769,21 @@ AVAILABLE_MODELS = {
|
|
| 400 |
"repeat_penalty": 1.0,
|
| 401 |
},
|
| 402 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
}
|
| 404 |
|
| 405 |
DEFAULT_MODEL_KEY = "qwen3_600m_q4"
|
|
@@ -744,7 +1128,8 @@ def summarize_streaming(
|
|
| 744 |
output_language: str = "en",
|
| 745 |
thread_config: str = "free",
|
| 746 |
custom_threads: int = 4,
|
| 747 |
-
|
|
|
|
| 748 |
"""
|
| 749 |
Stream summary generation from uploaded file.
|
| 750 |
|
|
@@ -756,9 +1141,12 @@ def summarize_streaming(
|
|
| 756 |
top_p: Nucleus sampling parameter (uses model default if None)
|
| 757 |
top_k: Top-k sampling parameter (uses model default if None)
|
| 758 |
output_language: Target language for summary ("en" or "zh-TW")
|
|
|
|
|
|
|
|
|
|
| 759 |
|
| 760 |
Yields:
|
| 761 |
-
Tuple of (thinking_text, summary_text, info_text, metrics_dict)
|
| 762 |
"""
|
| 763 |
import time
|
| 764 |
|
|
@@ -806,7 +1194,8 @@ def summarize_streaming(
|
|
| 806 |
# Read uploaded file
|
| 807 |
try:
|
| 808 |
if file_obj is None:
|
| 809 |
-
|
|
|
|
| 810 |
return
|
| 811 |
|
| 812 |
path = file_obj.name if hasattr(file_obj, 'name') else file_obj
|
|
@@ -825,11 +1214,13 @@ def summarize_streaming(
|
|
| 825 |
"original_char_count": len(transcript),
|
| 826 |
}
|
| 827 |
except Exception as e:
|
| 828 |
-
|
|
|
|
| 829 |
return
|
| 830 |
|
| 831 |
if not transcript.strip():
|
| 832 |
-
|
|
|
|
| 833 |
return
|
| 834 |
|
| 835 |
# Calculate context and check truncation (with reasoning buffer if enabled)
|
|
@@ -882,15 +1273,29 @@ def summarize_streaming(
|
|
| 882 |
# Load model (no-op if already loaded) with timing
|
| 883 |
model_load_start = time.time()
|
| 884 |
try:
|
| 885 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 886 |
logger.info(load_msg)
|
| 887 |
metrics["model_load_time_ms"] = (time.time() - model_load_start) * 1000
|
| 888 |
except Exception as e:
|
| 889 |
-
|
|
|
|
| 890 |
return
|
| 891 |
|
| 892 |
# Prepare system prompt with reasoning toggle for Qwen3 models
|
| 893 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 894 |
|
| 895 |
# Calculate dynamic temperature for Qwen3 models
|
| 896 |
if model.get("supports_toggle") and "temperature_thinking" in model.get("inference_settings", {}):
|
|
@@ -900,20 +1305,10 @@ def summarize_streaming(
|
|
| 900 |
effective_temperature = model["inference_settings"]["temperature_no_thinking"]
|
| 901 |
else:
|
| 902 |
effective_temperature = temperature
|
| 903 |
-
|
| 904 |
-
|
| 905 |
-
|
| 906 |
-
|
| 907 |
-
else:
|
| 908 |
-
system_content = "你是一個有助的助手,負責總結轉錄內容。"
|
| 909 |
-
user_content = f"請總結以下內容:\n\n{transcript}"
|
| 910 |
-
else:
|
| 911 |
-
if model.get("supports_toggle"):
|
| 912 |
-
reasoning_mode = "/think" if enable_reasoning else "/no_think"
|
| 913 |
-
system_content = f"You are a helpful assistant that summarizes transcripts. {reasoning_mode}"
|
| 914 |
-
else:
|
| 915 |
-
system_content = "You are a helpful assistant that summarizes transcripts."
|
| 916 |
-
user_content = f"Please summarize the following content:\n\n{transcript}"
|
| 917 |
|
| 918 |
messages = [
|
| 919 |
{"role": "system", "content": system_content},
|
|
@@ -991,7 +1386,7 @@ def summarize_streaming(
|
|
| 991 |
thinking, summary = parse_thinking_blocks(full_response, streaming=True)
|
| 992 |
current_thinking = thinking or ""
|
| 993 |
current_summary = summary or ""
|
| 994 |
-
yield (current_thinking, current_summary, info, metrics)
|
| 995 |
|
| 996 |
# Final timing calculations
|
| 997 |
metrics["generation_end_time"] = time.time()
|
|
@@ -1029,14 +1424,14 @@ def summarize_streaming(
|
|
| 1029 |
# Update totals
|
| 1030 |
metrics["total_tokens"] = metrics["input_tokens"] + metrics["output_tokens"] + metrics["thinking_tokens"]
|
| 1031 |
|
| 1032 |
-
yield (thinking or "", summary or "", info, metrics)
|
| 1033 |
|
| 1034 |
llm.reset()
|
| 1035 |
|
| 1036 |
except Exception as e:
|
| 1037 |
logger.error(f"Generation error: {e}")
|
| 1038 |
metrics["error"] = str(e)
|
| 1039 |
-
yield (current_thinking, current_summary + f"\n\nError: {e}", info, metrics)
|
| 1040 |
|
| 1041 |
|
| 1042 |
# Custom CSS for better UI
|
|
@@ -1272,6 +1667,45 @@ def create_interface():
|
|
| 1272 |
visible=AVAILABLE_MODELS[DEFAULT_MODEL_KEY].get("supports_toggle", False)
|
| 1273 |
)
|
| 1274 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1275 |
gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">📤</span> Upload File</div>')
|
| 1276 |
|
| 1277 |
file_input = gr.File(
|
|
@@ -1347,6 +1781,9 @@ def create_interface():
|
|
| 1347 |
|
| 1348 |
# Hidden state to store generation metrics
|
| 1349 |
metrics_state = gr.State(value={})
|
|
|
|
|
|
|
|
|
|
| 1350 |
|
| 1351 |
# Model info section (dynamic)
|
| 1352 |
with gr.Group():
|
|
@@ -1388,6 +1825,17 @@ def create_interface():
|
|
| 1388 |
|
| 1389 |
# File output component for download
|
| 1390 |
download_output = gr.File(label="Download JSON", visible=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1391 |
|
| 1392 |
# Function to update settings when model changes
|
| 1393 |
def update_settings_on_model_change(model_key, thread_config, custom_threads):
|
|
@@ -1457,6 +1905,146 @@ def create_interface():
|
|
| 1457 |
inputs=[summary_output, thinking_output, model_dropdown, language_selector, metrics_state],
|
| 1458 |
outputs=[download_output]
|
| 1459 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1460 |
|
| 1461 |
# Footer
|
| 1462 |
gr.HTML("""
|
|
|
|
| 10 |
import os
|
| 11 |
import re
|
| 12 |
import gc
|
| 13 |
+
import json
|
| 14 |
+
import time
|
| 15 |
+
from typing import Tuple, Generator, Optional, Dict, Any, List
|
| 16 |
import gradio as gr
|
|
|
|
| 17 |
from llama_cpp import Llama
|
| 18 |
from opencc import OpenCC
|
| 19 |
import logging
|
| 20 |
+
from huggingface_hub import list_repo_files, hf_hub_download
|
| 21 |
|
| 22 |
# Configure logging
|
| 23 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 28 |
converter = None
|
| 29 |
current_model_key = None
|
| 30 |
|
| 31 |
+
# Global cache for popular GGUF models (populated on first use)
|
| 32 |
+
_popular_gguf_cache: List[Dict[str, Any]] = []
|
| 33 |
+
_popular_gguf_cache_time: float = 0
|
| 34 |
+
_POPULAR_CACHE_TTL = 3600 # 1 hour cache
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def get_popular_gguf_models(limit: int = 20) -> List[Dict[str, Any]]:
|
| 38 |
+
"""Dynamically fetch popular GGUF models from HuggingFace Hub.
|
| 39 |
+
|
| 40 |
+
Uses HF Hub API to search for models with 'gguf' tag, sorted by downloads.
|
| 41 |
+
Cached for 1 hour to avoid repeated API calls.
|
| 42 |
+
|
| 43 |
+
Args:
|
| 44 |
+
limit: Maximum number of models to return
|
| 45 |
+
|
| 46 |
+
Returns:
|
| 47 |
+
List of model dicts with repo_id, downloads, tags
|
| 48 |
+
"""
|
| 49 |
+
global _popular_gguf_cache, _popular_gguf_cache_time
|
| 50 |
+
|
| 51 |
+
# Check cache
|
| 52 |
+
current_time = time.time()
|
| 53 |
+
if _popular_gguf_cache and (current_time - _popular_gguf_cache_time) < _POPULAR_CACHE_TTL:
|
| 54 |
+
return _popular_gguf_cache[:limit]
|
| 55 |
+
|
| 56 |
+
try:
|
| 57 |
+
from huggingface_hub import list_models
|
| 58 |
+
|
| 59 |
+
# Search for models with 'gguf' tag, sorted by downloads (most popular first)
|
| 60 |
+
models = list_models(
|
| 61 |
+
filter="gguf",
|
| 62 |
+
sort="downloads",
|
| 63 |
+
direction=-1, # Descending
|
| 64 |
+
limit=limit * 2, # Fetch more to filter
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
# Process and cache results
|
| 68 |
+
_popular_gguf_cache = []
|
| 69 |
+
for model in models:
|
| 70 |
+
# Skip if no GGUF files (just tagged)
|
| 71 |
+
if not model.tags or "gguf" not in model.tags:
|
| 72 |
+
continue
|
| 73 |
+
|
| 74 |
+
# Extract parameter count from tags if available
|
| 75 |
+
params = "Unknown"
|
| 76 |
+
for tag in model.tags:
|
| 77 |
+
if "b" in tag.lower() and any(c.isdigit() for c in tag):
|
| 78 |
+
params = tag
|
| 79 |
+
break
|
| 80 |
+
|
| 81 |
+
_popular_gguf_cache.append({
|
| 82 |
+
"repo_id": model.id,
|
| 83 |
+
"downloads": model.downloads,
|
| 84 |
+
"tags": [t for t in model.tags if t != "gguf"][:5], # Top 5 non-gguf tags
|
| 85 |
+
"params": params,
|
| 86 |
+
})
|
| 87 |
+
|
| 88 |
+
if len(_popular_gguf_cache) >= limit:
|
| 89 |
+
break
|
| 90 |
+
|
| 91 |
+
_popular_gguf_cache_time = current_time
|
| 92 |
+
logger.info(f"Cached {len(_popular_gguf_cache)} popular GGUF models from HF Hub")
|
| 93 |
+
return _popular_gguf_cache
|
| 94 |
+
|
| 95 |
+
except Exception as e:
|
| 96 |
+
logger.error(f"Failed to fetch popular GGUF models: {e}")
|
| 97 |
+
# Return empty list on error
|
| 98 |
+
return []
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def search_gguf_models(query: str, limit: int = 10) -> List[Dict[str, Any]]:
|
| 102 |
+
"""Search for GGUF models by query string.
|
| 103 |
+
|
| 104 |
+
Searches popular cached models first, then falls back to HF Hub API.
|
| 105 |
+
|
| 106 |
+
Args:
|
| 107 |
+
query: Search query (partial repo_id or keywords)
|
| 108 |
+
limit: Maximum results
|
| 109 |
+
|
| 110 |
+
Returns:
|
| 111 |
+
List of matching model dicts
|
| 112 |
+
"""
|
| 113 |
+
if not query or len(query) < 2:
|
| 114 |
+
return []
|
| 115 |
+
|
| 116 |
+
query_lower = query.lower()
|
| 117 |
+
|
| 118 |
+
# First, search in popular models cache
|
| 119 |
+
popular = get_popular_gguf_models(limit=50)
|
| 120 |
+
matches = [m for m in popular if query_lower in m["repo_id"].lower()]
|
| 121 |
+
|
| 122 |
+
# If we have enough matches from cache, return them
|
| 123 |
+
if len(matches) >= limit:
|
| 124 |
+
return matches[:limit]
|
| 125 |
+
|
| 126 |
+
# Otherwise, try HF Hub API search
|
| 127 |
+
try:
|
| 128 |
+
from huggingface_hub import list_models
|
| 129 |
+
|
| 130 |
+
api_models = list_models(
|
| 131 |
+
search=query,
|
| 132 |
+
filter="gguf",
|
| 133 |
+
sort="downloads",
|
| 134 |
+
direction=-1,
|
| 135 |
+
limit=limit,
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
for model in api_models:
|
| 139 |
+
if model.id not in [m["repo_id"] for m in matches]:
|
| 140 |
+
params = "Unknown"
|
| 141 |
+
for tag in model.tags or []:
|
| 142 |
+
if "b" in tag.lower() and any(c.isdigit() for c in tag):
|
| 143 |
+
params = tag
|
| 144 |
+
break
|
| 145 |
+
|
| 146 |
+
matches.append({
|
| 147 |
+
"repo_id": model.id,
|
| 148 |
+
"downloads": model.downloads,
|
| 149 |
+
"tags": [t for t in (model.tags or []) if t != "gguf"][:5],
|
| 150 |
+
"params": params,
|
| 151 |
+
})
|
| 152 |
+
|
| 153 |
+
if len(matches) >= limit:
|
| 154 |
+
break
|
| 155 |
+
|
| 156 |
+
except Exception as e:
|
| 157 |
+
logger.error(f"HF Hub search failed: {e}")
|
| 158 |
+
|
| 159 |
+
return matches[:limit]
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def parse_quantization(filename: str) -> Optional[str]:
|
| 163 |
+
"""Extract quantization level from GGUF filename.
|
| 164 |
+
|
| 165 |
+
Examples:
|
| 166 |
+
model-Q4_K_M.gguf -> Q4_K_M
|
| 167 |
+
model.Q5_K_S.gguf -> Q5_K_S
|
| 168 |
+
model-fp16.gguf -> fp16
|
| 169 |
+
|
| 170 |
+
Args:
|
| 171 |
+
filename: GGUF filename
|
| 172 |
+
|
| 173 |
+
Returns:
|
| 174 |
+
Quantization string or None if not found
|
| 175 |
+
"""
|
| 176 |
+
# Common quantization patterns
|
| 177 |
+
patterns = [
|
| 178 |
+
r'[.-](Q[0-9]_[A-Z]_[A-Z])\.gguf$', # Q4_K_M
|
| 179 |
+
r'[.-](Q[0-9]_[A-Z]+)\.gguf$', # Q4_K
|
| 180 |
+
r'[.-](fp16|fp32|q4_0|q4_1|q5_0|q5_1|q8_0)\.gguf$', # fp16, q4_0, etc.
|
| 181 |
+
]
|
| 182 |
+
|
| 183 |
+
for pattern in patterns:
|
| 184 |
+
match = re.search(pattern, filename, re.IGNORECASE)
|
| 185 |
+
if match:
|
| 186 |
+
return match.group(1).upper()
|
| 187 |
+
|
| 188 |
+
return None
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
def list_repo_gguf_files(repo_id: str) -> Tuple[List[Dict[str, Any]], str]:
|
| 192 |
+
"""List all GGUF files in a HuggingFace repository with metadata.
|
| 193 |
+
|
| 194 |
+
Args:
|
| 195 |
+
repo_id: HuggingFace repository ID (e.g., 'unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF')
|
| 196 |
+
|
| 197 |
+
Returns:
|
| 198 |
+
Tuple of (files_list, error_message)
|
| 199 |
+
- files_list: List of dicts with name, size_mb, quant, params, downloads
|
| 200 |
+
- error_message: Empty string on success, error description on failure
|
| 201 |
+
"""
|
| 202 |
+
if not repo_id or "/" not in repo_id:
|
| 203 |
+
return [], "Invalid repo ID format. Use 'username/repo-name'"
|
| 204 |
+
|
| 205 |
+
try:
|
| 206 |
+
# List all files in repo
|
| 207 |
+
files = list(list_repo_files(repo_id))
|
| 208 |
+
|
| 209 |
+
# Filter for GGUF files only
|
| 210 |
+
gguf_files = [f for f in files if f.endswith('.gguf')]
|
| 211 |
+
|
| 212 |
+
if not gguf_files:
|
| 213 |
+
return [], f"No GGUF files found in repository '{repo_id}'"
|
| 214 |
+
|
| 215 |
+
# Get repo info for downloads (optional, may fail for some repos)
|
| 216 |
+
try:
|
| 217 |
+
from huggingface_hub import model_info
|
| 218 |
+
info = model_info(repo_id)
|
| 219 |
+
repo_downloads = info.downloads
|
| 220 |
+
except:
|
| 221 |
+
repo_downloads = 0
|
| 222 |
+
|
| 223 |
+
# Build file metadata
|
| 224 |
+
result = []
|
| 225 |
+
for filename in sorted(gguf_files): # Alphabetical sorting (preference C)
|
| 226 |
+
quant = parse_quantization(filename) or "Unknown"
|
| 227 |
+
|
| 228 |
+
# Estimate size (we'd need to fetch file info for exact size)
|
| 229 |
+
# For now, use placeholder that will be updated when downloading
|
| 230 |
+
size_mb = "Unknown"
|
| 231 |
+
|
| 232 |
+
# Try to extract parameter count from filename
|
| 233 |
+
params = "Unknown"
|
| 234 |
+
param_patterns = [
|
| 235 |
+
r'(\d+\.?\d*)b', # 7b, 1.5b
|
| 236 |
+
r'(\d+\.?\d*)B', # 7B, 1.5B
|
| 237 |
+
]
|
| 238 |
+
for pattern in param_patterns:
|
| 239 |
+
match = re.search(pattern, filename, re.IGNORECASE)
|
| 240 |
+
if match:
|
| 241 |
+
params = f"{match.group(1)}B"
|
| 242 |
+
break
|
| 243 |
+
|
| 244 |
+
result.append({
|
| 245 |
+
"name": filename,
|
| 246 |
+
"size_mb": size_mb,
|
| 247 |
+
"quant": quant,
|
| 248 |
+
"params": params,
|
| 249 |
+
"downloads": repo_downloads,
|
| 250 |
+
})
|
| 251 |
+
|
| 252 |
+
return result, ""
|
| 253 |
+
|
| 254 |
+
except Exception as e:
|
| 255 |
+
error_msg = str(e).lower()
|
| 256 |
+
if "not found" in error_msg or "404" in error_msg:
|
| 257 |
+
return [], f"Repository '{repo_id}' not found"
|
| 258 |
+
elif "permission" in error_msg or "access" in error_msg:
|
| 259 |
+
return [], f"Cannot access '{repo_id}' - may be private or gated"
|
| 260 |
+
else:
|
| 261 |
+
return [], f"Error listing files: {str(e)}"
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
def format_file_choice(file_info: Dict[str, Any]) -> str:
|
| 265 |
+
"""Format a file info dict for display in dropdown.
|
| 266 |
+
|
| 267 |
+
Args:
|
| 268 |
+
file_info: Dict with name, size_mb, quant, params, downloads
|
| 269 |
+
|
| 270 |
+
Returns:
|
| 271 |
+
Formatted string for dropdown display
|
| 272 |
+
"""
|
| 273 |
+
name = file_info["name"]
|
| 274 |
+
size = file_info["size_mb"]
|
| 275 |
+
quant = file_info["quant"]
|
| 276 |
+
params = file_info["params"]
|
| 277 |
+
downloads = file_info.get("downloads", 0)
|
| 278 |
+
|
| 279 |
+
# Format downloads nicely
|
| 280 |
+
if downloads >= 1000000:
|
| 281 |
+
dl_str = f"{downloads/1000000:.1f}M"
|
| 282 |
+
elif downloads >= 1000:
|
| 283 |
+
dl_str = f"{downloads/1000:.1f}K"
|
| 284 |
+
else:
|
| 285 |
+
dl_str = str(downloads)
|
| 286 |
+
|
| 287 |
+
return f"📄 {name} | {size} | {quant} | {params} params | ⬇️ {dl_str}"
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
def build_system_prompt(output_language: str, supports_toggle: bool, enable_reasoning: bool) -> str:
|
| 291 |
+
"""Build the system prompt for the summarization task.
|
| 292 |
+
|
| 293 |
+
This function creates the system prompt that will be displayed in the debug field
|
| 294 |
+
and sent to the LLM. It handles language-specific prompts and reasoning toggles.
|
| 295 |
+
|
| 296 |
+
Args:
|
| 297 |
+
output_language: Target language ("en" or "zh-TW")
|
| 298 |
+
supports_toggle: Whether the model supports reasoning toggle (/think, /no_think)
|
| 299 |
+
enable_reasoning: Whether reasoning mode is enabled
|
| 300 |
+
|
| 301 |
+
Returns:
|
| 302 |
+
The complete system prompt string
|
| 303 |
+
"""
|
| 304 |
+
if output_language == "zh-TW":
|
| 305 |
+
if supports_toggle:
|
| 306 |
+
reasoning_mode = "/think" if enable_reasoning else "/no_think"
|
| 307 |
+
return f"你是一個有助的助手,負責總結轉錄內容。{reasoning_mode}"
|
| 308 |
+
else:
|
| 309 |
+
return "你是一個有助的助手,負責總結轉錄內容。"
|
| 310 |
+
else:
|
| 311 |
+
if supports_toggle:
|
| 312 |
+
reasoning_mode = "/think" if enable_reasoning else "/no_think"
|
| 313 |
+
return f"You are a helpful assistant that summarizes transcripts. {reasoning_mode}"
|
| 314 |
+
else:
|
| 315 |
+
return "You are a helpful assistant that summarizes transcripts."
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
def build_user_prompt(transcript: str, output_language: str) -> str:
|
| 319 |
+
"""Build the user prompt containing the transcript to summarize.
|
| 320 |
+
|
| 321 |
+
Args:
|
| 322 |
+
transcript: The transcript content to summarize
|
| 323 |
+
output_language: Target language ("en" or "zh-TW")
|
| 324 |
+
|
| 325 |
+
Returns:
|
| 326 |
+
The user prompt string with the transcript
|
| 327 |
+
"""
|
| 328 |
+
if output_language == "zh-TW":
|
| 329 |
+
return f"請總結以下內容:\n\n{transcript}"
|
| 330 |
+
else:
|
| 331 |
+
return f"Please summarize the following content:\n\n{transcript}"
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
def get_thread_count(thread_config: str, custom_threads: int) -> int:
|
| 335 |
+
"""Get the actual thread count based on configuration.
|
| 336 |
+
|
| 337 |
+
Args:
|
| 338 |
+
thread_config: Thread preset ("free", "upgrade", "custom")
|
| 339 |
+
custom_threads: Custom thread count when preset is "custom"
|
| 340 |
+
|
| 341 |
+
Returns:
|
| 342 |
+
Number of threads to use
|
| 343 |
+
"""
|
| 344 |
+
if thread_config == "free":
|
| 345 |
+
return 2
|
| 346 |
+
elif thread_config == "upgrade":
|
| 347 |
+
return 8
|
| 348 |
+
else: # custom
|
| 349 |
+
return max(1, min(32, custom_threads))
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
def load_custom_model_from_hf(repo_id: str, filename: str, n_threads: int) -> Tuple[Optional[Llama], str]:
|
| 353 |
+
"""Load a custom GGUF model from HuggingFace Hub.
|
| 354 |
+
|
| 355 |
+
Args:
|
| 356 |
+
repo_id: HuggingFace repository ID
|
| 357 |
+
filename: GGUF filename to load
|
| 358 |
+
n_threads: Number of CPU threads
|
| 359 |
+
|
| 360 |
+
Returns:
|
| 361 |
+
Tuple of (model_or_none, message)
|
| 362 |
+
"""
|
| 363 |
+
try:
|
| 364 |
+
logger.info(f"Loading custom model from {repo_id}/{filename}")
|
| 365 |
+
|
| 366 |
+
# Conservative defaults for custom models
|
| 367 |
+
n_ctx = 8192
|
| 368 |
+
n_batch = 512
|
| 369 |
+
n_gpu_layers = 0 # CPU only for safety
|
| 370 |
+
|
| 371 |
+
model = Llama.from_pretrained(
|
| 372 |
+
repo_id=repo_id,
|
| 373 |
+
filename=filename,
|
| 374 |
+
n_ctx=n_ctx,
|
| 375 |
+
n_batch=n_batch,
|
| 376 |
+
n_threads=n_threads,
|
| 377 |
+
n_gpu_layers=n_gpu_layers,
|
| 378 |
+
verbose=False,
|
| 379 |
+
)
|
| 380 |
+
|
| 381 |
+
return model, f"Successfully loaded {repo_id}/{filename}"
|
| 382 |
+
|
| 383 |
+
except Exception as e:
|
| 384 |
+
error_msg = str(e)
|
| 385 |
+
logger.error(f"Failed to load custom model: {error_msg}")
|
| 386 |
+
|
| 387 |
+
if "not found" in error_msg.lower():
|
| 388 |
+
return None, f"Model or file not found: {repo_id}/{filename}"
|
| 389 |
+
elif "permission" in error_msg.lower():
|
| 390 |
+
return None, f"Access denied (model may be private/gated): {repo_id}"
|
| 391 |
+
elif "memory" in error_msg.lower() or "oom" in error_msg.lower():
|
| 392 |
+
return None, f"Out of memory loading model. Try a smaller file or lower quantization."
|
| 393 |
+
else:
|
| 394 |
+
return None, f"Error loading model: {error_msg}"
|
| 395 |
+
|
| 396 |
+
|
| 397 |
# Thread configuration from environment variable
|
| 398 |
def _get_default_thread_config():
|
| 399 |
"""Get default thread configuration from environment variable."""
|
|
|
|
| 769 |
"repeat_penalty": 1.0,
|
| 770 |
},
|
| 771 |
},
|
| 772 |
+
"custom_hf": {
|
| 773 |
+
"name": "🔧 Custom HF GGUF...",
|
| 774 |
+
"repo_id": None,
|
| 775 |
+
"filename": None,
|
| 776 |
+
"max_context": 8192,
|
| 777 |
+
"default_temperature": 0.6,
|
| 778 |
+
"supports_reasoning": False,
|
| 779 |
+
"supports_toggle": False,
|
| 780 |
+
"inference_settings": {
|
| 781 |
+
"temperature": 0.6,
|
| 782 |
+
"top_p": 0.95,
|
| 783 |
+
"top_k": 40,
|
| 784 |
+
"repeat_penalty": 1.0,
|
| 785 |
+
},
|
| 786 |
+
},
|
| 787 |
}
|
| 788 |
|
| 789 |
DEFAULT_MODEL_KEY = "qwen3_600m_q4"
|
|
|
|
| 1128 |
output_language: str = "en",
|
| 1129 |
thread_config: str = "free",
|
| 1130 |
custom_threads: int = 4,
|
| 1131 |
+
custom_model_state: Any = None,
|
| 1132 |
+
) -> Generator[Tuple[str, str, str, dict, str], None, None]:
|
| 1133 |
"""
|
| 1134 |
Stream summary generation from uploaded file.
|
| 1135 |
|
|
|
|
| 1141 |
top_p: Nucleus sampling parameter (uses model default if None)
|
| 1142 |
top_k: Top-k sampling parameter (uses model default if None)
|
| 1143 |
output_language: Target language for summary ("en" or "zh-TW")
|
| 1144 |
+
thread_config: Thread configuration preset ("free", "upgrade", "custom")
|
| 1145 |
+
custom_threads: Custom thread count when preset is "custom"
|
| 1146 |
+
custom_model_state: Pre-loaded custom model (if using custom_hf)
|
| 1147 |
|
| 1148 |
Yields:
|
| 1149 |
+
Tuple of (thinking_text, summary_text, info_text, metrics_dict, system_prompt)
|
| 1150 |
"""
|
| 1151 |
import time
|
| 1152 |
|
|
|
|
| 1194 |
# Read uploaded file
|
| 1195 |
try:
|
| 1196 |
if file_obj is None:
|
| 1197 |
+
system_prompt_preview = build_system_prompt(output_language, False, enable_reasoning)
|
| 1198 |
+
yield ("", "Error: Please upload a transcript file first", "", metrics, system_prompt_preview)
|
| 1199 |
return
|
| 1200 |
|
| 1201 |
path = file_obj.name if hasattr(file_obj, 'name') else file_obj
|
|
|
|
| 1214 |
"original_char_count": len(transcript),
|
| 1215 |
}
|
| 1216 |
except Exception as e:
|
| 1217 |
+
system_prompt_preview = build_system_prompt(output_language, False, enable_reasoning)
|
| 1218 |
+
yield ("", f"Error reading file: {e}", "", metrics, system_prompt_preview)
|
| 1219 |
return
|
| 1220 |
|
| 1221 |
if not transcript.strip():
|
| 1222 |
+
system_prompt_preview = build_system_prompt(output_language, False, enable_reasoning)
|
| 1223 |
+
yield ("", "Error: File is empty", "", metrics, system_prompt_preview)
|
| 1224 |
return
|
| 1225 |
|
| 1226 |
# Calculate context and check truncation (with reasoning buffer if enabled)
|
|
|
|
| 1273 |
# Load model (no-op if already loaded) with timing
|
| 1274 |
model_load_start = time.time()
|
| 1275 |
try:
|
| 1276 |
+
if model_key == "custom_hf":
|
| 1277 |
+
# Use pre-loaded custom model
|
| 1278 |
+
if custom_model_state is None:
|
| 1279 |
+
system_prompt_preview = build_system_prompt(output_language, False, enable_reasoning)
|
| 1280 |
+
yield ("", "Error: No custom model loaded. Please load a custom model first.", "", metrics, system_prompt_preview)
|
| 1281 |
+
return
|
| 1282 |
+
llm = custom_model_state
|
| 1283 |
+
load_msg = "Using pre-loaded custom model"
|
| 1284 |
+
else:
|
| 1285 |
+
llm, load_msg = load_model(model_key, n_threads=n_threads)
|
| 1286 |
logger.info(load_msg)
|
| 1287 |
metrics["model_load_time_ms"] = (time.time() - model_load_start) * 1000
|
| 1288 |
except Exception as e:
|
| 1289 |
+
system_prompt_preview = build_system_prompt(output_language, False, enable_reasoning)
|
| 1290 |
+
yield ("", f"Error loading model: {e}", "", metrics, system_prompt_preview)
|
| 1291 |
return
|
| 1292 |
|
| 1293 |
# Prepare system prompt with reasoning toggle for Qwen3 models
|
| 1294 |
+
if model_key == "custom_hf":
|
| 1295 |
+
# Use default settings for custom models
|
| 1296 |
+
model = AVAILABLE_MODELS["custom_hf"]
|
| 1297 |
+
else:
|
| 1298 |
+
model = AVAILABLE_MODELS[model_key]
|
| 1299 |
|
| 1300 |
# Calculate dynamic temperature for Qwen3 models
|
| 1301 |
if model.get("supports_toggle") and "temperature_thinking" in model.get("inference_settings", {}):
|
|
|
|
| 1305 |
effective_temperature = model["inference_settings"]["temperature_no_thinking"]
|
| 1306 |
else:
|
| 1307 |
effective_temperature = temperature
|
| 1308 |
+
|
| 1309 |
+
# Build system and user prompts using the extracted function
|
| 1310 |
+
system_content = build_system_prompt(output_language, model.get("supports_toggle", False), enable_reasoning)
|
| 1311 |
+
user_content = build_user_prompt(transcript, output_language)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1312 |
|
| 1313 |
messages = [
|
| 1314 |
{"role": "system", "content": system_content},
|
|
|
|
| 1386 |
thinking, summary = parse_thinking_blocks(full_response, streaming=True)
|
| 1387 |
current_thinking = thinking or ""
|
| 1388 |
current_summary = summary or ""
|
| 1389 |
+
yield (current_thinking, current_summary, info, metrics, system_content)
|
| 1390 |
|
| 1391 |
# Final timing calculations
|
| 1392 |
metrics["generation_end_time"] = time.time()
|
|
|
|
| 1424 |
# Update totals
|
| 1425 |
metrics["total_tokens"] = metrics["input_tokens"] + metrics["output_tokens"] + metrics["thinking_tokens"]
|
| 1426 |
|
| 1427 |
+
yield (thinking or "", summary or "", info, metrics, system_content)
|
| 1428 |
|
| 1429 |
llm.reset()
|
| 1430 |
|
| 1431 |
except Exception as e:
|
| 1432 |
logger.error(f"Generation error: {e}")
|
| 1433 |
metrics["error"] = str(e)
|
| 1434 |
+
yield (current_thinking, current_summary + f"\n\nError: {e}", info, metrics, system_content)
|
| 1435 |
|
| 1436 |
|
| 1437 |
# Custom CSS for better UI
|
|
|
|
| 1667 |
visible=AVAILABLE_MODELS[DEFAULT_MODEL_KEY].get("supports_toggle", False)
|
| 1668 |
)
|
| 1669 |
|
| 1670 |
+
# Custom Model UI (hidden by default, shown when custom_hf selected)
|
| 1671 |
+
with gr.Group(visible=False) as custom_model_group:
|
| 1672 |
+
gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">🔧</span> Custom HuggingFace Model</div>')
|
| 1673 |
+
|
| 1674 |
+
custom_repo_id = gr.Textbox(
|
| 1675 |
+
label="HuggingFace Repo ID",
|
| 1676 |
+
placeholder="e.g., unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF",
|
| 1677 |
+
info="Enter repository ID (format: username/model-name). Popular models will be suggested as you type.",
|
| 1678 |
+
interactive=True,
|
| 1679 |
+
)
|
| 1680 |
+
|
| 1681 |
+
# Hidden fields to store discovered file data
|
| 1682 |
+
custom_repo_files = gr.State([])
|
| 1683 |
+
|
| 1684 |
+
# File dropdown (populated after repo discovery)
|
| 1685 |
+
custom_file_dropdown = gr.Dropdown(
|
| 1686 |
+
label="Available GGUF Files",
|
| 1687 |
+
choices=[],
|
| 1688 |
+
value=None,
|
| 1689 |
+
info="Files will be auto-discovered when you stop typing (alphabetically sorted)",
|
| 1690 |
+
interactive=True,
|
| 1691 |
+
visible=True,
|
| 1692 |
+
)
|
| 1693 |
+
|
| 1694 |
+
# Action buttons
|
| 1695 |
+
with gr.Row():
|
| 1696 |
+
discover_btn = gr.Button("🔍 Discover Files", variant="secondary", size="sm")
|
| 1697 |
+
load_btn = gr.Button("⬇️ Load Selected Model", variant="primary", size="sm")
|
| 1698 |
+
|
| 1699 |
+
# Status message
|
| 1700 |
+
custom_status = gr.Textbox(
|
| 1701 |
+
label="Status",
|
| 1702 |
+
interactive=False,
|
| 1703 |
+
value="",
|
| 1704 |
+
visible=False,
|
| 1705 |
+
)
|
| 1706 |
+
|
| 1707 |
+
retry_btn = gr.Button("🔄 Retry", variant="secondary", visible=False)
|
| 1708 |
+
|
| 1709 |
gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">📤</span> Upload File</div>')
|
| 1710 |
|
| 1711 |
file_input = gr.File(
|
|
|
|
| 1781 |
|
| 1782 |
# Hidden state to store generation metrics
|
| 1783 |
metrics_state = gr.State(value={})
|
| 1784 |
+
|
| 1785 |
+
# Hidden state to store loaded custom model
|
| 1786 |
+
custom_model_state = gr.State(value=None)
|
| 1787 |
|
| 1788 |
# Model info section (dynamic)
|
| 1789 |
with gr.Group():
|
|
|
|
| 1825 |
|
| 1826 |
# File output component for download
|
| 1827 |
download_output = gr.File(label="Download JSON", visible=True)
|
| 1828 |
+
|
| 1829 |
+
# Debug: System Prompt display
|
| 1830 |
+
with gr.Accordion("🐛 Debug: System Prompt", open=False):
|
| 1831 |
+
system_prompt_debug = gr.Textbox(
|
| 1832 |
+
label="System Prompt (Read-Only)",
|
| 1833 |
+
lines=5,
|
| 1834 |
+
max_lines=10,
|
| 1835 |
+
interactive=False,
|
| 1836 |
+
value="Select a model and click 'Generate Summary' to see the system prompt.",
|
| 1837 |
+
info="This shows the exact system prompt sent to the LLM"
|
| 1838 |
+
)
|
| 1839 |
|
| 1840 |
# Function to update settings when model changes
|
| 1841 |
def update_settings_on_model_change(model_key, thread_config, custom_threads):
|
|
|
|
| 1905 |
inputs=[summary_output, thinking_output, model_dropdown, language_selector, metrics_state],
|
| 1906 |
outputs=[download_output]
|
| 1907 |
)
|
| 1908 |
+
|
| 1909 |
+
# ==========================================
|
| 1910 |
+
# NEW: Custom Model Loader Event Handlers
|
| 1911 |
+
# ==========================================
|
| 1912 |
+
|
| 1913 |
+
# Show/hide custom model UI based on model selection
|
| 1914 |
+
def toggle_custom_model_ui(model_key):
|
| 1915 |
+
"""Show or hide custom model UI based on selection."""
|
| 1916 |
+
is_custom = model_key == "custom_hf"
|
| 1917 |
+
return gr.update(visible=is_custom)
|
| 1918 |
+
|
| 1919 |
+
model_dropdown.change(
|
| 1920 |
+
fn=toggle_custom_model_ui,
|
| 1921 |
+
inputs=[model_dropdown],
|
| 1922 |
+
outputs=[custom_model_group],
|
| 1923 |
+
)
|
| 1924 |
+
|
| 1925 |
+
# Update system prompt debug when model or reasoning changes
|
| 1926 |
+
def update_system_prompt_debug(model_key, enable_reasoning, language):
|
| 1927 |
+
"""Update the system prompt debug display."""
|
| 1928 |
+
if not model_key:
|
| 1929 |
+
return "Select a model to see the system prompt."
|
| 1930 |
+
|
| 1931 |
+
model = AVAILABLE_MODELS.get(model_key, {})
|
| 1932 |
+
supports_toggle = model.get("supports_toggle", False)
|
| 1933 |
+
|
| 1934 |
+
prompt = build_system_prompt(language, supports_toggle, enable_reasoning)
|
| 1935 |
+
return prompt
|
| 1936 |
+
|
| 1937 |
+
model_dropdown.change(
|
| 1938 |
+
fn=update_system_prompt_debug,
|
| 1939 |
+
inputs=[model_dropdown, enable_reasoning, language_selector],
|
| 1940 |
+
outputs=[system_prompt_debug],
|
| 1941 |
+
)
|
| 1942 |
+
|
| 1943 |
+
enable_reasoning.change(
|
| 1944 |
+
fn=update_system_prompt_debug,
|
| 1945 |
+
inputs=[model_dropdown, enable_reasoning, language_selector],
|
| 1946 |
+
outputs=[system_prompt_debug],
|
| 1947 |
+
)
|
| 1948 |
+
|
| 1949 |
+
language_selector.change(
|
| 1950 |
+
fn=update_system_prompt_debug,
|
| 1951 |
+
inputs=[model_dropdown, enable_reasoning, language_selector],
|
| 1952 |
+
outputs=[system_prompt_debug],
|
| 1953 |
+
)
|
| 1954 |
+
|
| 1955 |
+
# Debounced auto-discovery for custom repo ID (500ms delay)
|
| 1956 |
+
import time as time_module
|
| 1957 |
+
|
| 1958 |
+
def discover_custom_files(repo_id):
|
| 1959 |
+
"""Discover GGUF files in the custom repo."""
|
| 1960 |
+
if not repo_id or "/" not in repo_id:
|
| 1961 |
+
return [], [], "Enter a valid HuggingFace Repo ID above (e.g., unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF)"
|
| 1962 |
+
|
| 1963 |
+
# Show searching status
|
| 1964 |
+
yield gr.update(choices=["Searching..."], value=None, interactive=False), [], "🔍 Searching for GGUF files..."
|
| 1965 |
+
|
| 1966 |
+
# Small delay to simulate search
|
| 1967 |
+
time_module.sleep(0.5)
|
| 1968 |
+
|
| 1969 |
+
files, error = list_repo_gguf_files(repo_id)
|
| 1970 |
+
|
| 1971 |
+
if error:
|
| 1972 |
+
# Error - show empty dropdown with error message
|
| 1973 |
+
yield gr.update(choices=[], value=None, interactive=True), [], f"❌ {error}"
|
| 1974 |
+
elif not files:
|
| 1975 |
+
# No files found
|
| 1976 |
+
yield gr.update(choices=[], value=None, interactive=True), [], "❌ No GGUF files found in this repository"
|
| 1977 |
+
else:
|
| 1978 |
+
# Success - format choices
|
| 1979 |
+
choices = [format_file_choice(f) for f in files]
|
| 1980 |
+
yield gr.update(choices=choices, value=choices[0] if choices else None, interactive=True), files, "✅ Files discovered! Select one and click 'Load Selected Model'"
|
| 1981 |
+
|
| 1982 |
+
# Manual discover button
|
| 1983 |
+
discover_btn.click(
|
| 1984 |
+
fn=discover_custom_files,
|
| 1985 |
+
inputs=[custom_repo_id],
|
| 1986 |
+
outputs=[custom_file_dropdown, custom_repo_files, custom_status],
|
| 1987 |
+
)
|
| 1988 |
+
|
| 1989 |
+
# Load selected custom model
|
| 1990 |
+
def load_custom_model_selected(repo_id, selected_file_display, files_data):
|
| 1991 |
+
"""Load the selected custom model."""
|
| 1992 |
+
if not repo_id or not selected_file_display:
|
| 1993 |
+
return "❌ Please enter a Repo ID and select a file first", gr.update(visible=False), None
|
| 1994 |
+
|
| 1995 |
+
# Extract filename from the display string
|
| 1996 |
+
# Format: "📄 filename | size | quant | params | downloads"
|
| 1997 |
+
filename = selected_file_display.split(" | ")[0].replace("📄 ", "").strip()
|
| 1998 |
+
|
| 1999 |
+
if not filename:
|
| 2000 |
+
return "❌ Could not parse filename from selection", gr.update(visible=False), None
|
| 2001 |
+
|
| 2002 |
+
yield "⏳ Loading model... (this may take a while for large files)", gr.update(visible=False), None
|
| 2003 |
+
|
| 2004 |
+
try:
|
| 2005 |
+
# Load the model
|
| 2006 |
+
n_threads = get_thread_count(thread_config_dropdown.value, custom_threads_slider.value)
|
| 2007 |
+
llm, load_msg = load_custom_model_from_hf(repo_id, filename, n_threads)
|
| 2008 |
+
|
| 2009 |
+
if llm is None:
|
| 2010 |
+
# Load failed - show error and retry button
|
| 2011 |
+
yield f"❌ {load_msg}", gr.update(visible=True), None
|
| 2012 |
+
else:
|
| 2013 |
+
# Success
|
| 2014 |
+
model_info = next((f for f in files_data if f["name"] == filename), {})
|
| 2015 |
+
size_info = f" ({model_info.get('size_mb', 'Unknown')} MB)" if model_info else ""
|
| 2016 |
+
yield f"✅ Model loaded successfully{size_info}! Ready to generate summaries.", gr.update(visible=False), llm
|
| 2017 |
+
|
| 2018 |
+
except Exception as e:
|
| 2019 |
+
yield f"❌ Error loading model: {str(e)}", gr.update(visible=True), None
|
| 2020 |
+
|
| 2021 |
+
load_btn.click(
|
| 2022 |
+
fn=load_custom_model_selected,
|
| 2023 |
+
inputs=[custom_repo_id, custom_file_dropdown, custom_repo_files],
|
| 2024 |
+
outputs=[custom_status, retry_btn, custom_model_state],
|
| 2025 |
+
)
|
| 2026 |
+
|
| 2027 |
+
# Retry button - same as load
|
| 2028 |
+
retry_btn.click(
|
| 2029 |
+
fn=load_custom_model_selected,
|
| 2030 |
+
inputs=[custom_repo_id, custom_file_dropdown, custom_repo_files],
|
| 2031 |
+
outputs=[custom_status, retry_btn, custom_model_state],
|
| 2032 |
+
)
|
| 2033 |
+
|
| 2034 |
+
# Also update submit button to use custom model state
|
| 2035 |
+
# Note: We'll modify the summarize_streaming function to accept custom_model_state
|
| 2036 |
+
|
| 2037 |
+
# ==========================================
|
| 2038 |
+
# END: Custom Model Loader Event Handlers
|
| 2039 |
+
# ==========================================
|
| 2040 |
+
|
| 2041 |
+
# Update submit button to include custom_model_state in inputs and system_prompt_debug in outputs
|
| 2042 |
+
submit_btn.click(
|
| 2043 |
+
fn=summarize_streaming,
|
| 2044 |
+
inputs=[file_input, model_dropdown, enable_reasoning, max_tokens, temperature_slider, top_p, top_k, language_selector, thread_config_dropdown, custom_threads_slider, custom_model_state],
|
| 2045 |
+
outputs=[thinking_output, summary_output, info_output, metrics_state, system_prompt_debug],
|
| 2046 |
+
show_progress="full"
|
| 2047 |
+
)
|
| 2048 |
|
| 2049 |
# Footer
|
| 2050 |
gr.HTML("""
|