Spaces:
Running
Running
Improve UI/UX: Modern glassmorphism design, added Paste Text tab, and optimized visual hierarchy
Browse files- .opencode/plans/debug_and_custom_model.md +439 -0
- .opencode/plans/fix_custom_model_info.md +286 -0
- .opencode/plans/redesign_custom_gguf_loader.md +309 -0
- GEMINI.md +74 -0
- app.py +171 -300
.opencode/plans/debug_and_custom_model.md
ADDED
|
@@ -0,0 +1,439 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Implementation Plan: Debug System Prompt & Custom GGUF Loader
|
| 2 |
+
|
| 3 |
+
## Feature 1: Debug System Prompt Display
|
| 4 |
+
|
| 5 |
+
### Purpose
|
| 6 |
+
Show users the exact system prompt that will be sent to the LLM for transparency and debugging.
|
| 7 |
+
|
| 8 |
+
### Current State
|
| 9 |
+
The system prompt is built inline in `summarize_streaming()` (lines ~903-916) but never exposed to the UI.
|
| 10 |
+
|
| 11 |
+
### Implementation Plan
|
| 12 |
+
|
| 13 |
+
#### Step 1: Extract Prompt Builder Function
|
| 14 |
+
**Location**: Add new function in `app.py` around line 880
|
| 15 |
+
|
| 16 |
+
```python
|
| 17 |
+
def build_system_prompt(length: str, format_type: str, language: str, enable_reasoning: bool, supports_think_tags: bool) -> str:
|
| 18 |
+
"""Build the system prompt that will be sent to the LLM.
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
length: "tiny", "short", "medium", "long"
|
| 22 |
+
format_type: "bullets", "paragraph", "structured"
|
| 23 |
+
language: "en", "zh-TW"
|
| 24 |
+
enable_reasoning: Whether reasoning mode is enabled
|
| 25 |
+
supports_think_tags: Whether the model supports <think> tags
|
| 26 |
+
|
| 27 |
+
Returns:
|
| 28 |
+
The complete system prompt string
|
| 29 |
+
"""
|
| 30 |
+
# Length configurations (existing)
|
| 31 |
+
length_prompts = {
|
| 32 |
+
"tiny": f"""Provide a {format_type} summary in 2-3 sentences covering:
|
| 33 |
+
- Main topic and key points
|
| 34 |
+
- Most important finding or conclusion
|
| 35 |
+
- Practical takeaway""",
|
| 36 |
+
"short": f"""Provide a {format_type} summary in 3-5 sentences covering:
|
| 37 |
+
- Main topic and purpose
|
| 38 |
+
- 2-3 key points or findings
|
| 39 |
+
- Conclusion or recommendation""",
|
| 40 |
+
"medium": f"""Provide a {format_type} summary in 1-2 paragraphs covering:
|
| 41 |
+
- Main topic and context
|
| 42 |
+
- Key points with brief explanations
|
| 43 |
+
- Supporting details
|
| 44 |
+
- Conclusions and recommendations""",
|
| 45 |
+
"long": f"""Provide a comprehensive {format_type} summary in 3-4 paragraphs covering:
|
| 46 |
+
- Background and context
|
| 47 |
+
- All major points with detailed explanations
|
| 48 |
+
- Supporting evidence and examples
|
| 49 |
+
- Different perspectives if present
|
| 50 |
+
- Conclusions, implications, and actionable recommendations""",
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
base_prompt = length_prompts.get(length, length_prompts["medium"])
|
| 54 |
+
|
| 55 |
+
if language == "zh-TW":
|
| 56 |
+
if enable_reasoning and supports_think_tags:
|
| 57 |
+
system_content = f"You are a helpful assistant that summarizes transcripts. First think through the content in <thinking> tags, then provide the summary.\n\n{base_prompt}\n\nPlease respond in Traditional Chinese (Taiwan)."
|
| 58 |
+
else:
|
| 59 |
+
system_content = f"You are a helpful assistant that summarizes transcripts.\n\n{base_prompt}\n\nPlease respond in Traditional Chinese (Taiwan)."
|
| 60 |
+
else:
|
| 61 |
+
if enable_reasoning and supports_think_tags:
|
| 62 |
+
system_content = f"You are a helpful assistant that summarizes transcripts. First think through the content in <thinking> tags, then provide the summary.\n\n{base_prompt}"
|
| 63 |
+
else:
|
| 64 |
+
system_content = f"You are a helpful assistant that summarizes transcripts.\n\n{base_prompt}"
|
| 65 |
+
|
| 66 |
+
return system_content
|
| 67 |
+
```
|
| 68 |
+
|
| 69 |
+
#### Step 2: Refactor summarize_streaming()
|
| 70 |
+
**Location**: Lines ~903-916 in `app.py`
|
| 71 |
+
|
| 72 |
+
Replace inline prompt building with call to `build_system_prompt()`:
|
| 73 |
+
```python
|
| 74 |
+
# OLD CODE (to replace):
|
| 75 |
+
length_prompts = {...} # Remove this dict
|
| 76 |
+
# ... if language == "zh-TW": logic ...
|
| 77 |
+
|
| 78 |
+
# NEW CODE:
|
| 79 |
+
system_content = build_system_prompt(
|
| 80 |
+
length=length,
|
| 81 |
+
format_type=format_type,
|
| 82 |
+
language=language,
|
| 83 |
+
enable_reasoning=enable_reasoning,
|
| 84 |
+
supports_think_tags=supports_think_tags
|
| 85 |
+
)
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
#### Step 3: Add UI Component
|
| 89 |
+
**Location**: In the right column interface, after the summary output (around line 1370)
|
| 90 |
+
|
| 91 |
+
Add a collapsible accordion:
|
| 92 |
+
```python
|
| 93 |
+
with gr.Accordion("Debug: System Prompt", open=False):
|
| 94 |
+
system_prompt_debug = gr.Textbox(
|
| 95 |
+
label="System Prompt (Read-Only)",
|
| 96 |
+
lines=10,
|
| 97 |
+
max_lines=20,
|
| 98 |
+
interactive=False,
|
| 99 |
+
show_copy_button=True,
|
| 100 |
+
value="Click 'Generate Summary' to see the system prompt that will be used."
|
| 101 |
+
)
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
#### Step 4: Update Event Handlers
|
| 105 |
+
**Location**: In `generate_summary()` function
|
| 106 |
+
|
| 107 |
+
Pass the built system prompt to the output:
|
| 108 |
+
```python
|
| 109 |
+
def generate_summary(model_key, thread_config, custom_threads, transcript_text,
|
| 110 |
+
summary_length, output_format, language, enable_reasoning,
|
| 111 |
+
enable_streaming, progress=gr.Progress()):
|
| 112 |
+
# ... existing code ...
|
| 113 |
+
|
| 114 |
+
# Build system prompt for display
|
| 115 |
+
selected_model = AVAILABLE_MODELS[model_key]
|
| 116 |
+
supports_think_tags = selected_model.get("supports_toggle", False) or selected_model.get("supports_reasoning", False)
|
| 117 |
+
system_prompt_preview = build_system_prompt(
|
| 118 |
+
length=summary_length,
|
| 119 |
+
format_type=output_format,
|
| 120 |
+
language=language,
|
| 121 |
+
enable_reasoning=enable_reasoning,
|
| 122 |
+
supports_think_tags=supports_think_tags
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
# ... rest of summarization logic ...
|
| 126 |
+
|
| 127 |
+
# Return the system prompt along with other outputs
|
| 128 |
+
yield final_summary, thinking_text, json_output, system_prompt_preview, status_msg
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
#### Step 5: Update Gradio Outputs
|
| 132 |
+
**Location**: Line ~1435
|
| 133 |
+
|
| 134 |
+
Add `system_prompt_debug` to outputs list:
|
| 135 |
+
```python
|
| 136 |
+
outputs=[summary_output, thinking_output, json_output, system_prompt_debug, status_message]
|
| 137 |
+
```
|
| 138 |
+
|
| 139 |
+
---
|
| 140 |
+
|
| 141 |
+
## Feature 2: Custom GGUF Loader from HuggingFace
|
| 142 |
+
|
| 143 |
+
### Purpose
|
| 144 |
+
Allow users to load any GGUF model from HuggingFace, not just the predefined list.
|
| 145 |
+
|
| 146 |
+
### Implementation Plan
|
| 147 |
+
|
| 148 |
+
#### Step 1: Add Custom Model Option
|
| 149 |
+
**Location**: In AVAILABLE_MODELS dict (around line 120)
|
| 150 |
+
|
| 151 |
+
Add as the last entry:
|
| 152 |
+
```python
|
| 153 |
+
AVAILABLE_MODELS = {
|
| 154 |
+
# ... existing models ...
|
| 155 |
+
|
| 156 |
+
"custom_hf": {
|
| 157 |
+
"display": "Custom HF GGUF...",
|
| 158 |
+
"repo_id": None, # Will be provided by user
|
| 159 |
+
"filename": None, # Will be provided by user
|
| 160 |
+
"quantization": None,
|
| 161 |
+
"description": "Load any GGUF model from HuggingFace",
|
| 162 |
+
"size_mb": 0, # Unknown
|
| 163 |
+
"n_gpu_layers": 0,
|
| 164 |
+
"n_ctx": 8192,
|
| 165 |
+
"max_tokens": 4096,
|
| 166 |
+
"supports_reasoning": False,
|
| 167 |
+
"supports_toggle": False,
|
| 168 |
+
},
|
| 169 |
+
}
|
| 170 |
+
```
|
| 171 |
+
|
| 172 |
+
#### Step 2: Add Custom Model UI Components
|
| 173 |
+
**Location**: In the left column, after model dropdown (around line 1270)
|
| 174 |
+
|
| 175 |
+
```python
|
| 176 |
+
# Custom model inputs (hidden by default)
|
| 177 |
+
with gr.Group(visible=False) as custom_model_group:
|
| 178 |
+
gr.Markdown("### Custom HuggingFace Model")
|
| 179 |
+
custom_repo_id = gr.Textbox(
|
| 180 |
+
label="HuggingFace Repo ID",
|
| 181 |
+
placeholder="e.g., unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF",
|
| 182 |
+
info="The HuggingFace repository containing the GGUF file",
|
| 183 |
+
)
|
| 184 |
+
custom_filename = gr.Textbox(
|
| 185 |
+
label="GGUF Filename Pattern",
|
| 186 |
+
placeholder="e.g., *Q4_K_M.gguf or exact filename",
|
| 187 |
+
info="Use * as wildcard or provide exact filename",
|
| 188 |
+
)
|
| 189 |
+
custom_load_btn = gr.Button("Load Custom Model", variant="primary")
|
| 190 |
+
custom_error_message = gr.Textbox(
|
| 191 |
+
label="Status",
|
| 192 |
+
interactive=False,
|
| 193 |
+
visible=False,
|
| 194 |
+
)
|
| 195 |
+
custom_retry_btn = gr.Button("Retry", variant="secondary", visible=False)
|
| 196 |
+
```
|
| 197 |
+
|
| 198 |
+
#### Step 3: Add Visibility Toggle Handler
|
| 199 |
+
**Location**: Add new event handler around line 1490
|
| 200 |
+
|
| 201 |
+
```python
|
| 202 |
+
def update_custom_model_visibility(model_key):
|
| 203 |
+
"""Show/hide custom model inputs based on selection."""
|
| 204 |
+
is_custom = model_key == "custom_hf"
|
| 205 |
+
return gr.update(visible=is_custom)
|
| 206 |
+
|
| 207 |
+
# Add event handler
|
| 208 |
+
model_dropdown.change(
|
| 209 |
+
update_custom_model_visibility,
|
| 210 |
+
inputs=[model_dropdown],
|
| 211 |
+
outputs=[custom_model_group],
|
| 212 |
+
)
|
| 213 |
+
```
|
| 214 |
+
|
| 215 |
+
#### Step 4: Create Custom Model Loader Function
|
| 216 |
+
**Location**: Add new function around line 710
|
| 217 |
+
|
| 218 |
+
```python
|
| 219 |
+
def load_custom_model(repo_id: str, filename: str, cpu_only: bool = False) -> Tuple[Optional[Llama], str]:
|
| 220 |
+
"""Load a custom GGUF model from HuggingFace.
|
| 221 |
+
|
| 222 |
+
Args:
|
| 223 |
+
repo_id: HuggingFace repository ID
|
| 224 |
+
filename: Filename pattern or exact name
|
| 225 |
+
cpu_only: Whether to use CPU only
|
| 226 |
+
|
| 227 |
+
Returns:
|
| 228 |
+
Tuple of (model_instance, error_message)
|
| 229 |
+
If successful, error_message is empty string
|
| 230 |
+
If failed, model_instance is None
|
| 231 |
+
"""
|
| 232 |
+
if not repo_id or not filename:
|
| 233 |
+
return None, "โ Error: Please provide both Repo ID and Filename"
|
| 234 |
+
|
| 235 |
+
# Validate repo_id format
|
| 236 |
+
if "/" not in repo_id:
|
| 237 |
+
return None, "โ Error: Repo ID must be in format 'username/repo-name'"
|
| 238 |
+
|
| 239 |
+
try:
|
| 240 |
+
n_gpu_layers = 0 if cpu_only else -1
|
| 241 |
+
n_ctx = 8192 # Conservative default for custom models
|
| 242 |
+
n_batch = 512
|
| 243 |
+
|
| 244 |
+
llm = Llama.from_pretrained(
|
| 245 |
+
repo_id=repo_id,
|
| 246 |
+
filename=filename,
|
| 247 |
+
n_gpu_layers=n_gpu_layers,
|
| 248 |
+
n_ctx=n_ctx,
|
| 249 |
+
n_batch=n_batch,
|
| 250 |
+
verbose=False,
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
return llm, ""
|
| 254 |
+
|
| 255 |
+
except Exception as e:
|
| 256 |
+
error_msg = str(e)
|
| 257 |
+
if "not found" in error_msg.lower():
|
| 258 |
+
return None, f"โ Error: Model or file not found. Check repo_id and filename.\nDetails: {error_msg}"
|
| 259 |
+
elif "permission" in error_msg.lower() or "access" in error_msg.lower():
|
| 260 |
+
return None, f"โ Error: Cannot access model. It may be private or gated.\nDetails: {error_msg}"
|
| 261 |
+
else:
|
| 262 |
+
return None, f"โ Error loading model: {error_msg}"
|
| 263 |
+
```
|
| 264 |
+
|
| 265 |
+
#### Step 5: Add Custom Model Loading Handler
|
| 266 |
+
**Location**: Add around line 1510
|
| 267 |
+
|
| 268 |
+
```python
|
| 269 |
+
def handle_custom_model_load(repo_id, filename, cpu_only):
|
| 270 |
+
"""Handle custom model loading with error display and retry option."""
|
| 271 |
+
llm, error = load_custom_model(repo_id, filename, cpu_only)
|
| 272 |
+
|
| 273 |
+
if llm is None:
|
| 274 |
+
# Show error and retry button
|
| 275 |
+
return (
|
| 276 |
+
gr.update(visible=True, value=error), # error_message
|
| 277 |
+
gr.update(visible=True), # retry_btn
|
| 278 |
+
None, # model_instance (store somewhere accessible)
|
| 279 |
+
)
|
| 280 |
+
else:
|
| 281 |
+
# Success - hide error, show success message
|
| 282 |
+
return (
|
| 283 |
+
gr.update(visible=True, value="โ
Model loaded successfully!"),
|
| 284 |
+
gr.update(visible=False), # retry_btn
|
| 285 |
+
llm, # Store model instance
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
custom_load_btn.click(
|
| 289 |
+
handle_custom_model_load,
|
| 290 |
+
inputs=[custom_repo_id, custom_filename, cpu_only_checkbox],
|
| 291 |
+
outputs=[custom_error_message, custom_retry_btn, model_state], # model_state is gr.State()
|
| 292 |
+
)
|
| 293 |
+
|
| 294 |
+
custom_retry_btn.click(
|
| 295 |
+
handle_custom_model_load,
|
| 296 |
+
inputs=[custom_repo_id, custom_filename, cpu_only_checkbox],
|
| 297 |
+
outputs=[custom_error_message, custom_retry_btn, model_state],
|
| 298 |
+
)
|
| 299 |
+
```
|
| 300 |
+
|
| 301 |
+
#### Step 6: Update Generate Summary for Custom Models
|
| 302 |
+
**Location**: In `generate_summary()` function
|
| 303 |
+
|
| 304 |
+
Modify to handle custom models:
|
| 305 |
+
```python
|
| 306 |
+
def generate_summary(model_key, thread_config, custom_threads, transcript_text,
|
| 307 |
+
summary_length, output_format, language, enable_reasoning,
|
| 308 |
+
enable_streaming, custom_repo_id=None, custom_filename=None,
|
| 309 |
+
progress=gr.Progress()):
|
| 310 |
+
|
| 311 |
+
if model_key == "custom_hf":
|
| 312 |
+
# Load custom model
|
| 313 |
+
llm, error = load_custom_model(custom_repo_id, custom_filename, cpu_only)
|
| 314 |
+
if llm is None:
|
| 315 |
+
yield "", "", "", "", error
|
| 316 |
+
return
|
| 317 |
+
else:
|
| 318 |
+
# Use predefined model
|
| 319 |
+
model_info = AVAILABLE_MODELS[model_key]
|
| 320 |
+
llm = load_model_from_config(model_info)
|
| 321 |
+
|
| 322 |
+
# ... rest of the function ...
|
| 323 |
+
```
|
| 324 |
+
|
| 325 |
+
#### Step 7: Update UI to Pass Custom Model Values
|
| 326 |
+
**Location**: Line ~1429
|
| 327 |
+
|
| 328 |
+
Add custom inputs to the generate summary call:
|
| 329 |
+
```python
|
| 330 |
+
generate_btn.click(
|
| 331 |
+
fn=generate_summary,
|
| 332 |
+
inputs=[
|
| 333 |
+
model_dropdown,
|
| 334 |
+
thread_config,
|
| 335 |
+
custom_n_threads,
|
| 336 |
+
transcript_input,
|
| 337 |
+
summary_length,
|
| 338 |
+
output_format,
|
| 339 |
+
language,
|
| 340 |
+
reasoning_checkbox,
|
| 341 |
+
streaming_toggle,
|
| 342 |
+
custom_repo_id, # NEW
|
| 343 |
+
custom_filename, # NEW
|
| 344 |
+
],
|
| 345 |
+
outputs=[...]
|
| 346 |
+
)
|
| 347 |
+
```
|
| 348 |
+
|
| 349 |
+
#### Step 8: Update generate_summary signature
|
| 350 |
+
**Location**: Function definition around line 870
|
| 351 |
+
|
| 352 |
+
Update function signature to accept custom model parameters:
|
| 353 |
+
```python
|
| 354 |
+
def generate_summary(
|
| 355 |
+
model_key: str,
|
| 356 |
+
thread_config: str,
|
| 357 |
+
custom_threads: int,
|
| 358 |
+
transcript_text: str,
|
| 359 |
+
summary_length: str,
|
| 360 |
+
output_format: str,
|
| 361 |
+
language: str,
|
| 362 |
+
enable_reasoning: bool,
|
| 363 |
+
enable_streaming: bool,
|
| 364 |
+
custom_repo_id: Optional[str] = None, # NEW
|
| 365 |
+
custom_filename: Optional[str] = None, # NEW
|
| 366 |
+
progress: gr.Progress = gr.Progress(),
|
| 367 |
+
) -> Generator:
|
| 368 |
+
```
|
| 369 |
+
|
| 370 |
+
#### Step 9: Update Model State Management
|
| 371 |
+
**Location**: Add near other state declarations (around line 1250)
|
| 372 |
+
|
| 373 |
+
```python
|
| 374 |
+
# Store loaded model to avoid reloading on each generation
|
| 375 |
+
model_state = gr.State(None)
|
| 376 |
+
```
|
| 377 |
+
|
| 378 |
+
---
|
| 379 |
+
|
| 380 |
+
## Implementation Order
|
| 381 |
+
|
| 382 |
+
1. **Feature 1 First** - Debug System Prompt (simpler, self-contained)
|
| 383 |
+
- Step 1: Create `build_system_prompt()` function
|
| 384 |
+
- Step 2: Refactor `summarize_streaming()` to use it
|
| 385 |
+
- Step 3: Add UI accordion component
|
| 386 |
+
- Step 4: Update event handlers and outputs
|
| 387 |
+
|
| 388 |
+
2. **Feature 2 Second** - Custom GGUF Loader (more complex)
|
| 389 |
+
- Step 1: Add "custom_hf" to AVAILABLE_MODELS
|
| 390 |
+
- Step 2: Add UI components for custom model inputs
|
| 391 |
+
- Step 3: Add visibility toggle handler
|
| 392 |
+
- Step 4: Create `load_custom_model()` function
|
| 393 |
+
- Step 5: Add load/retry handlers
|
| 394 |
+
- Step 6: Update generate_summary for custom models
|
| 395 |
+
- Step 7: Update UI inputs
|
| 396 |
+
- Step 8: Update function signature
|
| 397 |
+
- Step 9: Add model state management
|
| 398 |
+
|
| 399 |
+
---
|
| 400 |
+
|
| 401 |
+
## Testing Plan
|
| 402 |
+
|
| 403 |
+
### Feature 1 Tests
|
| 404 |
+
1. Select different models, verify system prompt updates correctly
|
| 405 |
+
2. Toggle reasoning mode, verify /think or /no_think appears
|
| 406 |
+
3. Change language, verify Traditional Chinese prompt appears
|
| 407 |
+
4. Change length/format, verify prompt content changes
|
| 408 |
+
5. Verify prompt is read-only and copyable
|
| 409 |
+
|
| 410 |
+
### Feature 2 Tests
|
| 411 |
+
1. Select "Custom HF GGUF...", verify inputs appear
|
| 412 |
+
2. Enter invalid repo_id, verify error message with retry button
|
| 413 |
+
3. Enter valid but non-existent model, verify error
|
| 414 |
+
4. Enter valid model with wrong filename, verify error
|
| 415 |
+
5. Enter valid model with correct filename, verify success
|
| 416 |
+
6. Click retry after error, verify it retries
|
| 417 |
+
7. Test fallback to predefined models still works
|
| 418 |
+
|
| 419 |
+
---
|
| 420 |
+
|
| 421 |
+
## Risk Mitigation
|
| 422 |
+
|
| 423 |
+
1. **Custom model loading failures**: Already handled with try/except and user-friendly error messages
|
| 424 |
+
2. **Memory issues with large custom models**: Use conservative defaults (n_ctx=8192, CPU-only for HF Spaces)
|
| 425 |
+
3. **UI clutter**: Custom model inputs hidden by default, only show when selected
|
| 426 |
+
4. **Breaking existing functionality**: Feature 1 is additive only, Feature 2 extends existing paths without changing them
|
| 427 |
+
|
| 428 |
+
---
|
| 429 |
+
|
| 430 |
+
## Files to Modify
|
| 431 |
+
|
| 432 |
+
- `/home/luigi/tiny-scribe/app.py` - Main implementation file
|
| 433 |
+
|
| 434 |
+
## Estimated Lines Changed
|
| 435 |
+
|
| 436 |
+
- Feature 1: ~50 lines added, ~20 lines modified
|
| 437 |
+
- Feature 2: ~150 lines added, ~30 lines modified
|
| 438 |
+
|
| 439 |
+
Total: ~250 lines of code changes
|
.opencode/plans/fix_custom_model_info.md
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Implementation Plan: Fix Model Information for Custom GGUF Models
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
Fix the bug where "Model Information" remains empty when a custom GGUF model is loaded.
|
| 5 |
+
|
| 6 |
+
**Selected Approach**: Option A - Store metadata in Gradio State variables
|
| 7 |
+
**UI Style**: Cards/panels with dense layout
|
| 8 |
+
**Priority**: Bug fix first, then UI improvements
|
| 9 |
+
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
## Bug Analysis
|
| 13 |
+
|
| 14 |
+
### Problem
|
| 15 |
+
`get_model_info()` reads from static `AVAILABLE_MODELS["custom_hf"]` which has:
|
| 16 |
+
- `repo_id = None`
|
| 17 |
+
- `filename = None`
|
| 18 |
+
|
| 19 |
+
The actual values entered by the user are never stored or passed to the info display function.
|
| 20 |
+
|
| 21 |
+
### Solution
|
| 22 |
+
Store actual custom model metadata in dedicated Gradio State variables and pass them to `get_model_info()` when `model_key == "custom_hf"`.
|
| 23 |
+
|
| 24 |
+
---
|
| 25 |
+
|
| 26 |
+
## Implementation Steps
|
| 27 |
+
|
| 28 |
+
### Step 1: Add Custom Model Metadata State
|
| 29 |
+
|
| 30 |
+
**Location**: UI section (~line 1730), alongside other states
|
| 31 |
+
|
| 32 |
+
```python
|
| 33 |
+
# Custom model metadata state (stores actual repo_id and filename when loaded)
|
| 34 |
+
custom_model_metadata = gr.State({
|
| 35 |
+
"repo_id": None,
|
| 36 |
+
"filename": None,
|
| 37 |
+
"size_mb": 0,
|
| 38 |
+
})
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
### Step 2: Modify `get_model_info()` Function
|
| 42 |
+
|
| 43 |
+
**Location**: ~line 904
|
| 44 |
+
|
| 45 |
+
**Current signature**:
|
| 46 |
+
```python
|
| 47 |
+
def get_model_info(model_key: str, n_threads: int = 2):
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
**New signature**:
|
| 51 |
+
```python
|
| 52 |
+
def get_model_info(model_key: str, n_threads: int = 2, custom_metadata: dict = None):
|
| 53 |
+
```
|
| 54 |
+
|
| 55 |
+
**Logic change** (inside function):
|
| 56 |
+
```python
|
| 57 |
+
if model_key == "custom_hf" and custom_metadata:
|
| 58 |
+
# Use actual metadata from loaded custom model
|
| 59 |
+
repo_id = custom_metadata.get("repo_id", "Not loaded")
|
| 60 |
+
filename = custom_metadata.get("filename", "Not selected")
|
| 61 |
+
size_mb = custom_metadata.get("size_mb", 0)
|
| 62 |
+
|
| 63 |
+
# Parse quantization from filename
|
| 64 |
+
quant = parse_quantization(filename) if filename else "Unknown"
|
| 65 |
+
|
| 66 |
+
info_text = (
|
| 67 |
+
f"## ๐ค Custom HF GGUF Model\n\n"
|
| 68 |
+
f"### ๐ Model Metadata\n"
|
| 69 |
+
f"| Property | Value |\n"
|
| 70 |
+
f"|----------|-------|\n"
|
| 71 |
+
f"| **Repository** | `{repo_id}` |\n"
|
| 72 |
+
f"| **GGUF File** | `{filename}` |\n"
|
| 73 |
+
f"| **Quantization** | `{quant}` |\n"
|
| 74 |
+
f"| **File Size** | {size_mb:.1f} MB |\n"
|
| 75 |
+
f"| **Context** | 8,192 tokens |\n"
|
| 76 |
+
f"| **Threads** | {n_threads} |\n\n"
|
| 77 |
+
f"โ ๏ธ Note: Custom models use conservative defaults (CPU-only, smaller context)."
|
| 78 |
+
)
|
| 79 |
+
else:
|
| 80 |
+
# Use existing logic for predefined models
|
| 81 |
+
m = AVAILABLE_MODELS[model_key]
|
| 82 |
+
# ... existing code ...
|
| 83 |
+
```
|
| 84 |
+
|
| 85 |
+
### Step 3: Update `load_custom_model_selected()` to Store Metadata
|
| 86 |
+
|
| 87 |
+
**Location**: Event handler section (~line 1927)
|
| 88 |
+
|
| 89 |
+
**Current function** (simplified):
|
| 90 |
+
```python
|
| 91 |
+
def load_custom_model_selected(repo_id, selected_file_display, files_data):
|
| 92 |
+
filename = selected_file_display.split(" | ")[0].replace("๐ ", "").strip()
|
| 93 |
+
llm, load_msg = load_custom_model_from_hf(repo_id, filename, n_threads)
|
| 94 |
+
if llm is None:
|
| 95 |
+
return gr.update(visible=True, value=error), gr.update(visible=True), None
|
| 96 |
+
else:
|
| 97 |
+
return gr.update(visible=True, value=success), gr.update(visible=False), llm
|
| 98 |
+
```
|
| 99 |
+
|
| 100 |
+
**New function**:
|
| 101 |
+
```python
|
| 102 |
+
def load_custom_model_selected(repo_id, selected_file_display, files_data):
|
| 103 |
+
filename = selected_file_display.split(" | ")[0].replace("๐ ", "").strip()
|
| 104 |
+
|
| 105 |
+
# Extract size from files_data
|
| 106 |
+
size_mb = 0
|
| 107 |
+
for f in files_data:
|
| 108 |
+
if f["name"] == filename:
|
| 109 |
+
size_mb = f.get("size_mb", 0)
|
| 110 |
+
break
|
| 111 |
+
|
| 112 |
+
llm, load_msg = load_custom_model_from_hf(repo_id, filename, n_threads)
|
| 113 |
+
|
| 114 |
+
if llm is None:
|
| 115 |
+
return (
|
| 116 |
+
gr.update(visible=True, value=f"โ {load_msg}"),
|
| 117 |
+
gr.update(visible=True),
|
| 118 |
+
None,
|
| 119 |
+
{"repo_id": None, "filename": None, "size_mb": 0}, # Clear metadata
|
| 120 |
+
)
|
| 121 |
+
else:
|
| 122 |
+
# Create metadata dict
|
| 123 |
+
metadata = {
|
| 124 |
+
"repo_id": repo_id,
|
| 125 |
+
"filename": filename,
|
| 126 |
+
"size_mb": size_mb,
|
| 127 |
+
}
|
| 128 |
+
return (
|
| 129 |
+
gr.update(visible=True, value=f"โ
{load_msg}"),
|
| 130 |
+
gr.update(visible=False),
|
| 131 |
+
llm,
|
| 132 |
+
metadata, # Return metadata to store in state
|
| 133 |
+
)
|
| 134 |
+
```
|
| 135 |
+
|
| 136 |
+
**Update the click event handler**:
|
| 137 |
+
```python
|
| 138 |
+
load_btn.click(
|
| 139 |
+
fn=load_custom_model_selected,
|
| 140 |
+
inputs=[model_search_input, custom_file_dropdown, custom_repo_files],
|
| 141 |
+
outputs=[custom_status, retry_btn, custom_model_state, custom_model_metadata], # Added metadata
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
retry_btn.click(
|
| 145 |
+
fn=load_custom_model_selected,
|
| 146 |
+
inputs=[model_search_input, custom_file_dropdown, custom_repo_files],
|
| 147 |
+
outputs=[custom_status, retry_btn, custom_model_state, custom_model_metadata], # Added metadata
|
| 148 |
+
)
|
| 149 |
+
```
|
| 150 |
+
|
| 151 |
+
### Step 4: Update Model Info Display Event Handler
|
| 152 |
+
|
| 153 |
+
**Location**: ~line 1711, `update_settings_on_model_change()` function
|
| 154 |
+
|
| 155 |
+
**Current**:
|
| 156 |
+
```python
|
| 157 |
+
def update_settings_on_model_change(model_key, n_threads):
|
| 158 |
+
info, _, _, _ = get_model_info(model_key, n_threads=n_threads)
|
| 159 |
+
# ... return info ...
|
| 160 |
+
```
|
| 161 |
+
|
| 162 |
+
**New**:
|
| 163 |
+
```python
|
| 164 |
+
def update_settings_on_model_change(model_key, n_threads, custom_metadata):
|
| 165 |
+
info, _, _, _ = get_model_info(model_key, n_threads=n_threads, custom_metadata=custom_metadata)
|
| 166 |
+
# ... return info ...
|
| 167 |
+
```
|
| 168 |
+
|
| 169 |
+
**Update the event handler**:
|
| 170 |
+
```python
|
| 171 |
+
model_dropdown.change(
|
| 172 |
+
fn=update_settings_on_model_change,
|
| 173 |
+
inputs=[model_dropdown, n_threads_display, custom_model_metadata], # Added metadata
|
| 174 |
+
outputs=[info_output, max_tokens, reasoning_checkbox, n_ctx_display,
|
| 175 |
+
thinking_accordion, thinking_output, enable_reasoning],
|
| 176 |
+
)
|
| 177 |
+
```
|
| 178 |
+
|
| 179 |
+
### Step 5: Update Submit Button Handler
|
| 180 |
+
|
| 181 |
+
**Location**: ~line 2010
|
| 182 |
+
|
| 183 |
+
**Update inputs to include custom_model_metadata**:
|
| 184 |
+
```python
|
| 185 |
+
submit_btn.click(
|
| 186 |
+
fn=summarize_streaming,
|
| 187 |
+
inputs=[file_input, model_dropdown, enable_reasoning, max_tokens, temperature_slider,
|
| 188 |
+
top_p, top_k, language_selector, thread_config_dropdown, custom_threads_slider,
|
| 189 |
+
custom_model_state, custom_model_metadata], # Added metadata
|
| 190 |
+
outputs=[thinking_output, summary_output, info_output, metrics_state, system_prompt_debug],
|
| 191 |
+
show_progress="full"
|
| 192 |
+
)
|
| 193 |
+
```
|
| 194 |
+
|
| 195 |
+
### Step 6: Update `summarize_streaming()` Function
|
| 196 |
+
|
| 197 |
+
**Location**: ~line 1080
|
| 198 |
+
|
| 199 |
+
**Update function signature**:
|
| 200 |
+
```python
|
| 201 |
+
def summarize_streaming(
|
| 202 |
+
file_obj,
|
| 203 |
+
model_key: str,
|
| 204 |
+
enable_reasoning: bool = True,
|
| 205 |
+
max_tokens: int = 2048,
|
| 206 |
+
temperature: float = 0.6,
|
| 207 |
+
top_p: float = None,
|
| 208 |
+
top_k: int = None,
|
| 209 |
+
output_language: str = "en",
|
| 210 |
+
thread_config: str = "free",
|
| 211 |
+
custom_threads: int = 4,
|
| 212 |
+
custom_model_state: Any = None,
|
| 213 |
+
custom_model_metadata: dict = None, # NEW parameter
|
| 214 |
+
) -> Generator[Tuple[str, str, str, dict, str], None, None]:
|
| 215 |
+
```
|
| 216 |
+
|
| 217 |
+
**Update model info generation**:
|
| 218 |
+
```python
|
| 219 |
+
# Get base model info with current thread configuration
|
| 220 |
+
info_text, _, _, _ = get_model_info(model_key, n_threads=n_threads, custom_metadata=custom_model_metadata)
|
| 221 |
+
```
|
| 222 |
+
|
| 223 |
+
---
|
| 224 |
+
|
| 225 |
+
## Files to Modify
|
| 226 |
+
|
| 227 |
+
1. **app.py** - Main changes:
|
| 228 |
+
- Line ~1730: Add `custom_model_metadata` state
|
| 229 |
+
- Line ~904: Modify `get_model_info()` signature and logic
|
| 230 |
+
- Line ~1927: Update `load_custom_model_selected()` to return metadata
|
| 231 |
+
- Line ~1711: Update `update_settings_on_model_change()` to accept metadata
|
| 232 |
+
- Line ~1080: Update `summarize_streaming()` signature
|
| 233 |
+
- Line ~2010: Update submit button event handler inputs
|
| 234 |
+
|
| 235 |
+
---
|
| 236 |
+
|
| 237 |
+
## Testing Plan
|
| 238 |
+
|
| 239 |
+
1. Select "๐ง Custom HF GGUF..." from model dropdown
|
| 240 |
+
2. Type "llama" in search box and select a model
|
| 241 |
+
3. Verify file dropdown auto-populates
|
| 242 |
+
4. Select a GGUF file
|
| 243 |
+
5. Click "Load Selected Model"
|
| 244 |
+
6. **Verify Model Information now shows**:
|
| 245 |
+
- Repository: actual repo ID
|
| 246 |
+
- GGUF File: actual filename
|
| 247 |
+
- Quantization: parsed from filename
|
| 248 |
+
- File Size: actual size
|
| 249 |
+
- Context: 8192 tokens
|
| 250 |
+
- Note about conservative defaults
|
| 251 |
+
7. Generate a summary
|
| 252 |
+
8. Verify model info remains correct during and after generation
|
| 253 |
+
9. Switch to a different predefined model
|
| 254 |
+
10. Verify model info updates correctly
|
| 255 |
+
11. Switch back to custom model
|
| 256 |
+
12. Verify it shows "Not loaded" state until new custom model is loaded
|
| 257 |
+
|
| 258 |
+
---
|
| 259 |
+
|
| 260 |
+
## Lines to Modify
|
| 261 |
+
|
| 262 |
+
| Function/Component | Line Range | Changes |
|
| 263 |
+
|-------------------|------------|---------|
|
| 264 |
+
| State declarations | ~1730 | Add `custom_model_metadata` |
|
| 265 |
+
| `get_model_info()` | ~904-947 | Add `custom_metadata` param, handle custom_hf |
|
| 266 |
+
| `load_custom_model_selected()` | ~1927-1960 | Return metadata dict |
|
| 267 |
+
| Load button click | ~1970 | Add `custom_model_metadata` to outputs |
|
| 268 |
+
| `update_settings_on_model_change()` | ~1711-1720 | Accept metadata param |
|
| 269 |
+
| Model dropdown change | ~1721 | Add `custom_model_metadata` to inputs |
|
| 270 |
+
| `summarize_streaming()` signature | ~1080 | Add `custom_model_metadata` param |
|
| 271 |
+
| Submit button click | ~2010 | Add `custom_model_metadata` to inputs |
|
| 272 |
+
|
| 273 |
+
---
|
| 274 |
+
|
| 275 |
+
## Expected Result
|
| 276 |
+
|
| 277 |
+
After the fix, when a custom GGUF model is loaded:
|
| 278 |
+
- โ
Model Information displays actual repo_id and filename
|
| 279 |
+
- โ
Quantization level is parsed and shown
|
| 280 |
+
- โ
File size is displayed
|
| 281 |
+
- โ
Context window shows correct value
|
| 282 |
+
- โ
Information updates correctly when switching models
|
| 283 |
+
|
| 284 |
+
---
|
| 285 |
+
|
| 286 |
+
Ready to implement the bug fix? Say **"implement bug fix"** and I'll proceed!
|
.opencode/plans/redesign_custom_gguf_loader.md
ADDED
|
@@ -0,0 +1,309 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Implementation Plan: Redesign Custom HF GGUF Loader
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
Redesign the custom GGUF model query to use the native `gradio_huggingfacehub_search` component, matching the UX of gguf-my-repo space.
|
| 5 |
+
|
| 6 |
+
**Selected Approach**: Option A1 + Flow 1
|
| 7 |
+
- Use native HF search component
|
| 8 |
+
- Search ALL models on HF Hub
|
| 9 |
+
- Auto-discover GGUF files after selection
|
| 10 |
+
|
| 11 |
+
## Current Issues
|
| 12 |
+
|
| 13 |
+
### Problems with Current Implementation
|
| 14 |
+
1. **Complexity**: Manual textbox + search results dropdown + manual file discovery
|
| 15 |
+
2. **UX friction**: Too many steps, confusing flow
|
| 16 |
+
3. **Maintenance burden**: Custom search logic, event handlers, caching
|
| 17 |
+
4. **Performance**: Multiple API calls without optimization
|
| 18 |
+
|
| 19 |
+
### What Users Want
|
| 20 |
+
1. **Simple search**: Type model name, see suggestions
|
| 21 |
+
2. **Auto-discovery**: Select model โ automatically see available GGUF files
|
| 22 |
+
3. **Quick precision selection**: Choose from discovered files
|
| 23 |
+
4. **Load and go**: One click to load selected GGUF
|
| 24 |
+
|
| 25 |
+
---
|
| 26 |
+
|
| 27 |
+
## New Design
|
| 28 |
+
|
| 29 |
+
### User Flow (Flow 1)
|
| 30 |
+
|
| 31 |
+
```
|
| 32 |
+
1. Select "๐ง Custom HF GGUF..." from model dropdown
|
| 33 |
+
โ
|
| 34 |
+
2. Type model name in HuggingfaceHubSearch component
|
| 35 |
+
โ
|
| 36 |
+
3. See real-time search suggestions from ALL HF models
|
| 37 |
+
โ
|
| 38 |
+
4. Select a model from suggestions
|
| 39 |
+
โ
|
| 40 |
+
5. Auto-trigger: Discover all GGUF files in that repo
|
| 41 |
+
โ
|
| 42 |
+
6. See GGUF files dropdown populated (alphabetically sorted)
|
| 43 |
+
โ
|
| 44 |
+
7. Select desired precision/quantization
|
| 45 |
+
โ
|
| 46 |
+
8. Click "โฌ๏ธ Load Selected Model"
|
| 47 |
+
โ
|
| 48 |
+
9. Model loads, ready to use!
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
### UI Components
|
| 52 |
+
|
| 53 |
+
```
|
| 54 |
+
[Model Dropdown: "๐ง Custom HF GGUF..." selected]
|
| 55 |
+
โ
|
| 56 |
+
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 57 |
+
โ ๐ Search HuggingFace Models โ
|
| 58 |
+
โ [HuggingfaceHubSearch Component] โ
|
| 59 |
+
โ Type to search all HF models... โ
|
| 60 |
+
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 61 |
+
โ (after selection)
|
| 62 |
+
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 63 |
+
โ ๐ฆ Available GGUF Files โ
|
| 64 |
+
โ [Dropdown with quant options] โ
|
| 65 |
+
โ e.g., model-Q4_K_M.gguf (4.2GB) โ
|
| 66 |
+
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 67 |
+
โ
|
| 68 |
+
[โฌ๏ธ Load Selected Model] [Status: Ready]
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
---
|
| 72 |
+
|
| 73 |
+
## Technical Implementation
|
| 74 |
+
|
| 75 |
+
### 1. Dependencies
|
| 76 |
+
|
| 77 |
+
**requirements.txt additions:**
|
| 78 |
+
```
|
| 79 |
+
gradio-huggingfacehub-search>=0.1.0
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
**Import:**
|
| 83 |
+
```python
|
| 84 |
+
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
| 85 |
+
```
|
| 86 |
+
|
| 87 |
+
### 2. Remove Old Components
|
| 88 |
+
|
| 89 |
+
**Remove:**
|
| 90 |
+
1. `custom_repo_id` textbox
|
| 91 |
+
2. `model_search_results` dropdown
|
| 92 |
+
3. `discover_btn` button (or repurpose)
|
| 93 |
+
4. All custom search functions:
|
| 94 |
+
- `get_popular_gguf_models()`
|
| 95 |
+
- `search_gguf_models()`
|
| 96 |
+
- `search_models_dynamic()`
|
| 97 |
+
- `on_model_selected_from_search()`
|
| 98 |
+
|
| 99 |
+
**Keep:**
|
| 100 |
+
- `custom_file_dropdown` - for selecting GGUF precision
|
| 101 |
+
- `custom_repo_files` state - for storing file metadata
|
| 102 |
+
- `custom_model_state` state - for loaded model
|
| 103 |
+
- `load_btn` and `retry_btn` - for loading model
|
| 104 |
+
- `custom_status` - for status messages
|
| 105 |
+
|
| 106 |
+
### 3. Add New Component
|
| 107 |
+
|
| 108 |
+
**Location:** In custom_model_group (replacing old textbox)
|
| 109 |
+
|
| 110 |
+
```python
|
| 111 |
+
# NEW: Native HF Hub Search Component
|
| 112 |
+
model_search_input = HuggingfaceHubSearch(
|
| 113 |
+
label="๐ Search HuggingFace Models",
|
| 114 |
+
placeholder="Type model name to search (e.g., 'llama', 'qwen')",
|
| 115 |
+
search_type="model",
|
| 116 |
+
# Optional: Add filters
|
| 117 |
+
# filter="gguf" # if component supports filtering
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
# Keep file dropdown (but update label)
|
| 121 |
+
custom_file_dropdown = gr.Dropdown(
|
| 122 |
+
label="๐ฆ Select GGUF File (Precision)",
|
| 123 |
+
choices=[],
|
| 124 |
+
value=None,
|
| 125 |
+
info="Available GGUF files will appear after selecting a model",
|
| 126 |
+
interactive=True,
|
| 127 |
+
)
|
| 128 |
+
```
|
| 129 |
+
|
| 130 |
+
### 4. New Event Handler
|
| 131 |
+
|
| 132 |
+
**Flow:** model_search_input.change โ auto-discover files
|
| 133 |
+
|
| 134 |
+
```python
|
| 135 |
+
def on_model_selected(repo_id):
|
| 136 |
+
"""Handle model selection from HuggingfaceHubSearch.
|
| 137 |
+
|
| 138 |
+
Automatically discovers GGUF files in the selected repo.
|
| 139 |
+
"""
|
| 140 |
+
if not repo_id:
|
| 141 |
+
return (
|
| 142 |
+
gr.update(choices=[], value=None),
|
| 143 |
+
[],
|
| 144 |
+
gr.update(visible=False),
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
# Show searching status
|
| 148 |
+
yield (
|
| 149 |
+
gr.update(choices=["Searching for GGUF files..."], value=None, interactive=False),
|
| 150 |
+
[],
|
| 151 |
+
gr.update(visible=True, value="๐ Discovering GGUF files..."),
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
# Discover files
|
| 155 |
+
files, error = list_repo_gguf_files(repo_id)
|
| 156 |
+
|
| 157 |
+
if error:
|
| 158 |
+
yield (
|
| 159 |
+
gr.update(choices=[], value=None, interactive=True),
|
| 160 |
+
[],
|
| 161 |
+
gr.update(visible=True, value=f"โ {error}"),
|
| 162 |
+
)
|
| 163 |
+
elif not files:
|
| 164 |
+
yield (
|
| 165 |
+
gr.update(choices=[], value=None, interactive=True),
|
| 166 |
+
[],
|
| 167 |
+
gr.update(visible=True, value="โ No GGUF files in this repository"),
|
| 168 |
+
)
|
| 169 |
+
else:
|
| 170 |
+
# Format and show files
|
| 171 |
+
choices = [format_file_choice(f) for f in files]
|
| 172 |
+
yield (
|
| 173 |
+
gr.update(choices=choices, value=choices[0] if choices else None, interactive=True),
|
| 174 |
+
files,
|
| 175 |
+
gr.update(visible=True, value=f"โ
Found {len(files)} GGUF files! Select one and click 'Load Model'"),
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
# Connect event handler
|
| 179 |
+
model_search_input.change(
|
| 180 |
+
fn=on_model_selected,
|
| 181 |
+
inputs=[model_search_input],
|
| 182 |
+
outputs=[custom_file_dropdown, custom_repo_files, custom_status],
|
| 183 |
+
)
|
| 184 |
+
```
|
| 185 |
+
|
| 186 |
+
### 5. Update Load Function
|
| 187 |
+
|
| 188 |
+
**Current:** `load_custom_model_selected()` extracts filename from display string
|
| 189 |
+
|
| 190 |
+
**Keep as-is** - already works correctly:
|
| 191 |
+
```python
|
| 192 |
+
def load_custom_model_selected(repo_id, selected_file_display, files_data):
|
| 193 |
+
"""Load the selected custom model."""
|
| 194 |
+
# Extract filename from display string
|
| 195 |
+
filename = selected_file_display.split(" | ")[0].replace("๐ ", "").strip()
|
| 196 |
+
# ... rest of loading logic
|
| 197 |
+
```
|
| 198 |
+
|
| 199 |
+
### 6. Simplified UI Layout
|
| 200 |
+
|
| 201 |
+
```python
|
| 202 |
+
with gr.Group(visible=False) as custom_model_group:
|
| 203 |
+
gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">๐ง</span> Load Custom GGUF Model</div>')
|
| 204 |
+
|
| 205 |
+
# Step 1: Search models
|
| 206 |
+
model_search_input = HuggingfaceHubSearch(...)
|
| 207 |
+
|
| 208 |
+
# Step 2: Select GGUF file (auto-populated)
|
| 209 |
+
custom_file_dropdown = gr.Dropdown(...)
|
| 210 |
+
|
| 211 |
+
# Step 3: Load button
|
| 212 |
+
with gr.Row():
|
| 213 |
+
load_btn = gr.Button("โฌ๏ธ Load Selected Model", variant="primary")
|
| 214 |
+
retry_btn = gr.Button("๐ Retry", variant="secondary", visible=False)
|
| 215 |
+
|
| 216 |
+
# Status
|
| 217 |
+
custom_status = gr.Textbox(label="Status", interactive=False, visible=False)
|
| 218 |
+
|
| 219 |
+
# Hidden states
|
| 220 |
+
custom_repo_files = gr.State([])
|
| 221 |
+
custom_model_state = gr.State(None)
|
| 222 |
+
```
|
| 223 |
+
|
| 224 |
+
---
|
| 225 |
+
|
| 226 |
+
## Files to Modify
|
| 227 |
+
|
| 228 |
+
### 1. requirements.txt
|
| 229 |
+
Add dependency:
|
| 230 |
+
```
|
| 231 |
+
gradio-huggingfacehub-search>=0.1.0
|
| 232 |
+
```
|
| 233 |
+
|
| 234 |
+
### 2. app.py
|
| 235 |
+
|
| 236 |
+
**Import section (~line 1-15):**
|
| 237 |
+
```python
|
| 238 |
+
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
| 239 |
+
```
|
| 240 |
+
|
| 241 |
+
**Remove functions (~lines 30-200):**
|
| 242 |
+
- Remove `get_popular_gguf_models()`
|
| 243 |
+
- Remove `search_gguf_models()`
|
| 244 |
+
- Remove `POPULAR_GGUF_MODELS` cache
|
| 245 |
+
- Remove `search_models_dynamic()`
|
| 246 |
+
- Remove `on_model_selected_from_search()`
|
| 247 |
+
- Keep `list_repo_gguf_files()` - still needed
|
| 248 |
+
- Keep `parse_quantization()` - still needed
|
| 249 |
+
- Keep `format_file_choice()` - still needed
|
| 250 |
+
- Keep `load_custom_model_from_hf()` - still needed
|
| 251 |
+
|
| 252 |
+
**UI section (~lines 1590-1610):**
|
| 253 |
+
Replace old components with new design
|
| 254 |
+
|
| 255 |
+
**Event handlers (~lines 1950-2050):**
|
| 256 |
+
Replace search event handlers with simplified version
|
| 257 |
+
|
| 258 |
+
---
|
| 259 |
+
|
| 260 |
+
## Migration Checklist
|
| 261 |
+
|
| 262 |
+
- [ ] Add `gradio-huggingfacehub-search` to requirements.txt
|
| 263 |
+
- [ ] Add import statement
|
| 264 |
+
- [ ] Remove unused search functions (3 functions)
|
| 265 |
+
- [ ] Remove unused cache variables
|
| 266 |
+
- [ ] Replace `custom_repo_id` + `model_search_results` with `HuggingfaceHubSearch`
|
| 267 |
+
- [ ] Update custom model UI group
|
| 268 |
+
- [ ] Simplify event handlers
|
| 269 |
+
- [ ] Test search and file discovery flow
|
| 270 |
+
- [ ] Verify model loading still works
|
| 271 |
+
- [ ] Update documentation/comments
|
| 272 |
+
|
| 273 |
+
---
|
| 274 |
+
|
| 275 |
+
## Benefits of This Redesign
|
| 276 |
+
|
| 277 |
+
1. **Better UX**: Native HF search component, professional look
|
| 278 |
+
2. **Less code**: Remove ~150 lines of custom search logic
|
| 279 |
+
3. **Better performance**: Component handles debouncing and caching
|
| 280 |
+
4. **Easier maintenance**: Community-maintained search component
|
| 281 |
+
5. **More reliable**: Uses official HF component
|
| 282 |
+
6. **Simpler flow**: One search box, auto-discovery, select and load
|
| 283 |
+
|
| 284 |
+
---
|
| 285 |
+
|
| 286 |
+
## Testing Plan
|
| 287 |
+
|
| 288 |
+
1. Select "๐ง Custom HF GGUF..." from model dropdown
|
| 289 |
+
2. Type "llama" in search box
|
| 290 |
+
3. Verify suggestions appear (any HF models with "llama" in name)
|
| 291 |
+
4. Select a model (e.g., "meta-llama/Llama-2-7b-hf")
|
| 292 |
+
5. Verify GGUF files auto-discover (if any exist)
|
| 293 |
+
6. Select a GGUF file
|
| 294 |
+
7. Click "Load Selected Model"
|
| 295 |
+
8. Verify model loads successfully
|
| 296 |
+
9. Test with models that have no GGUF files (should show error)
|
| 297 |
+
10. Test error handling for invalid repo IDs
|
| 298 |
+
|
| 299 |
+
---
|
| 300 |
+
|
| 301 |
+
## Questions Before Implementation
|
| 302 |
+
|
| 303 |
+
1. **Requirements check**: Should I add `gradio-huggingfacehub-search` to requirements.txt now, or will you handle dependencies?
|
| 304 |
+
|
| 305 |
+
2. **Component customization**: The HuggingfaceHubSearch component may allow custom filters. Should we try to filter for models that might have GGUF files, or search all models?
|
| 306 |
+
|
| 307 |
+
3. **Manual discovery button**: Keep the "Discover Files" button as a backup option, or remove it since search is now automatic?
|
| 308 |
+
|
| 309 |
+
4. **Ready to implement?** Say "implement the redesign" and I'll proceed with the refactoring.
|
GEMINI.md
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Tiny Scribe - Project Context
|
| 2 |
+
|
| 3 |
+
## Project Overview
|
| 4 |
+
**Tiny Scribe** is a lightweight, local LLM-powered transcript summarization tool. It is designed to run efficiently on standard hardware (including free CPU tiers on HuggingFace Spaces) using GGUF quantized models.
|
| 5 |
+
|
| 6 |
+
The project features a web interface (Gradio) and a CLI tool, supporting over 24 models ranging from 100M to 30B parameters. It includes specialized features like live streaming, reasoning mode (thinking) for supported models, and dual-language output (English/Traditional Chinese).
|
| 7 |
+
|
| 8 |
+
## Tech Stack
|
| 9 |
+
* **Language:** Python 3.10+
|
| 10 |
+
* **UI Framework:** Gradio (Web), `argparse` (CLI)
|
| 11 |
+
* **Inference Engine:** `llama-cpp-python` (Python bindings for `llama.cpp`)
|
| 12 |
+
* **Model Format:** GGUF (Quantized)
|
| 13 |
+
* **Containerization:** Docker (optimized for HuggingFace Spaces)
|
| 14 |
+
* **Utilities:** `opencc` (Chinese conversion), `huggingface_hub`
|
| 15 |
+
|
| 16 |
+
## Key Files & Directories
|
| 17 |
+
* `app.py`: The main entry point for the Gradio web application. Contains the UI layout, model loading logic, and generation pipeline.
|
| 18 |
+
* `summarize_transcript.py`: Command-line interface for batch processing or local summarization without the web UI.
|
| 19 |
+
* `Dockerfile`: Defines the build environment. **Crucial:** It installs a specific pre-compiled wheel for `llama-cpp-python` to ensure compatibility and performance on HF Spaces (Free CPU tier).
|
| 20 |
+
* `deploy.sh`: Helper script to stage, commit, and push changes to the HuggingFace Space. Enforces non-generic commit messages.
|
| 21 |
+
* `requirements.txt`: Python dependencies (excluding `llama-cpp-python` which is handled specially in Docker).
|
| 22 |
+
* `transcripts/`: Directory for storing input transcript files.
|
| 23 |
+
* `AGENTS.md` / `CLAUDE.md`: Existing context files for other AI assistants.
|
| 24 |
+
|
| 25 |
+
## Build & Run Instructions
|
| 26 |
+
|
| 27 |
+
### 1. Installation
|
| 28 |
+
The project relies on `llama-cpp-python`. For local development, you must install it separately, as it's not in `requirements.txt` to avoid build errors on systems without compilers.
|
| 29 |
+
|
| 30 |
+
```bash
|
| 31 |
+
# Install general dependencies
|
| 32 |
+
pip install -r requirements.txt
|
| 33 |
+
|
| 34 |
+
# Install llama-cpp-python (with CUDA support if available, otherwise CPU)
|
| 35 |
+
# See: https://github.com/abetlen/llama-cpp-python#installation
|
| 36 |
+
pip install llama-cpp-python
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
### 2. Running the Web UI
|
| 40 |
+
```bash
|
| 41 |
+
python app.py
|
| 42 |
+
# Access at http://localhost:7860
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
### 3. Running the CLI
|
| 46 |
+
```bash
|
| 47 |
+
# Basic English summary
|
| 48 |
+
python summarize_transcript.py -i transcripts/your_file.txt
|
| 49 |
+
|
| 50 |
+
# Traditional Chinese output
|
| 51 |
+
python summarize_transcript.py -i transcripts/your_file.txt -l zh-TW
|
| 52 |
+
|
| 53 |
+
# Use a specific model
|
| 54 |
+
python summarize_transcript.py -i transcripts/your_file.txt -m "unsloth/Qwen3-1.7B-GGUF"
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
### 4. Deployment (HuggingFace Spaces)
|
| 58 |
+
Always use the provided script to ensure clean commits and deployment:
|
| 59 |
+
```bash
|
| 60 |
+
./deploy.sh "Your descriptive commit message"
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
## Model Architecture & Categories
|
| 64 |
+
The project categorizes models to help users balance speed vs. quality:
|
| 65 |
+
* **Tiny (0.1-0.6B):** Extremely fast, good for simple formatting (e.g., Qwen3-0.6B).
|
| 66 |
+
* **Compact (1.5-2.6B):** Good balance for free tier (e.g., Granite-3.1-1B, Qwen3-1.7B).
|
| 67 |
+
* **Standard (3-7B):** Higher quality, slower on CPU (e.g., Llama-3-8B variants).
|
| 68 |
+
* **Medium (21-30B):** High performance, requires significant RAM (e.g., Command R, Qwen-30B).
|
| 69 |
+
|
| 70 |
+
## Development Conventions
|
| 71 |
+
* **Dependency Management:** `llama-cpp-python` is pinned in the `Dockerfile` via a custom wheel URL. Do not add it to `requirements.txt` unless you are changing the build strategy.
|
| 72 |
+
* **Code Style:** The project uses `ruff` for linting.
|
| 73 |
+
* **Git:** Use `deploy.sh` to push. Avoid generic commit messages like "update" or "fix".
|
| 74 |
+
* **Environment:** The app is optimized for Linux/Docker environments. Local Windows development may require extra setup for `llama-cpp-python` compilation.
|
app.py
CHANGED
|
@@ -1030,7 +1030,8 @@ def parse_thinking_blocks(content: str, streaming: bool = False) -> Tuple[str, s
|
|
| 1030 |
|
| 1031 |
def summarize_streaming(
|
| 1032 |
file_obj,
|
| 1033 |
-
|
|
|
|
| 1034 |
enable_reasoning: bool = True,
|
| 1035 |
max_tokens: int = 2048,
|
| 1036 |
temperature: float = 0.6,
|
|
@@ -1042,10 +1043,11 @@ def summarize_streaming(
|
|
| 1042 |
custom_model_state: Any = None,
|
| 1043 |
) -> Generator[Tuple[str, str, str, dict, str], None, None]:
|
| 1044 |
"""
|
| 1045 |
-
Stream summary generation from uploaded file.
|
| 1046 |
|
| 1047 |
Args:
|
| 1048 |
file_obj: Gradio file object
|
|
|
|
| 1049 |
model_key: Model identifier from AVAILABLE_MODELS
|
| 1050 |
enable_reasoning: Whether to use reasoning mode (/think) for Qwen3 models
|
| 1051 |
max_tokens: Maximum tokens to generate
|
|
@@ -1102,31 +1104,35 @@ def summarize_streaming(
|
|
| 1102 |
if max_tokens > usable_max - 512:
|
| 1103 |
max_tokens = usable_max - 512
|
| 1104 |
|
| 1105 |
-
# Read
|
| 1106 |
try:
|
| 1107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1108 |
system_prompt_preview = build_system_prompt(output_language, False, enable_reasoning)
|
| 1109 |
-
yield ("", "Error: Please upload a
|
| 1110 |
return
|
| 1111 |
|
| 1112 |
-
|
| 1113 |
-
# Get file metadata
|
| 1114 |
-
import os
|
| 1115 |
-
file_size = os.path.getsize(path)
|
| 1116 |
-
file_name = os.path.basename(path)
|
| 1117 |
-
|
| 1118 |
-
with open(path, 'r', encoding='utf-8') as f:
|
| 1119 |
-
transcript = f.read()
|
| 1120 |
-
|
| 1121 |
-
# Store file info
|
| 1122 |
metrics["file_info"] = {
|
| 1123 |
-
"
|
| 1124 |
-
"size_bytes":
|
| 1125 |
"original_char_count": len(transcript),
|
| 1126 |
}
|
| 1127 |
except Exception as e:
|
| 1128 |
system_prompt_preview = build_system_prompt(output_language, False, enable_reasoning)
|
| 1129 |
-
yield ("", f"Error reading
|
| 1130 |
return
|
| 1131 |
|
| 1132 |
if not transcript.strip():
|
|
@@ -1348,387 +1354,247 @@ def summarize_streaming(
|
|
| 1348 |
# Custom CSS for better UI
|
| 1349 |
custom_css = """
|
| 1350 |
:root {
|
| 1351 |
-
--primary-color: #
|
| 1352 |
-
--primary-dark: #
|
| 1353 |
-
--primary-light: #
|
| 1354 |
-
--accent-color: #
|
| 1355 |
--bg-color: #f8fafc;
|
| 1356 |
-
--card-bg:
|
| 1357 |
--text-color: #1e293b;
|
| 1358 |
--text-muted: #64748b;
|
| 1359 |
--border-color: #e2e8f0;
|
| 1360 |
--border-light: #f1f5f9;
|
| 1361 |
-
|
| 1362 |
-
|
| 1363 |
-
--
|
| 1364 |
-
--
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1365 |
--shadow-sm: 0 1px 2px rgba(0, 0, 0, 0.05);
|
| 1366 |
--shadow-md: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
|
| 1367 |
--shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
|
| 1368 |
-
--radius-sm:
|
| 1369 |
-
--radius-md:
|
| 1370 |
-
--radius-lg:
|
| 1371 |
}
|
| 1372 |
|
| 1373 |
/* ===== LAYOUT & BASE ===== */
|
| 1374 |
.gradio-container {
|
| 1375 |
max-width: 1400px !important;
|
|
|
|
| 1376 |
}
|
| 1377 |
|
| 1378 |
/* ===== HEADER ===== */
|
| 1379 |
.app-header {
|
| 1380 |
text-align: center;
|
| 1381 |
-
padding:
|
| 1382 |
background: linear-gradient(135deg, var(--primary-color) 0%, var(--accent-color) 100%);
|
| 1383 |
border-radius: var(--radius-lg);
|
| 1384 |
-
margin-bottom:
|
| 1385 |
color: white;
|
| 1386 |
box-shadow: var(--shadow-lg);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1387 |
}
|
| 1388 |
|
| 1389 |
.app-header h1 {
|
| 1390 |
margin: 0 0 0.5rem 0;
|
| 1391 |
-
font-size: 2.
|
| 1392 |
-
font-weight:
|
| 1393 |
-
letter-spacing: -0.
|
|
|
|
|
|
|
| 1394 |
}
|
| 1395 |
|
| 1396 |
.app-header p {
|
| 1397 |
margin: 0;
|
| 1398 |
opacity: 0.9;
|
| 1399 |
-
font-size: 1.
|
|
|
|
|
|
|
|
|
|
| 1400 |
}
|
| 1401 |
|
| 1402 |
.model-badge {
|
| 1403 |
display: inline-flex;
|
| 1404 |
align-items: center;
|
| 1405 |
gap: 0.5rem;
|
| 1406 |
-
background: rgba(255, 255, 255, 0.
|
| 1407 |
-
padding: 0.
|
| 1408 |
-
border-radius:
|
| 1409 |
-
font-size: 0.
|
| 1410 |
-
margin-top:
|
| 1411 |
-
backdrop-filter: blur(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1412 |
}
|
| 1413 |
|
| 1414 |
/* ===== INSTRUCTIONS ===== */
|
| 1415 |
.instructions {
|
| 1416 |
-
background:
|
| 1417 |
-
border-left:
|
| 1418 |
-
padding:
|
| 1419 |
-
border-radius:
|
| 1420 |
-
margin-bottom:
|
| 1421 |
box-shadow: var(--shadow-sm);
|
| 1422 |
-
|
| 1423 |
-
|
| 1424 |
-
.instructions ul {
|
| 1425 |
-
margin: 0.5rem 0 0 0;
|
| 1426 |
-
padding-left: 1.25rem;
|
| 1427 |
-
}
|
| 1428 |
-
|
| 1429 |
-
.instructions li {
|
| 1430 |
-
margin-bottom: 0.35rem;
|
| 1431 |
-
color: var(--text-color);
|
| 1432 |
}
|
| 1433 |
|
| 1434 |
/* ===== SECTION HEADERS ===== */
|
| 1435 |
.section-header {
|
| 1436 |
-
font-size:
|
| 1437 |
-
font-weight:
|
| 1438 |
color: var(--text-color);
|
| 1439 |
-
margin-bottom:
|
| 1440 |
display: flex;
|
| 1441 |
align-items: center;
|
| 1442 |
-
gap: 0.
|
| 1443 |
-
padding-bottom: 0.
|
| 1444 |
-
border-bottom:
|
|
|
|
|
|
|
| 1445 |
}
|
| 1446 |
|
| 1447 |
.section-icon {
|
| 1448 |
-
font-size: 1.
|
| 1449 |
}
|
| 1450 |
|
| 1451 |
/* ===== TABS STYLING ===== */
|
| 1452 |
.gradio-tabs {
|
| 1453 |
border: 1px solid var(--border-color) !important;
|
| 1454 |
-
border-radius: var(--radius-
|
| 1455 |
overflow: hidden;
|
| 1456 |
box-shadow: var(--shadow-sm);
|
| 1457 |
-
margin-bottom: 1rem;
|
| 1458 |
-
}
|
| 1459 |
-
|
| 1460 |
-
.gradio-tabitem {
|
| 1461 |
-
padding: 1rem !important;
|
| 1462 |
background: var(--card-bg) !important;
|
|
|
|
| 1463 |
}
|
| 1464 |
|
| 1465 |
.tab-nav {
|
| 1466 |
-
background:
|
| 1467 |
-
|
| 1468 |
-
|
| 1469 |
-
gap: 0 !important;
|
| 1470 |
}
|
| 1471 |
|
| 1472 |
.tab-nav button {
|
| 1473 |
-
|
| 1474 |
-
|
| 1475 |
-
color: var(--text-muted) !important;
|
| 1476 |
-
border: none !important;
|
| 1477 |
-
border-bottom: 3px solid transparent !important;
|
| 1478 |
-
background: transparent !important;
|
| 1479 |
-
transition: all 0.2s ease !important;
|
| 1480 |
-
margin: 0 !important;
|
| 1481 |
-
border-radius: 0 !important;
|
| 1482 |
-
}
|
| 1483 |
-
|
| 1484 |
-
.tab-nav button:hover {
|
| 1485 |
-
color: var(--primary-color) !important;
|
| 1486 |
-
background: rgba(102, 126, 234, 0.05) !important;
|
| 1487 |
-
}
|
| 1488 |
-
|
| 1489 |
-
.tab-nav button.selected {
|
| 1490 |
-
color: var(--primary-color) !important;
|
| 1491 |
-
border-bottom-color: var(--primary-color) !important;
|
| 1492 |
-
background: var(--card-bg) !important;
|
| 1493 |
-
font-weight: 600 !important;
|
| 1494 |
}
|
| 1495 |
|
| 1496 |
/* ===== GROUPS & CARDS ===== */
|
| 1497 |
.gradio-group {
|
| 1498 |
border: 1px solid var(--border-color) !important;
|
| 1499 |
border-radius: var(--radius-md) !important;
|
| 1500 |
-
padding:
|
| 1501 |
background: var(--card-bg) !important;
|
| 1502 |
box-shadow: var(--shadow-sm) !important;
|
| 1503 |
-
margin-bottom:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1504 |
}
|
| 1505 |
|
| 1506 |
/* ===== ACCORDION STYLING ===== */
|
| 1507 |
.gradio-accordion {
|
| 1508 |
border: 1px solid var(--border-color) !important;
|
| 1509 |
border-radius: var(--radius-md) !important;
|
| 1510 |
-
overflow: hidden;
|
| 1511 |
-
box-shadow: var(--shadow-sm);
|
| 1512 |
-
margin-bottom: 1rem;
|
| 1513 |
-
}
|
| 1514 |
-
|
| 1515 |
-
.gradio-accordion > .label-wrap {
|
| 1516 |
-
background: linear-gradient(180deg, #f8fafc 0%, #f1f5f9 100%) !important;
|
| 1517 |
-
padding: 0.875rem 1rem !important;
|
| 1518 |
-
border-bottom: 1px solid var(--border-color);
|
| 1519 |
-
}
|
| 1520 |
-
|
| 1521 |
-
.gradio-accordion > .label-wrap:hover {
|
| 1522 |
-
background: linear-gradient(180deg, #f1f5f9 0%, #e2e8f0 100%) !important;
|
| 1523 |
-
}
|
| 1524 |
-
|
| 1525 |
-
.gradio-accordion > .label-wrap span {
|
| 1526 |
-
font-weight: 600 !important;
|
| 1527 |
-
color: var(--text-color) !important;
|
| 1528 |
-
}
|
| 1529 |
-
|
| 1530 |
-
.gradio-accordion > div:last-child {
|
| 1531 |
-
padding: 1rem !important;
|
| 1532 |
background: var(--card-bg) !important;
|
| 1533 |
}
|
| 1534 |
|
| 1535 |
/* ===== BUTTONS ===== */
|
| 1536 |
-
/* Primary submit button */
|
| 1537 |
.submit-btn {
|
| 1538 |
background: linear-gradient(135deg, var(--primary-color) 0%, var(--accent-color) 100%) !important;
|
| 1539 |
border: none !important;
|
| 1540 |
color: white !important;
|
| 1541 |
-
font-weight:
|
| 1542 |
-
padding:
|
| 1543 |
border-radius: var(--radius-md) !important;
|
| 1544 |
cursor: pointer;
|
| 1545 |
-
transition: all 0.
|
| 1546 |
-
box-shadow:
|
| 1547 |
width: 100% !important;
|
| 1548 |
-
font-size: 1rem !important;
|
|
|
|
| 1549 |
}
|
| 1550 |
|
| 1551 |
.submit-btn:hover {
|
| 1552 |
-
transform: translateY(-
|
| 1553 |
-
box-shadow: 0
|
| 1554 |
-
}
|
| 1555 |
-
|
| 1556 |
-
.submit-btn:active {
|
| 1557 |
-
transform: translateY(0);
|
| 1558 |
-
}
|
| 1559 |
-
|
| 1560 |
-
/* Secondary buttons (Copy, Download, Load) */
|
| 1561 |
-
button.secondary,
|
| 1562 |
-
button[size="sm"] {
|
| 1563 |
-
background: var(--card-bg) !important;
|
| 1564 |
-
border: 1px solid var(--border-color) !important;
|
| 1565 |
-
color: var(--text-color) !important;
|
| 1566 |
-
font-weight: 500 !important;
|
| 1567 |
-
padding: 0.5rem 1rem !important;
|
| 1568 |
-
border-radius: var(--radius-sm) !important;
|
| 1569 |
-
transition: all 0.2s ease !important;
|
| 1570 |
-
box-shadow: var(--shadow-sm) !important;
|
| 1571 |
-
}
|
| 1572 |
-
|
| 1573 |
-
button.secondary:hover,
|
| 1574 |
-
button[size="sm"]:hover {
|
| 1575 |
-
background: var(--bg-color) !important;
|
| 1576 |
-
border-color: var(--primary-color) !important;
|
| 1577 |
-
color: var(--primary-color) !important;
|
| 1578 |
-
box-shadow: var(--shadow-md) !important;
|
| 1579 |
-
}
|
| 1580 |
-
|
| 1581 |
-
/* Small primary buttons (Load Model) */
|
| 1582 |
-
button.primary[size="sm"] {
|
| 1583 |
-
background: linear-gradient(135deg, var(--primary-color) 0%, var(--accent-color) 100%) !important;
|
| 1584 |
-
border: none !important;
|
| 1585 |
-
color: white !important;
|
| 1586 |
-
font-weight: 600 !important;
|
| 1587 |
-
}
|
| 1588 |
-
|
| 1589 |
-
button.primary[size="sm"]:hover {
|
| 1590 |
-
box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4) !important;
|
| 1591 |
-
color: white !important;
|
| 1592 |
-
}
|
| 1593 |
-
|
| 1594 |
-
/* ===== INPUT COMPONENTS ===== */
|
| 1595 |
-
/* File upload area */
|
| 1596 |
-
.file-upload-area {
|
| 1597 |
-
border: 2px dashed var(--border-color) !important;
|
| 1598 |
-
border-radius: var(--radius-lg) !important;
|
| 1599 |
-
padding: 1.5rem !important;
|
| 1600 |
-
text-align: center;
|
| 1601 |
-
transition: all 0.3s ease !important;
|
| 1602 |
-
background: var(--bg-color) !important;
|
| 1603 |
-
}
|
| 1604 |
-
|
| 1605 |
-
.file-upload-area:hover {
|
| 1606 |
-
border-color: var(--primary-color) !important;
|
| 1607 |
-
background: rgba(102, 126, 234, 0.05) !important;
|
| 1608 |
-
}
|
| 1609 |
-
|
| 1610 |
-
/* Dropdowns */
|
| 1611 |
-
.gradio-dropdown {
|
| 1612 |
-
border-radius: var(--radius-sm) !important;
|
| 1613 |
-
}
|
| 1614 |
-
|
| 1615 |
-
.gradio-dropdown > div > input {
|
| 1616 |
-
border-radius: var(--radius-sm) !important;
|
| 1617 |
-
}
|
| 1618 |
-
|
| 1619 |
-
/* Sliders */
|
| 1620 |
-
input[type="range"] {
|
| 1621 |
-
accent-color: var(--primary-color);
|
| 1622 |
}
|
| 1623 |
|
| 1624 |
/* ===== OUTPUT BOXES ===== */
|
| 1625 |
.thinking-box {
|
| 1626 |
-
background: var(--
|
| 1627 |
-
border: 1px solid var(--
|
|
|
|
| 1628 |
border-radius: var(--radius-md) !important;
|
| 1629 |
-
font-family: '
|
| 1630 |
-
|
| 1631 |
}
|
| 1632 |
|
| 1633 |
-
.thinking-box
|
| 1634 |
-
|
| 1635 |
-
border: none !important;
|
| 1636 |
}
|
| 1637 |
|
| 1638 |
.summary-box {
|
| 1639 |
-
background: var(--
|
| 1640 |
-
border: 1px solid var(--
|
| 1641 |
border-radius: var(--radius-md) !important;
|
| 1642 |
-
padding:
|
| 1643 |
-
|
| 1644 |
-
|
| 1645 |
-
|
| 1646 |
-
|
| 1647 |
-
font-size: 0.9rem;
|
| 1648 |
-
}
|
| 1649 |
-
|
| 1650 |
-
.stats-grid table {
|
| 1651 |
-
width: 100%;
|
| 1652 |
-
border-collapse: collapse;
|
| 1653 |
-
}
|
| 1654 |
-
|
| 1655 |
-
.stats-grid th {
|
| 1656 |
-
text-align: left;
|
| 1657 |
-
padding: 0.5rem;
|
| 1658 |
-
background: var(--bg-color);
|
| 1659 |
-
font-weight: 600;
|
| 1660 |
-
color: var(--text-muted);
|
| 1661 |
-
font-size: 0.8rem;
|
| 1662 |
-
text-transform: uppercase;
|
| 1663 |
-
letter-spacing: 0.025em;
|
| 1664 |
-
}
|
| 1665 |
-
|
| 1666 |
-
.stats-grid td {
|
| 1667 |
-
padding: 0.5rem;
|
| 1668 |
-
border-bottom: 1px solid var(--border-light);
|
| 1669 |
-
}
|
| 1670 |
-
|
| 1671 |
-
/* ===== FOOTER ===== */
|
| 1672 |
-
.footer {
|
| 1673 |
-
text-align: center;
|
| 1674 |
-
margin-top: 2rem;
|
| 1675 |
-
padding: 1.25rem;
|
| 1676 |
-
color: var(--text-muted);
|
| 1677 |
-
font-size: 0.85rem;
|
| 1678 |
-
border-top: 1px solid var(--border-color);
|
| 1679 |
-
background: linear-gradient(180deg, var(--bg-color) 0%, #f1f5f9 100%);
|
| 1680 |
-
border-radius: 0 0 var(--radius-lg) var(--radius-lg);
|
| 1681 |
}
|
| 1682 |
|
| 1683 |
/* ===== RESPONSIVE ADJUSTMENTS ===== */
|
| 1684 |
-
@media (max-width:
|
| 1685 |
-
.
|
| 1686 |
-
|
| 1687 |
}
|
| 1688 |
-
|
| 1689 |
-
.app-header p {
|
| 1690 |
-
font-size: 1rem;
|
| 1691 |
-
}
|
| 1692 |
-
|
| 1693 |
.submit-btn {
|
| 1694 |
-
|
|
|
|
|
|
|
| 1695 |
}
|
| 1696 |
}
|
| 1697 |
|
| 1698 |
-
|
| 1699 |
-
|
| 1700 |
-
|
| 1701 |
-
|
| 1702 |
-
|
| 1703 |
-
|
| 1704 |
-
|
| 1705 |
-
}
|
| 1706 |
-
|
| 1707 |
-
/* ===== SCROLLBAR STYLING ===== */
|
| 1708 |
-
.thinking-box textarea::-webkit-scrollbar,
|
| 1709 |
-
.summary-box::-webkit-scrollbar {
|
| 1710 |
-
width: 8px;
|
| 1711 |
-
}
|
| 1712 |
-
|
| 1713 |
-
.thinking-box textarea::-webkit-scrollbar-track,
|
| 1714 |
-
.summary-box::-webkit-scrollbar-track {
|
| 1715 |
-
background: var(--border-light);
|
| 1716 |
-
border-radius: 4px;
|
| 1717 |
-
}
|
| 1718 |
-
|
| 1719 |
-
.thinking-box textarea::-webkit-scrollbar-thumb,
|
| 1720 |
-
.summary-box::-webkit-scrollbar-thumb {
|
| 1721 |
-
background: var(--border-color);
|
| 1722 |
-
border-radius: 4px;
|
| 1723 |
-
}
|
| 1724 |
-
|
| 1725 |
-
.thinking-box textarea::-webkit-scrollbar-thumb:hover,
|
| 1726 |
-
.summary-box::-webkit-scrollbar-thumb:hover {
|
| 1727 |
-
background: var(--text-muted);
|
| 1728 |
}
|
| 1729 |
"""
|
| 1730 |
|
| 1731 |
|
|
|
|
| 1732 |
# Create Gradio interface
|
| 1733 |
def create_interface():
|
| 1734 |
"""Create and configure the Gradio interface."""
|
|
@@ -1769,24 +1635,36 @@ def create_interface():
|
|
| 1769 |
with gr.Column(scale=1):
|
| 1770 |
|
| 1771 |
# ==========================================
|
| 1772 |
-
# Section 1: Input Configuration (Language +
|
| 1773 |
# ==========================================
|
| 1774 |
with gr.Group():
|
| 1775 |
-
gr.HTML('<div class="section-header"><span class="section-icon">
|
| 1776 |
|
| 1777 |
language_selector = gr.Dropdown(
|
| 1778 |
choices=[("English", "en"), ("Traditional Chinese (zh-TW)", "zh-TW")],
|
| 1779 |
value="en",
|
| 1780 |
-
label="
|
| 1781 |
-
info="
|
| 1782 |
)
|
|
|
|
|
|
|
|
|
|
| 1783 |
|
| 1784 |
-
|
| 1785 |
-
|
| 1786 |
-
|
| 1787 |
-
|
| 1788 |
-
|
| 1789 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1790 |
|
| 1791 |
# ==========================================
|
| 1792 |
# Section 2: Model Selection (Tabs)
|
|
@@ -2260,17 +2138,10 @@ def create_interface():
|
|
| 2260 |
outputs=[custom_info_output],
|
| 2261 |
)
|
| 2262 |
|
| 2263 |
-
# Also update submit button to use custom model state
|
| 2264 |
-
# Note: We'll modify the summarize_streaming function to accept custom_model_state
|
| 2265 |
-
|
| 2266 |
-
# ==========================================
|
| 2267 |
-
# END: Custom Model Loader Event Handlers
|
| 2268 |
-
# ==========================================
|
| 2269 |
-
|
| 2270 |
# Update submit button to include custom_model_state in inputs and system_prompt_debug in outputs
|
| 2271 |
submit_btn.click(
|
| 2272 |
fn=summarize_streaming,
|
| 2273 |
-
inputs=[file_input, model_dropdown, enable_reasoning, max_tokens, temperature_slider, top_p, top_k, language_selector, thread_config_dropdown, custom_threads_slider, custom_model_state],
|
| 2274 |
outputs=[thinking_output, summary_output, info_output, metrics_state, system_prompt_debug],
|
| 2275 |
show_progress="full"
|
| 2276 |
)
|
|
|
|
| 1030 |
|
| 1031 |
def summarize_streaming(
|
| 1032 |
file_obj,
|
| 1033 |
+
text_input: str = "",
|
| 1034 |
+
model_key: str = "qwen3_600m_q4",
|
| 1035 |
enable_reasoning: bool = True,
|
| 1036 |
max_tokens: int = 2048,
|
| 1037 |
temperature: float = 0.6,
|
|
|
|
| 1043 |
custom_model_state: Any = None,
|
| 1044 |
) -> Generator[Tuple[str, str, str, dict, str], None, None]:
|
| 1045 |
"""
|
| 1046 |
+
Stream summary generation from uploaded file or text input.
|
| 1047 |
|
| 1048 |
Args:
|
| 1049 |
file_obj: Gradio file object
|
| 1050 |
+
text_input: Direct text input from user
|
| 1051 |
model_key: Model identifier from AVAILABLE_MODELS
|
| 1052 |
enable_reasoning: Whether to use reasoning mode (/think) for Qwen3 models
|
| 1053 |
max_tokens: Maximum tokens to generate
|
|
|
|
| 1104 |
if max_tokens > usable_max - 512:
|
| 1105 |
max_tokens = usable_max - 512
|
| 1106 |
|
| 1107 |
+
# Read input source (prioritize text_input)
|
| 1108 |
try:
|
| 1109 |
+
transcript = ""
|
| 1110 |
+
source_name = "Direct Input"
|
| 1111 |
+
source_size = 0
|
| 1112 |
+
|
| 1113 |
+
if text_input and text_input.strip():
|
| 1114 |
+
transcript = text_input
|
| 1115 |
+
source_size = len(transcript.encode('utf-8'))
|
| 1116 |
+
elif file_obj is not None:
|
| 1117 |
+
path = file_obj.name if hasattr(file_obj, 'name') else file_obj
|
| 1118 |
+
source_name = os.path.basename(path)
|
| 1119 |
+
source_size = os.path.getsize(path)
|
| 1120 |
+
with open(path, 'r', encoding='utf-8') as f:
|
| 1121 |
+
transcript = f.read()
|
| 1122 |
+
else:
|
| 1123 |
system_prompt_preview = build_system_prompt(output_language, False, enable_reasoning)
|
| 1124 |
+
yield ("", "Error: Please upload a file or paste text first", "", metrics, system_prompt_preview)
|
| 1125 |
return
|
| 1126 |
|
| 1127 |
+
# Store input info
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1128 |
metrics["file_info"] = {
|
| 1129 |
+
"source": source_name,
|
| 1130 |
+
"size_bytes": source_size,
|
| 1131 |
"original_char_count": len(transcript),
|
| 1132 |
}
|
| 1133 |
except Exception as e:
|
| 1134 |
system_prompt_preview = build_system_prompt(output_language, False, enable_reasoning)
|
| 1135 |
+
yield ("", f"Error reading input: {e}", "", metrics, system_prompt_preview)
|
| 1136 |
return
|
| 1137 |
|
| 1138 |
if not transcript.strip():
|
|
|
|
| 1354 |
# Custom CSS for better UI
|
| 1355 |
custom_css = """
|
| 1356 |
:root {
|
| 1357 |
+
--primary-color: #6366f1;
|
| 1358 |
+
--primary-dark: #4f46e5;
|
| 1359 |
+
--primary-light: #c7d2fe;
|
| 1360 |
+
--accent-color: #8b5cf6;
|
| 1361 |
--bg-color: #f8fafc;
|
| 1362 |
+
--card-bg: rgba(255, 255, 255, 0.85);
|
| 1363 |
--text-color: #1e293b;
|
| 1364 |
--text-muted: #64748b;
|
| 1365 |
--border-color: #e2e8f0;
|
| 1366 |
--border-light: #f1f5f9;
|
| 1367 |
+
|
| 1368 |
+
/* Semantic Colors */
|
| 1369 |
+
--thinking-bg: #f5f3ff;
|
| 1370 |
+
--thinking-border: #ddd6fe;
|
| 1371 |
+
--thinking-accent: #8b5cf6;
|
| 1372 |
+
--summary-bg: #f0fdf4;
|
| 1373 |
+
--summary-border: #dcfce7;
|
| 1374 |
+
--summary-accent: #22c55e;
|
| 1375 |
+
|
| 1376 |
--shadow-sm: 0 1px 2px rgba(0, 0, 0, 0.05);
|
| 1377 |
--shadow-md: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
|
| 1378 |
--shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
|
| 1379 |
+
--radius-sm: 8px;
|
| 1380 |
+
--radius-md: 12px;
|
| 1381 |
+
--radius-lg: 20px;
|
| 1382 |
}
|
| 1383 |
|
| 1384 |
/* ===== LAYOUT & BASE ===== */
|
| 1385 |
.gradio-container {
|
| 1386 |
max-width: 1400px !important;
|
| 1387 |
+
background: radial-gradient(circle at top right, #eef2ff 0%, #f8fafc 40%) !important;
|
| 1388 |
}
|
| 1389 |
|
| 1390 |
/* ===== HEADER ===== */
|
| 1391 |
.app-header {
|
| 1392 |
text-align: center;
|
| 1393 |
+
padding: 2.5rem 1.5rem;
|
| 1394 |
background: linear-gradient(135deg, var(--primary-color) 0%, var(--accent-color) 100%);
|
| 1395 |
border-radius: var(--radius-lg);
|
| 1396 |
+
margin-bottom: 2rem;
|
| 1397 |
color: white;
|
| 1398 |
box-shadow: var(--shadow-lg);
|
| 1399 |
+
position: relative;
|
| 1400 |
+
overflow: hidden;
|
| 1401 |
+
}
|
| 1402 |
+
|
| 1403 |
+
.app-header::before {
|
| 1404 |
+
content: "";
|
| 1405 |
+
position: absolute;
|
| 1406 |
+
top: -50%;
|
| 1407 |
+
left: -50%;
|
| 1408 |
+
width: 200%;
|
| 1409 |
+
height: 200%;
|
| 1410 |
+
background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, transparent 60%);
|
| 1411 |
+
animation: rotate 20s linear infinite;
|
| 1412 |
+
}
|
| 1413 |
+
|
| 1414 |
+
@keyframes rotate {
|
| 1415 |
+
from { transform: rotate(0deg); }
|
| 1416 |
+
to { transform: rotate(360deg); }
|
| 1417 |
}
|
| 1418 |
|
| 1419 |
.app-header h1 {
|
| 1420 |
margin: 0 0 0.5rem 0;
|
| 1421 |
+
font-size: 2.5rem;
|
| 1422 |
+
font-weight: 800;
|
| 1423 |
+
letter-spacing: -0.04em;
|
| 1424 |
+
position: relative;
|
| 1425 |
+
z-index: 1;
|
| 1426 |
}
|
| 1427 |
|
| 1428 |
.app-header p {
|
| 1429 |
margin: 0;
|
| 1430 |
opacity: 0.9;
|
| 1431 |
+
font-size: 1.15rem;
|
| 1432 |
+
font-weight: 400;
|
| 1433 |
+
position: relative;
|
| 1434 |
+
z-index: 1;
|
| 1435 |
}
|
| 1436 |
|
| 1437 |
.model-badge {
|
| 1438 |
display: inline-flex;
|
| 1439 |
align-items: center;
|
| 1440 |
gap: 0.5rem;
|
| 1441 |
+
background: rgba(255, 255, 255, 0.15);
|
| 1442 |
+
padding: 0.6rem 1.25rem;
|
| 1443 |
+
border-radius: 30px;
|
| 1444 |
+
font-size: 0.9rem;
|
| 1445 |
+
margin-top: 1.25rem;
|
| 1446 |
+
backdrop-filter: blur(8px);
|
| 1447 |
+
border: 1px solid rgba(255, 255, 255, 0.2);
|
| 1448 |
+
position: relative;
|
| 1449 |
+
z-index: 1;
|
| 1450 |
+
font-weight: 500;
|
| 1451 |
}
|
| 1452 |
|
| 1453 |
/* ===== INSTRUCTIONS ===== */
|
| 1454 |
.instructions {
|
| 1455 |
+
background: var(--card-bg);
|
| 1456 |
+
border-left: 5px solid var(--primary-color);
|
| 1457 |
+
padding: 1.25rem 1.5rem;
|
| 1458 |
+
border-radius: var(--radius-sm) var(--radius-md) var(--radius-md) var(--radius-sm);
|
| 1459 |
+
margin-bottom: 2rem;
|
| 1460 |
box-shadow: var(--shadow-sm);
|
| 1461 |
+
backdrop-filter: blur(10px);
|
| 1462 |
+
border: 1px solid var(--border-color);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1463 |
}
|
| 1464 |
|
| 1465 |
/* ===== SECTION HEADERS ===== */
|
| 1466 |
.section-header {
|
| 1467 |
+
font-size: 0.95rem;
|
| 1468 |
+
font-weight: 700;
|
| 1469 |
color: var(--text-color);
|
| 1470 |
+
margin-bottom: 1rem;
|
| 1471 |
display: flex;
|
| 1472 |
align-items: center;
|
| 1473 |
+
gap: 0.6rem;
|
| 1474 |
+
padding-bottom: 0.6rem;
|
| 1475 |
+
border-bottom: 2px solid var(--border-light);
|
| 1476 |
+
text-transform: uppercase;
|
| 1477 |
+
letter-spacing: 0.05em;
|
| 1478 |
}
|
| 1479 |
|
| 1480 |
.section-icon {
|
| 1481 |
+
font-size: 1.2rem;
|
| 1482 |
}
|
| 1483 |
|
| 1484 |
/* ===== TABS STYLING ===== */
|
| 1485 |
.gradio-tabs {
|
| 1486 |
border: 1px solid var(--border-color) !important;
|
| 1487 |
+
border-radius: var(--radius-md) !important;
|
| 1488 |
overflow: hidden;
|
| 1489 |
box-shadow: var(--shadow-sm);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1490 |
background: var(--card-bg) !important;
|
| 1491 |
+
backdrop-filter: blur(10px);
|
| 1492 |
}
|
| 1493 |
|
| 1494 |
.tab-nav {
|
| 1495 |
+
background: #f1f5f9 !important;
|
| 1496 |
+
padding: 0.25rem 0.25rem 0 0.25rem !important;
|
| 1497 |
+
gap: 4px !important;
|
|
|
|
| 1498 |
}
|
| 1499 |
|
| 1500 |
.tab-nav button {
|
| 1501 |
+
border-radius: 8px 8px 0 0 !important;
|
| 1502 |
+
padding: 0.75rem 1rem !important;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1503 |
}
|
| 1504 |
|
| 1505 |
/* ===== GROUPS & CARDS ===== */
|
| 1506 |
.gradio-group {
|
| 1507 |
border: 1px solid var(--border-color) !important;
|
| 1508 |
border-radius: var(--radius-md) !important;
|
| 1509 |
+
padding: 1.25rem !important;
|
| 1510 |
background: var(--card-bg) !important;
|
| 1511 |
box-shadow: var(--shadow-sm) !important;
|
| 1512 |
+
margin-bottom: 1.5rem !important;
|
| 1513 |
+
backdrop-filter: blur(10px);
|
| 1514 |
+
transition: transform 0.2s ease, box-shadow 0.2s ease !important;
|
| 1515 |
+
}
|
| 1516 |
+
|
| 1517 |
+
.gradio-group:hover {
|
| 1518 |
+
box-shadow: var(--shadow-md) !important;
|
| 1519 |
}
|
| 1520 |
|
| 1521 |
/* ===== ACCORDION STYLING ===== */
|
| 1522 |
.gradio-accordion {
|
| 1523 |
border: 1px solid var(--border-color) !important;
|
| 1524 |
border-radius: var(--radius-md) !important;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1525 |
background: var(--card-bg) !important;
|
| 1526 |
}
|
| 1527 |
|
| 1528 |
/* ===== BUTTONS ===== */
|
|
|
|
| 1529 |
.submit-btn {
|
| 1530 |
background: linear-gradient(135deg, var(--primary-color) 0%, var(--accent-color) 100%) !important;
|
| 1531 |
border: none !important;
|
| 1532 |
color: white !important;
|
| 1533 |
+
font-weight: 700 !important;
|
| 1534 |
+
padding: 1rem 2rem !important;
|
| 1535 |
border-radius: var(--radius-md) !important;
|
| 1536 |
cursor: pointer;
|
| 1537 |
+
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
|
| 1538 |
+
box-shadow: 0 4px 15px rgba(99, 102, 241, 0.4) !important;
|
| 1539 |
width: 100% !important;
|
| 1540 |
+
font-size: 1.1rem !important;
|
| 1541 |
+
letter-spacing: 0.02em;
|
| 1542 |
}
|
| 1543 |
|
| 1544 |
.submit-btn:hover {
|
| 1545 |
+
transform: translateY(-3px) scale(1.02);
|
| 1546 |
+
box-shadow: 0 8px 25px rgba(99, 102, 241, 0.5) !important;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1547 |
}
|
| 1548 |
|
| 1549 |
/* ===== OUTPUT BOXES ===== */
|
| 1550 |
.thinking-box {
|
| 1551 |
+
background: var(--thinking-bg) !important;
|
| 1552 |
+
border: 1px solid var(--thinking-border) !important;
|
| 1553 |
+
border-left: 4px solid var(--thinking-accent) !important;
|
| 1554 |
border-radius: var(--radius-md) !important;
|
| 1555 |
+
font-family: 'JetBrains Mono', 'Fira Code', monospace !important;
|
| 1556 |
+
transition: all 0.3s ease !important;
|
| 1557 |
}
|
| 1558 |
|
| 1559 |
+
.thinking-box:focus-within {
|
| 1560 |
+
box-shadow: 0 0 0 3px rgba(139, 92, 246, 0.1) !important;
|
|
|
|
| 1561 |
}
|
| 1562 |
|
| 1563 |
.summary-box {
|
| 1564 |
+
background: var(--summary-bg) !important;
|
| 1565 |
+
border: 1px solid var(--summary-border) !important;
|
| 1566 |
border-radius: var(--radius-md) !important;
|
| 1567 |
+
padding: 1.5rem !important;
|
| 1568 |
+
font-size: 1.1rem !important;
|
| 1569 |
+
line-height: 1.7 !important;
|
| 1570 |
+
color: #0f172a !important;
|
| 1571 |
+
box-shadow: var(--shadow-sm);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1572 |
}
|
| 1573 |
|
| 1574 |
/* ===== RESPONSIVE ADJUSTMENTS ===== */
|
| 1575 |
+
@media (max-width: 1024px) {
|
| 1576 |
+
.gradio-container {
|
| 1577 |
+
padding: 1rem !important;
|
| 1578 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1579 |
.submit-btn {
|
| 1580 |
+
position: sticky;
|
| 1581 |
+
bottom: 1rem;
|
| 1582 |
+
z-index: 100;
|
| 1583 |
}
|
| 1584 |
}
|
| 1585 |
|
| 1586 |
+
@media (max-width: 768px) {
|
| 1587 |
+
.app-header {
|
| 1588 |
+
padding: 1.5rem 1rem;
|
| 1589 |
+
}
|
| 1590 |
+
.app-header h1 {
|
| 1591 |
+
font-size: 1.8rem;
|
| 1592 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1593 |
}
|
| 1594 |
"""
|
| 1595 |
|
| 1596 |
|
| 1597 |
+
|
| 1598 |
# Create Gradio interface
|
| 1599 |
def create_interface():
|
| 1600 |
"""Create and configure the Gradio interface."""
|
|
|
|
| 1635 |
with gr.Column(scale=1):
|
| 1636 |
|
| 1637 |
# ==========================================
|
| 1638 |
+
# Section 1: Input Configuration (Language + Source)
|
| 1639 |
# ==========================================
|
| 1640 |
with gr.Group():
|
| 1641 |
+
gr.HTML('<div class="section-header"><span class="section-icon">๐</span> Global Settings</div>')
|
| 1642 |
|
| 1643 |
language_selector = gr.Dropdown(
|
| 1644 |
choices=[("English", "en"), ("Traditional Chinese (zh-TW)", "zh-TW")],
|
| 1645 |
value="en",
|
| 1646 |
+
label="Output Language",
|
| 1647 |
+
info="Target language for the summary"
|
| 1648 |
)
|
| 1649 |
+
|
| 1650 |
+
with gr.Group():
|
| 1651 |
+
gr.HTML('<div class="section-header"><span class="section-icon">๐ฅ</span> Input Content</div>')
|
| 1652 |
|
| 1653 |
+
with gr.Tabs() as input_tabs:
|
| 1654 |
+
with gr.TabItem("๐ Upload File", id=0):
|
| 1655 |
+
file_input = gr.File(
|
| 1656 |
+
label="Transcript (.txt)",
|
| 1657 |
+
file_types=[".txt"],
|
| 1658 |
+
type="filepath",
|
| 1659 |
+
elem_classes=["file-upload-area"]
|
| 1660 |
+
)
|
| 1661 |
+
with gr.TabItem("โ๏ธ Paste Text", id=1):
|
| 1662 |
+
text_input = gr.Textbox(
|
| 1663 |
+
label="Paste Transcript",
|
| 1664 |
+
placeholder="Paste your transcript content here...",
|
| 1665 |
+
lines=10,
|
| 1666 |
+
max_lines=20
|
| 1667 |
+
)
|
| 1668 |
|
| 1669 |
# ==========================================
|
| 1670 |
# Section 2: Model Selection (Tabs)
|
|
|
|
| 2138 |
outputs=[custom_info_output],
|
| 2139 |
)
|
| 2140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2141 |
# Update submit button to include custom_model_state in inputs and system_prompt_debug in outputs
|
| 2142 |
submit_btn.click(
|
| 2143 |
fn=summarize_streaming,
|
| 2144 |
+
inputs=[file_input, text_input, model_dropdown, enable_reasoning, max_tokens, temperature_slider, top_p, top_k, language_selector, thread_config_dropdown, custom_threads_slider, custom_model_state],
|
| 2145 |
outputs=[thinking_output, summary_output, info_output, metrics_state, system_prompt_debug],
|
| 2146 |
show_progress="full"
|
| 2147 |
)
|