Luigi commited on
Commit
a54f798
Β·
1 Parent(s): 96d104f

Major UI/UX improvements

Browse files

- Add beautiful gradient header with model badge
- Two-column layout: upload left, outputs right
- Custom CSS with modern styling and hover effects
- Add section icons and clear visual hierarchy
- Include model info cards showing context window, params, etc.
- Better instructions with step-by-step guide
- Improved thinking/summary boxes with distinct colors
- Add footer with credits
- Enhanced file upload area with visual feedback

Files changed (1) hide show
  1. app.py +330 -114
app.py CHANGED
@@ -3,8 +3,6 @@
3
  Tiny Scribe - HuggingFace Spaces Demo
4
  A Gradio app for summarizing transcripts using GGUF models with live streaming output.
5
  Optimized for HuggingFace Spaces Free CPU Tier (2 vCPUs).
6
-
7
- Deployment: Always use git push to preserve meaningful commit messages
8
  """
9
 
10
  import os
@@ -42,7 +40,6 @@ def load_model():
42
  converter = OpenCC('s2twp')
43
 
44
  # Load model optimized for CPU
45
- # n_ctx=32768 for handling larger transcripts
46
  llm = Llama.from_pretrained(
47
  repo_id=DEFAULT_MODEL,
48
  filename=DEFAULT_FILENAME,
@@ -58,45 +55,26 @@ def load_model():
58
  raise
59
 
60
 
61
- def parse_thinking_blocks(content: str, streaming: bool = False) -> Tuple[str, str]:
62
  """
63
  Parse thinking blocks from model output.
64
- Supports both <think> and <thinking> tags.
65
-
66
  Args:
67
  content: Full model response
68
- streaming: If True, handle unclosed <think> tags for live display
69
-
70
  Returns:
71
  Tuple of (thinking_content, summary_content)
72
  """
73
- closed_pattern = r'<think(?:ing)?>(.*?)</think(?:ing)?>'
74
- open_pattern = r'<think(?:ing)?>([^<]*)$'
75
-
76
- # Extract completed thinking blocks
77
- closed_matches = re.findall(closed_pattern, content, re.DOTALL)
78
- # Remove completed blocks to get summary
79
- remaining = re.sub(closed_pattern, '', content, flags=re.DOTALL).strip()
80
-
81
- thinking_parts = [m.strip() for m in closed_matches if m.strip()]
82
-
83
- if streaming:
84
- # Check for unclosed <think> tag (model still generating thinking tokens)
85
- open_match = re.search(open_pattern, content, re.DOTALL)
86
- if open_match:
87
- partial = open_match.group(1).strip()
88
- if partial:
89
- thinking_parts.append(partial)
90
- # Nothing after the open tag counts as summary yet
91
- remaining = re.sub(r'<think(?:ing)?>[^<]*$', '', remaining, flags=re.DOTALL).strip()
92
-
93
- thinking = '\n\n'.join(thinking_parts)
94
-
95
- if not thinking and not closed_matches:
96
- # No thinking tags found at all
97
- return ("", content if not content.startswith('<think') else "")
98
-
99
- return (thinking, remaining)
100
 
101
 
102
  def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0.6) -> Generator[Tuple[str, str], None, None]:
@@ -109,7 +87,7 @@ def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0
109
  temperature: Sampling temperature
110
 
111
  Yields:
112
- Partial summary text for streaming display
113
  """
114
  global llm, converter
115
 
@@ -141,7 +119,7 @@ def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0
141
  warning_msg = ""
142
  if len(transcript) > max_chars:
143
  transcript = transcript[:max_chars] + "...\n[Content truncated due to length limits]"
144
- warning_msg = "Note: Content was truncated to fit model context window.\n\n" + "="*50 + "\n\n"
145
 
146
  # Prepare messages
147
  messages = [
@@ -153,6 +131,10 @@ def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0
153
  full_response = ""
154
  current_thinking = ""
155
  current_summary = warning_msg
 
 
 
 
156
 
157
  try:
158
  stream = llm.create_chat_completion(
@@ -174,26 +156,35 @@ def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0
174
  # Convert to Traditional Chinese (Taiwan)
175
  converted = converter.convert(content)
176
  full_response += converted
177
-
178
- # Parse thinking blocks and summary (streaming=True for partial tags)
179
- thinking_blocks, summary = parse_thinking_blocks(full_response, streaming=True)
180
-
181
- # Update thinking field (only show thinking blocks, not raw stream)
182
- current_thinking = thinking_blocks if thinking_blocks else ""
183
-
184
- # Update summary field (only show summary, not thinking blocks)
185
- current_summary = warning_msg + summary if summary else warning_msg
186
-
 
 
 
 
 
 
 
 
 
 
 
187
  # Yield both fields on every token
188
  yield (current_thinking, current_summary)
189
 
190
- # Final parse to ensure consistency (redundant but safe)
191
- final_thinking, final_summary = parse_thinking_blocks(full_response)
192
- current_thinking = final_thinking if final_thinking else ""
193
- current_summary = warning_msg + final_summary if final_summary else warning_msg
194
-
195
- # Final yield
196
- yield (current_thinking, current_summary)
197
 
198
  # Reset model state
199
  llm.reset()
@@ -205,82 +196,301 @@ def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0
205
  current_summary + "\n\n" + error_msg)
206
 
207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  # Create Gradio interface
209
  def create_interface():
210
  """Create and configure the Gradio interface."""
211
 
212
  with gr.Blocks(
213
- title="Tiny Scribe - Transcript Summarizer"
 
214
  ) as demo:
215
 
216
- gr.Markdown(f"""
217
- # Tiny Scribe
218
-
219
- Summarize your text files (transcripts, notes, documents) with AI.
220
-
221
- **Model:** `{DEFAULT_MODEL}` (`{DEFAULT_FILENAME}`)
 
 
 
 
 
 
 
222
 
223
- **Features:**
224
- - Live streaming output
225
- - Traditional Chinese (zh-TW) conversion
226
- - Optimized for CPU inference
227
- - Supports .txt files
228
- """)
 
 
 
 
 
 
 
 
 
229
 
 
230
  with gr.Row():
 
231
  with gr.Column(scale=1):
232
- # Input section
233
- gr.Markdown("### Upload File")
234
- file_input = gr.File(
235
- label="Upload .txt file",
236
- file_types=[".txt"],
237
- type="filepath"
238
- )
239
-
240
- with gr.Accordion("Advanced Settings", open=False):
241
- max_tokens = gr.Slider(
242
- minimum=256,
243
- maximum=4096,
244
- value=2048,
245
- step=256,
246
- label="Max Tokens"
247
  )
248
- temperature = gr.Slider(
249
- minimum=0.1,
250
- maximum=1.0,
251
- value=0.6,
252
- step=0.1,
253
- label="Temperature"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  )
255
 
256
- submit_btn = gr.Button(
257
- "Summarize",
258
- variant="primary",
259
- size="lg"
260
- )
261
-
262
- gr.Markdown("""
263
- <div class="info-text">
264
- <strong>Note:</strong> First load may take 30-60 seconds as the model downloads.
265
- <br>Max file size: ~3KB of text (context window limit).
266
- </div>
267
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
 
 
269
  with gr.Column(scale=2):
270
- # Output section
271
- gr.Markdown("### Model Thinking Process")
272
- thinking_output = gr.Textbox(
273
- label="Thinking",
274
- lines=10,
275
- max_lines=20,
276
- show_label=True,
277
- elem_classes=["output-text"]
278
- )
 
 
279
 
280
- gr.Markdown("### Summary Output")
281
- summary_output = gr.Markdown(
282
- elem_classes=["output-text"]
283
- )
 
 
 
284
 
285
  # Event handlers
286
  submit_btn.click(
@@ -290,7 +500,13 @@ def create_interface():
290
  show_progress="full"
291
  )
292
 
293
-
 
 
 
 
 
 
294
 
295
  return demo
296
 
 
3
  Tiny Scribe - HuggingFace Spaces Demo
4
  A Gradio app for summarizing transcripts using GGUF models with live streaming output.
5
  Optimized for HuggingFace Spaces Free CPU Tier (2 vCPUs).
 
 
6
  """
7
 
8
  import os
 
40
  converter = OpenCC('s2twp')
41
 
42
  # Load model optimized for CPU
 
43
  llm = Llama.from_pretrained(
44
  repo_id=DEFAULT_MODEL,
45
  filename=DEFAULT_FILENAME,
 
55
  raise
56
 
57
 
58
+ def parse_thinking_blocks(content: str) -> Tuple[str, str]:
59
  """
60
  Parse thinking blocks from model output.
61
+
 
62
  Args:
63
  content: Full model response
64
+
 
65
  Returns:
66
  Tuple of (thinking_content, summary_content)
67
  """
68
+ pattern = r'<thinking>(.*?)</thinking>'
69
+ matches = re.findall(pattern, content, re.DOTALL)
70
+
71
+ if not matches:
72
+ return ("", content)
73
+
74
+ thinking = '\n\n'.join(match.strip() for match in matches)
75
+ summary = re.sub(pattern, '', content, flags=re.DOTALL).strip()
76
+
77
+ return (thinking, summary)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
 
80
  def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0.6) -> Generator[Tuple[str, str], None, None]:
 
87
  temperature: Sampling temperature
88
 
89
  Yields:
90
+ Tuple of (thinking_text, summary_text) for streaming display
91
  """
92
  global llm, converter
93
 
 
119
  warning_msg = ""
120
  if len(transcript) > max_chars:
121
  transcript = transcript[:max_chars] + "...\n[Content truncated due to length limits]"
122
+ warning_msg = "⚠️ **Note:** Content was truncated to fit model context window.\n\n---\n\n"
123
 
124
  # Prepare messages
125
  messages = [
 
131
  full_response = ""
132
  current_thinking = ""
133
  current_summary = warning_msg
134
+ summary_started = False
135
+
136
+ # Markers that indicate summary section has started
137
+ SUMMARY_MARKERS = ["---", "δ»₯δΈ‹ζ˜―ηΈ½η΅", "總硐:", "Summary:"]
138
 
139
  try:
140
  stream = llm.create_chat_completion(
 
156
  # Convert to Traditional Chinese (Taiwan)
157
  converted = converter.convert(content)
158
  full_response += converted
159
+
160
+ # Check if we've hit a summary marker
161
+ if not summary_started:
162
+ for marker in SUMMARY_MARKERS:
163
+ if marker in full_response:
164
+ summary_started = True
165
+ # Find where summary starts
166
+ marker_pos = full_response.find(marker)
167
+ # Everything before marker is thinking
168
+ current_thinking = full_response[:marker_pos]
169
+ # Everything from marker onward is summary
170
+ current_summary = warning_msg + full_response[marker_pos:]
171
+ break
172
+
173
+ if not summary_started:
174
+ # Still in thinking phase
175
+ current_thinking += converted
176
+ else:
177
+ # Already in summary phase, add to summary
178
+ current_summary += converted
179
+
180
  # Yield both fields on every token
181
  yield (current_thinking, current_summary)
182
 
183
+ # If summary never started, put everything in summary field
184
+ if not summary_started and current_thinking:
185
+ current_summary = warning_msg + current_thinking
186
+ current_thinking = "(Model did not separate thinking from summary)"
187
+ yield (current_thinking, current_summary)
 
 
188
 
189
  # Reset model state
190
  llm.reset()
 
196
  current_summary + "\n\n" + error_msg)
197
 
198
 
199
+ # Custom CSS for better UI
200
+ custom_css = """
201
+ :root {
202
+ --primary-color: #3b82f6;
203
+ --primary-hover: #2563eb;
204
+ --bg-color: #f8fafc;
205
+ --card-bg: #ffffff;
206
+ --text-color: #1e293b;
207
+ --border-color: #e2e8f0;
208
+ }
209
+
210
+ .app-header {
211
+ text-align: center;
212
+ padding: 1.5rem;
213
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
214
+ border-radius: 12px;
215
+ margin-bottom: 2rem;
216
+ color: white;
217
+ }
218
+
219
+ .app-header h1 {
220
+ margin: 0 0 0.5rem 0;
221
+ font-size: 2rem;
222
+ font-weight: 700;
223
+ }
224
+
225
+ .app-header p {
226
+ margin: 0;
227
+ opacity: 0.9;
228
+ }
229
+
230
+ .model-badge {
231
+ display: inline-flex;
232
+ align-items: center;
233
+ gap: 0.5rem;
234
+ background: rgba(255,255,255,0.2);
235
+ padding: 0.5rem 1rem;
236
+ border-radius: 20px;
237
+ font-size: 0.85rem;
238
+ margin-top: 1rem;
239
+ }
240
+
241
+ .section-header {
242
+ font-size: 1.1rem;
243
+ font-weight: 600;
244
+ color: var(--text-color);
245
+ margin-bottom: 0.75rem;
246
+ display: flex;
247
+ align-items: center;
248
+ gap: 0.5rem;
249
+ }
250
+
251
+ .section-icon {
252
+ font-size: 1.2rem;
253
+ }
254
+
255
+ .instructions {
256
+ background: #f1f5f9;
257
+ border-left: 4px solid var(--primary-color);
258
+ padding: 1rem;
259
+ border-radius: 0 8px 8px 0;
260
+ margin-bottom: 1.5rem;
261
+ }
262
+
263
+ .instructions ul {
264
+ margin: 0.5rem 0 0 0;
265
+ padding-left: 1.5rem;
266
+ }
267
+
268
+ .instructions li {
269
+ margin-bottom: 0.25rem;
270
+ }
271
+
272
+ .output-container {
273
+ background: var(--card-bg);
274
+ border: 1px solid var(--border-color);
275
+ border-radius: 8px;
276
+ padding: 1rem;
277
+ min-height: 200px;
278
+ }
279
+
280
+ .thinking-box {
281
+ background: #fef3c7;
282
+ border: 1px solid #fbbf24;
283
+ border-radius: 8px;
284
+ padding: 1rem;
285
+ font-family: 'Courier New', monospace;
286
+ font-size: 0.9rem;
287
+ white-space: pre-wrap;
288
+ }
289
+
290
+ .summary-box {
291
+ background: #f0fdf4;
292
+ border: 1px solid #86efac;
293
+ border-radius: 8px;
294
+ padding: 1rem;
295
+ }
296
+
297
+ .submit-btn {
298
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
299
+ border: none !important;
300
+ color: white !important;
301
+ font-weight: 600 !important;
302
+ padding: 0.75rem 2rem !important;
303
+ border-radius: 8px !important;
304
+ cursor: pointer;
305
+ transition: transform 0.2s, box-shadow 0.2s !important;
306
+ }
307
+
308
+ .submit-btn:hover {
309
+ transform: translateY(-2px);
310
+ box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4) !important;
311
+ }
312
+
313
+ .advanced-settings {
314
+ background: #f8fafc;
315
+ border: 1px solid var(--border-color);
316
+ border-radius: 8px;
317
+ padding: 1rem;
318
+ }
319
+
320
+ .file-upload-area {
321
+ border: 2px dashed #cbd5e1;
322
+ border-radius: 12px;
323
+ padding: 2rem;
324
+ text-align: center;
325
+ transition: border-color 0.3s, background 0.3s;
326
+ }
327
+
328
+ .file-upload-area:hover {
329
+ border-color: var(--primary-color);
330
+ background: #f8fafc;
331
+ }
332
+
333
+ .stats-grid {
334
+ display: grid;
335
+ grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
336
+ gap: 1rem;
337
+ margin-top: 1rem;
338
+ }
339
+
340
+ .stat-card {
341
+ background: var(--card-bg);
342
+ border: 1px solid var(--border-color);
343
+ border-radius: 8px;
344
+ padding: 1rem;
345
+ text-align: center;
346
+ }
347
+
348
+ .stat-value {
349
+ font-size: 1.5rem;
350
+ font-weight: 700;
351
+ color: var(--primary-color);
352
+ }
353
+
354
+ .stat-label {
355
+ font-size: 0.85rem;
356
+ color: #64748b;
357
+ margin-top: 0.25rem;
358
+ }
359
+ """
360
+
361
+
362
  # Create Gradio interface
363
  def create_interface():
364
  """Create and configure the Gradio interface."""
365
 
366
  with gr.Blocks(
367
+ title="Tiny Scribe - AI Transcript Summarizer",
368
+ css=custom_css
369
  ) as demo:
370
 
371
+ # Header section
372
+ with gr.Row():
373
+ with gr.Column():
374
+ gr.HTML(f"""
375
+ <div class="app-header">
376
+ <h1>πŸ“„ Tiny Scribe</h1>
377
+ <p>AI-Powered Transcript Summarization with Real-Time Streaming</p>
378
+ <div class="model-badge">
379
+ <span>πŸ€–</span>
380
+ <span>Model: {DEFAULT_MODEL} ({DEFAULT_FILENAME})</span>
381
+ </div>
382
+ </div>
383
+ """)
384
 
385
+ # Instructions
386
+ with gr.Row():
387
+ with gr.Column():
388
+ gr.HTML("""
389
+ <div class="instructions">
390
+ <strong>πŸ“‹ How to use:</strong>
391
+ <ul>
392
+ <li>Upload a .txt file containing your transcript, notes, or document</li>
393
+ <li>Click "Generate Summary" to start AI processing</li>
394
+ <li>Watch the <strong>Thinking Process</strong> (left) - see how the AI reasons</li>
395
+ <li>Read the <strong>Final Summary</strong> (right) - the polished result</li>
396
+ <li>Both outputs stream in real-time as the AI generates content</li>
397
+ </ul>
398
+ </div>
399
+ """)
400
 
401
+ # Main content area
402
  with gr.Row():
403
+ # Left column - Input
404
  with gr.Column(scale=1):
405
+ with gr.Group():
406
+ gr.HTML('<div class="section-header"><span class="section-icon">πŸ“€</span> Upload File</div>')
407
+
408
+ file_input = gr.File(
409
+ label="Drag & drop or click to upload",
410
+ file_types=[".txt"],
411
+ type="filepath",
412
+ elem_classes=["file-upload-area"]
 
 
 
 
 
 
 
413
  )
414
+
415
+ with gr.Accordion("βš™οΈ Advanced Settings", open=False):
416
+ with gr.Group(elem_classes=["advanced-settings"]):
417
+ max_tokens = gr.Slider(
418
+ minimum=256,
419
+ maximum=4096,
420
+ value=2048,
421
+ step=256,
422
+ label="Max Output Tokens",
423
+ info="Higher = more detailed summary"
424
+ )
425
+ temperature = gr.Slider(
426
+ minimum=0.1,
427
+ maximum=1.0,
428
+ value=0.6,
429
+ step=0.1,
430
+ label="Temperature",
431
+ info="Lower = more focused, Higher = more creative"
432
+ )
433
+
434
+ submit_btn = gr.Button(
435
+ "✨ Generate Summary",
436
+ variant="primary",
437
+ elem_classes=["submit-btn"]
438
  )
439
 
440
+ # Stats/info section
441
+ with gr.Group():
442
+ gr.HTML('<div class="section-header"><span class="section-icon">πŸ“Š</span> Model Info</div>')
443
+ gr.HTML(f"""
444
+ <div class="stats-grid">
445
+ <div class="stat-card">
446
+ <div class="stat-value">32K</div>
447
+ <div class="stat-label">Context Window</div>
448
+ </div>
449
+ <div class="stat-card">
450
+ <div class="stat-value">0.6B</div>
451
+ <div class="stat-label">Parameters</div>
452
+ </div>
453
+ <div class="stat-card">
454
+ <div class="stat-value">Q4_K_M</div>
455
+ <div class="stat-label">Quantization</div>
456
+ </div>
457
+ <div class="stat-card">
458
+ <div class="stat-value">CPU</div>
459
+ <div class="stat-label">Inference</div>
460
+ </div>
461
+ </div>
462
+ """)
463
+
464
+ gr.HTML("""
465
+ <div style="margin-top: 1rem; padding: 0.75rem; background: #fff7ed; border-radius: 8px; font-size: 0.9rem; color: #9a3412;">
466
+ <strong>⚑ Performance Tips:</strong><br>
467
+ β€’ First load: 30-60 seconds (model download)<br>
468
+ β€’ Max file size: ~24KB of text<br>
469
+ β€’ Output: Traditional Chinese (zh-TW)
470
+ </div>
471
+ """)
472
 
473
+ # Right column - Outputs
474
  with gr.Column(scale=2):
475
+ # Thinking Process
476
+ with gr.Group():
477
+ gr.HTML('<div class="section-header"><span class="section-icon">🧠</span> Model Thinking Process</div>')
478
+ thinking_output = gr.Textbox(
479
+ label="",
480
+ lines=12,
481
+ max_lines=20,
482
+ show_label=False,
483
+ placeholder="The AI's reasoning process will appear here in real-time...",
484
+ elem_classes=["thinking-box"]
485
+ )
486
 
487
+ # Summary Output
488
+ with gr.Group():
489
+ gr.HTML('<div class="section-header"><span class="section-icon">πŸ“</span> Final Summary</div>')
490
+ summary_output = gr.Markdown(
491
+ value="*Your summarized content will appear here...*",
492
+ elem_classes=["summary-box"]
493
+ )
494
 
495
  # Event handlers
496
  submit_btn.click(
 
500
  show_progress="full"
501
  )
502
 
503
+ # Footer
504
+ gr.HTML("""
505
+ <div style="text-align: center; margin-top: 2rem; padding: 1rem; color: #64748b; font-size: 0.85rem; border-top: 1px solid #e2e8f0;">
506
+ Powered by <strong>Qwen3-0.6B-GGUF</strong> β€’ Running on <strong>HuggingFace Spaces Free Tier</strong><br>
507
+ Traditional Chinese conversion via <strong>OpenCC</strong>
508
+ </div>
509
+ """)
510
 
511
  return demo
512