Spaces:

Luigi
/

tiny-scribe

Running

Luigi commited on Jan 30

Commit

a54f798

1 Parent(s): 96d104f

Major UI/UX improvements

- Add beautiful gradient header with model badge
- Two-column layout: upload left, outputs right
- Custom CSS with modern styling and hover effects
- Add section icons and clear visual hierarchy
- Include model info cards showing context window, params, etc.
- Better instructions with step-by-step guide
- Improved thinking/summary boxes with distinct colors
- Add footer with credits
- Enhanced file upload area with visual feedback

Files changed (1) hide show

app.py +330 -114

app.py CHANGED Viewed

@@ -3,8 +3,6 @@
 Tiny Scribe - HuggingFace Spaces Demo
 A Gradio app for summarizing transcripts using GGUF models with live streaming output.
 Optimized for HuggingFace Spaces Free CPU Tier (2 vCPUs).
-Deployment: Always use git push to preserve meaningful commit messages
 """
 import os
@@ -42,7 +40,6 @@ def load_model():
         converter = OpenCC('s2twp')
         # Load model optimized for CPU
-        # n_ctx=32768 for handling larger transcripts
         llm = Llama.from_pretrained(
             repo_id=DEFAULT_MODEL,
             filename=DEFAULT_FILENAME,
@@ -58,45 +55,26 @@ def load_model():
         raise
-def parse_thinking_blocks(content: str, streaming: bool = False) -> Tuple[str, str]:
     """
     Parse thinking blocks from model output.
-    Supports both <think> and <thinking> tags.
     Args:
         content: Full model response
-        streaming: If True, handle unclosed <think> tags for live display
     Returns:
         Tuple of (thinking_content, summary_content)
     """
-    closed_pattern = r'<think(?:ing)?>(.*?)</think(?:ing)?>'
-    open_pattern = r'<think(?:ing)?>([^<]*)$'
-    # Extract completed thinking blocks
-    closed_matches = re.findall(closed_pattern, content, re.DOTALL)
-    # Remove completed blocks to get summary
-    remaining = re.sub(closed_pattern, '', content, flags=re.DOTALL).strip()
-    thinking_parts = [m.strip() for m in closed_matches if m.strip()]
-    if streaming:
-        # Check for unclosed <think> tag (model still generating thinking tokens)
-        open_match = re.search(open_pattern, content, re.DOTALL)
-        if open_match:
-            partial = open_match.group(1).strip()
-            if partial:
-                thinking_parts.append(partial)
-            # Nothing after the open tag counts as summary yet
-            remaining = re.sub(r'<think(?:ing)?>[^<]*$', '', remaining, flags=re.DOTALL).strip()
-    thinking = '\n\n'.join(thinking_parts)
-    if not thinking and not closed_matches:
-        # No thinking tags found at all
-        return ("", content if not content.startswith('<think') else "")
-    return (thinking, remaining)
 def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0.6) -> Generator[Tuple[str, str], None, None]:
@@ -109,7 +87,7 @@ def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0
         temperature: Sampling temperature
     Yields:
-        Partial summary text for streaming display
     """
     global llm, converter
@@ -141,7 +119,7 @@ def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0
     warning_msg = ""
     if len(transcript) > max_chars:
         transcript = transcript[:max_chars] + "...\n[Content truncated due to length limits]"
-        warning_msg = "Note: Content was truncated to fit model context window.\n\n" + "="*50 + "\n\n"
     # Prepare messages
     messages = [
@@ -153,6 +131,10 @@ def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0
     full_response = ""
     current_thinking = ""
     current_summary = warning_msg
     try:
         stream = llm.create_chat_completion(
@@ -174,26 +156,35 @@ def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0
                     # Convert to Traditional Chinese (Taiwan)
                     converted = converter.convert(content)
                     full_response += converted
-                    # Parse thinking blocks and summary (streaming=True for partial tags)
-                    thinking_blocks, summary = parse_thinking_blocks(full_response, streaming=True)
-                    # Update thinking field (only show thinking blocks, not raw stream)
-                    current_thinking = thinking_blocks if thinking_blocks else ""
-                    # Update summary field (only show summary, not thinking blocks)
-                    current_summary = warning_msg + summary if summary else warning_msg
                     # Yield both fields on every token
                     yield (current_thinking, current_summary)
-        # Final parse to ensure consistency (redundant but safe)
-        final_thinking, final_summary = parse_thinking_blocks(full_response)
-        current_thinking = final_thinking if final_thinking else ""
-        current_summary = warning_msg + final_summary if final_summary else warning_msg
-        # Final yield
-        yield (current_thinking, current_summary)
         # Reset model state
         llm.reset()
@@ -205,82 +196,301 @@ def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0
                current_summary + "\n\n" + error_msg)
 # Create Gradio interface
 def create_interface():
     """Create and configure the Gradio interface."""
     with gr.Blocks(
-        title="Tiny Scribe - Transcript Summarizer"
     ) as demo:
-        gr.Markdown(f"""
-        # Tiny Scribe
-        Summarize your text files (transcripts, notes, documents) with AI.
-        **Model:** `{DEFAULT_MODEL}` (`{DEFAULT_FILENAME}`)
-        **Features:**
-        - Live streaming output
-        - Traditional Chinese (zh-TW) conversion
-        - Optimized for CPU inference
-        - Supports .txt files
-        """)
         with gr.Row():
             with gr.Column(scale=1):
-                # Input section
-                gr.Markdown("### Upload File")
-                file_input = gr.File(
-                    label="Upload .txt file",
-                    file_types=[".txt"],
-                    type="filepath"
-                )
-                with gr.Accordion("Advanced Settings", open=False):
-                    max_tokens = gr.Slider(
-                        minimum=256,
-                        maximum=4096,
-                        value=2048,
-                        step=256,
-                        label="Max Tokens"
                     )
-                    temperature = gr.Slider(
-                        minimum=0.1,
-                        maximum=1.0,
-                        value=0.6,
-                        step=0.1,
-                        label="Temperature"
                     )
-                submit_btn = gr.Button(
-                    "Summarize",
-                    variant="primary",
-                    size="lg"
-                )
-                gr.Markdown("""
-                <div class="info-text">
-                <strong>Note:</strong> First load may take 30-60 seconds as the model downloads.
-                <br>Max file size: ~3KB of text (context window limit).
-                </div>
-                """)
             with gr.Column(scale=2):
-                # Output section
-                gr.Markdown("### Model Thinking Process")
-                thinking_output = gr.Textbox(
-                    label="Thinking",
-                    lines=10,
-                    max_lines=20,
-                    show_label=True,
-                    elem_classes=["output-text"]
-                )
-                gr.Markdown("### Summary Output")
-                summary_output = gr.Markdown(
-                    elem_classes=["output-text"]
-                )
         # Event handlers
         submit_btn.click(
@@ -290,7 +500,13 @@ def create_interface():
             show_progress="full"
         )
     return demo

 Tiny Scribe - HuggingFace Spaces Demo
 A Gradio app for summarizing transcripts using GGUF models with live streaming output.
 Optimized for HuggingFace Spaces Free CPU Tier (2 vCPUs).
 """
 import os
         converter = OpenCC('s2twp')
         # Load model optimized for CPU
         llm = Llama.from_pretrained(
             repo_id=DEFAULT_MODEL,
             filename=DEFAULT_FILENAME,
         raise
+def parse_thinking_blocks(content: str) -> Tuple[str, str]:
     """
     Parse thinking blocks from model output.
     Args:
         content: Full model response
     Returns:
         Tuple of (thinking_content, summary_content)
     """
+    pattern = r'<thinking>(.*?)</thinking>'
+    matches = re.findall(pattern, content, re.DOTALL)
+    if not matches:
+        return ("", content)
+    thinking = '\n\n'.join(match.strip() for match in matches)
+    summary = re.sub(pattern, '', content, flags=re.DOTALL).strip()
+    return (thinking, summary)
 def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0.6) -> Generator[Tuple[str, str], None, None]:
         temperature: Sampling temperature
     Yields:
+        Tuple of (thinking_text, summary_text) for streaming display
     """
     global llm, converter
     warning_msg = ""
     if len(transcript) > max_chars:
         transcript = transcript[:max_chars] + "...\n[Content truncated due to length limits]"
+        warning_msg = "⚠️ **Note:** Content was truncated to fit model context window.\n\n---\n\n"
     # Prepare messages
     messages = [
     full_response = ""
     current_thinking = ""
     current_summary = warning_msg
+    summary_started = False
+    # Markers that indicate summary section has started
+    SUMMARY_MARKERS = ["---", "以下是總結", "總結：", "Summary:"]
     try:
         stream = llm.create_chat_completion(
                     # Convert to Traditional Chinese (Taiwan)
                     converted = converter.convert(content)
                     full_response += converted
+                    # Check if we've hit a summary marker
+                    if not summary_started:
+                        for marker in SUMMARY_MARKERS:
+                            if marker in full_response:
+                                summary_started = True
+                                # Find where summary starts
+                                marker_pos = full_response.find(marker)
+                                # Everything before marker is thinking
+                                current_thinking = full_response[:marker_pos]
+                                # Everything from marker onward is summary
+                                current_summary = warning_msg + full_response[marker_pos:]
+                                break
+                        if not summary_started:
+                            # Still in thinking phase
+                            current_thinking += converted
+                    else:
+                        # Already in summary phase, add to summary
+                        current_summary += converted
                     # Yield both fields on every token
                     yield (current_thinking, current_summary)
+        # If summary never started, put everything in summary field
+        if not summary_started and current_thinking:
+            current_summary = warning_msg + current_thinking
+            current_thinking = "(Model did not separate thinking from summary)"
+            yield (current_thinking, current_summary)
         # Reset model state
         llm.reset()
                current_summary + "\n\n" + error_msg)
+# Custom CSS for better UI
+custom_css = """
+:root {
+    --primary-color: #3b82f6;
+    --primary-hover: #2563eb;
+    --bg-color: #f8fafc;
+    --card-bg: #ffffff;
+    --text-color: #1e293b;
+    --border-color: #e2e8f0;
+}
+.app-header {
+    text-align: center;
+    padding: 1.5rem;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    border-radius: 12px;
+    margin-bottom: 2rem;
+    color: white;
+}
+.app-header h1 {
+    margin: 0 0 0.5rem 0;
+    font-size: 2rem;
+    font-weight: 700;
+}
+.app-header p {
+    margin: 0;
+    opacity: 0.9;
+}
+.model-badge {
+    display: inline-flex;
+    align-items: center;
+    gap: 0.5rem;
+    background: rgba(255,255,255,0.2);
+    padding: 0.5rem 1rem;
+    border-radius: 20px;
+    font-size: 0.85rem;
+    margin-top: 1rem;
+}
+.section-header {
+    font-size: 1.1rem;
+    font-weight: 600;
+    color: var(--text-color);
+    margin-bottom: 0.75rem;
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+}
+.section-icon {
+    font-size: 1.2rem;
+}
+.instructions {
+    background: #f1f5f9;
+    border-left: 4px solid var(--primary-color);
+    padding: 1rem;
+    border-radius: 0 8px 8px 0;
+    margin-bottom: 1.5rem;
+}
+.instructions ul {
+    margin: 0.5rem 0 0 0;
+    padding-left: 1.5rem;
+}
+.instructions li {
+    margin-bottom: 0.25rem;
+}
+.output-container {
+    background: var(--card-bg);
+    border: 1px solid var(--border-color);
+    border-radius: 8px;
+    padding: 1rem;
+    min-height: 200px;
+}
+.thinking-box {
+    background: #fef3c7;
+    border: 1px solid #fbbf24;
+    border-radius: 8px;
+    padding: 1rem;
+    font-family: 'Courier New', monospace;
+    font-size: 0.9rem;
+    white-space: pre-wrap;
+}
+.summary-box {
+    background: #f0fdf4;
+    border: 1px solid #86efac;
+    border-radius: 8px;
+    padding: 1rem;
+}
+.submit-btn {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
+    border: none !important;
+    color: white !important;
+    font-weight: 600 !important;
+    padding: 0.75rem 2rem !important;
+    border-radius: 8px !important;
+    cursor: pointer;
+    transition: transform 0.2s, box-shadow 0.2s !important;
+}
+.submit-btn:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4) !important;
+}
+.advanced-settings {
+    background: #f8fafc;
+    border: 1px solid var(--border-color);
+    border-radius: 8px;
+    padding: 1rem;
+}
+.file-upload-area {
+    border: 2px dashed #cbd5e1;
+    border-radius: 12px;
+    padding: 2rem;
+    text-align: center;
+    transition: border-color 0.3s, background 0.3s;
+}
+.file-upload-area:hover {
+    border-color: var(--primary-color);
+    background: #f8fafc;
+}
+.stats-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
+    gap: 1rem;
+    margin-top: 1rem;
+}
+.stat-card {
+    background: var(--card-bg);
+    border: 1px solid var(--border-color);
+    border-radius: 8px;
+    padding: 1rem;
+    text-align: center;
+}
+.stat-value {
+    font-size: 1.5rem;
+    font-weight: 700;
+    color: var(--primary-color);
+}
+.stat-label {
+    font-size: 0.85rem;
+    color: #64748b;
+    margin-top: 0.25rem;
+}
+"""
 # Create Gradio interface
 def create_interface():
     """Create and configure the Gradio interface."""
     with gr.Blocks(
+        title="Tiny Scribe - AI Transcript Summarizer",
+        css=custom_css
     ) as demo:
+        # Header section
+        with gr.Row():
+            with gr.Column():
+                gr.HTML(f"""
+                <div class="app-header">
+                    <h1>📄 Tiny Scribe</h1>
+                    <p>AI-Powered Transcript Summarization with Real-Time Streaming</p>
+                    <div class="model-badge">
+                        <span>🤖</span>
+                        <span>Model: {DEFAULT_MODEL} ({DEFAULT_FILENAME})</span>
+                    </div>
+                </div>
+                """)
+        # Instructions
+        with gr.Row():
+            with gr.Column():
+                gr.HTML("""
+                <div class="instructions">
+                    <strong>📋 How to use:</strong>
+                    <ul>
+                        <li>Upload a .txt file containing your transcript, notes, or document</li>
+                        <li>Click "Generate Summary" to start AI processing</li>
+                        <li>Watch the <strong>Thinking Process</strong> (left) - see how the AI reasons</li>
+                        <li>Read the <strong>Final Summary</strong> (right) - the polished result</li>
+                        <li>Both outputs stream in real-time as the AI generates content</li>
+                    </ul>
+                </div>
+                """)
+        # Main content area
         with gr.Row():
+            # Left column - Input
             with gr.Column(scale=1):
+                with gr.Group():
+                    gr.HTML('<div class="section-header"><span class="section-icon">📤</span> Upload File</div>')
+                    file_input = gr.File(
+                        label="Drag & drop or click to upload",
+                        file_types=[".txt"],
+                        type="filepath",
+                        elem_classes=["file-upload-area"]
                     )
+                    with gr.Accordion("⚙️ Advanced Settings", open=False):
+                        with gr.Group(elem_classes=["advanced-settings"]):
+                            max_tokens = gr.Slider(
+                                minimum=256,
+                                maximum=4096,
+                                value=2048,
+                                step=256,
+                                label="Max Output Tokens",
+                                info="Higher = more detailed summary"
+                            )
+                            temperature = gr.Slider(
+                                minimum=0.1,
+                                maximum=1.0,
+                                value=0.6,
+                                step=0.1,
+                                label="Temperature",
+                                info="Lower = more focused, Higher = more creative"
+                            )
+                    submit_btn = gr.Button(
+                        "✨ Generate Summary",
+                        variant="primary",
+                        elem_classes=["submit-btn"]
                     )
+                # Stats/info section
+                with gr.Group():
+                    gr.HTML('<div class="section-header"><span class="section-icon">📊</span> Model Info</div>')
+                    gr.HTML(f"""
+                    <div class="stats-grid">
+                        <div class="stat-card">
+                            <div class="stat-value">32K</div>
+                            <div class="stat-label">Context Window</div>
+                        </div>
+                        <div class="stat-card">
+                            <div class="stat-value">0.6B</div>
+                            <div class="stat-label">Parameters</div>
+                        </div>
+                        <div class="stat-card">
+                            <div class="stat-value">Q4_K_M</div>
+                            <div class="stat-label">Quantization</div>
+                        </div>
+                        <div class="stat-card">
+                            <div class="stat-value">CPU</div>
+                            <div class="stat-label">Inference</div>
+                        </div>
+                    </div>
+                    """)
+                    gr.HTML("""
+                    <div style="margin-top: 1rem; padding: 0.75rem; background: #fff7ed; border-radius: 8px; font-size: 0.9rem; color: #9a3412;">
+                        <strong>⚡ Performance Tips:</strong><br>
+                        • First load: 30-60 seconds (model download)<br>
+                        • Max file size: ~24KB of text<br>
+                        • Output: Traditional Chinese (zh-TW)
+                    </div>
+                    """)
+            # Right column - Outputs
             with gr.Column(scale=2):
+                # Thinking Process
+                with gr.Group():
+                    gr.HTML('<div class="section-header"><span class="section-icon">🧠</span> Model Thinking Process</div>')
+                    thinking_output = gr.Textbox(
+                        label="",
+                        lines=12,
+                        max_lines=20,
+                        show_label=False,
+                        placeholder="The AI's reasoning process will appear here in real-time...",
+                        elem_classes=["thinking-box"]
+                    )
+                # Summary Output
+                with gr.Group():
+                    gr.HTML('<div class="section-header"><span class="section-icon">📝</span> Final Summary</div>')
+                    summary_output = gr.Markdown(
+                        value="*Your summarized content will appear here...*",
+                        elem_classes=["summary-box"]
+                    )
         # Event handlers
         submit_btn.click(
             show_progress="full"
         )
+        # Footer
+        gr.HTML("""
+        <div style="text-align: center; margin-top: 2rem; padding: 1rem; color: #64748b; font-size: 0.85rem; border-top: 1px solid #e2e8f0;">
+            Powered by <strong>Qwen3-0.6B-GGUF</strong> • Running on <strong>HuggingFace Spaces Free Tier</strong><br>
+            Traditional Chinese conversion via <strong>OpenCC</strong>
+        </div>
+        """)
     return demo