Spaces:

Luigi
/

tiny-scribe

Running

Luigi commited on Jan 30

Commit

682522a

1 Parent(s): b27d02e

UI improvements: separate thinking/summary fields + markdown rendering

- Increase max_tokens default from 512 to 2048 (more reasoning headroom)
- Add separate 'Thinking' text field showing model's reasoning process
- Change Summary Output to gr.Markdown for proper markdown rendering
- Update streaming function to yield tuple (thinking, summary)
- Max tokens slider: 256-4096 range with 2048 default

Files changed (1) hide show

app.py +39 -17

app.py CHANGED Viewed

@@ -78,7 +78,7 @@ def parse_thinking_blocks(content: str) -> Tuple[str, str]:
     return (thinking, summary)
-def summarize_streaming(file_obj, max_tokens: int = 512, temperature: float = 0.6) -> Generator[str, None, None]:
     """
     Stream summary generation from uploaded file.
@@ -117,9 +117,10 @@ def summarize_streaming(file_obj, max_tokens: int = 512, temperature: float = 0.
     # Check length (rough estimate: 4 chars per token)
     max_chars = 24000  # Leave room for generation with 32K context
     if len(transcript) > max_chars:
         transcript = transcript[:max_chars] + "...\n[Content truncated due to length limits]"
-        yield "Note: Content was truncated to fit model context window.\n\n" + "="*50 + "\n\n"
     # Prepare messages
     messages = [
@@ -131,6 +132,10 @@ def summarize_streaming(file_obj, max_tokens: int = 512, temperature: float = 0.
     full_response = ""
     buffer = ""
     try:
         stream = llm.create_chat_completion(
             messages=messages,
@@ -153,22 +158,33 @@ def summarize_streaming(file_obj, max_tokens: int = 512, temperature: float = 0.
                     buffer += converted
                     full_response += converted
-                    # Parse and clean thinking blocks for display
                     thinking, summary = parse_thinking_blocks(buffer)
                     if summary:
-                        yield summary
-        # Final parse to remove any remaining thinking blocks
-        thinking, final_summary = parse_thinking_blocks(full_response)
         if final_summary:
-            yield final_summary
         # Reset model state
         llm.reset()
     except Exception as e:
         logger.error(f"Error during generation: {e}")
-        yield f"\n\nError during generation: {str(e)}"
 # Create Gradio interface
@@ -205,10 +221,10 @@ def create_interface():
                 with gr.Accordion("Advanced Settings", open=False):
                     max_tokens = gr.Slider(
-                        minimum=128,
-                        maximum=1024,
-                        value=512,
-                        step=64,
                         label="Max Tokens"
                     )
                     temperature = gr.Slider(
@@ -234,11 +250,17 @@ def create_interface():
             with gr.Column(scale=2):
                 # Output section
                 gr.Markdown("### Summary Output")
-                output = gr.Textbox(
-                    label="Summary",
-                    lines=20,
-                    max_lines=50,
                     elem_classes=["output-text"]
                 )
@@ -246,7 +268,7 @@ def create_interface():
         submit_btn.click(
             fn=summarize_streaming,
             inputs=[file_input, max_tokens, temperature],
-            outputs=output,
             show_progress="full"
         )

     return (thinking, summary)
+def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0.6) -> Generator[Tuple[str, str], None, None]:
     """
     Stream summary generation from uploaded file.
     # Check length (rough estimate: 4 chars per token)
     max_chars = 24000  # Leave room for generation with 32K context
+    warning_msg = ""
     if len(transcript) > max_chars:
         transcript = transcript[:max_chars] + "...\n[Content truncated due to length limits]"
+        warning_msg = "Note: Content was truncated to fit model context window.\n\n" + "="*50 + "\n\n"
     # Prepare messages
     messages = [
     full_response = ""
     buffer = ""
+    # Initialize outputs
+    current_thinking = ""
+    current_summary = warning_msg
     try:
         stream = llm.create_chat_completion(
             messages=messages,
                     buffer += converted
                     full_response += converted
+                    # Parse thinking blocks
                     thinking, summary = parse_thinking_blocks(buffer)
+                    if thinking:
+                        current_thinking = thinking
                     if summary:
+                        current_summary = warning_msg + summary
+                    # Yield both thinking and summary
+                    yield (current_thinking, current_summary)
+        # Final parse
+        final_thinking, final_summary = parse_thinking_blocks(full_response)
+        if final_thinking:
+            current_thinking = final_thinking
         if final_summary:
+            current_summary = warning_msg + final_summary
+        # Final yield with complete output
+        yield (current_thinking, current_summary)
         # Reset model state
         llm.reset()
     except Exception as e:
         logger.error(f"Error during generation: {e}")
+        error_msg = f"\n\nError during generation: {str(e)}"
+        yield (current_thinking, current_summary + error_msg)
 # Create Gradio interface
                 with gr.Accordion("Advanced Settings", open=False):
                     max_tokens = gr.Slider(
+                        minimum=256,
+                        maximum=4096,
+                        value=2048,
+                        step=256,
                         label="Max Tokens"
                     )
                     temperature = gr.Slider(
             with gr.Column(scale=2):
                 # Output section
+                gr.Markdown("### Model Thinking Process")
+                thinking_output = gr.Textbox(
+                    label="Thinking",
+                    lines=10,
+                    max_lines=20,
+                    show_label=True,
+                    elem_classes=["output-text"]
+                )
                 gr.Markdown("### Summary Output")
+                summary_output = gr.Markdown(
                     elem_classes=["output-text"]
                 )
         submit_btn.click(
             fn=summarize_streaming,
             inputs=[file_input, max_tokens, temperature],
+            outputs=[thinking_output, summary_output],
             show_progress="full"
         )