Spaces:

IAMTFRMZA
/

documentaitestv3

Sleeping

App Files Files Community

IAMTFRMZA commited on Apr 17, 2025

Commit

a86432c

verified ·

1 Parent(s): e0cd5da

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -27

app.py CHANGED Viewed

@@ -70,7 +70,7 @@ class WebSocketClient:
             if data["type"] == "conversation.item.input_audio_transcription.delta":
                 self.transcript += data["delta"]
-# Real-time transcription connection manager
 def create_ws():
     cid = str(uuid.uuid4())
     client = WebSocketClient(WS_URI, HEADERS, cid)
@@ -83,14 +83,17 @@ def send_audio(chunk, cid):
         return "Connecting..."
     sr, arr = chunk
     connections[cid].enqueue_audio_chunk(sr, arr)
-    return connections[cid].transcript
-def clear_transcript(cid):
     if cid in connections:
         connections[cid].transcript = ""
     return ""
-# ============ Chat Assistant ============
 def handle_chat(user_input, history, thread_id, image_url):
     if not OPENAI_API_KEY or not ASSISTANT_ID:
         return "❌ Missing secrets!", history, thread_id, image_url
@@ -105,7 +108,8 @@ def handle_chat(user_input, history, thread_id, image_url):
         while True:
             status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
-            if status.status == "completed": break
             time.sleep(1)
         msgs = client.beta.threads.messages.list(thread_id=thread_id)
@@ -117,7 +121,8 @@ def handle_chat(user_input, history, thread_id, image_url):
                     r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
                     content
                 )
-                if match: image_url = match.group(0)
                 break
         return "", history, thread_id, image_url
@@ -125,6 +130,20 @@ def handle_chat(user_input, history, thread_id, image_url):
     except Exception as e:
         return f"❌ {e}", history, thread_id, image_url
 # ============ Gradio UI ============
 with gr.Blocks(theme=gr.themes.Soft()) as app:
     gr.Markdown("# 📄 Document AI Assistant")
@@ -162,7 +181,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
                 user_prompt = gr.Textbox(placeholder="Ask your question...", show_label=False, scale=8)
                 send_btn = gr.Button("Send", variant="primary", scale=2)
-            # === Voice Transcription Section ===
             with gr.Column(elem_classes="voice-area"):
                 gr.Markdown("### 🎙️ Voice Input")
@@ -171,39 +189,35 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
                 with gr.Row():
                     voice_send_btn = gr.Button("🟢 Send Voice to Assistant", elem_classes="big-btn")
-                    voice_clear_btn = gr.Button("🧹 Clear", elem_classes="big-btn")
-    # ============ Functional Bindings ============
     send_btn.click(fn=handle_chat,
                    inputs=[user_prompt, chat_state, thread_state, image_state],
                    outputs=[user_prompt, chat, thread_state, image_state])
-    image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
     voice_input.stream(fn=send_audio,
                        inputs=[voice_input, client_id],
                        outputs=voice_transcript,
                        stream_every=0.5)
-    def feed_voice_to_assistant(transcript, history, thread_id, image_url, cid):
-        if not transcript.strip():
-            return gr.update(), history, thread_id, image_url
-        if cid in connections:
-            connections[cid].transcript = ""
-        return handle_chat(transcript, history, thread_id, image_url)
-    def clear_all(cid):
-        if cid in connections:
-            connections[cid].transcript = ""
-        return [], "", None, None
-    voice_send_btn.click(fn=feed_voice_to_assistant,
                          inputs=[voice_transcript, chat_state, thread_state, image_state, client_id],
                          outputs=[user_prompt, chat, thread_state, image_state])
-    voice_clear_btn.click(fn=clear_all,
-                          inputs=[client_id],
-                          outputs=[chat, voice_transcript, thread_state, image_state])
     app.load(fn=create_ws, outputs=[client_id])

             if data["type"] == "conversation.item.input_audio_transcription.delta":
                 self.transcript += data["delta"]
+# WebSocket Connection Manager
 def create_ws():
     cid = str(uuid.uuid4())
     client = WebSocketClient(WS_URI, HEADERS, cid)
         return "Connecting..."
     sr, arr = chunk
     connections[cid].enqueue_audio_chunk(sr, arr)
+    return connections[cid].transcript.strip()
+def clear_transcript_only(cid):
     if cid in connections:
         connections[cid].transcript = ""
     return ""
+def clear_chat_only():
+    return [], None, None
+# Assistant chat handler
 def handle_chat(user_input, history, thread_id, image_url):
     if not OPENAI_API_KEY or not ASSISTANT_ID:
         return "❌ Missing secrets!", history, thread_id, image_url
         while True:
             status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
+            if status.status == "completed":
+                break
             time.sleep(1)
         msgs = client.beta.threads.messages.list(thread_id=thread_id)
                     r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
                     content
                 )
+                if match:
+                    image_url = match.group(0)
                 break
         return "", history, thread_id, image_url
     except Exception as e:
         return f"❌ {e}", history, thread_id, image_url
+# Feed transcript as assistant input
+def feed_transcript(transcript, history, thread_id, image_url, cid):
+    if not transcript.strip():
+        return gr.update(), history, thread_id, image_url
+    if cid in connections:
+        connections[cid].transcript = ""
+    return handle_chat(transcript, history, thread_id, image_url)
+# Fallback for image display
+def update_image_display(image_url):
+    if image_url and isinstance(image_url, str) and image_url.startswith("http"):
+        return image_url
+    return None
 # ============ Gradio UI ============
 with gr.Blocks(theme=gr.themes.Soft()) as app:
     gr.Markdown("# 📄 Document AI Assistant")
                 user_prompt = gr.Textbox(placeholder="Ask your question...", show_label=False, scale=8)
                 send_btn = gr.Button("Send", variant="primary", scale=2)
             with gr.Column(elem_classes="voice-area"):
                 gr.Markdown("### 🎙️ Voice Input")
                 with gr.Row():
                     voice_send_btn = gr.Button("🟢 Send Voice to Assistant", elem_classes="big-btn")
+                    clear_transcript_btn = gr.Button("🧹 Clear Transcript", elem_classes="big-btn")
+                with gr.Row():
+                    clear_chat_btn = gr.Button("🗑️ Clear Chat", elem_classes="big-btn")
+    # Bindings
     send_btn.click(fn=handle_chat,
                    inputs=[user_prompt, chat_state, thread_state, image_state],
                    outputs=[user_prompt, chat, thread_state, image_state])
     voice_input.stream(fn=send_audio,
                        inputs=[voice_input, client_id],
                        outputs=voice_transcript,
                        stream_every=0.5)
+    voice_send_btn.click(fn=feed_transcript,
                          inputs=[voice_transcript, chat_state, thread_state, image_state, client_id],
                          outputs=[user_prompt, chat, thread_state, image_state])
+    clear_transcript_btn.click(fn=clear_transcript_only,
+                               inputs=[client_id],
+                               outputs=voice_transcript)
+    clear_chat_btn.click(fn=clear_chat_only,
+                         outputs=[chat, thread_state, image_state])
+    image_state.change(fn=update_image_display,
+                       inputs=image_state,
+                       outputs=image_display)
     app.load(fn=create_ws, outputs=[client_id])