Spaces:

Agents-MCP-Hackathon
/

MedCodeMCP

Sleeping

App Files Files Community

gpaasch commited on Jun 7, 2025

Commit

7e29701

1 Parent(s): 00f30b1

switching from openaid whisper to gradio asr: https://www.gradio.app/guides/real-time-speech-recognition

Browse files

Files changed (1) hide show

src/app.py +25 -45

src/app.py CHANGED Viewed

@@ -1,77 +1,57 @@
 import os
 import gradio as gr
-import openai
-from transformers import pipeline
 from llama_index import SimpleDirectoryReader, GPTVectorStoreIndex
 from llama_index import HuggingFaceLLMPredictor
 from src.parse_tabular import symptom_index
-# --- Whisper ASR setup ---
-asr = pipeline(
-    "automatic-speech-recognition",
-    model="openai/whisper-small",
-    device=0,
-    chunk_length_s=30,
-)
 # --- LlamaIndex utils import ---
 from utils.llama_index_utils import get_llm_predictor, build_index, query_symptoms
 # --- System prompt ---
 SYSTEM_PROMPT = """
 You are a medical assistant helping a user narrow down to the most likely ICD-10 code.
-At each turn, EITHER ask one focused clarifying question (e.g. “Is your cough dry or productive?”)
 or, if you have enough info, output a final JSON with fields:
 {"diagnoses":[…], "confidences":[…]}.
 """
-def transcribe_and_respond(audio_chunk, state):
-    # Transcribe audio chunk
-    result = asr(audio_chunk)
-    text = result.get('text', '').strip()
-    if not text:
-        return state, []
-    # Append user message
-    state.append(("user", text))
-    # Build LLM predictor (you can swap OpenAI / HuggingFace here)
     llm_predictor = HuggingFaceLLMPredictor(model_name_or_path=os.getenv("HF_MODEL", "gpt2-medium"))
     # Query index with conversation
-    # (Assuming `symptom_index` is your GPTVectorStoreIndex)
-    # Prepare combined prompt from state
-    prompt = "\n".join([f"{role}: {msg}" for role, msg in state])
     response = symptom_index.as_query_engine(
         llm_predictor=llm_predictor
     ).query(prompt)
-    reply = response.response
-    # Append assistant message
-    state.append(("assistant", reply))
-    # Return updated state to chatbot
-    return state, state
 # Build Gradio interface
 demo = gr.Blocks()
 with demo:
     gr.Markdown("# Symptom to ICD-10 Code Lookup (Audio Input)")
     chatbot = gr.Chatbot(label="Conversation")
-    state = gr.State([])
-    # Use streaming audio input for real-time transcription
-    mic = gr.Audio(source="microphone", type="filepath", streaming=True, label="Describe your symptoms")
-    mic.stream(
-        fn=transcribe_and_respond,
-        inputs=[mic, state],
-        outputs=[chatbot, state],
-        time_limit=60,
-        stream_every=5,
-        concurrency_limit=1
     )
 if __name__ == "__main__":
     demo.launch(
-        server_name="0.0.0.0", server_port=7860, mcp_server=True
     )

 import os
 import gradio as gr
 from llama_index import SimpleDirectoryReader, GPTVectorStoreIndex
 from llama_index import HuggingFaceLLMPredictor
 from src.parse_tabular import symptom_index
 # --- LlamaIndex utils import ---
 from utils.llama_index_utils import get_llm_predictor, build_index, query_symptoms
 # --- System prompt ---
 SYSTEM_PROMPT = """
 You are a medical assistant helping a user narrow down to the most likely ICD-10 code.
+At each turn, EITHER ask one focused clarifying question (e.g. "Is your cough dry or productive?")
 or, if you have enough info, output a final JSON with fields:
 {"diagnoses":[…], "confidences":[…]}.
 """
+def process_speech(new_transcript, history):
+    # Skip if no new transcript
+    if not new_transcript:
+        return history
+    # Build LLM predictor
     llm_predictor = HuggingFaceLLMPredictor(model_name_or_path=os.getenv("HF_MODEL", "gpt2-medium"))
     # Query index with conversation
+    prompt = "\n".join([f"{role}: {msg}" for role, msg in history])
+    prompt += f"\nuser: {new_transcript}"
     response = symptom_index.as_query_engine(
         llm_predictor=llm_predictor
     ).query(prompt)
+    # Append the new exchange to history
+    history.append((new_transcript, response.response))
+    return history
 # Build Gradio interface
 demo = gr.Blocks()
 with demo:
     gr.Markdown("# Symptom to ICD-10 Code Lookup (Audio Input)")
     chatbot = gr.Chatbot(label="Conversation")
+    audio = gr.Audio(source="microphone", type="text", streaming=True)
+    audio.stream(
+        process_speech,
+        inputs=[audio, chatbot],
+        outputs=chatbot,
+        show_progress="hidden"
     )
 if __name__ == "__main__":
     demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        mcp_server=True
     )