File size: 2,414 Bytes
82d84c7
e8589a9
2d164e9
82d84c7
 
77640a8
 
82d84c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d164e9
 
 
 
 
 
82d84c7
2d164e9
 
82d84c7
2d164e9
 
82d84c7
2d164e9
 
 
 
 
 
 
 
82d84c7
2d164e9
 
82d84c7
2d164e9
 
82d84c7
2d164e9
 
 
 
82d84c7
2d164e9
 
 
82d84c7
2d164e9
82d84c7
 
2d164e9
 
 
 
82d84c7
40e71b1
 
82d84c7
2d164e9
82d84c7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
import gradio as gr
import openai
from transformers import pipeline
from llama_index import SimpleDirectoryReader, GPTVectorStoreIndex
from llama_index import HuggingFaceLLMPredictor
from src.parse_tabular import symptom_index

# --- Whisper ASR setup ---
asr = pipeline(
    "automatic-speech-recognition",
    model="openai/whisper-small",
    device=0,
    chunk_length_s=30,
)

# --- LlamaIndex utils import ---
from utils.llama_index_utils import get_llm_predictor, build_index, query_symptoms

# --- System prompt ---
SYSTEM_PROMPT = """
You are a medical assistant helping a user narrow down to the most likely ICD-10 code.
At each turn, EITHER ask one focused clarifying question (e.g. “Is your cough dry or productive?”)
or, if you have enough info, output a final JSON with fields:
{"diagnoses":[…], "confidences":[…]}.
"""

def transcribe_and_respond(audio_chunk, state):
    # Transcribe audio chunk
    result = asr(audio_chunk)
    text = result.get('text', '').strip()
    if not text:
        return state, []

    # Append user message
    state.append(("user", text))

    # Build LLM predictor (you can swap OpenAI / HuggingFace here)
    llm_predictor = HuggingFaceLLMPredictor(model_name_or_path=os.getenv("HF_MODEL", "gpt2-medium"))

    # Query index with conversation
    # (Assuming `symptom_index` is your GPTVectorStoreIndex)
    # Prepare combined prompt from state
    prompt = "\n".join([f"{role}: {msg}" for role, msg in state])
    response = symptom_index.as_query_engine(
        llm_predictor=llm_predictor
    ).query(prompt)
    reply = response.response

    # Append assistant message
    state.append(("assistant", reply))

    # Return updated state to chatbot
    return state, state

# Build Gradio interface
demo = gr.Blocks()
with demo:
    gr.Markdown("# Symptom to ICD-10 Code Lookup (Audio Input)")
    chatbot = gr.Chatbot(label="Conversation")
    state = gr.State([])
    # Use streaming audio input for real-time transcription
    mic = gr.Audio(source="microphone", type="filepath", streaming=True, label="Describe your symptoms")

    mic.stream(
        fn=transcribe_and_respond,
        inputs=[mic, state],
        outputs=[chatbot, state],
        time_limit=60,
        stream_every=5,
        concurrency_limit=1
    )

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0", server_port=7860, mcp_server=True
    )