Spaces:
Sleeping
Sleeping
microphone should now transcribe directly to text wihtout needing intermediate audio file processing
Browse files- src/app.py +44 -69
src/app.py
CHANGED
|
@@ -169,62 +169,40 @@ or, if you have enough info, output a final JSON with fields:
|
|
| 169 |
{"diagnoses":[…], "confidences":[…]}.
|
| 170 |
"""
|
| 171 |
|
| 172 |
-
def process_speech(
|
| 173 |
-
|
| 174 |
-
return history
|
| 175 |
-
|
| 176 |
-
if not isinstance(new_transcript, str):
|
| 177 |
-
print(f"Warning: Expected string transcript, got {type(new_transcript)}")
|
| 178 |
-
new_transcript = str(new_transcript)
|
| 179 |
-
|
| 180 |
try:
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
diagnosis_query = f"""
|
| 183 |
-
Given these symptoms: '{
|
| 184 |
|
| 185 |
Identify the most likely ICD-10 diagnoses and key questions to differentiate between them.
|
| 186 |
Focus only on symptoms mentioned and their clinical implications.
|
| 187 |
-
|
| 188 |
-
Format response as:
|
| 189 |
-
1. Primary suspected diagnosis: [ICD-10 code] - [description]
|
| 190 |
-
2. Alternative diagnosis: [ICD-10 code] - [description]
|
| 191 |
-
3. Key differentiating question
|
| 192 |
"""
|
| 193 |
|
| 194 |
response = symptom_index.as_query_engine().query(diagnosis_query)
|
| 195 |
|
| 196 |
-
#
|
| 197 |
-
lines = str(response).strip().split('\n')
|
| 198 |
-
diagnoses = []
|
| 199 |
-
follow_up = ""
|
| 200 |
-
|
| 201 |
-
for line in lines:
|
| 202 |
-
if '[' in line and ']' in line: # Extract ICD-10 codes
|
| 203 |
-
code = line[line.find('[')+1:line.find(']')]
|
| 204 |
-
diagnoses.append(code)
|
| 205 |
-
elif 'Key differentiating question' in line:
|
| 206 |
-
follow_up = line.split(':')[-1].strip()
|
| 207 |
-
|
| 208 |
formatted_response = {
|
| 209 |
-
"diagnoses":
|
| 210 |
-
"confidences": [
|
| 211 |
-
"follow_up":
|
| 212 |
}
|
| 213 |
|
| 214 |
-
|
| 215 |
-
|
|
|
|
|
|
|
| 216 |
|
| 217 |
except Exception as e:
|
| 218 |
-
print(f"Error processing speech: {
|
| 219 |
-
|
| 220 |
-
"diagnoses": ["Error processing symptoms"],
|
| 221 |
-
"confidences": [0],
|
| 222 |
-
"follow_up": "Could you please repeat your symptoms?"
|
| 223 |
-
}
|
| 224 |
-
history.append({"role": "user", "content": new_transcript})
|
| 225 |
-
history.append({"role": "assistant", "content": json.dumps(error_response, indent=2)})
|
| 226 |
-
|
| 227 |
-
return history
|
| 228 |
|
| 229 |
def text_to_speech(text):
|
| 230 |
"""Convert text to speech and return audio HTML element."""
|
|
@@ -270,10 +248,16 @@ with gr.Blocks() as demo:
|
|
| 270 |
with gr.Column(scale=2):
|
| 271 |
# Moved microphone row above chatbot
|
| 272 |
with gr.Row():
|
| 273 |
-
microphone = gr.
|
|
|
|
|
|
|
| 274 |
label="Describe your symptoms",
|
| 275 |
-
streaming=True
|
| 276 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
)
|
| 278 |
clear_btn = gr.Button("Clear Chat", variant="secondary")
|
| 279 |
|
|
@@ -308,39 +292,30 @@ with gr.Blocks() as demo:
|
|
| 308 |
clear_btn.click(lambda: None, None, chatbot, queue=False)
|
| 309 |
|
| 310 |
def enhanced_process_speech(audio_path, history, api_key=None, model_tier="small", temp=0.7):
|
| 311 |
-
"""
|
| 312 |
if not audio_path:
|
| 313 |
return history
|
| 314 |
|
| 315 |
-
#
|
| 316 |
-
|
| 317 |
-
if not
|
| 318 |
-
return history
|
| 319 |
-
{"role": "user", "content": "Audio recording"},
|
| 320 |
-
{"role": "assistant", "content": "I couldn't process that audio. Could you try again?"}
|
| 321 |
-
]
|
| 322 |
-
|
| 323 |
-
try:
|
| 324 |
-
# Get the last assistant response
|
| 325 |
-
user_message = transcript[-2]["content"] # What the user said
|
| 326 |
-
assistant_json = transcript[-1]["content"] # JSON response from assistant
|
| 327 |
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
|
|
|
|
|
|
| 331 |
|
| 332 |
-
# Add
|
| 333 |
return history + [
|
| 334 |
-
{"role": "user", "content":
|
| 335 |
-
{"role": "assistant", "content":
|
| 336 |
]
|
| 337 |
|
| 338 |
except Exception as e:
|
| 339 |
-
print(f"Error formatting
|
| 340 |
-
return history
|
| 341 |
-
{"role": "user", "content": "Error processing audio"},
|
| 342 |
-
{"role": "assistant", "content": "Sorry, I encountered an error processing your symptoms. Could you try again?"}
|
| 343 |
-
]
|
| 344 |
|
| 345 |
microphone.stream(
|
| 346 |
fn=enhanced_process_speech,
|
|
|
|
| 169 |
{"diagnoses":[…], "confidences":[…]}.
|
| 170 |
"""
|
| 171 |
|
| 172 |
+
def process_speech(audio_path, history):
|
| 173 |
+
"""Process speech input and convert to text."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
try:
|
| 175 |
+
if not audio_path:
|
| 176 |
+
return []
|
| 177 |
+
|
| 178 |
+
# The audio_path now contains the transcribed text directly from Gradio
|
| 179 |
+
transcript = audio_path
|
| 180 |
+
|
| 181 |
+
# Query the symptom index
|
| 182 |
diagnosis_query = f"""
|
| 183 |
+
Given these symptoms: '{transcript}'
|
| 184 |
|
| 185 |
Identify the most likely ICD-10 diagnoses and key questions to differentiate between them.
|
| 186 |
Focus only on symptoms mentioned and their clinical implications.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
"""
|
| 188 |
|
| 189 |
response = symptom_index.as_query_engine().query(diagnosis_query)
|
| 190 |
|
| 191 |
+
# Format response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
formatted_response = {
|
| 193 |
+
"diagnoses": [],
|
| 194 |
+
"confidences": [],
|
| 195 |
+
"follow_up": str(response)
|
| 196 |
}
|
| 197 |
|
| 198 |
+
return [
|
| 199 |
+
{"role": "user", "content": transcript},
|
| 200 |
+
{"role": "assistant", "content": json.dumps(formatted_response)}
|
| 201 |
+
]
|
| 202 |
|
| 203 |
except Exception as e:
|
| 204 |
+
print(f"Error processing speech: {e}")
|
| 205 |
+
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
|
| 207 |
def text_to_speech(text):
|
| 208 |
"""Convert text to speech and return audio HTML element."""
|
|
|
|
| 248 |
with gr.Column(scale=2):
|
| 249 |
# Moved microphone row above chatbot
|
| 250 |
with gr.Row():
|
| 251 |
+
microphone = gr.Audio(
|
| 252 |
+
source="microphone",
|
| 253 |
+
type="text", # Changed from filepath to text
|
| 254 |
label="Describe your symptoms",
|
| 255 |
+
streaming=True
|
| 256 |
+
)
|
| 257 |
+
transcript_box = gr.Textbox(
|
| 258 |
+
label="Transcribed Text",
|
| 259 |
+
interactive=False,
|
| 260 |
+
show_label=True
|
| 261 |
)
|
| 262 |
clear_btn = gr.Button("Clear Chat", variant="secondary")
|
| 263 |
|
|
|
|
| 292 |
clear_btn.click(lambda: None, None, chatbot, queue=False)
|
| 293 |
|
| 294 |
def enhanced_process_speech(audio_path, history, api_key=None, model_tier="small", temp=0.7):
|
| 295 |
+
"""Handle speech processing and chat formatting."""
|
| 296 |
if not audio_path:
|
| 297 |
return history
|
| 298 |
|
| 299 |
+
# Process the new audio input
|
| 300 |
+
new_messages = process_speech(audio_path, history)
|
| 301 |
+
if not new_messages:
|
| 302 |
+
return history
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
|
| 304 |
+
try:
|
| 305 |
+
# Format last assistant response
|
| 306 |
+
assistant_response = new_messages[-1]["content"]
|
| 307 |
+
response_dict = json.loads(assistant_response)
|
| 308 |
+
formatted_text = format_response_for_user(response_dict)
|
| 309 |
|
| 310 |
+
# Add to history with proper message format
|
| 311 |
return history + [
|
| 312 |
+
{"role": "user", "content": new_messages[0]["content"]},
|
| 313 |
+
{"role": "assistant", "content": formatted_text}
|
| 314 |
]
|
| 315 |
|
| 316 |
except Exception as e:
|
| 317 |
+
print(f"Error formatting response: {e}")
|
| 318 |
+
return history
|
|
|
|
|
|
|
|
|
|
| 319 |
|
| 320 |
microphone.stream(
|
| 321 |
fn=enhanced_process_speech,
|