rohanjain1648 commited on
Commit
1d00c45
Β·
verified Β·
1 Parent(s): 6f8d350

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -0
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ from dotenv import load_dotenv
3
+ load_dotenv()
4
+
5
+ import os
6
+ import gradio as gr
7
+
8
+ from brain_of_the_doctor import encode_image, analyze_image_with_query
9
+ from voice_of_the_patient import transcribe_with_groq
10
+ from voice_of_the_doctor import text_to_speech_with_elevenlabs
11
+
12
+ # --- System prompt for the "doctor" ---
13
+ system_prompt = """You have to act as a professional doctor, i know you are not but this is for learning purpose.
14
+ What's in this image?. Do you find anything wrong with it medically?
15
+ If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in
16
+ your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
17
+ Donot say 'In the image I see' but say 'With what I see, I think you have ....'
18
+ Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
19
+ Keep your answer concise (max 2 sentences). No preamble, start your answer right away please"""
20
+
21
+
22
+ def process_inputs(audio_filepath, image_filepath):
23
+ # --- Validate API keys ---
24
+ groq_key = os.environ.get("GROQ_API_KEY")
25
+ eleven_key = os.environ.get("ELEVENLABS_API_KEY")
26
+
27
+ if not groq_key:
28
+ return "❌ Error: Missing GROQ_API_KEY in Hugging Face Secrets.", "", None
29
+ if not eleven_key:
30
+ return "❌ Error: Missing ELEVENLABS_API_KEY in Hugging Face Secrets.", "", None
31
+
32
+ # --- Speech to Text ---
33
+ try:
34
+ if not audio_filepath or not os.path.exists(audio_filepath):
35
+ return "❌ Error: No valid audio file received.", "", None
36
+
37
+ speech_to_text_output = transcribe_with_groq(
38
+ GROQ_API_KEY=groq_key,
39
+ audio_filepath=audio_filepath,
40
+ stt_model="whisper-large-v3"
41
+ )
42
+ except Exception as e:
43
+ return f"❌ STT Error: {str(e)}", "", None
44
+
45
+ # --- Image Analysis ---
46
+ try:
47
+ if not image_filepath or not os.path.exists(image_filepath):
48
+ doctor_response = "❌ Error: No image file received for analysis."
49
+ else:
50
+ doctor_response = analyze_image_with_query(
51
+ query=system_prompt + speech_to_text_output,
52
+ encoded_image=encode_image(image_filepath),
53
+ model="meta-llama/llama-4-scout-17b-16e-instruct"
54
+ )
55
+ except Exception as e:
56
+ doctor_response = f"❌ Image Analysis Error: {str(e)}"
57
+
58
+ # --- Text to Speech ---
59
+ try:
60
+ if doctor_response.startswith("❌"):
61
+ voice_of_doctor = None
62
+ else:
63
+ voice_of_doctor = text_to_speech_with_elevenlabs(
64
+ input_text=doctor_response,
65
+ output_filepath="final.mp3"
66
+ )
67
+ except Exception as e:
68
+ return speech_to_text_output, doctor_response, f"❌ TTS Error: {str(e)}"
69
+
70
+ return speech_to_text_output, doctor_response, voice_of_doctor
71
+
72
+
73
+ # --- Gradio Interface ---
74
+ iface = gr.Interface(
75
+ fn=process_inputs,
76
+ inputs=[
77
+ gr.Audio(
78
+ sources=["microphone", "upload"],
79
+ type="filepath",
80
+ format="wav",
81
+ label="Patient's Voice"
82
+ ),
83
+ gr.Image(
84
+ type="filepath",
85
+ label="Medical Image"
86
+ )
87
+ ],
88
+ outputs=[
89
+ gr.Textbox(label="Speech to Text", lines=2, interactive=False),
90
+ gr.Textbox(label="Doctor's Response", lines=4, interactive=False),
91
+ gr.Audio(label="Doctor's Voice")
92
+ ],
93
+ live=False,
94
+ title="AI Doctor with Vision and Voice",
95
+ description="Upload or record your voice and image to get a doctor's opinion."
96
+ )
97
+
98
+ if __name__ == "__main__":
99
+ iface.launch(debug=True)