Spaces:

rohanjain1648
/

doc

Sleeping

App Files Files Community

rohanjain1648 commited on Sep 1, 2025

Commit

1d00c45

verified ·

1 Parent(s): 6f8d350

Upload app.py

Browse files

Files changed (1) hide show

app.py +99 -0

app.py ADDED Viewed

	@@ -0,0 +1,99 @@

+# app.py
+from dotenv import load_dotenv
+load_dotenv()
+import os
+import gradio as gr
+from brain_of_the_doctor import encode_image, analyze_image_with_query
+from voice_of_the_patient import transcribe_with_groq
+from voice_of_the_doctor import text_to_speech_with_elevenlabs
+# --- System prompt for the "doctor" ---
+system_prompt = """You have to act as a professional doctor, i know you are not but this is for learning purpose.
+            What's in this image?. Do you find anything wrong with it medically?
+            If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in
+            your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
+            Donot say 'In the image I see' but say 'With what I see, I think you have ....'
+            Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
+            Keep your answer concise (max 2 sentences). No preamble, start your answer right away please"""
+def process_inputs(audio_filepath, image_filepath):
+    # --- Validate API keys ---
+    groq_key = os.environ.get("GROQ_API_KEY")
+    eleven_key = os.environ.get("ELEVENLABS_API_KEY")
+    if not groq_key:
+        return "❌ Error: Missing GROQ_API_KEY in Hugging Face Secrets.", "", None
+    if not eleven_key:
+        return "❌ Error: Missing ELEVENLABS_API_KEY in Hugging Face Secrets.", "", None
+    # --- Speech to Text ---
+    try:
+        if not audio_filepath or not os.path.exists(audio_filepath):
+            return "❌ Error: No valid audio file received.", "", None
+        speech_to_text_output = transcribe_with_groq(
+            GROQ_API_KEY=groq_key,
+            audio_filepath=audio_filepath,
+            stt_model="whisper-large-v3"
+        )
+    except Exception as e:
+        return f"❌ STT Error: {str(e)}", "", None
+    # --- Image Analysis ---
+    try:
+        if not image_filepath or not os.path.exists(image_filepath):
+            doctor_response = "❌ Error: No image file received for analysis."
+        else:
+            doctor_response = analyze_image_with_query(
+                query=system_prompt + speech_to_text_output,
+                encoded_image=encode_image(image_filepath),
+                model="meta-llama/llama-4-scout-17b-16e-instruct"
+            )
+    except Exception as e:
+        doctor_response = f"❌ Image Analysis Error: {str(e)}"
+    # --- Text to Speech ---
+    try:
+        if doctor_response.startswith("❌"):
+            voice_of_doctor = None
+        else:
+            voice_of_doctor = text_to_speech_with_elevenlabs(
+                input_text=doctor_response,
+                output_filepath="final.mp3"
+            )
+    except Exception as e:
+        return speech_to_text_output, doctor_response, f"❌ TTS Error: {str(e)}"
+    return speech_to_text_output, doctor_response, voice_of_doctor
+# --- Gradio Interface ---
+iface = gr.Interface(
+    fn=process_inputs,
+    inputs=[
+        gr.Audio(
+            sources=["microphone", "upload"],
+            type="filepath",
+            format="wav",
+            label="Patient's Voice"
+        ),
+        gr.Image(
+            type="filepath",
+            label="Medical Image"
+        )
+    ],
+    outputs=[
+        gr.Textbox(label="Speech to Text", lines=2, interactive=False),
+        gr.Textbox(label="Doctor's Response", lines=4, interactive=False),
+        gr.Audio(label="Doctor's Voice")
+    ],
+    live=False,
+    title="AI Doctor with Vision and Voice",
+    description="Upload or record your voice and image to get a doctor's opinion."
+)
+if __name__ == "__main__":
+    iface.launch(debug=True)