Spaces:

dkg-2
/

SymptoScanMD

Sleeping

App Files Files Community

dkg-2 commited on Aug 16, 2025

Commit

3ccb758

verified ·

1 Parent(s): 07ad692

Upload 6 files

Browse files

Files changed (6) hide show

ai_core.py +29 -0
app.py +104 -0
audio_utils.py +76 -0
image_utils.py +8 -0
packages.txt +2 -0
requirements.txt +8 -0

ai_core.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from groq import Groq
+def analyze_image_with_query(query, model, encoded_image, groq_api_key):
+    """
+    Analyzes an image with a query using the Groq API.
+    """
+    client = Groq(api_key=groq_api_key)
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": query
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/jpeg;base64,{encoded_image}",
+                    },
+                },
+            ],
+        }
+    ]
+    chat_completion = client.chat.completions.create(
+        messages=messages,
+        model=model
+    )
+    return chat_completion.choices[0].message.content

app.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import gradio as gr
+import os
+from dotenv import load_dotenv
+from ai_core import analyze_image_with_query
+from audio_utils import transcribe_with_groq, text_to_speech_with_gtts
+from image_utils import encode_image
+load_dotenv()
+# --- Configuration ---
+GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
+STT_MODEL = "whisper-large-v3"
+VISION_MODEL = "meta-llama/llama-4-scout-17b-16e-instruct"
+SYSTEM_PROMPT = """You are SymptoScan MD, an AI medical assistant. Your role is to act as a professional, empathetic, and knowledgeable doctor.
+When analyzing the user's symptoms and any provided medical images, please follow these guidelines:
+1.  **Analyze Symptoms:** Carefully consider the user's described symptoms from the transcript.
+2.  **Analyze Image:** If an image is provided, analyze it in detail for any visible signs related to the symptoms.
+3.  **Provide a Possible Diagnosis:** Based on the text and image, provide a potential diagnosis or a few possible explanations for the symptoms.
+4.  **Suggest Next Steps:** Recommend clear and safe next steps for the user. This could include seeing a specialist, trying over-the-counter remedies, or making lifestyle changes.
+5.  **Maintain a Professional Tone:** Your response should be clear, concise, and easy for a non-medical person to understand.
+6.  **Include a Disclaimer:** ALWAYS end your response with the following disclaimer: 'Disclaimer: I am an AI assistant and not a real doctor. This is not a real medical diagnosis. Please consult a qualified healthcare professional for any medical concerns.'
+Your primary goal is to be helpful and safe. Do not provide any information that could be dangerous or misleading.
+"""
+# --- Main Processing Function ---
+def process_inputs(audio_filepath, image_filepath):
+    transcript = "No audio was provided."
+    if audio_filepath:
+        try:
+            transcript = transcribe_with_groq(
+                stt_model=STT_MODEL,
+                audio_filepath=audio_filepath,
+                groq_api_key=GROQ_API_KEY
+            )
+        except Exception as e:
+            return f"Error in transcription: {e}", "", None
+    if image_filepath:
+        try:
+            encoded_image = encode_image(image_filepath)
+            query = f"{SYSTEM_PROMPT}\n\nUser symptoms: {transcript}"
+            doctor_response = analyze_image_with_query(
+                query=query,
+                model=VISION_MODEL,
+                encoded_image=encoded_image,
+                groq_api_key=GROQ_API_KEY
+            )
+        except Exception as e:
+            return transcript, f"Error in AI analysis: {e}", None
+    else:
+        doctor_response = "No image provided. Please upload an image for analysis."
+    try:
+        voice_path = text_to_speech_with_gtts(
+            input_text=doctor_response,
+            output_filepath="symptoscan_md_response.mp3"
+        )
+    except Exception as e:
+        return transcript, doctor_response, f"Error in generating audio: {e}"
+    return transcript, doctor_response, voice_path
+# --- Gradio UI ---
+professional_theme = gr.themes.Soft(
+    primary_hue="teal",
+    secondary_hue="blue",
+    neutral_hue="slate",
+).set(
+    body_background_fill="#F0F4F8",
+)
+with gr.Blocks(title="SymptoScan MD", theme=professional_theme, css=".gradio-container { max-width: 900px !important; margin: auto !important; }") as demo:
+    gr.Markdown(
+        """
+        # 🩺 SymptoScan MD
+        ### Your AI-Powered Visual Health Assistant
+        Upload a medical image (e.g., a skin condition) and describe your symptoms. Our AI will provide a preliminary analysis and suggest next steps.
+        """
+    )
+    with gr.Row(equal_height=True):
+        with gr.Column(scale=1):
+            audio_input = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Record Your Symptoms")
+            image_input = gr.Image(type="filepath", label="🖼️ Upload Medical Image")
+            submit_btn = gr.Button("Analyze Symptoms", variant="primary")
+        with gr.Column(scale=2):
+            transcript_output = gr.Textbox(label="📝 Your Symptoms (Transcribed)", lines=4, interactive=False)
+            response_output = gr.Textbox(label="👩‍⚕️ AI Doctor's Analysis", lines=8, interactive=False)
+            audio_output = gr.Audio(label="🔊 AI Voice Response", interactive=False)
+    # --- Logic ---
+    submit_btn.click(
+        fn=process_inputs,
+        inputs=[audio_input, image_input],
+        outputs=[transcript_output, response_output, audio_output],
+        api_name="analyze"
+    )
+if __name__ == "__main__":
+    demo.launch(debug=True)

audio_utils.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import logging
+import os
+import platform
+import subprocess
+from io import BytesIO
+import speech_recognition as sr
+from gtts import gTTS
+from pydub import AudioSegment
+from groq import Groq
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+def record_audio(file_path, timeout=20, phrase_time_limit=None):
+    """
+    Simplified function to record audio from the microphone and save it as an MP3 file.
+    """
+    recognizer = sr.Recognizer()
+    try:
+        with sr.Microphone() as source:
+            logging.info("Adjusting for ambient noise...")
+            recognizer.adjust_for_ambient_noise(source, duration=1)
+            logging.info("Start speaking now...")
+            audio_data = recognizer.listen(source, timeout=timeout, phrase_time_limit=phrase_time_limit)
+            logging.info("Recording complete.")
+            wav_data = audio_data.get_wav_data()
+            audio_segment = AudioSegment.from_wav(BytesIO(wav_data))
+            audio_segment.export(file_path, format="mp3", bitrate="128k")
+            logging.info(f"Audio saved to {file_path}")
+    except Exception as e:
+        logging.error(f"An error occurred: {e}")
+def transcribe_with_groq(stt_model, audio_filepath, groq_api_key):
+    """
+    Transcribes an audio file using the Groq API.
+    """
+    client = Groq(api_key=groq_api_key)
+    with open(audio_filepath, "rb") as audio_file:
+        transcription = client.audio.transcriptions.create(
+            model=stt_model,
+            file=audio_file,
+            language="en"
+        )
+    return transcription.text
+def text_to_speech_with_gtts(input_text, output_filepath="gtts_output.mp3"):
+    """
+    Converts text to speech using gTTS and handles playback.
+    """
+    tts = gTTS(text=input_text, lang="en", slow=False)
+    tts.save(output_filepath)
+    os_name = platform.system()
+    try:
+        if os_name == "Darwin":  # macOS
+            subprocess.run(['afplay', output_filepath])
+        elif os_name == "Windows":  # Windows
+            subprocess.run([
+                'powershell',
+                '-c',
+                f'(New-Object Media.SoundPlayer "{output_filepath}").PlaySync();'
+            ])
+        elif os_name == "Linux":
+            subprocess.run(['aplay', output_filepath])
+        else:
+            raise OSError("Unsupported OS for audio playback.")
+    except Exception as e:
+        print(f"[Audio Playback Error] {e}")
+    return output_filepath

image_utils.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import base64
+def encode_image(image_path):
+    """
+    Encodes an image file to a base64 string.
+    """
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode('utf-8')

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ portaudio19-dev
2	+ ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+groq
+python-dotenv
+speechrecognition
+pydub
+pyaudio
+gtts
+elevenlabs
+gradio