dkg-2 commited on
Commit
3ccb758
·
verified ·
1 Parent(s): 07ad692

Upload 6 files

Browse files
Files changed (6) hide show
  1. ai_core.py +29 -0
  2. app.py +104 -0
  3. audio_utils.py +76 -0
  4. image_utils.py +8 -0
  5. packages.txt +2 -0
  6. requirements.txt +8 -0
ai_core.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from groq import Groq
2
+
3
+ def analyze_image_with_query(query, model, encoded_image, groq_api_key):
4
+ """
5
+ Analyzes an image with a query using the Groq API.
6
+ """
7
+ client = Groq(api_key=groq_api_key)
8
+ messages = [
9
+ {
10
+ "role": "user",
11
+ "content": [
12
+ {
13
+ "type": "text",
14
+ "text": query
15
+ },
16
+ {
17
+ "type": "image_url",
18
+ "image_url": {
19
+ "url": f"data:image/jpeg;base64,{encoded_image}",
20
+ },
21
+ },
22
+ ],
23
+ }
24
+ ]
25
+ chat_completion = client.chat.completions.create(
26
+ messages=messages,
27
+ model=model
28
+ )
29
+ return chat_completion.choices[0].message.content
app.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from ai_core import analyze_image_with_query
5
+ from audio_utils import transcribe_with_groq, text_to_speech_with_gtts
6
+ from image_utils import encode_image
7
+
8
+ load_dotenv()
9
+
10
+ # --- Configuration ---
11
+ GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
12
+ STT_MODEL = "whisper-large-v3"
13
+ VISION_MODEL = "meta-llama/llama-4-scout-17b-16e-instruct"
14
+ SYSTEM_PROMPT = """You are SymptoScan MD, an AI medical assistant. Your role is to act as a professional, empathetic, and knowledgeable doctor.
15
+
16
+ When analyzing the user's symptoms and any provided medical images, please follow these guidelines:
17
+
18
+ 1. **Analyze Symptoms:** Carefully consider the user's described symptoms from the transcript.
19
+ 2. **Analyze Image:** If an image is provided, analyze it in detail for any visible signs related to the symptoms.
20
+ 3. **Provide a Possible Diagnosis:** Based on the text and image, provide a potential diagnosis or a few possible explanations for the symptoms.
21
+ 4. **Suggest Next Steps:** Recommend clear and safe next steps for the user. This could include seeing a specialist, trying over-the-counter remedies, or making lifestyle changes.
22
+ 5. **Maintain a Professional Tone:** Your response should be clear, concise, and easy for a non-medical person to understand.
23
+ 6. **Include a Disclaimer:** ALWAYS end your response with the following disclaimer: 'Disclaimer: I am an AI assistant and not a real doctor. This is not a real medical diagnosis. Please consult a qualified healthcare professional for any medical concerns.'
24
+
25
+ Your primary goal is to be helpful and safe. Do not provide any information that could be dangerous or misleading.
26
+ """
27
+
28
+ # --- Main Processing Function ---
29
+ def process_inputs(audio_filepath, image_filepath):
30
+ transcript = "No audio was provided."
31
+ if audio_filepath:
32
+ try:
33
+ transcript = transcribe_with_groq(
34
+ stt_model=STT_MODEL,
35
+ audio_filepath=audio_filepath,
36
+ groq_api_key=GROQ_API_KEY
37
+ )
38
+ except Exception as e:
39
+ return f"Error in transcription: {e}", "", None
40
+
41
+ if image_filepath:
42
+ try:
43
+ encoded_image = encode_image(image_filepath)
44
+ query = f"{SYSTEM_PROMPT}\n\nUser symptoms: {transcript}"
45
+ doctor_response = analyze_image_with_query(
46
+ query=query,
47
+ model=VISION_MODEL,
48
+ encoded_image=encoded_image,
49
+ groq_api_key=GROQ_API_KEY
50
+ )
51
+ except Exception as e:
52
+ return transcript, f"Error in AI analysis: {e}", None
53
+ else:
54
+ doctor_response = "No image provided. Please upload an image for analysis."
55
+
56
+ try:
57
+ voice_path = text_to_speech_with_gtts(
58
+ input_text=doctor_response,
59
+ output_filepath="symptoscan_md_response.mp3"
60
+ )
61
+ except Exception as e:
62
+ return transcript, doctor_response, f"Error in generating audio: {e}"
63
+
64
+ return transcript, doctor_response, voice_path
65
+
66
+ # --- Gradio UI ---
67
+ professional_theme = gr.themes.Soft(
68
+ primary_hue="teal",
69
+ secondary_hue="blue",
70
+ neutral_hue="slate",
71
+ ).set(
72
+ body_background_fill="#F0F4F8",
73
+ )
74
+
75
+ with gr.Blocks(title="SymptoScan MD", theme=professional_theme, css=".gradio-container { max-width: 900px !important; margin: auto !important; }") as demo:
76
+ gr.Markdown(
77
+ """
78
+ # 🩺 SymptoScan MD
79
+ ### Your AI-Powered Visual Health Assistant
80
+ Upload a medical image (e.g., a skin condition) and describe your symptoms. Our AI will provide a preliminary analysis and suggest next steps.
81
+ """
82
+ )
83
+
84
+ with gr.Row(equal_height=True):
85
+ with gr.Column(scale=1):
86
+ audio_input = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Record Your Symptoms")
87
+ image_input = gr.Image(type="filepath", label="🖼️ Upload Medical Image")
88
+ submit_btn = gr.Button("Analyze Symptoms", variant="primary")
89
+
90
+ with gr.Column(scale=2):
91
+ transcript_output = gr.Textbox(label="📝 Your Symptoms (Transcribed)", lines=4, interactive=False)
92
+ response_output = gr.Textbox(label="👩‍⚕️ AI Doctor's Analysis", lines=8, interactive=False)
93
+ audio_output = gr.Audio(label="🔊 AI Voice Response", interactive=False)
94
+
95
+ # --- Logic ---
96
+ submit_btn.click(
97
+ fn=process_inputs,
98
+ inputs=[audio_input, image_input],
99
+ outputs=[transcript_output, response_output, audio_output],
100
+ api_name="analyze"
101
+ )
102
+
103
+ if __name__ == "__main__":
104
+ demo.launch(debug=True)
audio_utils.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import platform
4
+ import subprocess
5
+ from io import BytesIO
6
+
7
+ import speech_recognition as sr
8
+ from gtts import gTTS
9
+ from pydub import AudioSegment
10
+ from groq import Groq
11
+
12
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
13
+
14
+ def record_audio(file_path, timeout=20, phrase_time_limit=None):
15
+ """
16
+ Simplified function to record audio from the microphone and save it as an MP3 file.
17
+ """
18
+ recognizer = sr.Recognizer()
19
+
20
+ try:
21
+ with sr.Microphone() as source:
22
+ logging.info("Adjusting for ambient noise...")
23
+ recognizer.adjust_for_ambient_noise(source, duration=1)
24
+ logging.info("Start speaking now...")
25
+
26
+ audio_data = recognizer.listen(source, timeout=timeout, phrase_time_limit=phrase_time_limit)
27
+ logging.info("Recording complete.")
28
+
29
+ wav_data = audio_data.get_wav_data()
30
+ audio_segment = AudioSegment.from_wav(BytesIO(wav_data))
31
+ audio_segment.export(file_path, format="mp3", bitrate="128k")
32
+
33
+ logging.info(f"Audio saved to {file_path}")
34
+
35
+ except Exception as e:
36
+ logging.error(f"An error occurred: {e}")
37
+
38
+ def transcribe_with_groq(stt_model, audio_filepath, groq_api_key):
39
+ """
40
+ Transcribes an audio file using the Groq API.
41
+ """
42
+ client = Groq(api_key=groq_api_key)
43
+
44
+ with open(audio_filepath, "rb") as audio_file:
45
+ transcription = client.audio.transcriptions.create(
46
+ model=stt_model,
47
+ file=audio_file,
48
+ language="en"
49
+ )
50
+ return transcription.text
51
+
52
+ def text_to_speech_with_gtts(input_text, output_filepath="gtts_output.mp3"):
53
+ """
54
+ Converts text to speech using gTTS and handles playback.
55
+ """
56
+ tts = gTTS(text=input_text, lang="en", slow=False)
57
+ tts.save(output_filepath)
58
+
59
+ os_name = platform.system()
60
+ try:
61
+ if os_name == "Darwin": # macOS
62
+ subprocess.run(['afplay', output_filepath])
63
+ elif os_name == "Windows": # Windows
64
+ subprocess.run([
65
+ 'powershell',
66
+ '-c',
67
+ f'(New-Object Media.SoundPlayer "{output_filepath}").PlaySync();'
68
+ ])
69
+ elif os_name == "Linux":
70
+ subprocess.run(['aplay', output_filepath])
71
+ else:
72
+ raise OSError("Unsupported OS for audio playback.")
73
+ except Exception as e:
74
+ print(f"[Audio Playback Error] {e}")
75
+
76
+ return output_filepath
image_utils.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+
3
+ def encode_image(image_path):
4
+ """
5
+ Encodes an image file to a base64 string.
6
+ """
7
+ with open(image_path, "rb") as image_file:
8
+ return base64.b64encode(image_file.read()).decode('utf-8')
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ portaudio19-dev
2
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ groq
2
+ python-dotenv
3
+ speechrecognition
4
+ pydub
5
+ pyaudio
6
+ gtts
7
+ elevenlabs
8
+ gradio