Spaces:
Sleeping
Sleeping
| import os | |
| import tempfile | |
| import base64 | |
| import io | |
| import librosa | |
| import librosa.display | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| from gtts import gTTS | |
| from groq import Groq | |
| from dotenv import load_dotenv | |
| # 1. Load the environment variables FIRST | |
| load_dotenv() | |
| # 2. Initialize the Groq Client | |
| client = Groq() | |
| def get_spectrogram_base64(audio_path): | |
| """ | |
| Generates a Mel-Spectrogram, aggressively compresses it, and converts it to Base64. | |
| """ | |
| try: | |
| y, sr = librosa.load(audio_path, sr=None) | |
| fig, ax = plt.subplots(figsize=(6, 3)) # Slightly smaller dimensions | |
| S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=2000) | |
| S_dB = librosa.power_to_db(S, ref=np.max) | |
| librosa.display.specshow(S_dB, sr=sr, fmax=2000, ax=ax, cmap='magma') | |
| buf = io.BytesIO() | |
| # dpi=72 ensures the image file size is extremely small and well under Groq's 4MB limit | |
| plt.savefig(buf, format='png', bbox_inches='tight', dpi=72) | |
| plt.close(fig) | |
| buf.seek(0) | |
| base64_image = base64.b64encode(buf.read()).decode('utf-8') | |
| return base64_image | |
| except Exception as e: | |
| print(f"Error generating base64 spectrogram: {e}") | |
| return None | |
| def generate_medical_advice_from_vision(base64_img): | |
| """ | |
| Uses Llama 4 Scout Vision (via Groq) to look at the Spectrogram and diagnose it. | |
| """ | |
| if not base64_img: | |
| return "Error: Could not process the audio into a visual spectrogram for the AI." | |
| prompt = """ | |
| You are an AI medical assistant specializing in cardiology. Look closely at this Mel-Spectrogram of a patient's Phonocardiogram (heart sound). | |
| Based on the visual patterns in this spectrogram: | |
| 1. Does this look Normal or Abnormal? | |
| 2. What specific cardiovascular disease might this indicate (e.g., Aortic Stenosis, Mitral Regurgitation, Normal)? | |
| 3. Recommend general lifestyle or exercise advice based on your estimation. | |
| 4. Mention potential medication types usually associated with this. | |
| Include a strict medical disclaimer stating that you are an AI and they must consult a doctor. Keep it under 200 words. | |
| """ | |
| try: | |
| response = client.chat.completions.create( | |
| # UPDATED: Pointing to Groq's current active vision model | |
| model="meta-llama/llama-4-scout-17b-16e-instruct", | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": prompt}, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/png;base64,{base64_img}" | |
| } | |
| } | |
| ] | |
| } | |
| ], | |
| temperature=0.2, | |
| max_tokens=300 | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| actual_error = str(e) | |
| print(f"Groq Vision API Error: {actual_error}") | |
| return f"Groq API Error: {actual_error}" | |
| def text_to_speech(text): | |
| """ | |
| Converts the generated text into an audio file using gTTS. | |
| """ | |
| try: | |
| tts = gTTS(text=text, lang='en', slow=False) | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
| tts.save(temp_file.name) | |
| return temp_file.name | |
| except Exception as e: | |
| print(f"gTTS Error: {e}") | |
| return None | |
| def evaluate_audio(audio_path): | |
| """ | |
| The main pipeline triggered by the Gradio 'Evaluate' button. | |
| """ | |
| if not audio_path: | |
| return "Please upload an audio file first.", None | |
| # Step 1: Generate the Base64 image from the audio | |
| base64_img = get_spectrogram_base64(audio_path) | |
| # Step 2: Send the image to Llama 3.2 Vision for diagnosis & advice | |
| advice_text = generate_medical_advice_from_vision(base64_img) | |
| # Step 3: Convert Advice to Audio | |
| advice_audio_path = text_to_speech(advice_text) | |
| return advice_text, advice_audio_path |