File size: 4,150 Bytes
61439fb
 
b822cb2
 
 
 
 
 
61439fb
 
596e46a
61439fb
596e46a
 
 
b822cb2
61439fb
 
b822cb2
61439fb
c5ca8bb
61439fb
 
b822cb2
c5ca8bb
b822cb2
 
 
 
 
 
c5ca8bb
 
 
b822cb2
 
 
 
61439fb
b822cb2
 
61439fb
b822cb2
61439fb
d2bfab4
61439fb
b822cb2
d2bfab4
61439fb
b822cb2
 
61439fb
b822cb2
 
c5ca8bb
b822cb2
 
61439fb
b822cb2
61439fb
 
 
 
d2bfab4
 
b822cb2
 
 
 
 
 
 
 
 
 
 
 
 
 
c5ca8bb
b822cb2
61439fb
 
 
c5ca8bb
 
d2bfab4
 
61439fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b822cb2
61439fb
 
 
 
b822cb2
 
61439fb
b822cb2
 
61439fb
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import os
import tempfile
import base64
import io
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
from gtts import gTTS
from groq import Groq
from dotenv import load_dotenv

# 1. Load the environment variables FIRST
load_dotenv()

# 2. Initialize the Groq Client
client = Groq()

def get_spectrogram_base64(audio_path):
    """
    Generates a Mel-Spectrogram, aggressively compresses it, and converts it to Base64.
    """
    try:
        y, sr = librosa.load(audio_path, sr=None)
        fig, ax = plt.subplots(figsize=(6, 3)) # Slightly smaller dimensions
        
        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=2000)
        S_dB = librosa.power_to_db(S, ref=np.max)
        librosa.display.specshow(S_dB, sr=sr, fmax=2000, ax=ax, cmap='magma')
        
        buf = io.BytesIO()
        # dpi=72 ensures the image file size is extremely small and well under Groq's 4MB limit
        plt.savefig(buf, format='png', bbox_inches='tight', dpi=72)
        plt.close(fig) 
        buf.seek(0)
        
        base64_image = base64.b64encode(buf.read()).decode('utf-8')
        return base64_image
    except Exception as e:
        print(f"Error generating base64 spectrogram: {e}")
        return None

def generate_medical_advice_from_vision(base64_img):
    """
    Uses Llama 4 Scout Vision (via Groq) to look at the Spectrogram and diagnose it.
    """
    if not base64_img:
        return "Error: Could not process the audio into a visual spectrogram for the AI."

    prompt = """
    You are an AI medical assistant specializing in cardiology. Look closely at this Mel-Spectrogram of a patient's Phonocardiogram (heart sound). 
    
    Based on the visual patterns in this spectrogram:
    1. Does this look Normal or Abnormal?
    2. What specific cardiovascular disease might this indicate (e.g., Aortic Stenosis, Mitral Regurgitation, Normal)?
    3. Recommend general lifestyle or exercise advice based on your estimation.
    4. Mention potential medication types usually associated with this.
    
    Include a strict medical disclaimer stating that you are an AI and they must consult a doctor. Keep it under 200 words.
    """

    try:
        response = client.chat.completions.create(
            # UPDATED: Pointing to Groq's current active vision model
            model="meta-llama/llama-4-scout-17b-16e-instruct", 
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt},
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/png;base64,{base64_img}"
                            }
                        }
                    ]
                }
            ],
            temperature=0.2, 
            max_tokens=300
        )
        return response.choices[0].message.content
    except Exception as e:
        actual_error = str(e)
        print(f"Groq Vision API Error: {actual_error}")
        return f"Groq API Error: {actual_error}"
          

def text_to_speech(text):
    """
    Converts the generated text into an audio file using gTTS.
    """
    try:
        tts = gTTS(text=text, lang='en', slow=False)
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
        tts.save(temp_file.name)
        return temp_file.name
    except Exception as e:
        print(f"gTTS Error: {e}")
        return None

def evaluate_audio(audio_path):
    """
    The main pipeline triggered by the Gradio 'Evaluate' button.
    """
    if not audio_path:
        return "Please upload an audio file first.", None

    # Step 1: Generate the Base64 image from the audio
    base64_img = get_spectrogram_base64(audio_path)
    
    # Step 2: Send the image to Llama 3.2 Vision for diagnosis & advice
    advice_text = generate_medical_advice_from_vision(base64_img)
    
    # Step 3: Convert Advice to Audio
    advice_audio_path = text_to_speech(advice_text)
    
    return advice_text, advice_audio_path