Spaces:

MalikShehram
/

Cardio-AI_Assistant

Sleeping

App Files Files Community

Cardio-AI_Assistant / backend.py

MalikShehram

Update backend.py

d2bfab4 verified about 2 months ago

raw

history blame contribute delete

4.15 kB

	import os
	import tempfile
	import base64
	import io
	import librosa
	import librosa.display
	import matplotlib.pyplot as plt
	import numpy as np
	from gtts import gTTS
	from groq import Groq
	from dotenv import load_dotenv

	# 1. Load the environment variables FIRST
	load_dotenv()

	# 2. Initialize the Groq Client
	client = Groq()

	def get_spectrogram_base64(audio_path):
	"""
	Generates a Mel-Spectrogram, aggressively compresses it, and converts it to Base64.
	"""
	try:
	y, sr = librosa.load(audio_path, sr=None)
	fig, ax = plt.subplots(figsize=(6, 3)) # Slightly smaller dimensions

	S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=2000)
	S_dB = librosa.power_to_db(S, ref=np.max)
	librosa.display.specshow(S_dB, sr=sr, fmax=2000, ax=ax, cmap='magma')

	buf = io.BytesIO()
	# dpi=72 ensures the image file size is extremely small and well under Groq's 4MB limit
	plt.savefig(buf, format='png', bbox_inches='tight', dpi=72)
	plt.close(fig)
	buf.seek(0)

	base64_image = base64.b64encode(buf.read()).decode('utf-8')
	return base64_image
	except Exception as e:
	print(f"Error generating base64 spectrogram: {e}")
	return None

	def generate_medical_advice_from_vision(base64_img):
	"""
	Uses Llama 4 Scout Vision (via Groq) to look at the Spectrogram and diagnose it.
	"""
	if not base64_img:
	return "Error: Could not process the audio into a visual spectrogram for the AI."

	prompt = """
	You are an AI medical assistant specializing in cardiology. Look closely at this Mel-Spectrogram of a patient's Phonocardiogram (heart sound).

	Based on the visual patterns in this spectrogram:
	1. Does this look Normal or Abnormal?
	2. What specific cardiovascular disease might this indicate (e.g., Aortic Stenosis, Mitral Regurgitation, Normal)?
	3. Recommend general lifestyle or exercise advice based on your estimation.
	4. Mention potential medication types usually associated with this.

	Include a strict medical disclaimer stating that you are an AI and they must consult a doctor. Keep it under 200 words.
	"""

	try:
	response = client.chat.completions.create(
	# UPDATED: Pointing to Groq's current active vision model
	model="meta-llama/llama-4-scout-17b-16e-instruct",
	messages=[
	{
	"role": "user",
	"content": [
	{"type": "text", "text": prompt},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/png;base64,{base64_img}"
	}
	}
	]
	}
	],
	temperature=0.2,
	max_tokens=300
	)
	return response.choices[0].message.content
	except Exception as e:
	actual_error = str(e)
	print(f"Groq Vision API Error: {actual_error}")
	return f"Groq API Error: {actual_error}"


	def text_to_speech(text):
	"""
	Converts the generated text into an audio file using gTTS.
	"""
	try:
	tts = gTTS(text=text, lang='en', slow=False)
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	tts.save(temp_file.name)
	return temp_file.name
	except Exception as e:
	print(f"gTTS Error: {e}")
	return None

	def evaluate_audio(audio_path):
	"""
	The main pipeline triggered by the Gradio 'Evaluate' button.
	"""
	if not audio_path:
	return "Please upload an audio file first.", None

	# Step 1: Generate the Base64 image from the audio
	base64_img = get_spectrogram_base64(audio_path)

	# Step 2: Send the image to Llama 3.2 Vision for diagnosis & advice
	advice_text = generate_medical_advice_from_vision(base64_img)

	# Step 3: Convert Advice to Audio
	advice_audio_path = text_to_speech(advice_text)

	return advice_text, advice_audio_path