RathodHarish's picture
Update app.py
38b6368 verified
import gradio as gr
import librosa
import numpy as np
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration
from simple_salesforce import Salesforce
import os
from datetime import datetime
import logging
import webrtcvad
import google.generativeai as genai
from gtts import gTTS
import tempfile
# Set up logging for usage metrics and debugging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
usage_metrics = {"total_assessments": 0}
# Environment variables for secure credentials
SF_USERNAME = os.getenv("SF_USERNAME")
SF_PASSWORD = os.getenv("SF_PASSWORD")
SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
SF_INSTANCE_URL = os.getenv("SF_INSTANCE_URL", "https://login.salesforce.com")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "AIzaSyBzr5vVpbe8CV1v70l3pGDp9vRJ76yCxdk")
# Initialize Salesforce
sf = None
try:
if all([SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN]):
sf = Salesforce(
username=SF_USERNAME,
password=SF_PASSWORD,
security_token=SF_SECURITY_TOKEN,
instance_url=SF_INSTANCE_URL
)
logger.info("Connected to Salesforce for user management")
else:
logger.warning("Salesforce credentials missing; user management disabled")
except Exception as e:
logger.error(f"Salesforce connection failed: {str(e)}")
# Initialize Google Gemini
try:
genai.configure(api_key=GEMINI_API_KEY)
gemini_model = genai.GenerativeModel('gemini-1.5-flash')
chat = gemini_model.start_chat(history=[])
logger.info("Connected to Google Gemini for chatbot functionality")
except Exception as e:
logger.error(f"Google Gemini initialization failed: {str(e)}")
chat = None
# Load Whisper model for speech-to-text
whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
whisper_model.config.forced_decoder_ids = whisper_processor.get_decoder_prompt_ids(language="english", task="transcribe")
# Initialize VAD
vad = webrtcvad.Vad(mode=2)
# Chatbot knowledge base
base_info = """
You are a highly advanced AI assistant named 'MindCare'.
Your role is to provide support in various aspects of health and well-being, including:
- **Mental health**: Emotional support, mindfulness, stress-relief exercises, anxiety management.
- **Medical guidance**: Basic symptom analysis, possible conditions, and medicine recommendations.
- **Decision-making support**: Helping users with personal, professional, and emotional choices.
- **General health advice**: Lifestyle improvements, nutrition, physical wellness, and mental well-being.
- **Emergency assistance**: If the user is in distress, suggest professional help or helpline numbers.
Your tone is always **empathetic, supportive, and informative**. You ensure users feel heard and cared for.
"""
mental_health = """
If the user is feeling stressed or anxious:
- Suggest mindfulness exercises, deep breathing techniques, or gratitude journaling.
- Encourage taking breaks, engaging in hobbies, and spending time in nature.
- Provide positive affirmations and self-care routines.
If the user is in distress:
- Offer emotional support and let them know they are not alone.
- Encourage them to reach out to a trusted person or professional.
- Provide emergency helpline numbers if needed.
"""
medical_assistance = """
If the user provides symptoms:
- Analyze symptoms and suggest possible conditions.
- Provide general advice but **never** replace a doctor’s consultation.
- Suggest lifestyle changes or basic home remedies if applicable.
- If symptoms are severe, advise them to visit a healthcare professional.
If the user asks about medicines:
- Suggest **common antibiotics** based on infection type (e.g., Amoxicillin for bacterial infections).
- Recommend **painkillers** like Paracetamol, Ibuprofen, or Diclofenac for pain relief.
- Mention precautions and possible side effects.
- Clearly **state that a doctor’s consultation is necessary before taking any medicine**.
"""
medicine_recommendation = """
If the user asks for a prescription, provide general guidance on **commonly used medicines**:
- **Antibiotics** (for bacterial infections): Amoxicillin, Azithromycin, Ciprofloxacin.
- **Painkillers**: Paracetamol (mild pain/fever), Ibuprofen (anti-inflammatory), Diclofenac (muscle pain).
- **Cold & Flu**: Antihistamines like Cetirizine, Cough syrups like Dextromethorphan.
- **Stomach Issues**: Antacids like Ranitidine, PPI like Omeprazole.
Always remind the user that **only a licensed doctor can prescribe medicines, and misuse can be harmful**.
"""
decision_guidance = """
If the user is struggling with a decision:
- Help them weigh pros and cons logically.
- Suggest considering their values, long-term goals, and emotions.
- Provide structured approaches like decision matrices or intuitive checks.
- Encourage seeking advice from trusted people if needed.
"""
emergency_help = """
If the user mentions severe mental distress:
- Respond with immediate emotional support.
- Provide crisis helpline numbers (if applicable to the region).
- Encourage talking to a trusted friend, family member, or professional.
- Remind them that they are not alone and help is available.
"""
context = [base_info, mental_health, medical_assistance, medicine_recommendation, decision_guidance, emergency_help]
def extract_health_features(audio, sr):
try:
audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) != 0 else audio
frame_duration = 30
frame_samples = int(sr * frame_duration / 1000)
frames = [audio[i:i + frame_samples] for i in range(0, len(audio), frame_samples)]
voiced_frames = [
frame for frame in frames
if len(frame) == frame_samples and vad.is_speech((frame * 32768).astype(np.int16).tobytes(), sr)
]
if not voiced_frames:
raise ValueError("No voiced segments detected")
voiced_audio = np.concatenate(voiced_frames)
pitches, magnitudes = librosa.piptrack(y=voiced_audio, sr=sr, fmin=75, fmax=300)
valid_pitches = [p for p in pitches[magnitudes > 0] if 75 <= p <= 300]
pitch = np.mean(valid_pitches) if valid_pitches else 0
jitter = np.std(valid_pitches) / pitch if pitch and valid_pitches else 0
if jitter > 10:
jitter = 10
logger.warning("Jitter capped at 10%")
amplitudes = librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0]
shimmer = np.std(amplitudes) / np.mean(amplitudes) if np.mean(amplitudes) else 0
if shimmer > 10:
shimmer = 10
logger.warning("Shimmer capped at 10%")
energy = np.mean(librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0])
return {
"pitch": pitch,
"jitter": jitter * 100,
"shimmer": shimmer * 100,
"energy": energy
}
except Exception as e:
logger.error(f"Feature extraction failed: {str(e)}")
raise
def transcribe_audio(audio):
try:
inputs = whisper_processor(audio, sampling_rate=16000, return_tensors="pt")
with torch.no_grad():
generated_ids = whisper_model.generate(inputs["input_features"])
transcription = whisper_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
logger.info(f"Transcription: {transcription}")
return transcription
except Exception as e:
logger.error(f"Transcription failed: {str(e)}")
return None
def get_chatbot_response(message):
if not chat or not message:
return "Unable to generate chatbot response due to missing input or model.", None
full_context = "\n".join(context) + f"\nUser: {message}\nMindCare:"
try:
response = chat.send_message(full_context).text
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
tts = gTTS(text=response, lang="en", slow=False)
tts.save(temp_audio.name)
audio_path = temp_audio.name
return response, audio_path
except Exception as e:
logger.error(f"Chatbot response failed: {str(e)}")
return "Error generating chatbot response.", None
def analyze_symptoms(text):
text = text.lower()
feedback = []
if "cough" in text or "difficulty breathing" in text:
feedback.append("Based on your input, you may have a respiratory issue, such as bronchitis or asthma. Please consult a doctor.")
elif "stressed" in text or "stress" in text or "tired" in text or "fatigue" in text:
feedback.append("Your description suggests possible stress or fatigue, potentially linked to anxiety or exhaustion. Consider seeking medical advice.")
else:
feedback.append("Your input didn’t clearly indicate specific symptoms. Please describe any health concerns (e.g., cough, stress) and consult a healthcare provider.")
return "\n".join(feedback)
def analyze_voice(audio_file=None):
global usage_metrics
usage_metrics["total_assessments"] += 1
logger.info(f"Total assessments: {usage_metrics['total_assessments']}")
try:
if audio_file and os.path.exists(audio_file):
audio, sr = librosa.load(audio_file, sr=16000)
else:
raise ValueError("No valid audio file provided for analysis")
if len(audio) < sr:
raise ValueError("Audio too short (minimum 1 second)")
features = extract_health_features(audio, sr)
transcription = transcribe_audio(audio)
symptom_feedback = analyze_symptoms(transcription) if transcription else "No transcription available. Please record again with clear speech."
feedback = []
respiratory_score = features["jitter"]
mental_health_score = features["shimmer"]
if respiratory_score > 1.0:
feedback.append(f"Your voice indicates elevated jitter ({respiratory_score:.2f}%), which may suggest respiratory issues. Consult a doctor.")
if mental_health_score > 5.0:
feedback.append(f"Your voice shows elevated shimmer ({mental_health_score:.2f}%), possibly indicating stress or emotional strain. Consider a health check.")
if features["energy"] < 0.01:
feedback.append(f"Your vocal energy is low ({features['energy']:.4f}), which might point to fatigue. Seek medical advice if this persists.")
if not feedback and not symptom_feedback.startswith("No transcription"):
feedback.append("Your voice analysis shows no immediate health concerns based on current data.")
feedback.append("\n**Symptom Feedback (Based on Your Input)**:")
feedback.append(symptom_feedback)
feedback.append("\n**Voice Analysis Details**:")
feedback.append(f"Pitch: {features['pitch']:.2f} Hz (average fundamental frequency)")
feedback.append(f"Jitter: {respiratory_score:.2f}% (pitch variation, higher values may indicate respiratory issues)")
feedback.append(f"Shimmer: {mental_health_score:.2f}% (amplitude variation, higher values may indicate stress)")
feedback.append(f"Energy: {features['energy']:.4f} (vocal intensity, lower values may indicate fatigue)")
feedback.append(f"Transcription: {transcription if transcription else 'None'}")
feedback.append("\n**Disclaimer**: This is a preliminary analysis, not a medical diagnosis. Always consult a healthcare provider for professional evaluation.")
feedback_str = "\n".join(feedback)
if sf:
store_in_salesforce(audio_file, feedback_str, respiratory_score, mental_health_score, features, transcription)
if audio_file and os.path.exists(audio_file):
try:
os.remove(audio_file)
logger.info(f"Deleted audio file: {audio_file} for compliance")
except Exception as e:
logger.error(f"Failed to delete audio file: {str(e)}")
return feedback_str
except Exception as e:
logger.error(f"Audio processing failed: {str(e)}")
return f"Error: {str(e)}"
def store_in_salesforce(audio_file, feedback, respiratory_score, mental_health_score, features, transcription):
try:
sf.HealthAssessment__c.create({
"AssessmentDate__c": datetime.utcnow().isoformat(),
"Feedback__c": feedback,
"RespiratoryScore__c": float(respiratory_score),
"MentalHealthScore__c": float(mental_health_score),
"AudioFileName__c": os.path.basename(audio_file) if audio_file else "user_recorded_audio",
"Pitch__c": float(features["pitch"]),
"Jitter__c": float(features["jitter"]),
"Shimmer__c": float(features["shimmer"]),
"Energy__c": float(features["energy"]),
"Transcription__c": transcription or "None"
})
logger.info("Stored assessment in Salesforce")
except Exception as e:
logger.error(f"Salesforce storage failed: {str(e)}")
# Combined interface with voice analysis and chatbot suggestions
with gr.Blocks(title="MindCare Health Assistant") as demo:
gr.Markdown("# MindCare Health Assistant")
gr.Markdown("This tool is accessible via web and mobile. Use the sections below for health assessments and suggestions.")
with gr.Row():
with gr.Column():
gr.Markdown("### Voice Analysis")
gr.Markdown("Record or upload your voice (minimum 1 second) to receive a preliminary health check. Speak clearly in English about your symptoms (e.g., 'I have a cough' or 'I feel stressed').")
audio_input = gr.Audio(type="filepath", label="Record or Upload Your Voice (WAV, MP3, FLAC, 1+ sec)", format="wav")
voice_output = gr.Textbox(label="Health Assessment Results", elem_id="health-results")
submit_btn = gr.Button("Submit")
clear_btn = gr.Button("Clear")
with gr.Column():
gr.Markdown("### Health Suggestions")
gr.Markdown("Enter a message to get personalized health suggestions from MindCare.")
text_input = gr.Textbox(label="Enter your message")
text_output = gr.Textbox(label="Response")
audio_output = gr.Audio(label="Response Audio")
suggest_submit_btn = gr.Button("Submit")
suggest_clear_btn = gr.Button("Clear")
# Voice analysis event
submit_btn.click(
fn=analyze_voice,
inputs=[audio_input],
outputs=[voice_output]
)
clear_btn.click(
fn=lambda: (gr.update(value=None), gr.update(value="")),
inputs=None,
outputs=[audio_input, voice_output]
)
# Chatbot suggestion event
suggest_submit_btn.click(
fn=get_chatbot_response,
inputs=[text_input],
outputs=[text_output, audio_output]
)
suggest_clear_btn.click(
fn=lambda: (gr.update(value=""), gr.update(value=""), gr.update(value=None)),
inputs=None,
outputs=[text_input, text_output, audio_output]
)
if __name__ == "__main__":
logger.info("Starting MindCare Health Analyzer at 02:21 PM IST, June 23, 2025")
demo.launch(server_name="0.0.0.0", server_port=7860)