import os import gradio as gr import requests import json import speech_recognition as sr from tempfile import NamedTemporaryFile import logging from dotenv import load_dotenv # Load environment variables from Hugging Face secrets or .env load_dotenv() HUGGINGFACE_API_TOKEN = os.getenv("HUGGING_FACE_API") GROQ_API_KEY = os.getenv("GROQ_API_KEY") GROQ_MODEL = os.getenv("GROQ_MODEL", "llama3-8b") #mixtral-8x7b-32768") # Logging setup logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # API headers groq_headers = { "Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json" } tts_headers = { "Authorization": f"Bearer {HUGGINGFACE_API_TOKEN}" } # API endpoints GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions" TTS_API_URL = "https://api-inference.huggingface.co/models/microsoft/speecht5_tts" # Emotion dictionary emotion_options = { "neutral": "Neutral or balanced mood", "positive": "Generally positive or optimistic", "happy": "Feeling joy or happiness", "excited": "Feeling enthusiastic or energetic", "sad": "Feeling down or unhappy", "angry": "Feeling frustrated or irritated", "negative": "Generally negative or pessimistic", "anxious": "Feeling worried or nervous" } # Recognizer for audio input def transcribe_audio(audio_path): recognizer = sr.Recognizer() try: with sr.AudioFile(audio_path) as source: audio_data = recognizer.record(source) return recognizer.recognize_google(audio_data) except Exception as e: logger.error(f"Audio transcription failed: {e}") return "" # Groq response handler def get_groq_response(prompt, chat_history): messages = [{"role": "system", "content": prompt}] for msg in chat_history: role, content = ("user", msg[0]) if msg else ("assistant", msg[1]) messages.append({"role": role, "content": content}) data = { "model": GROQ_MODEL, "messages": messages[-20:], # Limit history "temperature": 0.7, "max_tokens": 1024 } try: res = requests.post(GROQ_API_URL, headers=groq_headers, json=data) res.raise_for_status() return res.json()["choices"][0]["message"]["content"] except Exception as e: logger.error(f"Groq API error: {e}") return "Sorry, I couldn't generate a response right now." # Hugging Face TTS def generate_audio(text): response = requests.post(TTS_API_URL, headers=tts_headers, json={"inputs": text}) if response.status_code == 200: with NamedTemporaryFile(delete=False, suffix=".wav") as tmp: tmp.write(response.content) return tmp.name else: logger.error(f"TTS generation failed: {response.text}") return None # Conversation state conversation_history = [] # Main chat logic def chat_with_ai(audio, text_input, emotion, history): global conversation_history user_input = text_input.strip() if text_input else "" if audio: transcription = transcribe_audio(audio) if transcription: user_input = transcription if not user_input: return "Please provide a message or audio.", None, history conversation_history.append((user_input, None)) prompt = f"""You are an empathetic AI assistant. The user feels {emotion} ({emotion_options[emotion]}). Respond supportively and helpfully in a concise manner.""" ai_response = get_groq_response(prompt, conversation_history) conversation_history[-1] = (user_input, ai_response) audio_output_path = generate_audio(ai_response) return ai_response, audio_output_path, conversation_history def clear_conversation(): global conversation_history conversation_history = [] return [], None, None, "Conversation cleared." # Gradio Interface with gr.Blocks(title="Mind AID AI Assistant") as iface: gr.Markdown("# Mind AID: Emotion-Aware Conversational AI") gr.Markdown("AI assistant with emotion-awareness, powered by Groq and Hugging Face TTS.") with gr.Row(): with gr.Column(scale=3): emotion = gr.Dropdown( label="How are you feeling?", choices=list(emotion_options.keys()), value="neutral" ) emotion_description = gr.Markdown("**Current mood:** Neutral or balanced mood") emotion.change( fn=lambda e: f"**Current mood:** {emotion_options[e]}", inputs=emotion, outputs=emotion_description ) with gr.Column(scale=1): clear_btn = gr.Button("Clear Conversation") status_box = gr.Textbox(label="Status", interactive=False) with gr.Row(): chat_history = gr.Chatbot(label="Conversation", height=400) with gr.Row(): with gr.Column(scale=4): text_input = gr.Textbox(label="Type your message here", lines=2) with gr.Column(scale=1): audio_input = gr.Audio(type="filepath", label="Or speak") with gr.Row(): submit_btn = gr.Button("Send", variant="primary") with gr.Row(): output_audio = gr.Audio(label="AI Voice") submit_btn.click( fn=chat_with_ai, inputs=[audio_input, text_input, emotion, chat_history], outputs=[status_box, output_audio, chat_history] ) text_input.submit( fn=chat_with_ai, inputs=[audio_input, text_input, emotion, chat_history], outputs=[status_box, output_audio, chat_history] ) clear_btn.click( fn=clear_conversation, inputs=[], outputs=[chat_history, audio_input, text_input, status_box] ) iface.launch(share=True)