Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import requests | |
| import json | |
| import speech_recognition as sr | |
| from tempfile import NamedTemporaryFile | |
| import logging | |
| from dotenv import load_dotenv | |
| # Load environment variables from Hugging Face secrets or .env | |
| load_dotenv() | |
| HUGGINGFACE_API_TOKEN = os.getenv("HUGGING_FACE_API") | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| GROQ_MODEL = os.getenv("GROQ_MODEL", "llama3-8b") #mixtral-8x7b-32768") | |
| # Logging setup | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # API headers | |
| groq_headers = { | |
| "Authorization": f"Bearer {GROQ_API_KEY}", | |
| "Content-Type": "application/json" | |
| } | |
| tts_headers = { | |
| "Authorization": f"Bearer {HUGGINGFACE_API_TOKEN}" | |
| } | |
| # API endpoints | |
| GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions" | |
| TTS_API_URL = "https://api-inference.huggingface.co/models/microsoft/speecht5_tts" | |
| # Emotion dictionary | |
| emotion_options = { | |
| "neutral": "Neutral or balanced mood", | |
| "positive": "Generally positive or optimistic", | |
| "happy": "Feeling joy or happiness", | |
| "excited": "Feeling enthusiastic or energetic", | |
| "sad": "Feeling down or unhappy", | |
| "angry": "Feeling frustrated or irritated", | |
| "negative": "Generally negative or pessimistic", | |
| "anxious": "Feeling worried or nervous" | |
| } | |
| # Recognizer for audio input | |
| def transcribe_audio(audio_path): | |
| recognizer = sr.Recognizer() | |
| try: | |
| with sr.AudioFile(audio_path) as source: | |
| audio_data = recognizer.record(source) | |
| return recognizer.recognize_google(audio_data) | |
| except Exception as e: | |
| logger.error(f"Audio transcription failed: {e}") | |
| return "" | |
| # Groq response handler | |
| def get_groq_response(prompt, chat_history): | |
| messages = [{"role": "system", "content": prompt}] | |
| for msg in chat_history: | |
| role, content = ("user", msg[0]) if msg else ("assistant", msg[1]) | |
| messages.append({"role": role, "content": content}) | |
| data = { | |
| "model": GROQ_MODEL, | |
| "messages": messages[-20:], # Limit history | |
| "temperature": 0.7, | |
| "max_tokens": 1024 | |
| } | |
| try: | |
| res = requests.post(GROQ_API_URL, headers=groq_headers, json=data) | |
| res.raise_for_status() | |
| return res.json()["choices"][0]["message"]["content"] | |
| except Exception as e: | |
| logger.error(f"Groq API error: {e}") | |
| return "Sorry, I couldn't generate a response right now." | |
| # Hugging Face TTS | |
| def generate_audio(text): | |
| response = requests.post(TTS_API_URL, headers=tts_headers, json={"inputs": text}) | |
| if response.status_code == 200: | |
| with NamedTemporaryFile(delete=False, suffix=".wav") as tmp: | |
| tmp.write(response.content) | |
| return tmp.name | |
| else: | |
| logger.error(f"TTS generation failed: {response.text}") | |
| return None | |
| # Conversation state | |
| conversation_history = [] | |
| # Main chat logic | |
| def chat_with_ai(audio, text_input, emotion, history): | |
| global conversation_history | |
| user_input = text_input.strip() if text_input else "" | |
| if audio: | |
| transcription = transcribe_audio(audio) | |
| if transcription: | |
| user_input = transcription | |
| if not user_input: | |
| return "Please provide a message or audio.", None, history | |
| conversation_history.append((user_input, None)) | |
| prompt = f"""You are an empathetic AI assistant. The user feels {emotion} ({emotion_options[emotion]}). | |
| Respond supportively and helpfully in a concise manner.""" | |
| ai_response = get_groq_response(prompt, conversation_history) | |
| conversation_history[-1] = (user_input, ai_response) | |
| audio_output_path = generate_audio(ai_response) | |
| return ai_response, audio_output_path, conversation_history | |
| def clear_conversation(): | |
| global conversation_history | |
| conversation_history = [] | |
| return [], None, None, "Conversation cleared." | |
| # Gradio Interface | |
| with gr.Blocks(title="Mind AID AI Assistant") as iface: | |
| gr.Markdown("# Mind AID: Emotion-Aware Conversational AI") | |
| gr.Markdown("AI assistant with emotion-awareness, powered by Groq and Hugging Face TTS.") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| emotion = gr.Dropdown( | |
| label="How are you feeling?", | |
| choices=list(emotion_options.keys()), | |
| value="neutral" | |
| ) | |
| emotion_description = gr.Markdown("**Current mood:** Neutral or balanced mood") | |
| emotion.change( | |
| fn=lambda e: f"**Current mood:** {emotion_options[e]}", | |
| inputs=emotion, | |
| outputs=emotion_description | |
| ) | |
| with gr.Column(scale=1): | |
| clear_btn = gr.Button("Clear Conversation") | |
| status_box = gr.Textbox(label="Status", interactive=False) | |
| with gr.Row(): | |
| chat_history = gr.Chatbot(label="Conversation", height=400) | |
| with gr.Row(): | |
| with gr.Column(scale=4): | |
| text_input = gr.Textbox(label="Type your message here", lines=2) | |
| with gr.Column(scale=1): | |
| audio_input = gr.Audio(type="filepath", label="Or speak") | |
| with gr.Row(): | |
| submit_btn = gr.Button("Send", variant="primary") | |
| with gr.Row(): | |
| output_audio = gr.Audio(label="AI Voice") | |
| submit_btn.click( | |
| fn=chat_with_ai, | |
| inputs=[audio_input, text_input, emotion, chat_history], | |
| outputs=[status_box, output_audio, chat_history] | |
| ) | |
| text_input.submit( | |
| fn=chat_with_ai, | |
| inputs=[audio_input, text_input, emotion, chat_history], | |
| outputs=[status_box, output_audio, chat_history] | |
| ) | |
| clear_btn.click( | |
| fn=clear_conversation, | |
| inputs=[], | |
| outputs=[chat_history, audio_input, text_input, status_box] | |
| ) | |
| iface.launch(share=True) |