Spaces:
Sleeping
Sleeping
File size: 5,749 Bytes
8229cac ad19f16 968cfc0 8229cac bbd08dd 3bd5cf8 6207088 bbd08dd 6207088 bbd08dd 6207088 7ae1b5e 968cfc0 6207088 8d36508 6207088 8d36508 6207088 bbd08dd 6207088 d129d99 6207088 3bd5cf8 6207088 8229cac bbd08dd 3bd5cf8 6207088 3bd5cf8 6207088 3bd5cf8 6207088 968cfc0 6207088 968cfc0 6207088 968cfc0 6207088 968cfc0 6207088 3bd5cf8 6207088 8229cac 6207088 bbd08dd 6207088 3bd5cf8 6207088 054fff2 6207088 8229cac 6207088 3bd5cf8 6207088 3bd5cf8 6207088 8229cac 3bd5cf8 8229cac 6207088 8229cac 3bd5cf8 6207088 8229cac 3bd5cf8 6207088 3bd5cf8 6207088 3bd5cf8 6207088 3bd5cf8 6207088 125beff 8229cac 6207088 bbd08dd 3bd5cf8 6207088 3bd5cf8 6207088 bbd08dd 3bd5cf8 6207088 3bd5cf8 6207088 3bd5cf8 6207088 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
import os
import gradio as gr
import requests
import json
import speech_recognition as sr
from tempfile import NamedTemporaryFile
import logging
from dotenv import load_dotenv
# Load environment variables from Hugging Face secrets or .env
load_dotenv()
HUGGINGFACE_API_TOKEN = os.getenv("HUGGING_FACE_API")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GROQ_MODEL = os.getenv("GROQ_MODEL", "llama3-8b") #mixtral-8x7b-32768")
# Logging setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# API headers
groq_headers = {
"Authorization": f"Bearer {GROQ_API_KEY}",
"Content-Type": "application/json"
}
tts_headers = {
"Authorization": f"Bearer {HUGGINGFACE_API_TOKEN}"
}
# API endpoints
GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
TTS_API_URL = "https://api-inference.huggingface.co/models/microsoft/speecht5_tts"
# Emotion dictionary
emotion_options = {
"neutral": "Neutral or balanced mood",
"positive": "Generally positive or optimistic",
"happy": "Feeling joy or happiness",
"excited": "Feeling enthusiastic or energetic",
"sad": "Feeling down or unhappy",
"angry": "Feeling frustrated or irritated",
"negative": "Generally negative or pessimistic",
"anxious": "Feeling worried or nervous"
}
# Recognizer for audio input
def transcribe_audio(audio_path):
recognizer = sr.Recognizer()
try:
with sr.AudioFile(audio_path) as source:
audio_data = recognizer.record(source)
return recognizer.recognize_google(audio_data)
except Exception as e:
logger.error(f"Audio transcription failed: {e}")
return ""
# Groq response handler
def get_groq_response(prompt, chat_history):
messages = [{"role": "system", "content": prompt}]
for msg in chat_history:
role, content = ("user", msg[0]) if msg else ("assistant", msg[1])
messages.append({"role": role, "content": content})
data = {
"model": GROQ_MODEL,
"messages": messages[-20:], # Limit history
"temperature": 0.7,
"max_tokens": 1024
}
try:
res = requests.post(GROQ_API_URL, headers=groq_headers, json=data)
res.raise_for_status()
return res.json()["choices"][0]["message"]["content"]
except Exception as e:
logger.error(f"Groq API error: {e}")
return "Sorry, I couldn't generate a response right now."
# Hugging Face TTS
def generate_audio(text):
response = requests.post(TTS_API_URL, headers=tts_headers, json={"inputs": text})
if response.status_code == 200:
with NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
tmp.write(response.content)
return tmp.name
else:
logger.error(f"TTS generation failed: {response.text}")
return None
# Conversation state
conversation_history = []
# Main chat logic
def chat_with_ai(audio, text_input, emotion, history):
global conversation_history
user_input = text_input.strip() if text_input else ""
if audio:
transcription = transcribe_audio(audio)
if transcription:
user_input = transcription
if not user_input:
return "Please provide a message or audio.", None, history
conversation_history.append((user_input, None))
prompt = f"""You are an empathetic AI assistant. The user feels {emotion} ({emotion_options[emotion]}).
Respond supportively and helpfully in a concise manner."""
ai_response = get_groq_response(prompt, conversation_history)
conversation_history[-1] = (user_input, ai_response)
audio_output_path = generate_audio(ai_response)
return ai_response, audio_output_path, conversation_history
def clear_conversation():
global conversation_history
conversation_history = []
return [], None, None, "Conversation cleared."
# Gradio Interface
with gr.Blocks(title="Mind AID AI Assistant") as iface:
gr.Markdown("# Mind AID: Emotion-Aware Conversational AI")
gr.Markdown("AI assistant with emotion-awareness, powered by Groq and Hugging Face TTS.")
with gr.Row():
with gr.Column(scale=3):
emotion = gr.Dropdown(
label="How are you feeling?",
choices=list(emotion_options.keys()),
value="neutral"
)
emotion_description = gr.Markdown("**Current mood:** Neutral or balanced mood")
emotion.change(
fn=lambda e: f"**Current mood:** {emotion_options[e]}",
inputs=emotion,
outputs=emotion_description
)
with gr.Column(scale=1):
clear_btn = gr.Button("Clear Conversation")
status_box = gr.Textbox(label="Status", interactive=False)
with gr.Row():
chat_history = gr.Chatbot(label="Conversation", height=400)
with gr.Row():
with gr.Column(scale=4):
text_input = gr.Textbox(label="Type your message here", lines=2)
with gr.Column(scale=1):
audio_input = gr.Audio(type="filepath", label="Or speak")
with gr.Row():
submit_btn = gr.Button("Send", variant="primary")
with gr.Row():
output_audio = gr.Audio(label="AI Voice")
submit_btn.click(
fn=chat_with_ai,
inputs=[audio_input, text_input, emotion, chat_history],
outputs=[status_box, output_audio, chat_history]
)
text_input.submit(
fn=chat_with_ai,
inputs=[audio_input, text_input, emotion, chat_history],
outputs=[status_box, output_audio, chat_history]
)
clear_btn.click(
fn=clear_conversation,
inputs=[],
outputs=[chat_history, audio_input, text_input, status_box]
)
iface.launch(share=True) |