Spaces:

Fluospark128
/

Emotion_Conversation

Sleeping

File size: 5,749 Bytes

8229cac
ad19f16
968cfc0
 
8229cac
bbd08dd
3bd5cf8
6207088
bbd08dd
6207088
bbd08dd
6207088
 
7ae1b5e
968cfc0
6207088
 
 
8d36508
6207088
 
8d36508
 
 
6207088
 
 
bbd08dd
6207088
 
 
d129d99
6207088
3bd5cf8
 
 
 
 
 
 
 
 
 
 
6207088
8229cac
bbd08dd
3bd5cf8
 
 
6207088
3bd5cf8
6207088
3bd5cf8
 
6207088
 
968cfc0
6207088
 
 
 
968cfc0
 
6207088
968cfc0
 
 
6207088
968cfc0
6207088
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3bd5cf8
 
6207088
 
8229cac
6207088
 
 
bbd08dd
6207088
 
3bd5cf8
6207088
054fff2
6207088
 
8229cac
6207088
 
3bd5cf8
6207088
3bd5cf8
6207088
8229cac
3bd5cf8
 
 
 
8229cac
6207088
8229cac
3bd5cf8
6207088
 
8229cac
3bd5cf8
 
6207088
3bd5cf8
6207088
3bd5cf8
 
 
6207088
 
 
3bd5cf8
 
 
6207088
125beff
8229cac
6207088
bbd08dd
3bd5cf8
 
6207088
3bd5cf8
6207088
bbd08dd
3bd5cf8
6207088
3bd5cf8
6207088
3bd5cf8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6207088

import os
import gradio as gr
import requests
import json
import speech_recognition as sr
from tempfile import NamedTemporaryFile
import logging
from dotenv import load_dotenv

# Load environment variables from Hugging Face secrets or .env
load_dotenv()
HUGGINGFACE_API_TOKEN = os.getenv("HUGGING_FACE_API")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GROQ_MODEL = os.getenv("GROQ_MODEL", "llama3-8b") #mixtral-8x7b-32768")

# Logging setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# API headers
groq_headers = {
    "Authorization": f"Bearer {GROQ_API_KEY}",
    "Content-Type": "application/json"
}
tts_headers = {
    "Authorization": f"Bearer {HUGGINGFACE_API_TOKEN}"
}

# API endpoints
GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
TTS_API_URL = "https://api-inference.huggingface.co/models/microsoft/speecht5_tts"

# Emotion dictionary
emotion_options = {
    "neutral": "Neutral or balanced mood",
    "positive": "Generally positive or optimistic",
    "happy": "Feeling joy or happiness",
    "excited": "Feeling enthusiastic or energetic",
    "sad": "Feeling down or unhappy",
    "angry": "Feeling frustrated or irritated",
    "negative": "Generally negative or pessimistic",
    "anxious": "Feeling worried or nervous"
}

# Recognizer for audio input
def transcribe_audio(audio_path):
    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(audio_path) as source:
            audio_data = recognizer.record(source)
            return recognizer.recognize_google(audio_data)
    except Exception as e:
        logger.error(f"Audio transcription failed: {e}")
        return ""

# Groq response handler
def get_groq_response(prompt, chat_history):
    messages = [{"role": "system", "content": prompt}]
    for msg in chat_history:
        role, content = ("user", msg[0]) if msg else ("assistant", msg[1])
        messages.append({"role": role, "content": content})

    data = {
        "model": GROQ_MODEL,
        "messages": messages[-20:],  # Limit history
        "temperature": 0.7,
        "max_tokens": 1024
    }

    try:
        res = requests.post(GROQ_API_URL, headers=groq_headers, json=data)
        res.raise_for_status()
        return res.json()["choices"][0]["message"]["content"]
    except Exception as e:
        logger.error(f"Groq API error: {e}")
        return "Sorry, I couldn't generate a response right now."

# Hugging Face TTS
def generate_audio(text):
    response = requests.post(TTS_API_URL, headers=tts_headers, json={"inputs": text})
    if response.status_code == 200:
        with NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
            tmp.write(response.content)
            return tmp.name
    else:
        logger.error(f"TTS generation failed: {response.text}")
        return None

# Conversation state
conversation_history = []

# Main chat logic
def chat_with_ai(audio, text_input, emotion, history):
    global conversation_history
    user_input = text_input.strip() if text_input else ""

    if audio:
        transcription = transcribe_audio(audio)
        if transcription:
            user_input = transcription

    if not user_input:
        return "Please provide a message or audio.", None, history

    conversation_history.append((user_input, None))

    prompt = f"""You are an empathetic AI assistant. The user feels {emotion} ({emotion_options[emotion]}).
Respond supportively and helpfully in a concise manner."""

    ai_response = get_groq_response(prompt, conversation_history)
    conversation_history[-1] = (user_input, ai_response)

    audio_output_path = generate_audio(ai_response)

    return ai_response, audio_output_path, conversation_history

def clear_conversation():
    global conversation_history
    conversation_history = []
    return [], None, None, "Conversation cleared."

# Gradio Interface
with gr.Blocks(title="Mind AID AI Assistant") as iface:
    gr.Markdown("# Mind AID: Emotion-Aware Conversational AI")
    gr.Markdown("AI assistant with emotion-awareness, powered by Groq and Hugging Face TTS.")

    with gr.Row():
        with gr.Column(scale=3):
            emotion = gr.Dropdown(
                label="How are you feeling?",
                choices=list(emotion_options.keys()),
                value="neutral"
            )
            emotion_description = gr.Markdown("**Current mood:** Neutral or balanced mood")
            emotion.change(
                fn=lambda e: f"**Current mood:** {emotion_options[e]}",
                inputs=emotion,
                outputs=emotion_description
            )
        with gr.Column(scale=1):
            clear_btn = gr.Button("Clear Conversation")
            status_box = gr.Textbox(label="Status", interactive=False)

    with gr.Row():
        chat_history = gr.Chatbot(label="Conversation", height=400)

    with gr.Row():
        with gr.Column(scale=4):
            text_input = gr.Textbox(label="Type your message here", lines=2)
        with gr.Column(scale=1):
            audio_input = gr.Audio(type="filepath", label="Or speak")

    with gr.Row():
        submit_btn = gr.Button("Send", variant="primary")
    with gr.Row():
        output_audio = gr.Audio(label="AI Voice")

    submit_btn.click(
        fn=chat_with_ai,
        inputs=[audio_input, text_input, emotion, chat_history],
        outputs=[status_box, output_audio, chat_history]
    )
    text_input.submit(
        fn=chat_with_ai,
        inputs=[audio_input, text_input, emotion, chat_history],
        outputs=[status_box, output_audio, chat_history]
    )
    clear_btn.click(
        fn=clear_conversation,
        inputs=[],
        outputs=[chat_history, audio_input, text_input, status_box]
    )

iface.launch(share=True)