File size: 5,749 Bytes
8229cac
ad19f16
968cfc0
 
8229cac
bbd08dd
3bd5cf8
6207088
bbd08dd
6207088
bbd08dd
6207088
 
7ae1b5e
968cfc0
6207088
 
 
8d36508
6207088
 
8d36508
 
 
6207088
 
 
bbd08dd
6207088
 
 
d129d99
6207088
3bd5cf8
 
 
 
 
 
 
 
 
 
 
6207088
8229cac
bbd08dd
3bd5cf8
 
 
6207088
3bd5cf8
6207088
3bd5cf8
 
6207088
 
968cfc0
6207088
 
 
 
968cfc0
 
6207088
968cfc0
 
 
6207088
968cfc0
6207088
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3bd5cf8
 
6207088
 
8229cac
6207088
 
 
bbd08dd
6207088
 
3bd5cf8
6207088
054fff2
6207088
 
8229cac
6207088
 
3bd5cf8
6207088
3bd5cf8
6207088
8229cac
3bd5cf8
 
 
 
8229cac
6207088
8229cac
3bd5cf8
6207088
 
8229cac
3bd5cf8
 
6207088
3bd5cf8
6207088
3bd5cf8
 
 
6207088
 
 
3bd5cf8
 
 
6207088
125beff
8229cac
6207088
bbd08dd
3bd5cf8
 
6207088
3bd5cf8
6207088
bbd08dd
3bd5cf8
6207088
3bd5cf8
6207088
3bd5cf8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6207088
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import os
import gradio as gr
import requests
import json
import speech_recognition as sr
from tempfile import NamedTemporaryFile
import logging
from dotenv import load_dotenv

# Load environment variables from Hugging Face secrets or .env
load_dotenv()
HUGGINGFACE_API_TOKEN = os.getenv("HUGGING_FACE_API")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GROQ_MODEL = os.getenv("GROQ_MODEL", "llama3-8b") #mixtral-8x7b-32768")

# Logging setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# API headers
groq_headers = {
    "Authorization": f"Bearer {GROQ_API_KEY}",
    "Content-Type": "application/json"
}
tts_headers = {
    "Authorization": f"Bearer {HUGGINGFACE_API_TOKEN}"
}

# API endpoints
GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
TTS_API_URL = "https://api-inference.huggingface.co/models/microsoft/speecht5_tts"

# Emotion dictionary
emotion_options = {
    "neutral": "Neutral or balanced mood",
    "positive": "Generally positive or optimistic",
    "happy": "Feeling joy or happiness",
    "excited": "Feeling enthusiastic or energetic",
    "sad": "Feeling down or unhappy",
    "angry": "Feeling frustrated or irritated",
    "negative": "Generally negative or pessimistic",
    "anxious": "Feeling worried or nervous"
}

# Recognizer for audio input
def transcribe_audio(audio_path):
    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(audio_path) as source:
            audio_data = recognizer.record(source)
            return recognizer.recognize_google(audio_data)
    except Exception as e:
        logger.error(f"Audio transcription failed: {e}")
        return ""

# Groq response handler
def get_groq_response(prompt, chat_history):
    messages = [{"role": "system", "content": prompt}]
    for msg in chat_history:
        role, content = ("user", msg[0]) if msg else ("assistant", msg[1])
        messages.append({"role": role, "content": content})

    data = {
        "model": GROQ_MODEL,
        "messages": messages[-20:],  # Limit history
        "temperature": 0.7,
        "max_tokens": 1024
    }

    try:
        res = requests.post(GROQ_API_URL, headers=groq_headers, json=data)
        res.raise_for_status()
        return res.json()["choices"][0]["message"]["content"]
    except Exception as e:
        logger.error(f"Groq API error: {e}")
        return "Sorry, I couldn't generate a response right now."

# Hugging Face TTS
def generate_audio(text):
    response = requests.post(TTS_API_URL, headers=tts_headers, json={"inputs": text})
    if response.status_code == 200:
        with NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
            tmp.write(response.content)
            return tmp.name
    else:
        logger.error(f"TTS generation failed: {response.text}")
        return None

# Conversation state
conversation_history = []

# Main chat logic
def chat_with_ai(audio, text_input, emotion, history):
    global conversation_history
    user_input = text_input.strip() if text_input else ""

    if audio:
        transcription = transcribe_audio(audio)
        if transcription:
            user_input = transcription

    if not user_input:
        return "Please provide a message or audio.", None, history

    conversation_history.append((user_input, None))

    prompt = f"""You are an empathetic AI assistant. The user feels {emotion} ({emotion_options[emotion]}).
Respond supportively and helpfully in a concise manner."""

    ai_response = get_groq_response(prompt, conversation_history)
    conversation_history[-1] = (user_input, ai_response)

    audio_output_path = generate_audio(ai_response)

    return ai_response, audio_output_path, conversation_history

def clear_conversation():
    global conversation_history
    conversation_history = []
    return [], None, None, "Conversation cleared."

# Gradio Interface
with gr.Blocks(title="Mind AID AI Assistant") as iface:
    gr.Markdown("# Mind AID: Emotion-Aware Conversational AI")
    gr.Markdown("AI assistant with emotion-awareness, powered by Groq and Hugging Face TTS.")

    with gr.Row():
        with gr.Column(scale=3):
            emotion = gr.Dropdown(
                label="How are you feeling?",
                choices=list(emotion_options.keys()),
                value="neutral"
            )
            emotion_description = gr.Markdown("**Current mood:** Neutral or balanced mood")
            emotion.change(
                fn=lambda e: f"**Current mood:** {emotion_options[e]}",
                inputs=emotion,
                outputs=emotion_description
            )
        with gr.Column(scale=1):
            clear_btn = gr.Button("Clear Conversation")
            status_box = gr.Textbox(label="Status", interactive=False)

    with gr.Row():
        chat_history = gr.Chatbot(label="Conversation", height=400)

    with gr.Row():
        with gr.Column(scale=4):
            text_input = gr.Textbox(label="Type your message here", lines=2)
        with gr.Column(scale=1):
            audio_input = gr.Audio(type="filepath", label="Or speak")

    with gr.Row():
        submit_btn = gr.Button("Send", variant="primary")
    with gr.Row():
        output_audio = gr.Audio(label="AI Voice")

    submit_btn.click(
        fn=chat_with_ai,
        inputs=[audio_input, text_input, emotion, chat_history],
        outputs=[status_box, output_audio, chat_history]
    )
    text_input.submit(
        fn=chat_with_ai,
        inputs=[audio_input, text_input, emotion, chat_history],
        outputs=[status_box, output_audio, chat_history]
    )
    clear_btn.click(
        fn=clear_conversation,
        inputs=[],
        outputs=[chat_history, audio_input, text_input, status_box]
    )

iface.launch(share=True)