Spaces:

Fluospark128
/

Emotion_Conversation

Sleeping

App Files Files Community

Emotion_Conversation / app.py

Fluospark128

Update app.py

7ae1b5e verified 8 months ago

raw

history blame contribute delete

5.75 kB

	import os
	import gradio as gr
	import requests
	import json
	import speech_recognition as sr
	from tempfile import NamedTemporaryFile
	import logging
	from dotenv import load_dotenv

	# Load environment variables from Hugging Face secrets or .env
	load_dotenv()
	HUGGINGFACE_API_TOKEN = os.getenv("HUGGING_FACE_API")
	GROQ_API_KEY = os.getenv("GROQ_API_KEY")
	GROQ_MODEL = os.getenv("GROQ_MODEL", "llama3-8b") #mixtral-8x7b-32768")

	# Logging setup
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# API headers
	groq_headers = {
	"Authorization": f"Bearer {GROQ_API_KEY}",
	"Content-Type": "application/json"
	}
	tts_headers = {
	"Authorization": f"Bearer {HUGGINGFACE_API_TOKEN}"
	}

	# API endpoints
	GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
	TTS_API_URL = "https://api-inference.huggingface.co/models/microsoft/speecht5_tts"

	# Emotion dictionary
	emotion_options = {
	"neutral": "Neutral or balanced mood",
	"positive": "Generally positive or optimistic",
	"happy": "Feeling joy or happiness",
	"excited": "Feeling enthusiastic or energetic",
	"sad": "Feeling down or unhappy",
	"angry": "Feeling frustrated or irritated",
	"negative": "Generally negative or pessimistic",
	"anxious": "Feeling worried or nervous"
	}

	# Recognizer for audio input
	def transcribe_audio(audio_path):
	recognizer = sr.Recognizer()
	try:
	with sr.AudioFile(audio_path) as source:
	audio_data = recognizer.record(source)
	return recognizer.recognize_google(audio_data)
	except Exception as e:
	logger.error(f"Audio transcription failed: {e}")
	return ""

	# Groq response handler
	def get_groq_response(prompt, chat_history):
	messages = [{"role": "system", "content": prompt}]
	for msg in chat_history:
	role, content = ("user", msg[0]) if msg else ("assistant", msg[1])
	messages.append({"role": role, "content": content})

	data = {
	"model": GROQ_MODEL,
	"messages": messages[-20:], # Limit history
	"temperature": 0.7,
	"max_tokens": 1024
	}

	try:
	res = requests.post(GROQ_API_URL, headers=groq_headers, json=data)
	res.raise_for_status()
	return res.json()["choices"][0]["message"]["content"]
	except Exception as e:
	logger.error(f"Groq API error: {e}")
	return "Sorry, I couldn't generate a response right now."

	# Hugging Face TTS
	def generate_audio(text):
	response = requests.post(TTS_API_URL, headers=tts_headers, json={"inputs": text})
	if response.status_code == 200:
	with NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
	tmp.write(response.content)
	return tmp.name
	else:
	logger.error(f"TTS generation failed: {response.text}")
	return None

	# Conversation state
	conversation_history = []

	# Main chat logic
	def chat_with_ai(audio, text_input, emotion, history):
	global conversation_history
	user_input = text_input.strip() if text_input else ""

	if audio:
	transcription = transcribe_audio(audio)
	if transcription:
	user_input = transcription

	if not user_input:
	return "Please provide a message or audio.", None, history

	conversation_history.append((user_input, None))

	prompt = f"""You are an empathetic AI assistant. The user feels {emotion} ({emotion_options[emotion]}).
	Respond supportively and helpfully in a concise manner."""

	ai_response = get_groq_response(prompt, conversation_history)
	conversation_history[-1] = (user_input, ai_response)

	audio_output_path = generate_audio(ai_response)

	return ai_response, audio_output_path, conversation_history

	def clear_conversation():
	global conversation_history
	conversation_history = []
	return [], None, None, "Conversation cleared."

	# Gradio Interface
	with gr.Blocks(title="Mind AID AI Assistant") as iface:
	gr.Markdown("# Mind AID: Emotion-Aware Conversational AI")
	gr.Markdown("AI assistant with emotion-awareness, powered by Groq and Hugging Face TTS.")

	with gr.Row():
	with gr.Column(scale=3):
	emotion = gr.Dropdown(
	label="How are you feeling?",
	choices=list(emotion_options.keys()),
	value="neutral"
	)
	emotion_description = gr.Markdown("Current mood: Neutral or balanced mood")
	emotion.change(
	fn=lambda e: f"Current mood: {emotion_options[e]}",
	inputs=emotion,
	outputs=emotion_description
	)
	with gr.Column(scale=1):
	clear_btn = gr.Button("Clear Conversation")
	status_box = gr.Textbox(label="Status", interactive=False)

	with gr.Row():
	chat_history = gr.Chatbot(label="Conversation", height=400)

	with gr.Row():
	with gr.Column(scale=4):
	text_input = gr.Textbox(label="Type your message here", lines=2)
	with gr.Column(scale=1):
	audio_input = gr.Audio(type="filepath", label="Or speak")

	with gr.Row():
	submit_btn = gr.Button("Send", variant="primary")
	with gr.Row():
	output_audio = gr.Audio(label="AI Voice")

	submit_btn.click(
	fn=chat_with_ai,
	inputs=[audio_input, text_input, emotion, chat_history],
	outputs=[status_box, output_audio, chat_history]
	)
	text_input.submit(
	fn=chat_with_ai,
	inputs=[audio_input, text_input, emotion, chat_history],
	outputs=[status_box, output_audio, chat_history]
	)
	clear_btn.click(
	fn=clear_conversation,
	inputs=[],
	outputs=[chat_history, audio_input, text_input, status_box]
	)

	iface.launch(share=True)