Expression / app.py
Shahid0812's picture
Update app.py
47eb038 verified
import cv2
import face_recognition
import requests
import os
import gradio as gr
import numpy as np
from fer import FER
# --- CONFIG / API SETUP ---
KOKORO_API_URL = "https://shahid202-kokoro-api.hf.space/generate"
OWNER_IMAGE_PATH = "faces/owner.jpg"
# Initialize Emotion Detector (Lighter than DeepFace)
emotion_detector = FER(mtcnn=False)
# Load Owner Face Encoding
owner_encoding = None
if os.path.exists(OWNER_IMAGE_PATH):
img = face_recognition.load_image_file(OWNER_IMAGE_PATH)
encodings = face_recognition.face_encodings(img)
if encodings:
owner_encoding = encodings[0]
print("Owner profile loaded.")
else:
print("Warning: faces/owner.jpg not found. Everyone will be a stranger.")
def process_frame(image, user_msg):
"""
Function to handle the image from the web cam,
recognize face, detect mood, and talk to the API.
"""
if image is None:
return "No image captured.", None
# Convert Gradio image (RGB) to BGR for OpenCV
frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
# 1. Face Recognition
face_locations = face_recognition.face_locations(image)
face_encodings = face_recognition.face_encodings(image, face_locations)
is_owner = False
for enc in face_encodings:
if owner_encoding is not None:
matches = face_recognition.compare_faces([owner_encoding], enc)
if True in matches:
is_owner = True
break
if not is_owner:
return "Stranger detected. Access Denied.", None
# 2. Mood Detection
# detect_emotions returns a list of dictionaries
emotions = emotion_detector.detect_emotions(frame)
if emotions:
# Get the top emotion from the first face found
mood = max(emotions[0]["emotions"], key=emotions[0]["emotions"].get)
else:
mood = "Neutral"
# 3. Get Bot Reply (Placeholder for your logic)
bot_reply = f"Hello Owner. I see you are {mood}. You said: {user_msg}"
# 4. Speech Synthesis via Kokoro
audio_path = "output.wav"
payload = {"text": bot_reply, "voice": "af_bella", "speed": 1.0}
try:
r = requests.post(KOKORO_API_URL, json=payload)
if r.status_code == 200:
with open(audio_path, "wb") as f:
f.write(r.content)
else:
audio_path = None
except:
audio_path = None
return bot_reply, audio_path
# --- GRADIO INTERFACE ---
with gr.Blocks() as demo:
gr.Markdown("# AI Face & Mood Assistant")
with gr.Row():
with gr.Column():
input_img = gr.Image(sources=["webcam"], type="numpy", label="Show your face")
input_text = gr.Textbox(label="Message for Bot", placeholder="Type something...")
btn = gr.Button("Send to Bot")
with gr.Column():
output_text = gr.Textbox(label="Bot Response")
output_audio = gr.Audio(label="Bot Voice", type="filepath")
btn.click(
fn=process_frame,
inputs=[input_img, input_text],
outputs=[output_text, output_audio]
)
if __name__ == "__main__":
demo.launch()