import cv2 import face_recognition import requests import os import gradio as gr import numpy as np from fer import FER # --- CONFIG / API SETUP --- KOKORO_API_URL = "https://shahid202-kokoro-api.hf.space/generate" OWNER_IMAGE_PATH = "faces/owner.jpg" # Initialize Emotion Detector (Lighter than DeepFace) emotion_detector = FER(mtcnn=False) # Load Owner Face Encoding owner_encoding = None if os.path.exists(OWNER_IMAGE_PATH): img = face_recognition.load_image_file(OWNER_IMAGE_PATH) encodings = face_recognition.face_encodings(img) if encodings: owner_encoding = encodings[0] print("Owner profile loaded.") else: print("Warning: faces/owner.jpg not found. Everyone will be a stranger.") def process_frame(image, user_msg): """ Function to handle the image from the web cam, recognize face, detect mood, and talk to the API. """ if image is None: return "No image captured.", None # Convert Gradio image (RGB) to BGR for OpenCV frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # 1. Face Recognition face_locations = face_recognition.face_locations(image) face_encodings = face_recognition.face_encodings(image, face_locations) is_owner = False for enc in face_encodings: if owner_encoding is not None: matches = face_recognition.compare_faces([owner_encoding], enc) if True in matches: is_owner = True break if not is_owner: return "Stranger detected. Access Denied.", None # 2. Mood Detection # detect_emotions returns a list of dictionaries emotions = emotion_detector.detect_emotions(frame) if emotions: # Get the top emotion from the first face found mood = max(emotions[0]["emotions"], key=emotions[0]["emotions"].get) else: mood = "Neutral" # 3. Get Bot Reply (Placeholder for your logic) bot_reply = f"Hello Owner. I see you are {mood}. You said: {user_msg}" # 4. Speech Synthesis via Kokoro audio_path = "output.wav" payload = {"text": bot_reply, "voice": "af_bella", "speed": 1.0} try: r = requests.post(KOKORO_API_URL, json=payload) if r.status_code == 200: with open(audio_path, "wb") as f: f.write(r.content) else: audio_path = None except: audio_path = None return bot_reply, audio_path # --- GRADIO INTERFACE --- with gr.Blocks() as demo: gr.Markdown("# AI Face & Mood Assistant") with gr.Row(): with gr.Column(): input_img = gr.Image(sources=["webcam"], type="numpy", label="Show your face") input_text = gr.Textbox(label="Message for Bot", placeholder="Type something...") btn = gr.Button("Send to Bot") with gr.Column(): output_text = gr.Textbox(label="Bot Response") output_audio = gr.Audio(label="Bot Voice", type="filepath") btn.click( fn=process_frame, inputs=[input_img, input_text], outputs=[output_text, output_audio] ) if __name__ == "__main__": demo.launch()