File size: 3,161 Bytes
377ccda
 
 
 
47eb038
 
1418eff
377ccda
47eb038
1418eff
377ccda
1418eff
 
47eb038
377ccda
47eb038
 
1418eff
47eb038
 
 
 
 
1418eff
47eb038
377ccda
47eb038
 
 
 
 
 
 
377ccda
47eb038
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377ccda
47eb038
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import cv2
import face_recognition
import requests
import os
import gradio as gr
import numpy as np
from fer import FER

# --- CONFIG / API SETUP ---
KOKORO_API_URL = "https://shahid202-kokoro-api.hf.space/generate"
OWNER_IMAGE_PATH = "faces/owner.jpg"

# Initialize Emotion Detector (Lighter than DeepFace)
emotion_detector = FER(mtcnn=False)

# Load Owner Face Encoding
owner_encoding = None
if os.path.exists(OWNER_IMAGE_PATH):
    img = face_recognition.load_image_file(OWNER_IMAGE_PATH)
    encodings = face_recognition.face_encodings(img)
    if encodings:
        owner_encoding = encodings[0]
        print("Owner profile loaded.")
else:
    print("Warning: faces/owner.jpg not found. Everyone will be a stranger.")

def process_frame(image, user_msg):
    """
    Function to handle the image from the web cam, 
    recognize face, detect mood, and talk to the API.
    """
    if image is None:
        return "No image captured.", None

    # Convert Gradio image (RGB) to BGR for OpenCV
    frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    
    # 1. Face Recognition
    face_locations = face_recognition.face_locations(image)
    face_encodings = face_recognition.face_encodings(image, face_locations)
    
    is_owner = False
    for enc in face_encodings:
        if owner_encoding is not None:
            matches = face_recognition.compare_faces([owner_encoding], enc)
            if True in matches:
                is_owner = True
                break

    if not is_owner:
        return "Stranger detected. Access Denied.", None

    # 2. Mood Detection
    # detect_emotions returns a list of dictionaries
    emotions = emotion_detector.detect_emotions(frame)
    if emotions:
        # Get the top emotion from the first face found
        mood = max(emotions[0]["emotions"], key=emotions[0]["emotions"].get)
    else:
        mood = "Neutral"

    # 3. Get Bot Reply (Placeholder for your logic)
    bot_reply = f"Hello Owner. I see you are {mood}. You said: {user_msg}"

    # 4. Speech Synthesis via Kokoro
    audio_path = "output.wav"
    payload = {"text": bot_reply, "voice": "af_bella", "speed": 1.0}
    
    try:
        r = requests.post(KOKORO_API_URL, json=payload)
        if r.status_code == 200:
            with open(audio_path, "wb") as f:
                f.write(r.content)
        else:
            audio_path = None
    except:
        audio_path = None

    return bot_reply, audio_path

# --- GRADIO INTERFACE ---
with gr.Blocks() as demo:
    gr.Markdown("# AI Face & Mood Assistant")
    
    with gr.Row():
        with gr.Column():
            input_img = gr.Image(sources=["webcam"], type="numpy", label="Show your face")
            input_text = gr.Textbox(label="Message for Bot", placeholder="Type something...")
            btn = gr.Button("Send to Bot")
        
        with gr.Column():
            output_text = gr.Textbox(label="Bot Response")
            output_audio = gr.Audio(label="Bot Voice", type="filepath")

    btn.click(
        fn=process_frame, 
        inputs=[input_img, input_text], 
        outputs=[output_text, output_audio]
    )

if __name__ == "__main__":
    demo.launch()