Spaces:

kamcio1989
/

anycoder-679c5b67

Build error

File size: 6,347 Bytes

618cf4f

import gradio as gr
import cv2
import numpy as np
import os
from utils import (
    register_new_face,
    process_video_frame,
    generate_gemini_response,
    draw_overlays
)

# --- Global State Initialization ---
# In a real deployment, you might use a database. 
# For this demo, we use Gradio State for session-specific storage.

def create_app():
    with gr.Blocks(title="Gemini Live Identity Chat", theme=gr.themes.Soft()) as demo:
        
        # --- State Variables ---
        # known_faces: dict {name: encoding}
        known_faces_state = gr.State(value={}) 
        # current_user: str
        current_user_state = gr.State(value="Unknown")
        # chat_history: list of [user_msg, bot_msg]
        history_state = gr.State(value=[])
        # current_frame: to store the last frame for multimodal queries
        last_frame_state = gr.State(value=None)

        # --- Header ---
        with gr.Row(elem_classes="header"):
            gr.Markdown(
                """
                # 🎙️ Gemini Live Identity Chat
                [Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
                """
            )

        # --- Main Layout ---
        with gr.Tabs():
            
            # TAB 1: Live Interaction
            with gr.Tab("💬 Live Interaction"):
                with gr.Row():
                    # Left Column: Vision & Identity
                    with gr.Column(scale=1):
                        gr.Markdown("### 👁️ Vision & Identity")
                        
                        # Input webcam for face recognition
                        input_webcam = gr.Image(
                            label="Live Feed", 
                            sources=["webcam"], 
                            streaming=True,
                            type="numpy"
                        )
                        
                        # Status display
                        user_status = gr.Markdown(
                            value="**👤 Detected:** Unknown", 
                            elem_id="status-box"
                        )
                        
                        # Multimodal toggle
                        use_vision_toggle = gr.Checkbox(
                            label="👀 Allow Gemini to see this video frame",
                            value=False,
                            info="If checked, the current image will be sent with your audio."
                        )

                    # Right Column: Chat
                    with gr.Column(scale=2):
                        gr.Markdown("### 🗣️ Conversation")
                        
                        chatbot = gr.Chatbot(
                            label="Chat History",
                            height=500,
                            type="messages",
                            avatar_images=(None, "https://www.gstatic.com/lamda/images/gemini_sparkle_v002_d4735304ff6292a690345.svg")
                        )
                        
                        with gr.Row():
                            audio_input = gr.Audio(
                                sources=["microphone"], 
                                type="filepath",
                                label="Voice Input (Recording stops automatically)",
                                editable=False
                            )
                        
                        clear_btn = gr.Button("Clear Conversation", variant="secondary")

            # TAB 2: Registration
            with gr.Tab("👤 Registration"):
                gr.Markdown("### Register a New Face")
                with gr.Row():
                    with gr.Column():
                        reg_name = gr.Textbox(label="Name", placeholder="Enter your name")
                        reg_image = gr.Image(label="Upload Photo", sources=["upload", "webcam"], type="numpy")
                        reg_btn = gr.Button("Register Face", variant="primary")
                    
                    with gr.Column():
                        gr.Markdown("### Registered Users")
                        registered_list = gr.JSON(label="Database", value={})

            # TAB 3: Configuration
            with gr.Tab("⚙️ Settings"):
                gr.Markdown("### App Configuration")
                api_key_input = gr.Textbox(
                    label="Gemini API Key", 
                    type="password", 
                    placeholder="Paste your Google AI Studio Key here",
                    info="Required for chat functionality."
                )
                
                system_prompt_input = gr.Textbox(
                    label="System Persona", 
                    value="You are a helpful, conversational assistant. Keep responses concise.",
                    lines=3
                )

        # --- Event Wiring ---

        # 1. Face Recognition Loop
        # This stream processes frames, updates the 'current_user', and returns the annotated image
        input_webcam.stream(
            fn=process_video_frame,
            inputs=[input_webcam, known_faces_state],
            outputs=[input_webcam, current_user_state, user_status, last_frame_state],
            time_limit=None,
            stream_every=0.1  # Limit FPS for performance
        )

        # 2. Audio Chat Interaction
        # Triggered when the user stops recording audio
        audio_input.stop_recording(
            fn=generate_gemini_response,
            inputs=[
                audio_input, 
                history_state, 
                current_user_state, 
                api_key_input, 
                system_prompt_input,
                use_vision_toggle,
                last_frame_state
            ],
            outputs=[history_state, chatbot, audio_input] # Clear audio input after sending
        )

        # 3. Registration Logic
        reg_btn.click(
            fn=register_new_face,
            inputs=[reg_name, reg_image, known_faces_state],
            outputs=[known_faces_state, registered_list, reg_name, reg_image]
        )
        
        # 4. Clear Chat
        def clear_history():
            return [], []
        clear_btn.click(clear_history, None, [history_state, chatbot])

    return demo

if __name__ == "__main__":
    demo = create_app()
    demo.launch()