Spaces:
Build error
Build error
| import gradio as gr | |
| import cv2 | |
| import numpy as np | |
| import os | |
| from utils import ( | |
| register_new_face, | |
| process_video_frame, | |
| generate_gemini_response, | |
| draw_overlays | |
| ) | |
| # --- Global State Initialization --- | |
| # In a real deployment, you might use a database. | |
| # For this demo, we use Gradio State for session-specific storage. | |
| def create_app(): | |
| with gr.Blocks(title="Gemini Live Identity Chat", theme=gr.themes.Soft()) as demo: | |
| # --- State Variables --- | |
| # known_faces: dict {name: encoding} | |
| known_faces_state = gr.State(value={}) | |
| # current_user: str | |
| current_user_state = gr.State(value="Unknown") | |
| # chat_history: list of [user_msg, bot_msg] | |
| history_state = gr.State(value=[]) | |
| # current_frame: to store the last frame for multimodal queries | |
| last_frame_state = gr.State(value=None) | |
| # --- Header --- | |
| with gr.Row(elem_classes="header"): | |
| gr.Markdown( | |
| """ | |
| # ποΈ Gemini Live Identity Chat | |
| [Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder) | |
| """ | |
| ) | |
| # --- Main Layout --- | |
| with gr.Tabs(): | |
| # TAB 1: Live Interaction | |
| with gr.Tab("π¬ Live Interaction"): | |
| with gr.Row(): | |
| # Left Column: Vision & Identity | |
| with gr.Column(scale=1): | |
| gr.Markdown("### ποΈ Vision & Identity") | |
| # Input webcam for face recognition | |
| input_webcam = gr.Image( | |
| label="Live Feed", | |
| sources=["webcam"], | |
| streaming=True, | |
| type="numpy" | |
| ) | |
| # Status display | |
| user_status = gr.Markdown( | |
| value="**π€ Detected:** Unknown", | |
| elem_id="status-box" | |
| ) | |
| # Multimodal toggle | |
| use_vision_toggle = gr.Checkbox( | |
| label="π Allow Gemini to see this video frame", | |
| value=False, | |
| info="If checked, the current image will be sent with your audio." | |
| ) | |
| # Right Column: Chat | |
| with gr.Column(scale=2): | |
| gr.Markdown("### π£οΈ Conversation") | |
| chatbot = gr.Chatbot( | |
| label="Chat History", | |
| height=500, | |
| type="messages", | |
| avatar_images=(None, "https://www.gstatic.com/lamda/images/gemini_sparkle_v002_d4735304ff6292a690345.svg") | |
| ) | |
| with gr.Row(): | |
| audio_input = gr.Audio( | |
| sources=["microphone"], | |
| type="filepath", | |
| label="Voice Input (Recording stops automatically)", | |
| editable=False | |
| ) | |
| clear_btn = gr.Button("Clear Conversation", variant="secondary") | |
| # TAB 2: Registration | |
| with gr.Tab("π€ Registration"): | |
| gr.Markdown("### Register a New Face") | |
| with gr.Row(): | |
| with gr.Column(): | |
| reg_name = gr.Textbox(label="Name", placeholder="Enter your name") | |
| reg_image = gr.Image(label="Upload Photo", sources=["upload", "webcam"], type="numpy") | |
| reg_btn = gr.Button("Register Face", variant="primary") | |
| with gr.Column(): | |
| gr.Markdown("### Registered Users") | |
| registered_list = gr.JSON(label="Database", value={}) | |
| # TAB 3: Configuration | |
| with gr.Tab("βοΈ Settings"): | |
| gr.Markdown("### App Configuration") | |
| api_key_input = gr.Textbox( | |
| label="Gemini API Key", | |
| type="password", | |
| placeholder="Paste your Google AI Studio Key here", | |
| info="Required for chat functionality." | |
| ) | |
| system_prompt_input = gr.Textbox( | |
| label="System Persona", | |
| value="You are a helpful, conversational assistant. Keep responses concise.", | |
| lines=3 | |
| ) | |
| # --- Event Wiring --- | |
| # 1. Face Recognition Loop | |
| # This stream processes frames, updates the 'current_user', and returns the annotated image | |
| input_webcam.stream( | |
| fn=process_video_frame, | |
| inputs=[input_webcam, known_faces_state], | |
| outputs=[input_webcam, current_user_state, user_status, last_frame_state], | |
| time_limit=None, | |
| stream_every=0.1 # Limit FPS for performance | |
| ) | |
| # 2. Audio Chat Interaction | |
| # Triggered when the user stops recording audio | |
| audio_input.stop_recording( | |
| fn=generate_gemini_response, | |
| inputs=[ | |
| audio_input, | |
| history_state, | |
| current_user_state, | |
| api_key_input, | |
| system_prompt_input, | |
| use_vision_toggle, | |
| last_frame_state | |
| ], | |
| outputs=[history_state, chatbot, audio_input] # Clear audio input after sending | |
| ) | |
| # 3. Registration Logic | |
| reg_btn.click( | |
| fn=register_new_face, | |
| inputs=[reg_name, reg_image, known_faces_state], | |
| outputs=[known_faces_state, registered_list, reg_name, reg_image] | |
| ) | |
| # 4. Clear Chat | |
| def clear_history(): | |
| return [], [] | |
| clear_btn.click(clear_history, None, [history_state, chatbot]) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_app() | |
| demo.launch() |