Spaces:

kamcio1989
/

anycoder-679c5b67

Build error

App Files Files Community

anycoder-679c5b67 / app.py

kamcio1989

Upload folder using huggingface_hub

618cf4f verified 2 months ago

raw

history blame contribute delete

6.35 kB

	import gradio as gr
	import cv2
	import numpy as np
	import os
	from utils import (
	register_new_face,
	process_video_frame,
	generate_gemini_response,
	draw_overlays
	)

	# --- Global State Initialization ---
	# In a real deployment, you might use a database.
	# For this demo, we use Gradio State for session-specific storage.

	def create_app():
	with gr.Blocks(title="Gemini Live Identity Chat", theme=gr.themes.Soft()) as demo:

	# --- State Variables ---
	# known_faces: dict {name: encoding}
	known_faces_state = gr.State(value={})
	# current_user: str
	current_user_state = gr.State(value="Unknown")
	# chat_history: list of [user_msg, bot_msg]
	history_state = gr.State(value=[])
	# current_frame: to store the last frame for multimodal queries
	last_frame_state = gr.State(value=None)

	# --- Header ---
	with gr.Row(elem_classes="header"):
	gr.Markdown(
	"""
	# 🎙️ Gemini Live Identity Chat
	[Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
	"""
	)

	# --- Main Layout ---
	with gr.Tabs():

	# TAB 1: Live Interaction
	with gr.Tab("💬 Live Interaction"):
	with gr.Row():
	# Left Column: Vision & Identity
	with gr.Column(scale=1):
	gr.Markdown("### 👁️ Vision & Identity")

	# Input webcam for face recognition
	input_webcam = gr.Image(
	label="Live Feed",
	sources=["webcam"],
	streaming=True,
	type="numpy"
	)

	# Status display
	user_status = gr.Markdown(
	value="👤 Detected: Unknown",
	elem_id="status-box"
	)

	# Multimodal toggle
	use_vision_toggle = gr.Checkbox(
	label="👀 Allow Gemini to see this video frame",
	value=False,
	info="If checked, the current image will be sent with your audio."
	)

	# Right Column: Chat
	with gr.Column(scale=2):
	gr.Markdown("### 🗣️ Conversation")

	chatbot = gr.Chatbot(
	label="Chat History",
	height=500,
	type="messages",
	avatar_images=(None, "https://www.gstatic.com/lamda/images/gemini_sparkle_v002_d4735304ff6292a690345.svg")
	)

	with gr.Row():
	audio_input = gr.Audio(
	sources=["microphone"],
	type="filepath",
	label="Voice Input (Recording stops automatically)",
	editable=False
	)

	clear_btn = gr.Button("Clear Conversation", variant="secondary")

	# TAB 2: Registration
	with gr.Tab("👤 Registration"):
	gr.Markdown("### Register a New Face")
	with gr.Row():
	with gr.Column():
	reg_name = gr.Textbox(label="Name", placeholder="Enter your name")
	reg_image = gr.Image(label="Upload Photo", sources=["upload", "webcam"], type="numpy")
	reg_btn = gr.Button("Register Face", variant="primary")

	with gr.Column():
	gr.Markdown("### Registered Users")
	registered_list = gr.JSON(label="Database", value={})

	# TAB 3: Configuration
	with gr.Tab("⚙️ Settings"):
	gr.Markdown("### App Configuration")
	api_key_input = gr.Textbox(
	label="Gemini API Key",
	type="password",
	placeholder="Paste your Google AI Studio Key here",
	info="Required for chat functionality."
	)

	system_prompt_input = gr.Textbox(
	label="System Persona",
	value="You are a helpful, conversational assistant. Keep responses concise.",
	lines=3
	)

	# --- Event Wiring ---

	# 1. Face Recognition Loop
	# This stream processes frames, updates the 'current_user', and returns the annotated image
	input_webcam.stream(
	fn=process_video_frame,
	inputs=[input_webcam, known_faces_state],
	outputs=[input_webcam, current_user_state, user_status, last_frame_state],
	time_limit=None,
	stream_every=0.1 # Limit FPS for performance
	)

	# 2. Audio Chat Interaction
	# Triggered when the user stops recording audio
	audio_input.stop_recording(
	fn=generate_gemini_response,
	inputs=[
	audio_input,
	history_state,
	current_user_state,
	api_key_input,
	system_prompt_input,
	use_vision_toggle,
	last_frame_state
	],
	outputs=[history_state, chatbot, audio_input] # Clear audio input after sending
	)

	# 3. Registration Logic
	reg_btn.click(
	fn=register_new_face,
	inputs=[reg_name, reg_image, known_faces_state],
	outputs=[known_faces_state, registered_list, reg_name, reg_image]
	)

	# 4. Clear Chat
	def clear_history():
	return [], []
	clear_btn.click(clear_history, None, [history_state, chatbot])

	return demo

	if __name__ == "__main__":
	demo = create_app()
	demo.launch()