import gradio as gr import os import uuid import tempfile from method import * VECTOR_DB = None CAPTIONS = [] FRAMES = [] caption_text = [] caption_embedding = [] extracted_face = [] face_frames = [] face_name_inputs = [] # Gradio UI components (to be populated later) face_image_boxes = [] face_name_inputs = [] def handle_query(text): if VECTOR_DB is None: return "Please process a video first.", None idx = search_query(text, VECTOR_DB) return CAPTIONS[idx], FRAMES[idx] def save_face_names(*names): face_name_map = {f"face_{i}": names[i] for i in range(len(names))} print("Saved Names Mapping:", face_name_map) return "✅ Names saved for all faces." def prepare_faces_and_show(): if not extracted_face: updates = [gr.update(value="❌ No faces extracted yet.")] updates.extend([gr.update(visible=False) for _ in range(40)]) return updates image_updates = [] textbox_updates = [] for i in range(20): if i < len(extracted_face): face_img = extracted_face[i]["face"] image_updates.append(gr.update(value=face_img, visible=True)) textbox_updates.append(gr.update(visible=True, value="")) else: image_updates.append(gr.update(visible=False)) textbox_updates.append(gr.update(visible=False)) status_update = [gr.update(value="✅ Faces ready. Enter names below.")] return status_update + image_updates + textbox_updates def process_youtube_link(video_file): global VECTOR_DB, CAPTIONS, FRAMES, caption_text, caption_embedding try: if video_file is None: return "❌ Please upload a video file." # Reset state for new video caption_text.clear() caption_embedding.clear() gr.Info("Extracting frames...") frames = extract_unique_frames(video_file) print('extract_unique_frames done') gr.Info("Generating captions...") for i, frame in enumerate(frames): caption_text.append(caption_image(frame)) print('caption done') gr.Info("Creating embeddings...") for caption in caption_text: caption_embedding.append(embed_func(caption)) gr.Info("Building vector store...") VECTOR_DB = build_vector_store(embed=np.array(caption_embedding)) print('vector store done') CAPTIONS = caption_text FRAMES = frames return "✅ Processing complete. You can now enter a query." except Exception as e: return f"❌ Error: {str(e)}" def face_detect(video_file): global face_frames, extracted_face try: if video_file is None: return "❌ Please upload a video file." extracted_face.clear() gr.Info("Extracting frames...") face_frames = extract_unique_frames(video_file, interval_sec = 5) # face_frames = np.array([np.array(frame) for frame in face_frames]) print('extract_unique_frames done') print(type(face_frames[0])) for i, frame in enumerate(face_frames): print('f frm:', i) # face_crop returns list of dicts per image — loop over them try: cropped_faces = face_crop(frame) extracted_face.extend(cropped_faces) print('extracted_face: ', len(extracted_face)) except: pass print('extracted_face done') return "✅ Face extraction done. Click 'Show Faces' to review." except Exception as e: print('error: ', e) return f"❌ Error: {str(e)}" with gr.Blocks() as demo: gr.Markdown("# 🎥 Upload Your Video, CHAT_RAG") with gr.Tabs(): with gr.TabItem("Video Upload & Query"): video_input = gr.Video(label="Upload video file (MP4, AVI, etc.)") process_button = gr.Button("Submit") process_output = gr.Textbox(label="Status") query_input = gr.Textbox(label="Enter a query") query_button = gr.Button("Search") query_caption = gr.Textbox(label="Matching Caption") query_image = gr.Image(label="Matching Frame") process_button.click(fn=process_youtube_link, inputs=video_input, outputs=process_output) query_button.click(fn=handle_query, inputs=query_input, outputs=[query_caption, query_image]) with gr.TabItem("Face Extractor"): gr.Markdown("### 🧠 Upload a video, detect faces, assign names") face_video_input = gr.Video(label="Upload video for face extraction") face_process_button = gr.Button("Submit") face_status = gr.Textbox(label="Status") face_process_button.click(fn=face_detect, inputs=face_video_input, outputs=face_status) show_faces_btn = gr.Button("Show Faces") # Scrollable display (row of faces with textboxes) with gr.Row(): for i in range(20): with gr.Column(visible=True) as col: img_box = gr.Image(label=f"Face {i+1}", interactive=False, visible=False) txt_box = gr.Textbox(label="Enter Name", visible=False) face_image_boxes.append(img_box) face_name_inputs.append(txt_box) save_names_btn = gr.Button("Save Names") show_faces_btn.click( fn=prepare_faces_and_show, outputs=[face_status] + face_image_boxes + face_name_inputs ) save_names_btn.click( fn=save_face_names, inputs=face_name_inputs, outputs=face_status ) demo.launch(debug = True)