Spaces:
Sleeping
Sleeping
File size: 5,805 Bytes
59ca75b 043b0a7 7e90d60 c31f079 043b0a7 fa0fce0 b5858b4 59ca75b ef053b6 043b0a7 59ca75b b5858b4 343e5d9 0a43963 b5858b4 f19bcb5 343e5d9 f19bcb5 343e5d9 f19bcb5 b5858b4 f19bcb5 b5858b4 f19bcb5 0a43963 343e5d9 fa0fce0 043b0a7 343e5d9 043b0a7 d3be97c 043b0a7 343e5d9 9ee530a 043b0a7 fa0fce0 76ef17b 043b0a7 fa0fce0 043b0a7 fa0fce0 76ef17b 043b0a7 fa0fce0 043b0a7 b5858b4 0a43963 075ef94 b5858b4 86e45fd b5858b4 0a43963 b5858b4 0a43963 b5858b4 0a43963 b5858b4 043b0a7 343e5d9 043b0a7 9febdc2 043b0a7 9febdc2 043b0a7 9febdc2 b5858b4 4aa95c4 b5858b4 9febdc2 b5858b4 043b0a7 0a43963 9febdc2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 | import gradio as gr
import os
import uuid
import tempfile
from method import *
VECTOR_DB = None
CAPTIONS = []
FRAMES = []
caption_text = []
caption_embedding = []
extracted_face = []
face_frames = []
face_name_inputs = []
# Gradio UI components (to be populated later)
face_image_boxes = []
face_name_inputs = []
def handle_query(text):
if VECTOR_DB is None:
return "Please process a video first.", None
idx = search_query(text, VECTOR_DB)
return CAPTIONS[idx], FRAMES[idx]
def save_face_names(*names):
face_name_map = {f"face_{i}": names[i] for i in range(len(names))}
print("Saved Names Mapping:", face_name_map)
return "β
Names saved for all faces."
def prepare_faces_and_show():
if not extracted_face:
updates = [gr.update(value="β No faces extracted yet.")]
updates.extend([gr.update(visible=False) for _ in range(40)])
return updates
image_updates = []
textbox_updates = []
for i in range(20):
if i < len(extracted_face):
face_img = extracted_face[i]["face"]
image_updates.append(gr.update(value=face_img, visible=True))
textbox_updates.append(gr.update(visible=True, value=""))
else:
image_updates.append(gr.update(visible=False))
textbox_updates.append(gr.update(visible=False))
status_update = [gr.update(value="β
Faces ready. Enter names below.")]
return status_update + image_updates + textbox_updates
def process_youtube_link(video_file):
global VECTOR_DB, CAPTIONS, FRAMES, caption_text, caption_embedding
try:
if video_file is None:
return "β Please upload a video file."
# Reset state for new video
caption_text.clear()
caption_embedding.clear()
gr.Info("Extracting frames...")
frames = extract_unique_frames(video_file)
print('extract_unique_frames done')
gr.Info("Generating captions...")
for i, frame in enumerate(frames):
caption_text.append(caption_image(frame))
print('caption done')
gr.Info("Creating embeddings...")
for caption in caption_text:
caption_embedding.append(embed_func(caption))
gr.Info("Building vector store...")
VECTOR_DB = build_vector_store(embed=np.array(caption_embedding))
print('vector store done')
CAPTIONS = caption_text
FRAMES = frames
return "β
Processing complete. You can now enter a query."
except Exception as e:
return f"β Error: {str(e)}"
def face_detect(video_file):
global face_frames, extracted_face
try:
if video_file is None:
return "β Please upload a video file."
extracted_face.clear()
gr.Info("Extracting frames...")
face_frames = extract_unique_frames(video_file, interval_sec = 5)
# face_frames = np.array([np.array(frame) for frame in face_frames])
print('extract_unique_frames done')
print(type(face_frames[0]))
for i, frame in enumerate(face_frames):
print('f frm:', i)
# face_crop returns list of dicts per image β loop over them
try:
cropped_faces = face_crop(frame)
extracted_face.extend(cropped_faces)
print('extracted_face: ', len(extracted_face))
except:
pass
print('extracted_face done')
return "β
Face extraction done. Click 'Show Faces' to review."
except Exception as e:
print('error: ', e)
return f"β Error: {str(e)}"
with gr.Blocks() as demo:
gr.Markdown("# π₯ Upload Your Video, CHAT_RAG")
with gr.Tabs():
with gr.TabItem("Video Upload & Query"):
video_input = gr.Video(label="Upload video file (MP4, AVI, etc.)")
process_button = gr.Button("Submit")
process_output = gr.Textbox(label="Status")
query_input = gr.Textbox(label="Enter a query")
query_button = gr.Button("Search")
query_caption = gr.Textbox(label="Matching Caption")
query_image = gr.Image(label="Matching Frame")
process_button.click(fn=process_youtube_link, inputs=video_input, outputs=process_output)
query_button.click(fn=handle_query, inputs=query_input, outputs=[query_caption, query_image])
with gr.TabItem("Face Extractor"):
gr.Markdown("### π§ Upload a video, detect faces, assign names")
face_video_input = gr.Video(label="Upload video for face extraction")
face_process_button = gr.Button("Submit")
face_status = gr.Textbox(label="Status")
face_process_button.click(fn=face_detect, inputs=face_video_input, outputs=face_status)
show_faces_btn = gr.Button("Show Faces")
# Scrollable display (row of faces with textboxes)
with gr.Row():
for i in range(20):
with gr.Column(visible=True) as col:
img_box = gr.Image(label=f"Face {i+1}", interactive=False, visible=False)
txt_box = gr.Textbox(label="Enter Name", visible=False)
face_image_boxes.append(img_box)
face_name_inputs.append(txt_box)
save_names_btn = gr.Button("Save Names")
show_faces_btn.click(
fn=prepare_faces_and_show,
outputs=[face_status] + face_image_boxes + face_name_inputs
)
save_names_btn.click(
fn=save_face_names,
inputs=face_name_inputs,
outputs=face_status
)
demo.launch(debug = True)
|