File size: 5,805 Bytes
59ca75b
043b0a7
 
 
7e90d60
c31f079
043b0a7
 
 
fa0fce0
 
b5858b4
 
 
59ca75b
ef053b6
 
 
 
043b0a7
 
 
 
 
59ca75b
b5858b4
 
 
 
343e5d9
0a43963
b5858b4
 
f19bcb5
 
 
 
343e5d9
f19bcb5
 
343e5d9
f19bcb5
b5858b4
 
f19bcb5
 
b5858b4
f19bcb5
 
 
 
0a43963
 
343e5d9
fa0fce0
043b0a7
 
343e5d9
 
 
 
 
 
043b0a7
d3be97c
043b0a7
343e5d9
9ee530a
043b0a7
 
fa0fce0
 
76ef17b
043b0a7
 
fa0fce0
 
043b0a7
 
fa0fce0
76ef17b
043b0a7
fa0fce0
043b0a7
 
 
 
 
 
 
b5858b4
 
 
 
 
 
 
 
 
0a43963
075ef94
b5858b4
86e45fd
b5858b4
 
0a43963
b5858b4
0a43963
 
 
 
 
 
b5858b4
 
 
 
0a43963
b5858b4
 
 
043b0a7
343e5d9
043b0a7
9febdc2
 
 
 
 
 
 
 
 
 
043b0a7
9febdc2
 
043b0a7
9febdc2
b5858b4
4aa95c4
b5858b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9febdc2
b5858b4
 
 
 
 
043b0a7
0a43963
9febdc2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import gradio as gr
import os
import uuid
import tempfile
from method import *

VECTOR_DB = None
CAPTIONS = []
FRAMES = []
caption_text = []
caption_embedding = []
extracted_face = []
face_frames = []
face_name_inputs  = []

# Gradio UI components (to be populated later)
face_image_boxes = []
face_name_inputs = []

def handle_query(text):
    if VECTOR_DB is None:
        return "Please process a video first.", None
    idx = search_query(text, VECTOR_DB)
    return CAPTIONS[idx], FRAMES[idx]

def save_face_names(*names):
    face_name_map = {f"face_{i}": names[i] for i in range(len(names))}
    print("Saved Names Mapping:", face_name_map)
    return "βœ… Names saved for all faces."


def prepare_faces_and_show():
    if not extracted_face:
        
        updates = [gr.update(value="❌ No faces extracted yet.")] 
        updates.extend([gr.update(visible=False) for _ in range(40)])
        return updates

    image_updates = []
    textbox_updates = []

    for i in range(20):
        if i < len(extracted_face):
            face_img = extracted_face[i]["face"]
            image_updates.append(gr.update(value=face_img, visible=True))
            textbox_updates.append(gr.update(visible=True, value="")) 
        else:
            image_updates.append(gr.update(visible=False))
            textbox_updates.append(gr.update(visible=False))
    status_update = [gr.update(value="βœ… Faces ready. Enter names below.")]
    return status_update + image_updates + textbox_updates


def process_youtube_link(video_file):
    global VECTOR_DB, CAPTIONS, FRAMES, caption_text, caption_embedding

    try:
        if video_file is None:
            return "❌ Please upload a video file."

        # Reset state for new video
        caption_text.clear()
        caption_embedding.clear()

        
        gr.Info("Extracting frames...")
        frames = extract_unique_frames(video_file)
        print('extract_unique_frames done')

        gr.Info("Generating captions...")
        for i, frame in enumerate(frames):
            caption_text.append(caption_image(frame))
        print('caption done')

        gr.Info("Creating embeddings...")
        for caption in caption_text:
            caption_embedding.append(embed_func(caption))

        gr.Info("Building vector store...")
        VECTOR_DB = build_vector_store(embed=np.array(caption_embedding))
        print('vector store done')

        CAPTIONS = caption_text
        FRAMES = frames

        return "βœ… Processing complete. You can now enter a query."
    except Exception as e:
        return f"❌ Error: {str(e)}"


def face_detect(video_file):
    global face_frames, extracted_face
    try:
        if video_file is None:
            return "❌ Please upload a video file."

        extracted_face.clear()

        gr.Info("Extracting frames...")
        face_frames = extract_unique_frames(video_file, interval_sec = 5)
        # face_frames = np.array([np.array(frame) for frame in face_frames])
        print('extract_unique_frames done')
        print(type(face_frames[0]))

        for i, frame in enumerate(face_frames):
            print('f frm:', i)
            # face_crop returns list of dicts per image β€” loop over them
            try:
                cropped_faces = face_crop(frame)
                extracted_face.extend(cropped_faces)
                print('extracted_face: ', len(extracted_face))
            except:
                pass

        print('extracted_face done')
        return "βœ… Face extraction done. Click 'Show Faces' to review."
    except Exception as e:
        print('error: ', e)
        return f"❌ Error: {str(e)}"
        

with gr.Blocks() as demo:
    gr.Markdown("# πŸŽ₯ Upload Your Video, CHAT_RAG")

    with gr.Tabs():
        with gr.TabItem("Video Upload & Query"):
            video_input = gr.Video(label="Upload video file (MP4, AVI, etc.)")
            process_button = gr.Button("Submit")
            process_output = gr.Textbox(label="Status")

            query_input = gr.Textbox(label="Enter a query")
            query_button = gr.Button("Search")
            query_caption = gr.Textbox(label="Matching Caption")
            query_image = gr.Image(label="Matching Frame")

            process_button.click(fn=process_youtube_link, inputs=video_input, outputs=process_output)
            query_button.click(fn=handle_query, inputs=query_input, outputs=[query_caption, query_image])


            
        with gr.TabItem("Face Extractor"):
            gr.Markdown("### 🧠 Upload a video, detect faces, assign names")

            face_video_input = gr.Video(label="Upload video for face extraction")
            face_process_button = gr.Button("Submit")
            face_status = gr.Textbox(label="Status")

            face_process_button.click(fn=face_detect, inputs=face_video_input, outputs=face_status)

            show_faces_btn = gr.Button("Show Faces")

            # Scrollable display (row of faces with textboxes)
            with gr.Row():
                for i in range(20):
                    with gr.Column(visible=True) as col:
                        img_box = gr.Image(label=f"Face {i+1}", interactive=False, visible=False)
                        txt_box = gr.Textbox(label="Enter Name", visible=False)
                        face_image_boxes.append(img_box)
                        face_name_inputs.append(txt_box)

            save_names_btn = gr.Button("Save Names")

            show_faces_btn.click(
                fn=prepare_faces_and_show,
                outputs=[face_status] + face_image_boxes + face_name_inputs
            )

            save_names_btn.click(
                fn=save_face_names,
                inputs=face_name_inputs,
                outputs=face_status
            )

demo.launch(debug = True)