Safetensors
English
llava
video-retrieval
text-to-video-search
multimodal-embedding
File size: 8,618 Bytes
7daf628
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
"""Helpers for visual prompts."""
import os
import random
import numpy as np
import cv2
import decord

from PIL import Image, ImageDraw, ImageFont


def add_absolute_time_bar(video_path, save_path, bar_height=20, bar_color="red", max_duration=10.):
    from moviepy.editor import VideoFileClip
    from moviepy.video.fx.all import crop

    # Load the video clip
    video_clip = VideoFileClip(video_path)
    
    # Get video properties
    video_duration = video_clip.duration  # Total duration of the video in seconds
    video_width, video_height = video_clip.size
    
    # Convert bar color (e.g., "red") to RGB
    color_map = {"red": (255, 0, 0), "green": (0, 255, 0), "blue": (0, 0, 255)}  # Extendable
    bar_color_rgb = color_map.get(bar_color.lower(), (255, 0, 0))  # Default is red
    
    def add_bar_to_frame(get_frame, t):
        """ Function to add a time bar to each frame based on time t (in seconds). """
        frame = get_frame(t)
        current_time = min(t, max_duration)  # Cap the time at max_duration

        # Calculate the proportional bar length
        bar_length = int((current_time / max_duration) * video_width)
        
        # Create a new bar image with the same width as the frame
        bar = np.zeros((bar_height, video_width, 3), dtype=np.uint8)
        bar[:, :bar_length] = bar_color_rgb
        
        # Stack the bar on top of the current frame
        frame_with_bar = np.vstack([bar, frame])
        return frame_with_bar
    
    # Modify the video frames by adding the time bar
    video_with_bar = video_clip.fl(add_bar_to_frame)
    
    # Crop the extra height from the video (if needed)
    video_with_bar = crop(video_with_bar, height=video_height)  # Keeps original video height
    
    # Save the modified video to the output path
    video_with_bar.write_videofile(save_path, codec="libx264", verbose=False, logger=None)


def add_clock(video_path, save_path, clock_color='red', clock_location=None, clock_radius=25):
    from moviepy.editor import VideoFileClip
    from moviepy.video.fx.all import crop

    # Load the video clip
    video_clip = VideoFileClip(video_path)
    
    # Get video properties
    video_width, video_height = video_clip.size
    video_duration = video_clip.duration  # Total duration of the video in seconds
    
    # Default clock location if not provided: 10% height and width from the top-right corner
    if clock_location is None:
        clock_location = [int(video_width * 0.9), int(video_height * 0.1)]

        # Make sure the clock does not go out of the image
        clock_location[0] = min(clock_location[0], video_width - clock_radius)
        clock_location[1] = max(clock_location[1], clock_radius)
        clock_location = tuple(clock_location)
    
    # Convert clock_color from string to BGR for OpenCV
    color_map = {"red": (255, 0, 0), "green": (0, 255, 0), "blue": (0, 0, 255)}  # Extendable
    bar_color_rgb = color_map.get(clock_color.lower(), (255, 0, 0))  # Default is red

    def add_clock_to_frame(get_frame, t):
        """ Function to add a clock bar (circle) that fills clockwise as time progresses. """
        frame = get_frame(t)
        
        # Create a copy of the frame to modify
        frame = np.array(frame)
        
        # Calculate the angle based on the current time t relative to the total video duration
        angle = int((t / video_duration) * 360)  # From 0 to 360 degrees
        
        # Draw a partial filled circle (arc) to represent the clock progress
        center = clock_location  # The center of the clock
        thickness = -1  # Solid fill
        
        # Draw the circular progress using cv2.ellipse
        cv2.ellipse(
            frame, 
            center, 
            (clock_radius, clock_radius),  # Clock radius in both x and y direction
            -90,  # Start angle
            0,  # Starting point (0 degrees)
            angle,  # End angle based on progress
            bar_color_rgb,  # Clock color in BGR
            thickness  # Fill the shape
        )
        
        return frame

    # Apply the clock bar to each frame in the video
    video_with_clock = video_clip.fl(add_clock_to_frame)
    
    # Save the modified video with the clock to the output path
    video_with_clock.write_videofile(save_path, codec="libx264", verbose=False, logger=None)


def add_expanding_shape(
    video_path, 
    save_path, 
    shape='circle', 
    max_duration=None, 
    shape_color='blue', 
    shape_location=None, 
    min_size=10, 
    max_size=50,
    thickness=3,
):
    from moviepy.editor import VideoFileClip
    from moviepy.video.fx.all import crop

    # Load the video clip
    video_clip = VideoFileClip(video_path)
    
    # Get video properties
    video_width, video_height = video_clip.size
    video_duration = video_clip.duration if max_duration is None else max_duration
    
    # Default shape location if not provided: 10% width and height from the top-right corner
    if shape_location is None:
        shape_location = [int(video_width * 0.9), int(video_height * 0.1)]

        # # Make sure the shape does not go out of the image
        # shape_location[0] = min(shape_location[0], video_width - max_size)
        # shape_location[1] = max(shape_location[1], max_size)

        # Move the location if the shape goes out of the image
        if shape_location[0] + max_size > video_width:
            shape_location[0] = video_width - max_size
        if shape_location[1] - max_size < 0:
            shape_location[1] = max_size

        shape_location = tuple(shape_location)
    else:
        if isinstance(shape_location, str):
            shape_location = shape_location.lower()
            if shape_location == 'center':
                shape_location = (video_width // 2, video_height // 2)
            elif shape_location == 'top-left':
                shape_location = (max_size, max_size)
            elif shape_location == 'top-right':
                shape_location = (video_width - max_size, max_size)
            elif shape_location == 'bottom-left':
                shape_location = (max_size, video_height - max_size)
            elif shape_location == 'bottom-right':
                shape_location = (video_width - max_size, video_height - max_size)
            else:
                raise ValueError(f"Invalid shape_location: {shape_location}")
        else:
            assert len(shape_location) == 2, "shape_location must be a tuple of (x, y) coordinates."
            shape_location = tuple(shape_location)

    
    # Convert shape_color from string to BGR for OpenCV
    color_map = {"red": (0, 0, 255), "green": (0, 255, 0), "blue": (255, 0, 0)}  # Extendable
    shape_color_bgr = color_map.get(shape_color.lower(), (0, 0, 255))  # Default is red

    def add_shape_to_frame(get_frame, t):
        """ Function to add an expanding shape (circle, square, or triangle) to each frame based on time t. """
        frame = get_frame(t)
        
        # Create a copy of the frame to modify
        frame = np.array(frame)
        
        # Calculate the current size of the shape based on the time elapsed
        current_size = int(min_size + (max_size - min_size) * (t / video_duration))
        
        # Get the center for placing the shape
        center = shape_location
        
        # Draw the specified shape
        if shape == 'circle':
            cv2.circle(frame, center, current_size, shape_color_bgr, thickness)  # Filled circle
            
        elif shape == 'square':
            top_left = (center[0] - current_size, center[1] - current_size)
            bottom_right = (center[0] + current_size, center[1] + current_size)
            cv2.rectangle(frame, top_left, bottom_right, shape_color_bgr, thickness)  # Filled square
        
        elif shape == 'triangle':
            # Define triangle points
            p1 = (center[0], center[1] - current_size)  # Top point
            p2 = (center[0] - current_size, center[1] + current_size)  # Bottom-left point
            p3 = (center[0] + current_size, center[1] + current_size)  # Bottom-right point
            pts = np.array([p1, p2, p3], np.int32)
            pts = pts.reshape((-1, 1, 2))
            cv2.fillPoly(frame, [pts], shape_color_bgr)  # Filled triangle
        
        return frame

    # Apply the expanding shape to each frame in the video
    video_with_shape = video_clip.fl(add_shape_to_frame)
    
    # Save the modified video with the expanding shape to the output path
    video_with_shape.write_videofile(save_path, codec="libx264", verbose=False, logger=None)