Spaces:
No application file
No application file
| import zipfile | |
| import os | |
| import chromadb | |
| from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction | |
| from chromadb.utils.data_loaders import ImageLoader | |
| import cv2 | |
| path = "mm_vdb2" | |
| client = chromadb.PersistentClient(path=path) | |
| image_loader = ImageLoader() | |
| CLIP = OpenCLIPEmbeddingFunction() | |
| video_collection = client.get_or_create_collection( | |
| name='video_collection', | |
| embedding_function=CLIP, | |
| data_loader=image_loader | |
| ) | |
| def extract_frames(video_folder, output_folder): | |
| if not os.path.exists(output_folder): | |
| os.makedirs(output_folder) | |
| for video_filename in os.listdir(video_folder): | |
| if video_filename.endswith('.mp4'): | |
| video_path = os.path.join(video_folder, video_filename) | |
| video_capture = cv2.VideoCapture(video_path) | |
| fps = video_capture.get(cv2.CAP_PROP_FPS) | |
| frame_count = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT)) | |
| duration = frame_count / fps | |
| output_subfolder = os.path.join(output_folder, os.path.splitext(video_filename)[0]) | |
| if not os.path.exists(output_subfolder): | |
| os.makedirs(output_subfolder) | |
| success, image = video_capture.read() | |
| frame_number = 0 | |
| while success: | |
| if frame_number == 0 or frame_number % int(fps * 5) == 0 or frame_number == frame_count - 1: | |
| frame_time = frame_number / fps | |
| output_frame_filename = os.path.join(output_subfolder, f'frame_{int(frame_time)}.jpg') | |
| cv2.imwrite(output_frame_filename, image) | |
| success, image = video_capture.read() | |
| frame_number += 1 | |
| video_capture.release() | |
| def add_frames_to_chromadb(video_dir, frames_dir): | |
| video_frames = {} | |
| for video_file in os.listdir(video_dir): | |
| if video_file.endswith('.mp4'): | |
| video_title = video_file[:-4] | |
| frame_folder = os.path.join(frames_dir, video_title) | |
| if os.path.exists(frame_folder): | |
| video_frames[video_title] = [f for f in os.listdir(frame_folder) if f.endswith('.jpg')] | |
| ids = [] | |
| uris = [] | |
| metadatas = [] | |
| for video_title, frames in video_frames.items(): | |
| video_path = os.path.join(video_dir, f"{video_title}.mp4") | |
| for frame in frames: | |
| frame_id = f"{frame[:-4]}_{video_title}" | |
| frame_path = os.path.join(frames_dir, video_title, frame) | |
| ids.append(frame_id) | |
| uris.append(frame_path) | |
| metadatas.append({'video_uri': video_path}) | |
| video_collection.add(ids=ids, uris=uris, metadatas=metadatas) | |
| def process_video_files(video_paths): | |
| frames_output_folder = r"extracted_frames" | |
| os.makedirs(frames_output_folder, exist_ok=True) | |
| for video_path in video_paths: | |
| video_folder = os.path.dirname(video_path) | |
| extract_frames(video_folder, frames_output_folder) | |
| add_frames_to_chromadb(video_folder, frames_output_folder) | |
| return video_collection | |
| # # Example usage | |
| # video_paths = [ | |
| # "/path/to/video1.mp4", | |
| # "/path/to/video2.mp4", | |
| # "/path/to/video3.mp4" | |
| # ] | |
| # process_video_files(video_paths) | |