vicky4s4s's picture
Upload 76 files
01e9350 verified
import torch
import clip
import cv2
from PIL import Image
from concurrent.futures import ThreadPoolExecutor
device = "cpu"
model, preprocess = clip.load("ViT-B/16", device=device)
model.eval()
SUPPORTED_IMAGE_EXTS = (".jpg", ".jpeg", ".png")
SUPPORTED_VIDEO_EXTS = (".mp4", ".avi", ".mov")
def preprocess_image(path):
img = Image.open(path).convert("RGB")
return preprocess(img)
def load_images_parallel(image_paths, max_workers=4):
with ThreadPoolExecutor(max_workers=max_workers) as executor:
images = list(executor.map(preprocess_image, image_paths))
return torch.stack(images)
def images_to_embeddings_cpu(image_paths, batch_size=32):
outputs = []
for i in range(0, len(image_paths), batch_size):
batch_paths = image_paths[i:i + batch_size]
images = load_images_parallel(batch_paths)
with torch.no_grad():
emb = model.encode_image(images)
emb = emb / emb.norm(dim=-1, keepdim=True)
outputs.append(emb)
return torch.cat(outputs).numpy()
def extract_frames(video_path, sample_rate=1):
cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
interval = int(max(1, fps * sample_rate))
frames = []
count = 0
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
if count % interval == 0:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frames.append(Image.fromarray(frame))
count += 1
cap.release()
return frames
def video_to_embedding_cpu(video_path):
frames = extract_frames(video_path)
if not frames:
return None
images = torch.stack([preprocess(f) for f in frames])
with torch.no_grad():
emb = model.encode_image(images)
emb = emb / emb.norm(dim=-1, keepdim=True)
return emb.mean(dim=0).numpy()
def process_videos_parallel(video_files, max_workers=2):
with ThreadPoolExecutor(max_workers=max_workers) as executor:
return list(executor.map(video_to_embedding_cpu, video_files))
def process_inputs(files):
image_files = []
video_files = []
for f in files:
f_lower = f.lower()
if f_lower.endswith(SUPPORTED_IMAGE_EXTS):
image_files.append(f)
elif f_lower.endswith(SUPPORTED_VIDEO_EXTS):
video_files.append(f)
results = {}
if image_files:
results["images"] = images_to_embeddings_cpu(image_files)
if video_files:
results["videos"] = process_videos_parallel(video_files)
return results