vicky4s4s
/

rocketship_data

Model card Files Files and versions

rocketship_data / all_combine_code /recommedation_engine /embeddings_.py

vicky4s4s's picture

Upload 76 files

01e9350 verified about 2 months ago

history blame contribute delete

2.57 kB

	import torch
	import clip
	import cv2
	from PIL import Image
	from concurrent.futures import ThreadPoolExecutor

	device = "cpu"

	model, preprocess = clip.load("ViT-B/16", device=device)
	model.eval()

	SUPPORTED_IMAGE_EXTS = (".jpg", ".jpeg", ".png")
	SUPPORTED_VIDEO_EXTS = (".mp4", ".avi", ".mov")

	def preprocess_image(path):
	img = Image.open(path).convert("RGB")
	return preprocess(img)

	def load_images_parallel(image_paths, max_workers=4):
	with ThreadPoolExecutor(max_workers=max_workers) as executor:
	images = list(executor.map(preprocess_image, image_paths))
	return torch.stack(images)

	def images_to_embeddings_cpu(image_paths, batch_size=32):
	outputs = []

	for i in range(0, len(image_paths), batch_size):
	batch_paths = image_paths[i:i + batch_size]
	images = load_images_parallel(batch_paths)

	with torch.no_grad():
	emb = model.encode_image(images)
	emb = emb / emb.norm(dim=-1, keepdim=True)
	outputs.append(emb)
	return torch.cat(outputs).numpy()

	def extract_frames(video_path, sample_rate=1):
	cap = cv2.VideoCapture(video_path)
	fps = cap.get(cv2.CAP_PROP_FPS)
	interval = int(max(1, fps * sample_rate))

	frames = []
	count = 0

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	if count % interval == 0:
	frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	frames.append(Image.fromarray(frame))

	count += 1

	cap.release()
	return frames


	def video_to_embedding_cpu(video_path):
	frames = extract_frames(video_path)

	if not frames:
	return None

	images = torch.stack([preprocess(f) for f in frames])

	with torch.no_grad():
	emb = model.encode_image(images)
	emb = emb / emb.norm(dim=-1, keepdim=True)

	return emb.mean(dim=0).numpy()


	def process_videos_parallel(video_files, max_workers=2):
	with ThreadPoolExecutor(max_workers=max_workers) as executor:
	return list(executor.map(video_to_embedding_cpu, video_files))


	def process_inputs(files):
	image_files = []
	video_files = []

	for f in files:
	f_lower = f.lower()
	if f_lower.endswith(SUPPORTED_IMAGE_EXTS):
	image_files.append(f)
	elif f_lower.endswith(SUPPORTED_VIDEO_EXTS):
	video_files.append(f)

	results = {}
	if image_files:
	results["images"] = images_to_embeddings_cpu(image_files)
	if video_files:
	results["videos"] = process_videos_parallel(video_files)
	return results