Spaces:

Curify-dev
/

prompt_to_video

Runtime error

App Files Files Community

prompt_to_video / evaluation.py

qqwjq1981

Upload 8 files

a5fc5ac verified 6 months ago

raw

history blame contribute delete

3.87 kB

	import json
	import numpy as np
	import cv2

	from dotenv import load_dotenv
	from openai import OpenAI
	from transformers import CLIPProcessor, CLIPModel
	from PIL import Image
	from skimage.metrics import structural_similarity as ssim


	# Load env for OpenAI
	load_dotenv()
	client = OpenAI()

	clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
	clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")


	def evaluate_with_gpt4(storyboard, video_description):
	system_prompt = (
	"You are a film critic evaluating how well a video matches a storyboard.\n"
	"Rate each of the following from 1 to 10:\n"
	"- Story Consistency: Does the video follow the scene and emotion described?\n"
	"- Shot Variety: Does it use interesting or varied camera angles?\n"
	"- Relevance: Does it suit the intended purpose (role, setting, emotion)?\n\n"
	"Provide scores and brief justifications for each.\n\n"
	"Format output as:\n"
	"{\n"
	" \"story_consistency\": <score>,\n"
	" \"shot_variety\": <score>,\n"
	" \"relevance\": <score>,\n"
	" \"justification\": \"...\"\n"
	"}"
	)

	user_prompt = (
	f"Storyboard:\n"
	f"Scene: {storyboard['scene']}\n"
	f"Shot: {storyboard['shot_type']}\n"
	f"Emotion: {storyboard['emotion']}\n\n"
	f"Video Description:\n{video_description}"
	)

	response = client.chat.completions.create(
	model="gpt-4o",
	temperature=0.3,
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt}
	]
	)

	content = response.choices[0].message.content.strip()
	return json.loads(content)


	def compute_clip_similarity(image_path, text_prompt):
	image = Image.open(image_path).convert("RGB")
	inputs = clip_processor(text=[text_prompt], images=image, return_tensors="pt", padding=True)
	outputs = clip_model(**inputs)
	logits_per_image = outputs.logits_per_image
	similarity = logits_per_image.softmax(dim=1).item()
	return similarity


	def compute_motion_score(video_path):
	cap = cv2.VideoCapture(video_path)
	prev_gray = None
	motion_values = []

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	if prev_gray is not None:
	flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None,
	0.5, 3, 15, 3, 5, 1.2, 0)
	magnitude, _ = cv2.cartToPolar(flow[..., 0], flow[..., 1])
	motion_values.append(np.mean(magnitude))

	prev_gray = gray

	cap.release()
	return np.mean(motion_values) if motion_values else 0


	def compute_temporal_coherence(video_path):
	cap = cv2.VideoCapture(video_path)
	prev_frame = None
	ssim_scores = []

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	if prev_frame is not None:
	score = ssim(prev_frame, gray)
	ssim_scores.append(score)

	prev_frame = gray

	cap.release()
	return np.mean(ssim_scores) if ssim_scores else 0


	def evaluate_video(storyboard, video_description, video_path, thumbnail_path, text_prompt):
	gpt_eval = evaluate_with_gpt4(storyboard, video_description)
	clip_score = compute_clip_similarity(thumbnail_path, text_prompt)
	motion_score = compute_motion_score(video_path)
	coherence_score = compute_temporal_coherence(video_path)

	return {
	"gpt_eval": gpt_eval,
	"metrics": {
	"clip_similarity": clip_score,
	"motion_score": motion_score,
	"temporal_coherence": coherence_score
	}
	}