VideoScore

hexuan21 commited on Jun 23, 2024

Commit

0776a93

verified ·

1 Parent(s): 7dba304

Update README.md

Files changed (1) hide show

README.md CHANGED Viewed

@@ -78,6 +78,11 @@ pip install git+https://github.com/TIGER-AI-Lab/MantisScore.git
 ```python
 import av
 import numpy as np
 def _read_video_pyav(
     frame_paths:List[str],
     max_frames:int,
@@ -94,6 +99,7 @@ def _read_video_pyav(
     return np.stack([x.to_ndarray(format="rgb24") for x in frames])
 MAX_NUM_FRAMES=16
 REGRESSION_QUERY_PROMPT = """
 Suppose you are an expert in judging and evaluating the quality of AI-generated videos,
 please watch the following frames of a given video and see the text prompt for generating the video,
@@ -119,6 +125,12 @@ all the frames of video are as follows:
 """
 video_path="examples/video1.mp4"
 # sample uniformly 8 frames from the video
 container = av.open(video_path)
@@ -129,7 +141,7 @@ else:
     indices = np.arange(total_frames)
 frames = [Image.fromarray(x) for x in _read_video_pyav(container, indices)]
-eval_prompt = REGRESSION_QUERY_TEMPLATE.format(text_prompt=video_prompt)
 num_image_token = eval_prompt.count("<image>")
 if num_image_token < len(frames):
     eval_prompt += "<image> " * (len(frames) - num_image_token)

 ```python
 import av
 import numpy as np
+from typing import List
+import torch
+from transformers import AutoProcessor
+from models.idefics2 import Idefics2ForSequenceClassification
 def _read_video_pyav(
     frame_paths:List[str],
     max_frames:int,
     return np.stack([x.to_ndarray(format="rgb24") for x in frames])
 MAX_NUM_FRAMES=16
+ROUND_DIGIT=4
 REGRESSION_QUERY_PROMPT = """
 Suppose you are an expert in judging and evaluating the quality of AI-generated videos,
 please watch the following frames of a given video and see the text prompt for generating the video,
 """
 video_path="examples/video1.mp4"
+video_prompt=""
+processor = AutoProcessor.from_pretrained(f"TIGER-Lab/MantisScore",torch_dtype=torch.bfloat16)
+model = Idefics2ForSequenceClassification.from_pretrained(f"TIGER-Lab/MantisScore",torch_dtype=torch.bfloat16).eval()
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
 # sample uniformly 8 frames from the video
 container = av.open(video_path)
     indices = np.arange(total_frames)
 frames = [Image.fromarray(x) for x in _read_video_pyav(container, indices)]
+eval_prompt = REGRESSION_QUERY_PROMPT.format(text_prompt=video_prompt)
 num_image_token = eval_prompt.count("<image>")
 if num_image_token < len(frames):
     eval_prompt += "<image> " * (len(frames) - num_image_token)