Spaces:
Runtime error
Runtime error
| import argparse | |
| import csv | |
| import os | |
| import requests | |
| import tqdm | |
| from .utils import extract_frames, prompts, read_video_list | |
| def get_caption(frame, prompt, api_key): | |
| headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"} | |
| payload = { | |
| "model": "gpt-4-vision-preview", | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": prompt, | |
| }, | |
| {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{frame[0]}"}}, | |
| {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{frame[1]}"}}, | |
| {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{frame[2]}"}}, | |
| ], | |
| } | |
| ], | |
| "max_tokens": 300, | |
| } | |
| response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload, timeout=60) | |
| caption = response.json()["choices"][0]["message"]["content"] | |
| caption = caption.replace("\n", " ") | |
| return caption | |
| def main(args): | |
| # ====================================================== | |
| # 1. read video list | |
| # ====================================================== | |
| videos = read_video_list(args.video_folder, args.output_file) | |
| f = open(args.output_file, "a") | |
| writer = csv.writer(f) | |
| # ====================================================== | |
| # 2. generate captions | |
| # ====================================================== | |
| for video in tqdm.tqdm(videos): | |
| video_path = os.path.join(args.video_folder, video) | |
| frame, length = extract_frames(video_path, base_64=True) | |
| if len(frame) < 3: | |
| continue | |
| prompt = prompts[args.prompt] | |
| caption = get_caption(frame, prompt, args.key) | |
| writer.writerow((video, caption, length)) | |
| f.close() | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("video_folder", type=str) | |
| parser.add_argument("output_file", type=str) | |
| parser.add_argument("--prompt", type=str, default="three_frames") | |
| parser.add_argument("--key", type=str) | |
| args = parser.parse_args() | |
| main(args) | |