Spaces:

OpenVideo
/

GPT4o-Azure-Caption-Pixel

Runtime error

App Files Files Community

lalalalalalalalalala commited on Jun 17, 2024

Commit

c3ccbbe

verified ·

1 Parent(s): e13fa07

Update run.py

Browse files

Files changed (1) hide show

run.py +33 -26

run.py CHANGED Viewed

@@ -5,6 +5,10 @@ from constraint import SYS_PROMPT, USER_PROMPT
 from datasets import load_dataset
 import tempfile
 import requests
 def load_hf_dataset(dataset_path, auth_token):
     dataset = load_dataset(dataset_path, token=auth_token)
@@ -13,7 +17,7 @@ def load_hf_dataset(dataset_path, auth_token):
     return video_paths
-def fast_caption(sys_prompt, usr_prompt, temp, top_p, max_tokens, model, key, endpoint, video_src, video_hf, video_hf_auth, video_od, video_od_auth, video_gd, video_gd_auth, frame_format, frame_limit):
     if video_src:
         video = video_src
         processor = VideoProcessor(frame_format=frame_format, frame_limit=frame_limit)
@@ -29,31 +33,36 @@ def fast_caption(sys_prompt, usr_prompt, temp, top_p, max_tokens, model, key, en
         caption = api.get_caption(sys_prompt, usr_prompt, base64_list)
         return f"{caption}", f"Using model '{model}' with {len(frames)} frames extracted.", debug_image
     elif video_hf and video_hf_auth:
-        # Handle Hugging Face dataset
-        video_paths = load_hf_dataset(video_hf, video_hf_auth)
-        video_paths = video_paths["train"]
         # Process all videos in the dataset
         all_captions = []
-        for video_path_url in video_paths:
-            video_path_url = video_path_url["id"]
-            # 使用requests下载文件到临时文件
-            response = requests.get(video_path_url, stream=True)
-            if response.status_code == 200:
-                with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
-                    temp_video_file.write(response.content)
                     video_path = temp_video_file.name
-            else:
-                raise Exception(f"Failed to download video, status code: {response.status_code}")
-            if video_path.endswith('.mp4'):  # 假设我们只处理.mp4文件
-                processor = VideoProcessor(frame_format=frame_format, frame_limit=frame_limit)
-                frames = processor._decode(video_path)
-                base64_list = processor.to_base64_list(frames)
-                api = AzureAPI(key=key, endpoint=endpoint, model=model, temp=temp, top_p=top_p, max_tokens=max_tokens)
-                caption = api.get_caption(sys_prompt, usr_prompt, base64_list)
-                all_captions.append(caption)
         return "\n\n\n".join(all_captions), f"Processed {len(video_paths)} videos.", None
-    # ... (Handle other sources)
     else:
         return "", "No video source selected.", None
@@ -113,9 +122,7 @@ with gr.Blocks() as Core:
                         with gr.Tab("HF"):
                             video_hf = gr.Text(label="Huggingface File Path")
                             video_hf_auth = gr.Text(label="Huggingface Token")
-                        with gr.Tab("Parquet_index"):
-                            video_hf = gr.Text(label="Parquet_index")
-                            video_hf_auth = gr.Text(label="Huggingface Token")
                         with gr.Tab("Onedrive"):
                             video_od = gr.Text("Microsoft Onedrive")
                             video_od_auth = gr.Text(label="Microsoft Onedrive Token")
@@ -125,7 +132,7 @@ with gr.Blocks() as Core:
                 caption_button = gr.Button("Caption", variant="primary", size="lg")
         caption_button.click(
             fast_caption,
-            inputs=[sys_prompt, usr_prompt, temp, top_p, max_tokens, model, key, endpoint, video_src, video_hf, video_hf_auth, video_od, video_od_auth, video_gd, video_gd_auth, frame_format, frame_limit],
             outputs=[result, info, frame]
         )

 from datasets import load_dataset
 import tempfile
 import requests
+from huggingface_hub import hf_hub_download, snapshot_download
+import pyarrow.parquet as pq
+import hashlib
 def load_hf_dataset(dataset_path, auth_token):
     dataset = load_dataset(dataset_path, token=auth_token)
     return video_paths
+def fast_caption(sys_prompt, usr_prompt, temp, top_p, max_tokens, model, key, endpoint, video_src, video_hf, video_hf_auth, parquet_index, video_od, video_od_auth, video_gd, video_gd_auth, frame_format, frame_limit):
     if video_src:
         video = video_src
         processor = VideoProcessor(frame_format=frame_format, frame_limit=frame_limit)
         caption = api.get_caption(sys_prompt, usr_prompt, base64_list)
         return f"{caption}", f"Using model '{model}' with {len(frames)} frames extracted.", debug_image
     elif video_hf and video_hf_auth:
         # Process all videos in the dataset
         all_captions = []
+        with tempfile.NamedTemporaryFile(mode='w+t', delete=True) as temp_parquet_file:
+            temp_parquet_file = hf_hub_download(
+                repo_id="OpenVideo/pexels-raw",
+                filename="data/“ + str(number).zfill(6) + “.parquet",
+                repo_type="dataset",
+                token=video_hf_auth,
+            )
+            parquet_path = temp_parquet_file.name
+            parquet_file = pq.ParquetFile(parquet_path)
+            for batch in parquet_file.iter_batches(batch_size=1):
+                df = batch.to_pandas()
+                video = df['video'][0]
+                md5 = hashlib.md5(video).hexdigest()
+                with tempfile.NamedTemporaryFile(mode='w+t', delete=True) as temp_video_file:
+                    temp_video_file.write(video)
                     video_path = temp_video_file.name
+                    processor = VideoProcessor(frame_format=frame_format, frame_limit=frame_limit)
+                    frames = processor._decode(video_path)
+                    base64_list = processor.to_base64_list(frames)
+                    api = AzureAPI(key=key, endpoint=endpoint, model=model, temp=temp, top_p=top_p, max_tokens=max_tokens)
+                    caption = api.get_caption(sys_prompt, usr_prompt, base64_list)
+                    all_captions.append(caption)
         return "\n\n\n".join(all_captions), f"Processed {len(video_paths)} videos.", None
     else:
         return "", "No video source selected.", None
                         with gr.Tab("HF"):
                             video_hf = gr.Text(label="Huggingface File Path")
                             video_hf_auth = gr.Text(label="Huggingface Token")
+                            parquet_index = gr.Text(label="Parquet Index")
                         with gr.Tab("Onedrive"):
                             video_od = gr.Text("Microsoft Onedrive")
                             video_od_auth = gr.Text(label="Microsoft Onedrive Token")
                 caption_button = gr.Button("Caption", variant="primary", size="lg")
         caption_button.click(
             fast_caption,
+            inputs=[sys_prompt, usr_prompt, temp, top_p, max_tokens, model, key, endpoint, video_src, video_hf, video_hf_auth, parquet_index, video_od, video_od_auth, video_gd, video_gd_auth, frame_format, frame_limit],
             outputs=[result, info, frame]
         )