safe-challenge
/

safe-video-example-submission

Video Classification

Model card Files Files and versions

xet

Community

gmancino-ball commited on Aug 20, 2025

Commit

6f86bd8

verified ·

1 Parent(s): 2905f64

Update script.py

Browse files

Files changed (1) hide show

script.py +38 -13

script.py CHANGED Viewed

@@ -5,6 +5,7 @@ import tqdm.auto as tqdm
 import os
 import io
 import torch
 import time
 import av
 import torch
@@ -16,27 +17,53 @@ import numpy as np
 # So you must include everything you need in your model repo.
-def preprocess(file_like):
-    # Open the video file
     file_like.seek(0)
     container = av.open(file_like)
     frames = []
     every = 10
-    MAX_MEMORY = 100 * 1024 * 1024  ## 100 MB maximum - some videos are large
     current_memory = 0
     for i, frame in enumerate(container.decode(video=0)):
         if i % every == 0:
             frame_array = frame.to_ndarray(format="rgb24")
             frame_tensor = torch.from_numpy(frame_array).permute(2, 0, 1).float()
-            frames.append(frame_tensor)
             ## Memory check
-            frame_bytes = frame_tensor.numel() * 4  # float32 → 4 bytes
             current_memory += frame_bytes
-            if current_memory >= MAX_MEMORY:
                 break
-    video_tensor = torch.stack(frames)
-    return video_tensor
 class Model(torch.nn.Module):
@@ -71,11 +98,9 @@ for el in tqdm.tqdm(dataset_remote):
     # el["video"]["path"] containts the filename. This is just for reference and you cant actually load it
     # if you are using libraries that expect a file. You can use BytesIO object
-    # print("processing", el["id"])
-    raise ValueError
     try:
         file_like = io.BytesIO(el["video"]["bytes"])
-        tensor = preprocess(file_like)
         with torch.no_grad():
             # soft decision (such as log likelihood score)
@@ -90,11 +115,11 @@ for el in tqdm.tqdm(dataset_remote):
         # "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results
         out.append(dict(id=el["id"], pred=pred, score=score))
     except Exception as e:
         print(e)
         print("failed", el["id"])
-        # raise e
         out.append(dict(id=el["id"]))
 # save the final result and that's it
-pd.DataFrame(out).to_csv("submission.csv", index=False)

 import os
 import io
 import torch
+from torchvision import transforms
 import time
 import av
 import torch
 # So you must include everything you need in your model repo.
+def preprocess(
+    file_like: io.BytesIO, crop_size: int = -1, max_memory: int = 50 * 1024 * 1024, device: str = "cpu"
+) -> torch.Tensor:
+    """
+    This preprocessing function loads videos and reduces their input size if necessary.
+    This is just a guide function; square center cropping may not be the most appropriate,
+    50 MB per video may not be enough, etc.
+    Args:
+        file_like (io.BytesIO): video bytes
+        crop_size (int, optional): center crop adjustment (if frames are too large, this will crop)
+        max_memory (int, optional): maximum memory per video to be saved as a tensor
+        device (str, optional): which device to store the tensors on
+    Returns:
+        torch.Tensor: Tensor of video
+    """
+    ## Define crop if applicable
+    center_crop_transform = None
+    if crop_size > 0:
+        center_crop_transform = transforms.CenterCrop(crop_size)
+    ## Open the video file
     file_like.seek(0)
     container = av.open(file_like)
     frames = []
     every = 10
     current_memory = 0
     for i, frame in enumerate(container.decode(video=0)):
         if i % every == 0:
             frame_array = frame.to_ndarray(format="rgb24")
             frame_tensor = torch.from_numpy(frame_array).permute(2, 0, 1).float()
+            ## Crop
+            if center_crop_transform is not None:
+                frame_tensor = center_crop_transform(frame_tensor)
+            ## Append to the list
+            frames.append(frame_tensor.to(device))
             ## Memory check
+            frame_bytes = frame_tensor.numel() * 4  # float32 = 4 bytes
             current_memory += frame_bytes
+            if current_memory >= max_memory:
                 break
+    ## Stack as video
+    return torch.stack(frames)
 class Model(torch.nn.Module):
     # el["video"]["path"] containts the filename. This is just for reference and you cant actually load it
     # if you are using libraries that expect a file. You can use BytesIO object
     try:
         file_like = io.BytesIO(el["video"]["bytes"])
+        tensor = preprocess(file_like, device=device)
         with torch.no_grad():
             # soft decision (such as log likelihood score)
         # "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results
         out.append(dict(id=el["id"], pred=pred, score=score))
     except Exception as e:
         print(e)
         print("failed", el["id"])
         out.append(dict(id=el["id"]))
 # save the final result and that's it
+pd.DataFrame(out).to_csv("submission.csv", index=False)