Gabe Mancino-Ball
Update pristine -> accuracy
b704dbb
raw
history blame
3.11 kB
import pandas as pd
from datasets import load_dataset
import numpy as np
import tqdm.auto as tqdm
import os
import io
import torch
import time
import av
import torch
import numpy as np
# Import your model and anything else you want
# You can even install other packages included in your repo
# However, during the evaluation the container will not have access to the internet.
# So you must include everything you need in your model repo.
import torch
# from torchcodec.decoders import VideoDecoder
# def preprocess_v1(file_like):
# file_like.seek(0)
# decoder = VideoDecoder(file_like)
# frames = decoder[0:-1:20]
# frames = frames.float() / 255.0
# return frames
def preprocess(file_like):
# Open the video file
file_like.seek(0)
container = av.open(file_like)
frames = []
every = 10
for i, frame in enumerate(container.decode(video=0)):
if i % every == 0:
frame_array = frame.to_ndarray(format="rgb24")
frame_tensor = torch.from_numpy(frame_array).permute(2, 0, 1).float()
frames.append(frame_tensor)
video_tensor = torch.stack(frames)
return video_tensor
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.fc1 = torch.nn.Linear(10, 5)
self.threshold = 0.0
def forward(self, x):
## generates a random float the same size as x
return torch.randn(x.shape[0]).to(x.device)
# load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
DATASET_PATH = "/tmp/data"
dataset_remote = load_dataset(DATASET_PATH, split="test", streaming=True)
# load your model
device = "cuda:0"
model = Model().to(device)
# iterate over the dataset
out = []
for el in tqdm.tqdm(dataset_remote):
# start_time = time.time()
# each element is a dict
# el["video"]["bytes"] contains bytes from reading the raw file
# el["video"]["path"] containts the filename. This is just for reference and you cant actually load it
# if you are using libraries that expect a file. You can use BytesIO object
try:
file_like = io.BytesIO(el["video"]["bytes"])
tensor = preprocess(file_like)
with torch.no_grad():
# soft decision (such as log likelihood score)
# positive score correspond to synthetic prediction
# negative score correspond to real prediction
score = model(tensor[None].to(device)).cpu().item()
# we require a hard decision to be submited. so you need to pick a threshold
pred = "generated" if score > model.threshold else "real"
# append your prediction
# "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results
out.append(dict(id=el["id"], pred=pred, score=score))
except Exception as e:
print(e)
print("failed", el["id"])
out.append(dict(id=el["id"]))
# save the final result and that's it
pd.DataFrame(out).to_csv("submission.csv", index=False)