dsf-sandbox
/

safe-video-example-submission

Model card Files Files and versions

safe-video-example-submission / script.py

Gabe Mancino-Ball

Update pristine -> accuracy

b704dbb 7 months ago

3.11 kB

	import pandas as pd
	from datasets import load_dataset
	import numpy as np
	import tqdm.auto as tqdm
	import os
	import io
	import torch
	import time
	import av
	import torch
	import numpy as np

	# Import your model and anything else you want
	# You can even install other packages included in your repo
	# However, during the evaluation the container will not have access to the internet.
	# So you must include everything you need in your model repo.


	import torch

	# from torchcodec.decoders import VideoDecoder

	# def preprocess_v1(file_like):
	# file_like.seek(0)
	# decoder = VideoDecoder(file_like)
	# frames = decoder[0:-1:20]
	# frames = frames.float() / 255.0
	# return frames


	def preprocess(file_like):
	# Open the video file
	file_like.seek(0)
	container = av.open(file_like)
	frames = []
	every = 10
	for i, frame in enumerate(container.decode(video=0)):
	if i % every == 0:
	frame_array = frame.to_ndarray(format="rgb24")
	frame_tensor = torch.from_numpy(frame_array).permute(2, 0, 1).float()
	frames.append(frame_tensor)

	video_tensor = torch.stack(frames)
	return video_tensor


	class Model(torch.nn.Module):
	def __init__(self):
	super(Model, self).__init__()
	self.fc1 = torch.nn.Linear(10, 5)
	self.threshold = 0.0

	def forward(self, x):
	## generates a random float the same size as x
	return torch.randn(x.shape[0]).to(x.device)


	# load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
	DATASET_PATH = "/tmp/data"
	dataset_remote = load_dataset(DATASET_PATH, split="test", streaming=True)


	# load your model
	device = "cuda:0"
	model = Model().to(device)


	# iterate over the dataset
	out = []
	for el in tqdm.tqdm(dataset_remote):

	# start_time = time.time()

	# each element is a dict
	# el["video"]["bytes"] contains bytes from reading the raw file
	# el["video"]["path"] containts the filename. This is just for reference and you cant actually load it

	# if you are using libraries that expect a file. You can use BytesIO object
	try:
	file_like = io.BytesIO(el["video"]["bytes"])
	tensor = preprocess(file_like)

	with torch.no_grad():
	# soft decision (such as log likelihood score)
	# positive score correspond to synthetic prediction
	# negative score correspond to real prediction
	score = model(tensor[None].to(device)).cpu().item()

	# we require a hard decision to be submited. so you need to pick a threshold
	pred = "generated" if score > model.threshold else "real"

	# append your prediction
	# "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results

	out.append(dict(id=el["id"], pred=pred, score=score))
	except Exception as e:
	print(e)
	print("failed", el["id"])
	out.append(dict(id=el["id"]))

	# save the final result and that's it
	pd.DataFrame(out).to_csv("submission.csv", index=False)