File size: 3,838 Bytes
c4306d5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import io
import time
import os
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['OPENBLAS_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'
os.environ['NUMEXPR_NUM_THREADS'] = '1'
import torch
import tqdm.auto as tqdm
from glob import glob
from datasets import load_dataset
import numpy as np
import pandas as pd
# from models import Model
from preprocess import preprocess
from src.rawnet_model import RawNet
# Import your model and anything else you want
# You can even install other packages included in your repo
# However, during the evaluation the container will not have access to the internet.
# So you must include everything you need in your model repo. Common python libraries will be installed.
# Feel free to contact us to add dependencies to the requiremnts.txt
# For testing, this is the docker image that will be used https://github.com/huggingface/competitions/blob/main/Dockerfile
# It can be pulled here https://hub.docker.com/r/huggingface/competitions/tags
print('imported packages')
# load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
DATASET_PATH = "/tmp/data"
dataset_remote = glob(os.path.join(DATASET_PATH, '*'))
# dataset_remote = load_dataset('safe-challenge/safe-challenge-practice-dataset', split="test", streaming=True)
print('loaded dataset')
# device = "cuda:0"
device = "cpu"
# DEFINE RAWNET2 MODEL
config = {
"first_conv": 1024,
"in_channels": 1,
"filts": [20, [20, 20], [20, 128], [128, 128]],
"blocks": [2, 4],
"nb_fc_node": 1024,
"gru_node": 1024,
"nb_gru_layer": 3,
"nb_classes": 2
}
model = RawNet(config, device)
model_path = '/tmp/checkpoints/RAWNET_ASVSPOOF_FOR_INTHEWILD_PURDUE.pth'
model.load_state_dict(torch.load(model_path, map_location=device))
# model = model.float()
print('Loaded RawNet2 Weights')
# model = Model().to(device) # OLD MODEL
# iterate over the dataset
out = []
# for el in tqdm.tqdm(dataset_remote):
for el in dataset_remote:
start_time = time.time()
# each element is a dict
# el["id"] id of example and el["audio"] contains the audio file
# el["audio"]["bytes"] contains bytes from reading the raw audio
# el["audio"]["path"] containts the filename. This is just for reference and you cant actually load it
# if you are using libraries that expect a file. You can use BytesIO object
try:
# file_like = io.BytesIO(el["audio"]["bytes"])
# tensor = preprocess(file_like)
tensor = preprocess(el)
with torch.no_grad():
# soft decision (such as log likelihood score)
# positive score correspond to synthetic prediction
# negative score correspond to pristine prediction
# score = model(tensor.to(device)).cpu().item()
score = model(tensor.to(device))[:, 1].cpu()
print(f'SCORE OUT: {score}')
score = score.mean().item()
print(f'SCORE FINAL: {score}')
# we require a hard decision to be submited. so you need to pick a threshold
pred = "generated" if score > model.threshold else "pristine"
# append your prediction
# "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results
# out.append(dict(id = el["id"], pred = pred, score = score, time = time.time() - start_time))
out.append(dict(id=el, pred=pred, score=score, time=time.time() - start_time))
except Exception as e:
print(e)
# print("failed", el["id"])
# out.append(dict(id = el["id"], pred = "none", score = None))
print("failed", el)
out.append(dict(id=el, pred="none", score=None))
# save the final result and that's it
pd.DataFrame(out).to_csv("submission.csv", index=False) |