File size: 3,838 Bytes

c4306d5

import io
import time
import os

os.environ['OMP_NUM_THREADS'] = '1'
os.environ['OPENBLAS_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'
os.environ['NUMEXPR_NUM_THREADS'] = '1'

import torch
import tqdm.auto as tqdm
from glob import glob
from datasets import load_dataset

import numpy as np
import pandas as pd

# from models import Model
from preprocess import preprocess
from src.rawnet_model import RawNet

# Import your model and anything else you want
# You can even install other packages included in your repo
# However, during the evaluation the container will not have access to the internet.
# So you must include everything you need in your model repo. Common python libraries will be installed.
# Feel free to contact us to add dependencies to the requiremnts.txt
# For testing, this is the docker image that will be used https://github.com/huggingface/competitions/blob/main/Dockerfile
# It can be pulled here https://hub.docker.com/r/huggingface/competitions/tags

print('imported packages')
# load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
DATASET_PATH = "/tmp/data"
dataset_remote = glob(os.path.join(DATASET_PATH, '*'))
# dataset_remote = load_dataset('safe-challenge/safe-challenge-practice-dataset', split="test", streaming=True)
print('loaded dataset')

# device = "cuda:0"
device = "cpu"

# DEFINE RAWNET2 MODEL
config = {
    "first_conv": 1024,
    "in_channels": 1,
    "filts": [20, [20, 20], [20, 128], [128, 128]],
    "blocks": [2, 4],
    "nb_fc_node": 1024,
    "gru_node": 1024,
    "nb_gru_layer": 3,
    "nb_classes": 2
}

model = RawNet(config, device)

model_path = '/tmp/checkpoints/RAWNET_ASVSPOOF_FOR_INTHEWILD_PURDUE.pth'
model.load_state_dict(torch.load(model_path, map_location=device))
# model = model.float()

print('Loaded RawNet2 Weights')

# model = Model().to(device) # OLD MODEL

# iterate over the dataset
out = []
# for el in tqdm.tqdm(dataset_remote):
for el in dataset_remote:

    start_time = time.time()

    # each element is a dict
    # el["id"] id of example and el["audio"] contains the audio file
    # el["audio"]["bytes"] contains bytes from reading the raw audio
    # el["audio"]["path"] containts the filename. This is just for reference and you cant actually load it

    # if you are using libraries that expect a file. You can use BytesIO object

    try:

        # file_like = io.BytesIO(el["audio"]["bytes"])
        # tensor = preprocess(file_like)
        tensor = preprocess(el)

        with torch.no_grad():
            # soft decision (such as log likelihood score)
            # positive score correspond to synthetic prediction
            # negative score correspond to pristine prediction
            # score = model(tensor.to(device)).cpu().item()
            score = model(tensor.to(device))[:, 1].cpu()
            print(f'SCORE OUT: {score}')
            score = score.mean().item()
            print(f'SCORE FINAL: {score}')

            # we require a hard decision to be submited. so you need to pick a threshold
            pred = "generated" if score > model.threshold else "pristine"

        # append your prediction
        # "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results

        # out.append(dict(id = el["id"], pred = pred, score = score, time = time.time() - start_time))
        out.append(dict(id=el, pred=pred, score=score, time=time.time() - start_time))

    except Exception as e:
        print(e)
        # print("failed", el["id"])
        # out.append(dict(id = el["id"], pred = "none", score = None))
        print("failed", el)
        out.append(dict(id=el, pred="none", score=None))

# save the final result and that's it
pd.DataFrame(out).to_csv("submission.csv", index=False)