violangg commited on
Commit
04a07f1
·
verified ·
1 Parent(s): 8507b7d

template upload

Browse files
Files changed (4) hide show
  1. gitignore +2 -0
  2. models.py +11 -0
  3. preprocess.py +10 -0
  4. script.py +67 -0
gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ __pycache__
2
+ submission.csv
models.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ class Model(torch.nn.Module):
4
+ def __init__(self):
5
+ super(Model, self).__init__()
6
+ self.fc1 = torch.nn.Linear(10, 5)
7
+ self.threshold = 0.
8
+
9
+ def forward(self, x):
10
+ ## generates a random float the same size as x
11
+ return torch.randn(x.shape[0]).to(x.device)
preprocess.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import librosa
3
+ import torch
4
+
5
+ def preprocess(audio_file):
6
+ # Load the audio file
7
+ y, sr = librosa.load(audio_file, sr=None)
8
+ mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
9
+ tensor = torch.from_numpy(mfccs)[None]
10
+ return tensor
script.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from datasets import load_dataset
3
+ import numpy as np
4
+ import tqdm.auto as tqdm
5
+ import os
6
+ import io
7
+ import torch
8
+ import time
9
+
10
+ # Import your model and anything else you want
11
+ # You can even install other packages included in your repo
12
+ # However, during the evaluation the container will not have access to the internet.
13
+ # So you must include everything you need in your model repo. Common python libraries will be installed.
14
+ # Feel free to contact us to add dependencies to the requiremnts.txt
15
+ # For testing, this is the docker image that will be used https://github.com/huggingface/competitions/blob/main/Dockerfile
16
+ # It can be pulled here https://hub.docker.com/r/huggingface/competitions/tags
17
+
18
+ from models import Model
19
+ from preprocess import preprocess
20
+
21
+
22
+ # load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
23
+ DATASET_PATH = "/tmp/data"
24
+ dataset_remote = load_dataset(DATASET_PATH,split = "test",streaming = True)
25
+
26
+
27
+ # load your model
28
+ device = "cuda:0"
29
+ model = Model().to(device)
30
+
31
+
32
+ # iterate over the dataset
33
+ out = []
34
+ for el in tqdm.tqdm(dataset_remote):
35
+
36
+ start_time = time.time()
37
+
38
+ # each element is a dict
39
+ # el["id"] id of example and el["audio"] contains the audio file
40
+ # el["audio"]["bytes"] contains bytes from reading the raw audio
41
+ # el["audio"]["path"] containts the filename. This is just for reference and you cant actually load it
42
+
43
+ # if you are using libraries that expect a file. You can use BytesIO object
44
+ try:
45
+ file_like = io.BytesIO(el["audio"]["bytes"])
46
+ tensor = preprocess(file_like)
47
+
48
+ with torch.no_grad():
49
+ # soft decision (such as log likelihood score)
50
+ # positive score correspond to synthetic prediction
51
+ # negative score correspond to pristine prediction
52
+ score = model(tensor.to(device)).cpu().item()
53
+
54
+ # we require a hard decision to be submited. so you need to pick a threshold
55
+ pred = "generated" if score > model.threshold else "pristine"
56
+
57
+ # append your prediction
58
+ # "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results
59
+
60
+ out.append(dict(id = el["id"], pred = pred, score = score, time = time.time() - start_time))
61
+ except Exception as e:
62
+ print(e)
63
+ print("failed", el["id"])
64
+ out.append(dict(id = el["id"], pred = "none", score = None))
65
+
66
+ # save the final result and that's it
67
+ pd.DataFrame(out).to_csv("submission.csv",index = False)