HF ready
Browse files- .gitignore +1 -0
- .idea/deployment.xml +2 -1
- .idea/workspace.xml +15 -12
- data/06704fa4-5a0c-540c-86e6-c98af1528478.wav +0 -0
- data/07bd0843-74a6-53ec-a3f0-00dfc31d6e2a.wav +0 -0
- data/9712245a-548d-584c-a82d-a543f1ea21ac.wav +0 -0
- data/c3e008aa-e4ba-5d2a-b37e-dd6d0ae640cb.wav +0 -0
- script.py +55 -53
- script_custom.py +109 -0
- script_orig.py +67 -0
.gitignore
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
# Ignore weights
|
| 2 |
checkpoints/RAWNET_ASVSPOOF_FOR_INTHEWILD_PURDUE.pth
|
| 3 |
debug.sh
|
|
|
|
| 4 |
|
| 5 |
.idea
|
| 6 |
|
|
|
|
| 1 |
# Ignore weights
|
| 2 |
checkpoints/RAWNET_ASVSPOOF_FOR_INTHEWILD_PURDUE.pth
|
| 3 |
debug.sh
|
| 4 |
+
data_test/
|
| 5 |
|
| 6 |
.idea
|
| 7 |
|
.idea/deployment.xml
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
<project version="4">
|
| 3 |
-
<component name="PublishConfigData" autoUpload="On explicit save action" autoUploadExternalChanges="true">
|
|
|
|
| 4 |
<serverData>
|
| 5 |
<paths name="Fermi">
|
| 6 |
<serverdata>
|
|
|
|
| 1 |
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
<project version="4">
|
| 3 |
+
<component name="PublishConfigData" autoUpload="On explicit save action" confirmBeforeUploading="false" autoUploadExternalChanges="true">
|
| 4 |
+
<option name="confirmBeforeUploading" value="false" />
|
| 5 |
<serverData>
|
| 6 |
<paths name="Fermi">
|
| 7 |
<serverdata>
|
.idea/workspace.xml
CHANGED
|
@@ -5,19 +5,15 @@
|
|
| 5 |
</component>
|
| 6 |
<component name="ChangeListManager">
|
| 7 |
<list default="true" id="23565123-73ab-4f40-a9ef-1086e0c9e1ec" name="Changes" comment="">
|
| 8 |
-
<change afterPath="$PROJECT_DIR$/
|
| 9 |
-
<change afterPath="$PROJECT_DIR$/
|
| 10 |
-
<change afterPath="$PROJECT_DIR$/data/07bd0843-74a6-53ec-a3f0-00dfc31d6e2a.wav" afterDir="false" />
|
| 11 |
-
<change afterPath="$PROJECT_DIR$/data/9712245a-548d-584c-a82d-a543f1ea21ac.wav" afterDir="false" />
|
| 12 |
-
<change afterPath="$PROJECT_DIR$/data/c3e008aa-e4ba-5d2a-b37e-dd6d0ae640cb.wav" afterDir="false" />
|
| 13 |
-
<change afterPath="$PROJECT_DIR$/debug.sh" afterDir="false" />
|
| 14 |
-
<change afterPath="$PROJECT_DIR$/requirements.txt" afterDir="false" />
|
| 15 |
<change beforePath="$PROJECT_DIR$/.gitignore" beforeDir="false" afterPath="$PROJECT_DIR$/.gitignore" afterDir="false" />
|
| 16 |
<change beforePath="$PROJECT_DIR$/.idea/deployment.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/deployment.xml" afterDir="false" />
|
| 17 |
-
<change beforePath="$PROJECT_DIR$/.idea/vcs.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/vcs.xml" afterDir="false" />
|
| 18 |
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
| 19 |
-
<change beforePath="$PROJECT_DIR$/
|
| 20 |
-
<change beforePath="$PROJECT_DIR$/
|
|
|
|
|
|
|
| 21 |
<change beforePath="$PROJECT_DIR$/script.py" beforeDir="false" afterPath="$PROJECT_DIR$/script.py" afterDir="false" />
|
| 22 |
</list>
|
| 23 |
<option name="SHOW_DIALOG" value="false" />
|
|
@@ -25,6 +21,13 @@
|
|
| 25 |
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
| 26 |
<option name="LAST_RESOLUTION" value="IGNORE" />
|
| 27 |
</component>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
<component name="FlaskConsoleOptions" custom-start-script="import sys sys.path.extend([WORKING_DIR_AND_PYTHON_PATHS]) from flask.cli import ScriptInfo locals().update(ScriptInfo(create_app=None).load_app().make_shell_context()) print("Python %s on %s\nApp: %s [%s]\nInstance: %s" % (sys.version, sys.platform, app.import_name, app.env, app.instance_path))">
|
| 29 |
<envs>
|
| 30 |
<env key="FLASK_APP" value="app" />
|
|
@@ -104,7 +107,7 @@
|
|
| 104 |
<updated>1742573353560</updated>
|
| 105 |
<workItem from="1742573355153" duration="587000" />
|
| 106 |
<workItem from="1742806974298" duration="2741000" />
|
| 107 |
-
<workItem from="1742810431420" duration="
|
| 108 |
</task>
|
| 109 |
<servers />
|
| 110 |
</component>
|
|
@@ -116,7 +119,7 @@
|
|
| 116 |
<breakpoints>
|
| 117 |
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
|
| 118 |
<url>file://$PROJECT_DIR$/script.py</url>
|
| 119 |
-
<line>
|
| 120 |
<option name="timeStamp" value="1" />
|
| 121 |
</line-breakpoint>
|
| 122 |
</breakpoints>
|
|
|
|
| 5 |
</component>
|
| 6 |
<component name="ChangeListManager">
|
| 7 |
<list default="true" id="23565123-73ab-4f40-a9ef-1086e0c9e1ec" name="Changes" comment="">
|
| 8 |
+
<change afterPath="$PROJECT_DIR$/script_custom.py" afterDir="false" />
|
| 9 |
+
<change afterPath="$PROJECT_DIR$/script_orig.py" afterDir="false" />
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
<change beforePath="$PROJECT_DIR$/.gitignore" beforeDir="false" afterPath="$PROJECT_DIR$/.gitignore" afterDir="false" />
|
| 11 |
<change beforePath="$PROJECT_DIR$/.idea/deployment.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/deployment.xml" afterDir="false" />
|
|
|
|
| 12 |
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
| 13 |
+
<change beforePath="$PROJECT_DIR$/data/06704fa4-5a0c-540c-86e6-c98af1528478.wav" beforeDir="false" />
|
| 14 |
+
<change beforePath="$PROJECT_DIR$/data/07bd0843-74a6-53ec-a3f0-00dfc31d6e2a.wav" beforeDir="false" />
|
| 15 |
+
<change beforePath="$PROJECT_DIR$/data/9712245a-548d-584c-a82d-a543f1ea21ac.wav" beforeDir="false" />
|
| 16 |
+
<change beforePath="$PROJECT_DIR$/data/c3e008aa-e4ba-5d2a-b37e-dd6d0ae640cb.wav" beforeDir="false" />
|
| 17 |
<change beforePath="$PROJECT_DIR$/script.py" beforeDir="false" afterPath="$PROJECT_DIR$/script.py" afterDir="false" />
|
| 18 |
</list>
|
| 19 |
<option name="SHOW_DIALOG" value="false" />
|
|
|
|
| 21 |
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
| 22 |
<option name="LAST_RESOLUTION" value="IGNORE" />
|
| 23 |
</component>
|
| 24 |
+
<component name="FileTemplateManagerImpl">
|
| 25 |
+
<option name="RECENT_TEMPLATES">
|
| 26 |
+
<list>
|
| 27 |
+
<option value="Python Script" />
|
| 28 |
+
</list>
|
| 29 |
+
</option>
|
| 30 |
+
</component>
|
| 31 |
<component name="FlaskConsoleOptions" custom-start-script="import sys sys.path.extend([WORKING_DIR_AND_PYTHON_PATHS]) from flask.cli import ScriptInfo locals().update(ScriptInfo(create_app=None).load_app().make_shell_context()) print("Python %s on %s\nApp: %s [%s]\nInstance: %s" % (sys.version, sys.platform, app.import_name, app.env, app.instance_path))">
|
| 32 |
<envs>
|
| 33 |
<env key="FLASK_APP" value="app" />
|
|
|
|
| 107 |
<updated>1742573353560</updated>
|
| 108 |
<workItem from="1742573355153" duration="587000" />
|
| 109 |
<workItem from="1742806974298" duration="2741000" />
|
| 110 |
+
<workItem from="1742810431420" duration="7858000" />
|
| 111 |
</task>
|
| 112 |
<servers />
|
| 113 |
</component>
|
|
|
|
| 119 |
<breakpoints>
|
| 120 |
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
|
| 121 |
<url>file://$PROJECT_DIR$/script.py</url>
|
| 122 |
+
<line>28</line>
|
| 123 |
<option name="timeStamp" value="1" />
|
| 124 |
</line-breakpoint>
|
| 125 |
</breakpoints>
|
data/06704fa4-5a0c-540c-86e6-c98af1528478.wav
DELETED
|
Binary file (530 kB)
|
|
|
data/07bd0843-74a6-53ec-a3f0-00dfc31d6e2a.wav
DELETED
|
Binary file (672 kB)
|
|
|
data/9712245a-548d-584c-a82d-a543f1ea21ac.wav
DELETED
|
Binary file (660 kB)
|
|
|
data/c3e008aa-e4ba-5d2a-b37e-dd6d0ae640cb.wav
DELETED
|
Binary file (683 kB)
|
|
|
script.py
CHANGED
|
@@ -1,24 +1,22 @@
|
|
| 1 |
-
import
|
| 2 |
-
import
|
|
|
|
| 3 |
import os
|
| 4 |
-
|
| 5 |
-
os.environ['OMP_NUM_THREADS'] = '1'
|
| 6 |
-
os.environ['OPENBLAS_NUM_THREADS'] = '1'
|
| 7 |
-
os.environ['MKL_NUM_THREADS'] = '1'
|
| 8 |
-
os.environ['NUMEXPR_NUM_THREADS'] = '1'
|
| 9 |
-
|
| 10 |
import torch
|
| 11 |
-
import
|
| 12 |
from glob import glob
|
| 13 |
-
from datasets import load_dataset
|
| 14 |
|
| 15 |
-
import numpy as np
|
| 16 |
-
import pandas as pd
|
| 17 |
|
| 18 |
# from models import Model
|
|
|
|
| 19 |
from preprocess import preprocess
|
| 20 |
from src.rawnet_model import RawNet
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
# Import your model and anything else you want
|
| 24 |
# You can even install other packages included in your repo
|
|
@@ -28,16 +26,15 @@ from src.rawnet_model import RawNet
|
|
| 28 |
# For testing, this is the docker image that will be used https://github.com/huggingface/competitions/blob/main/Dockerfile
|
| 29 |
# It can be pulled here https://hub.docker.com/r/huggingface/competitions/tags
|
| 30 |
|
| 31 |
-
|
| 32 |
# load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
|
|
|
|
|
|
|
| 33 |
DATASET_PATH = "/tmp/data"
|
| 34 |
-
dataset_remote =
|
| 35 |
-
# dataset_remote = load_dataset('safe-challenge/safe-challenge-practice-dataset', split="test", streaming=True)
|
| 36 |
-
print('loaded dataset')
|
| 37 |
-
|
| 38 |
|
| 39 |
-
|
| 40 |
-
device = "cpu"
|
| 41 |
|
| 42 |
# DEFINE RAWNET2 MODEL
|
| 43 |
config = {
|
|
@@ -55,7 +52,6 @@ model = RawNet(config, device)
|
|
| 55 |
|
| 56 |
model_path = '/tmp/checkpoints/RAWNET_ASVSPOOF_FOR_INTHEWILD_PURDUE.pth'
|
| 57 |
model.load_state_dict(torch.load(model_path, map_location=device))
|
| 58 |
-
# model = model.float()
|
| 59 |
|
| 60 |
print('Loaded RawNet2 Weights')
|
| 61 |
|
|
@@ -63,8 +59,8 @@ print('Loaded RawNet2 Weights')
|
|
| 63 |
|
| 64 |
# iterate over the dataset
|
| 65 |
out = []
|
| 66 |
-
|
| 67 |
-
for el in dataset_remote:
|
| 68 |
|
| 69 |
start_time = time.time()
|
| 70 |
|
|
@@ -75,37 +71,43 @@ for el in dataset_remote:
|
|
| 75 |
|
| 76 |
# if you are using libraries that expect a file. You can use BytesIO object
|
| 77 |
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
# save the final result and that's it
|
| 111 |
pd.DataFrame(out).to_csv("submission.csv", index=False)
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import tqdm.auto as tqdm
|
| 4 |
import os
|
| 5 |
+
import io
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
import torch
|
| 7 |
+
import time
|
| 8 |
from glob import glob
|
|
|
|
| 9 |
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# from models import Model
|
| 12 |
+
from datasets import load_dataset
|
| 13 |
from preprocess import preprocess
|
| 14 |
from src.rawnet_model import RawNet
|
| 15 |
|
| 16 |
+
# os.environ['OMP_NUM_THREADS'] = '1'
|
| 17 |
+
# os.environ['OPENBLAS_NUM_THREADS'] = '1'
|
| 18 |
+
# os.environ['MKL_NUM_THREADS'] = '1'
|
| 19 |
+
# os.environ['NUMEXPR_NUM_THREADS'] = '1'
|
| 20 |
|
| 21 |
# Import your model and anything else you want
|
| 22 |
# You can even install other packages included in your repo
|
|
|
|
| 26 |
# For testing, this is the docker image that will be used https://github.com/huggingface/competitions/blob/main/Dockerfile
|
| 27 |
# It can be pulled here https://hub.docker.com/r/huggingface/competitions/tags
|
| 28 |
|
| 29 |
+
|
| 30 |
# load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
|
| 31 |
+
# DATASET_PATH = "/tmp/data_test"
|
| 32 |
+
# dataset_remote = glob(os.path.join(DATASET_PATH, '*'))
|
| 33 |
DATASET_PATH = "/tmp/data"
|
| 34 |
+
dataset_remote = load_dataset(DATASET_PATH,split = "test",streaming = True)
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
device = "cuda:0"
|
| 37 |
+
# device = "cpu"
|
| 38 |
|
| 39 |
# DEFINE RAWNET2 MODEL
|
| 40 |
config = {
|
|
|
|
| 52 |
|
| 53 |
model_path = '/tmp/checkpoints/RAWNET_ASVSPOOF_FOR_INTHEWILD_PURDUE.pth'
|
| 54 |
model.load_state_dict(torch.load(model_path, map_location=device))
|
|
|
|
| 55 |
|
| 56 |
print('Loaded RawNet2 Weights')
|
| 57 |
|
|
|
|
| 59 |
|
| 60 |
# iterate over the dataset
|
| 61 |
out = []
|
| 62 |
+
for el in tqdm.tqdm(dataset_remote):
|
| 63 |
+
# for el in dataset_remote:
|
| 64 |
|
| 65 |
start_time = time.time()
|
| 66 |
|
|
|
|
| 71 |
|
| 72 |
# if you are using libraries that expect a file. You can use BytesIO object
|
| 73 |
|
| 74 |
+
try:
|
| 75 |
+
# RUNNING ON HUGGINGFACE
|
| 76 |
+
file_like = io.BytesIO(el["audio"]["bytes"])
|
| 77 |
+
tensor = preprocess(file_like)
|
| 78 |
+
## RUNNING LOCALLY
|
| 79 |
+
# tensor = preprocess(el)
|
| 80 |
+
|
| 81 |
+
with torch.no_grad():
|
| 82 |
+
# soft decision (such as log likelihood score)
|
| 83 |
+
# positive score correspond to synthetic prediction
|
| 84 |
+
# negative score correspond to pristine prediction
|
| 85 |
+
|
| 86 |
+
# OLD MODEL
|
| 87 |
+
# score = model(tensor.to(device)).cpu().item()
|
| 88 |
+
# RANWNET2 MODEL
|
| 89 |
+
score = model(tensor.to(device))[:, 1].cpu()
|
| 90 |
+
print(f'SCORE OUT: {score}')
|
| 91 |
+
score = score.mean().item()
|
| 92 |
+
print(f'SCORE FINAL: {score}')
|
| 93 |
+
|
| 94 |
+
# we require a hard decision to be submited. so you need to pick a threshold
|
| 95 |
+
pred = "generated" if score > model.threshold else "pristine"
|
| 96 |
+
|
| 97 |
+
# append your prediction
|
| 98 |
+
# "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results
|
| 99 |
+
|
| 100 |
+
# RUNNING ON HUGGINGFACE
|
| 101 |
+
out.append(dict(id=el["id"], pred=pred, score=score, time=time.time() - start_time))
|
| 102 |
+
## RUNNING LOCALLY
|
| 103 |
+
# out.append(dict(id=el, pred=pred, score=score, time=time.time() - start_time))
|
| 104 |
+
|
| 105 |
+
except Exception as e:
|
| 106 |
+
print(e)
|
| 107 |
+
print("failed", el["id"])
|
| 108 |
+
out.append(dict(id=el["id"], pred="none", score=None))
|
| 109 |
+
# print("failed", el)
|
| 110 |
+
# out.append(dict(id=el, pred="none", score=None))
|
| 111 |
|
| 112 |
# save the final result and that's it
|
| 113 |
pd.DataFrame(out).to_csv("submission.csv", index=False)
|
script_custom.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import time
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
os.environ['OMP_NUM_THREADS'] = '1'
|
| 6 |
+
os.environ['OPENBLAS_NUM_THREADS'] = '1'
|
| 7 |
+
os.environ['MKL_NUM_THREADS'] = '1'
|
| 8 |
+
os.environ['NUMEXPR_NUM_THREADS'] = '1'
|
| 9 |
+
|
| 10 |
+
import torch
|
| 11 |
+
import tqdm.auto as tqdm
|
| 12 |
+
from glob import glob
|
| 13 |
+
from datasets import load_dataset
|
| 14 |
+
|
| 15 |
+
import numpy as np
|
| 16 |
+
import pandas as pd
|
| 17 |
+
|
| 18 |
+
# from models import Model
|
| 19 |
+
from preprocess import preprocess
|
| 20 |
+
from src.rawnet_model import RawNet
|
| 21 |
+
|
| 22 |
+
# Import your model and anything else you want
|
| 23 |
+
# You can even install other packages included in your repo
|
| 24 |
+
# However, during the evaluation the container will not have access to the internet.
|
| 25 |
+
# So you must include everything you need in your model repo. Common python libraries will be installed.
|
| 26 |
+
# Feel free to contact us to add dependencies to the requiremnts.txt
|
| 27 |
+
# For testing, this is the docker image that will be used https://github.com/huggingface/competitions/blob/main/Dockerfile
|
| 28 |
+
# It can be pulled here https://hub.docker.com/r/huggingface/competitions/tags
|
| 29 |
+
|
| 30 |
+
print('imported packages')
|
| 31 |
+
# load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
|
| 32 |
+
DATASET_PATH = "/tmp/data"
|
| 33 |
+
dataset_remote = glob(os.path.join(DATASET_PATH, '*'))
|
| 34 |
+
# dataset_remote = load_dataset('safe-challenge/safe-challenge-practice-dataset', split="test", streaming=True)
|
| 35 |
+
print('loaded dataset')
|
| 36 |
+
|
| 37 |
+
# device = "cuda:0"
|
| 38 |
+
device = "cpu"
|
| 39 |
+
|
| 40 |
+
# DEFINE RAWNET2 MODEL
|
| 41 |
+
config = {
|
| 42 |
+
"first_conv": 1024,
|
| 43 |
+
"in_channels": 1,
|
| 44 |
+
"filts": [20, [20, 20], [20, 128], [128, 128]],
|
| 45 |
+
"blocks": [2, 4],
|
| 46 |
+
"nb_fc_node": 1024,
|
| 47 |
+
"gru_node": 1024,
|
| 48 |
+
"nb_gru_layer": 3,
|
| 49 |
+
"nb_classes": 2
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
model = RawNet(config, device)
|
| 53 |
+
|
| 54 |
+
model_path = '/tmp/checkpoints/RAWNET_ASVSPOOF_FOR_INTHEWILD_PURDUE.pth'
|
| 55 |
+
model.load_state_dict(torch.load(model_path, map_location=device))
|
| 56 |
+
# model = model.float()
|
| 57 |
+
|
| 58 |
+
print('Loaded RawNet2 Weights')
|
| 59 |
+
|
| 60 |
+
# model = Model().to(device) # OLD MODEL
|
| 61 |
+
|
| 62 |
+
# iterate over the dataset
|
| 63 |
+
out = []
|
| 64 |
+
# for el in tqdm.tqdm(dataset_remote):
|
| 65 |
+
for el in dataset_remote:
|
| 66 |
+
|
| 67 |
+
start_time = time.time()
|
| 68 |
+
|
| 69 |
+
# each element is a dict
|
| 70 |
+
# el["id"] id of example and el["audio"] contains the audio file
|
| 71 |
+
# el["audio"]["bytes"] contains bytes from reading the raw audio
|
| 72 |
+
# el["audio"]["path"] containts the filename. This is just for reference and you cant actually load it
|
| 73 |
+
|
| 74 |
+
# if you are using libraries that expect a file. You can use BytesIO object
|
| 75 |
+
|
| 76 |
+
try:
|
| 77 |
+
|
| 78 |
+
# file_like = io.BytesIO(el["audio"]["bytes"])
|
| 79 |
+
# tensor = preprocess(file_like)
|
| 80 |
+
tensor = preprocess(el)
|
| 81 |
+
|
| 82 |
+
with torch.no_grad():
|
| 83 |
+
# soft decision (such as log likelihood score)
|
| 84 |
+
# positive score correspond to synthetic prediction
|
| 85 |
+
# negative score correspond to pristine prediction
|
| 86 |
+
# score = model(tensor.to(device)).cpu().item()
|
| 87 |
+
score = model(tensor.to(device))[:, 1].cpu()
|
| 88 |
+
print(f'SCORE OUT: {score}')
|
| 89 |
+
score = score.mean().item()
|
| 90 |
+
print(f'SCORE FINAL: {score}')
|
| 91 |
+
|
| 92 |
+
# we require a hard decision to be submited. so you need to pick a threshold
|
| 93 |
+
pred = "generated" if score > model.threshold else "pristine"
|
| 94 |
+
|
| 95 |
+
# append your prediction
|
| 96 |
+
# "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results
|
| 97 |
+
|
| 98 |
+
# out.append(dict(id = el["id"], pred = pred, score = score, time = time.time() - start_time))
|
| 99 |
+
out.append(dict(id=el, pred=pred, score=score, time=time.time() - start_time))
|
| 100 |
+
|
| 101 |
+
except Exception as e:
|
| 102 |
+
print(e)
|
| 103 |
+
# print("failed", el["id"])
|
| 104 |
+
# out.append(dict(id = el["id"], pred = "none", score = None))
|
| 105 |
+
print("failed", el)
|
| 106 |
+
out.append(dict(id=el, pred="none", score=None))
|
| 107 |
+
|
| 108 |
+
# save the final result and that's it
|
| 109 |
+
pd.DataFrame(out).to_csv("submission.csv", index=False)
|
script_orig.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from datasets import load_dataset
|
| 3 |
+
import numpy as np
|
| 4 |
+
import tqdm.auto as tqdm
|
| 5 |
+
import os
|
| 6 |
+
import io
|
| 7 |
+
import torch
|
| 8 |
+
import time
|
| 9 |
+
|
| 10 |
+
# Import your model and anything else you want
|
| 11 |
+
# You can even install other packages included in your repo
|
| 12 |
+
# However, during the evaluation the container will not have access to the internet.
|
| 13 |
+
# So you must include everything you need in your model repo. Common python libraries will be installed.
|
| 14 |
+
# Feel free to contact us to add dependencies to the requiremnts.txt
|
| 15 |
+
# For testing, this is the docker image that will be used https://github.com/huggingface/competitions/blob/main/Dockerfile
|
| 16 |
+
# It can be pulled here https://hub.docker.com/r/huggingface/competitions/tags
|
| 17 |
+
|
| 18 |
+
from models import Model
|
| 19 |
+
from preprocess import preprocess
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
|
| 23 |
+
DATASET_PATH = "/tmp/data"
|
| 24 |
+
dataset_remote = load_dataset(DATASET_PATH,split = "test",streaming = True)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# load your model
|
| 28 |
+
device = "cuda:0"
|
| 29 |
+
model = Model().to(device)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
# iterate over the dataset
|
| 33 |
+
out = []
|
| 34 |
+
for el in tqdm.tqdm(dataset_remote):
|
| 35 |
+
|
| 36 |
+
start_time = time.time()
|
| 37 |
+
|
| 38 |
+
# each element is a dict
|
| 39 |
+
# el["id"] id of example and el["audio"] contains the audio file
|
| 40 |
+
# el["audio"]["bytes"] contains bytes from reading the raw audio
|
| 41 |
+
# el["audio"]["path"] containts the filename. This is just for reference and you cant actually load it
|
| 42 |
+
|
| 43 |
+
# if you are using libraries that expect a file. You can use BytesIO object
|
| 44 |
+
try:
|
| 45 |
+
file_like = io.BytesIO(el["audio"]["bytes"])
|
| 46 |
+
tensor = preprocess(file_like)
|
| 47 |
+
|
| 48 |
+
with torch.no_grad():
|
| 49 |
+
# soft decision (such as log likelihood score)
|
| 50 |
+
# positive score correspond to synthetic prediction
|
| 51 |
+
# negative score correspond to pristine prediction
|
| 52 |
+
score = model(tensor.to(device)).cpu().item()
|
| 53 |
+
|
| 54 |
+
# we require a hard decision to be submited. so you need to pick a threshold
|
| 55 |
+
pred = "generated" if score > model.threshold else "pristine"
|
| 56 |
+
|
| 57 |
+
# append your prediction
|
| 58 |
+
# "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results
|
| 59 |
+
|
| 60 |
+
out.append(dict(id = el["id"], pred = pred, score = score, time = time.time() - start_time))
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print(e)
|
| 63 |
+
print("failed", el["id"])
|
| 64 |
+
out.append(dict(id = el["id"], pred = "none", score = None))
|
| 65 |
+
|
| 66 |
+
# save the final result and that's it
|
| 67 |
+
pd.DataFrame(out).to_csv("submission.csv",index = False)
|