HF ready

Browse files

Files changed (10) hide show

.gitignore +1 -0
.idea/deployment.xml +2 -1
.idea/workspace.xml +15 -12
data/06704fa4-5a0c-540c-86e6-c98af1528478.wav +0 -0
data/07bd0843-74a6-53ec-a3f0-00dfc31d6e2a.wav +0 -0
data/9712245a-548d-584c-a82d-a543f1ea21ac.wav +0 -0
data/c3e008aa-e4ba-5d2a-b37e-dd6d0ae640cb.wav +0 -0
script.py +55 -53
script_custom.py +109 -0
script_orig.py +67 -0

.gitignore CHANGED Viewed

@@ -1,6 +1,7 @@
 # Ignore weights
 checkpoints/RAWNET_ASVSPOOF_FOR_INTHEWILD_PURDUE.pth
 debug.sh
 .idea

 # Ignore weights
 checkpoints/RAWNET_ASVSPOOF_FOR_INTHEWILD_PURDUE.pth
 debug.sh
+data_test/
 .idea

.idea/deployment.xml CHANGED Viewed

@@ -1,6 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="PublishConfigData" autoUpload="On explicit save action" autoUploadExternalChanges="true">
     <serverData>
       <paths name="Fermi">
         <serverdata>

 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
+  <component name="PublishConfigData" autoUpload="On explicit save action" confirmBeforeUploading="false" autoUploadExternalChanges="true">
+    <option name="confirmBeforeUploading" value="false" />
     <serverData>
       <paths name="Fermi">
         <serverdata>

.idea/workspace.xml CHANGED Viewed

@@ -5,19 +5,15 @@
   </component>
   <component name="ChangeListManager">
     <list default="true" id="23565123-73ab-4f40-a9ef-1086e0c9e1ec" name="Changes" comment="">
-      <change afterPath="$PROJECT_DIR$/Dockerfile" afterDir="false" />
-      <change afterPath="$PROJECT_DIR$/data/06704fa4-5a0c-540c-86e6-c98af1528478.wav" afterDir="false" />
-      <change afterPath="$PROJECT_DIR$/data/07bd0843-74a6-53ec-a3f0-00dfc31d6e2a.wav" afterDir="false" />
-      <change afterPath="$PROJECT_DIR$/data/9712245a-548d-584c-a82d-a543f1ea21ac.wav" afterDir="false" />
-      <change afterPath="$PROJECT_DIR$/data/c3e008aa-e4ba-5d2a-b37e-dd6d0ae640cb.wav" afterDir="false" />
-      <change afterPath="$PROJECT_DIR$/debug.sh" afterDir="false" />
-      <change afterPath="$PROJECT_DIR$/requirements.txt" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/.gitignore" beforeDir="false" afterPath="$PROJECT_DIR$/.gitignore" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/.idea/deployment.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/deployment.xml" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/.idea/vcs.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/vcs.xml" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/checkpoints/RAWNET_ASVSPOOF_FOR_INTHEWILD_PURDUE.pth" beforeDir="false" />
-      <change beforePath="$PROJECT_DIR$/preprocess.py" beforeDir="false" afterPath="$PROJECT_DIR$/preprocess.py" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/script.py" beforeDir="false" afterPath="$PROJECT_DIR$/script.py" afterDir="false" />
     </list>
     <option name="SHOW_DIALOG" value="false" />
@@ -25,6 +21,13 @@
     <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
     <option name="LAST_RESOLUTION" value="IGNORE" />
   </component>
   <component name="FlaskConsoleOptions" custom-start-script="import sys&#10;sys.path.extend([WORKING_DIR_AND_PYTHON_PATHS])&#10;from flask.cli import ScriptInfo&#10;locals().update(ScriptInfo(create_app=None).load_app().make_shell_context())&#10;print(&quot;Python %s on %s\nApp: %s [%s]\nInstance: %s&quot; % (sys.version, sys.platform, app.import_name, app.env, app.instance_path))">
     <envs>
       <env key="FLASK_APP" value="app" />
@@ -104,7 +107,7 @@
       <updated>1742573353560</updated>
       <workItem from="1742573355153" duration="587000" />
       <workItem from="1742806974298" duration="2741000" />
-      <workItem from="1742810431420" duration="7010000" />
     </task>
     <servers />
   </component>
@@ -116,7 +119,7 @@
       <breakpoints>
         <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
           <url>file://$PROJECT_DIR$/script.py</url>
-          <line>30</line>
           <option name="timeStamp" value="1" />
         </line-breakpoint>
       </breakpoints>

   </component>
   <component name="ChangeListManager">
     <list default="true" id="23565123-73ab-4f40-a9ef-1086e0c9e1ec" name="Changes" comment="">
+      <change afterPath="$PROJECT_DIR$/script_custom.py" afterDir="false" />
+      <change afterPath="$PROJECT_DIR$/script_orig.py" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/.gitignore" beforeDir="false" afterPath="$PROJECT_DIR$/.gitignore" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/.idea/deployment.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/deployment.xml" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/data/06704fa4-5a0c-540c-86e6-c98af1528478.wav" beforeDir="false" />
+      <change beforePath="$PROJECT_DIR$/data/07bd0843-74a6-53ec-a3f0-00dfc31d6e2a.wav" beforeDir="false" />
+      <change beforePath="$PROJECT_DIR$/data/9712245a-548d-584c-a82d-a543f1ea21ac.wav" beforeDir="false" />
+      <change beforePath="$PROJECT_DIR$/data/c3e008aa-e4ba-5d2a-b37e-dd6d0ae640cb.wav" beforeDir="false" />
       <change beforePath="$PROJECT_DIR$/script.py" beforeDir="false" afterPath="$PROJECT_DIR$/script.py" afterDir="false" />
     </list>
     <option name="SHOW_DIALOG" value="false" />
     <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
     <option name="LAST_RESOLUTION" value="IGNORE" />
   </component>
+  <component name="FileTemplateManagerImpl">
+    <option name="RECENT_TEMPLATES">
+      <list>
+        <option value="Python Script" />
+      </list>
+    </option>
+  </component>
   <component name="FlaskConsoleOptions" custom-start-script="import sys&#10;sys.path.extend([WORKING_DIR_AND_PYTHON_PATHS])&#10;from flask.cli import ScriptInfo&#10;locals().update(ScriptInfo(create_app=None).load_app().make_shell_context())&#10;print(&quot;Python %s on %s\nApp: %s [%s]\nInstance: %s&quot; % (sys.version, sys.platform, app.import_name, app.env, app.instance_path))">
     <envs>
       <env key="FLASK_APP" value="app" />
       <updated>1742573353560</updated>
       <workItem from="1742573355153" duration="587000" />
       <workItem from="1742806974298" duration="2741000" />
+      <workItem from="1742810431420" duration="7858000" />
     </task>
     <servers />
   </component>
       <breakpoints>
         <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
           <url>file://$PROJECT_DIR$/script.py</url>
+          <line>28</line>
           <option name="timeStamp" value="1" />
         </line-breakpoint>
       </breakpoints>

data/06704fa4-5a0c-540c-86e6-c98af1528478.wav DELETED Viewed

Binary file (530 kB)

data/07bd0843-74a6-53ec-a3f0-00dfc31d6e2a.wav DELETED Viewed

Binary file (672 kB)

data/9712245a-548d-584c-a82d-a543f1ea21ac.wav DELETED Viewed

Binary file (660 kB)

data/c3e008aa-e4ba-5d2a-b37e-dd6d0ae640cb.wav DELETED Viewed

Binary file (683 kB)

script.py CHANGED Viewed

@@ -1,24 +1,22 @@
-import io
-import time
 import os
-os.environ['OMP_NUM_THREADS'] = '1'
-os.environ['OPENBLAS_NUM_THREADS'] = '1'
-os.environ['MKL_NUM_THREADS'] = '1'
-os.environ['NUMEXPR_NUM_THREADS'] = '1'
 import torch
-import tqdm.auto as tqdm
 from glob import glob
-from datasets import load_dataset
-import numpy as np
-import pandas as pd
 # from models import Model
 from preprocess import preprocess
 from src.rawnet_model import RawNet
 # Import your model and anything else you want
 # You can even install other packages included in your repo
@@ -28,16 +26,15 @@ from src.rawnet_model import RawNet
 # For testing, this is the docker image that will be used https://github.com/huggingface/competitions/blob/main/Dockerfile
 # It can be pulled here https://hub.docker.com/r/huggingface/competitions/tags
-print('imported packages')
 # load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
 DATASET_PATH = "/tmp/data"
-dataset_remote = glob(os.path.join(DATASET_PATH, '*'))
-# dataset_remote = load_dataset('safe-challenge/safe-challenge-practice-dataset', split="test", streaming=True)
-print('loaded dataset')
-# device = "cuda:0"
-device = "cpu"
 # DEFINE RAWNET2 MODEL
 config = {
@@ -55,7 +52,6 @@ model = RawNet(config, device)
 model_path = '/tmp/checkpoints/RAWNET_ASVSPOOF_FOR_INTHEWILD_PURDUE.pth'
 model.load_state_dict(torch.load(model_path, map_location=device))
-# model = model.float()
 print('Loaded RawNet2 Weights')
@@ -63,8 +59,8 @@ print('Loaded RawNet2 Weights')
 # iterate over the dataset
 out = []
-# for el in tqdm.tqdm(dataset_remote):
-for el in dataset_remote:
     start_time = time.time()
@@ -75,37 +71,43 @@ for el in dataset_remote:
     # if you are using libraries that expect a file. You can use BytesIO object
-    # try:
-    # file_like = io.BytesIO(el["audio"]["bytes"])
-    # tensor = preprocess(file_like)
-    tensor = preprocess(el)
-    with torch.no_grad():
-        # soft decision (such as log likelihood score)
-        # positive score correspond to synthetic prediction
-        # negative score correspond to pristine prediction
-        # score = model(tensor.to(device)).cpu().item()
-        score = model(tensor.to(device))[:, 1].cpu()
-        print(f'SCORE OUT: {score}')
-        score = score.mean().item()
-        print(f'SCORE FINAL: {score}')
-        # we require a hard decision to be submited. so you need to pick a threshold
-        pred = "generated" if score > model.threshold else "pristine"
-    # append your prediction
-    # "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results
-    # out.append(dict(id = el["id"], pred = pred, score = score, time = time.time() - start_time))
-    out.append(dict(id=el, pred=pred, score=score, time=time.time() - start_time))
-    # except Exception as e:
-    #     print(e)
-    #     # print("failed", el["id"])
-    #     # out.append(dict(id = el["id"], pred = "none", score = None))
-    #     print("failed", el)
-    #     out.append(dict(id = el, pred = "none", score = None))
 # save the final result and that's it
 pd.DataFrame(out).to_csv("submission.csv", index=False)

+import numpy as np
+import pandas as pd
+import tqdm.auto as tqdm
 import os
+import io
 import torch
+import time
 from glob import glob
 # from models import Model
+from datasets import load_dataset
 from preprocess import preprocess
 from src.rawnet_model import RawNet
+# os.environ['OMP_NUM_THREADS'] = '1'
+# os.environ['OPENBLAS_NUM_THREADS'] = '1'
+# os.environ['MKL_NUM_THREADS'] = '1'
+# os.environ['NUMEXPR_NUM_THREADS'] = '1'
 # Import your model and anything else you want
 # You can even install other packages included in your repo
 # For testing, this is the docker image that will be used https://github.com/huggingface/competitions/blob/main/Dockerfile
 # It can be pulled here https://hub.docker.com/r/huggingface/competitions/tags
 # load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
+# DATASET_PATH = "/tmp/data_test"
+# dataset_remote = glob(os.path.join(DATASET_PATH, '*'))
 DATASET_PATH = "/tmp/data"
+dataset_remote = load_dataset(DATASET_PATH,split = "test",streaming = True)
+device = "cuda:0"
+# device = "cpu"
 # DEFINE RAWNET2 MODEL
 config = {
 model_path = '/tmp/checkpoints/RAWNET_ASVSPOOF_FOR_INTHEWILD_PURDUE.pth'
 model.load_state_dict(torch.load(model_path, map_location=device))
 print('Loaded RawNet2 Weights')
 # iterate over the dataset
 out = []
+for el in tqdm.tqdm(dataset_remote):
+# for el in dataset_remote:
     start_time = time.time()
     # if you are using libraries that expect a file. You can use BytesIO object
+    try:
+        # RUNNING ON HUGGINGFACE
+        file_like = io.BytesIO(el["audio"]["bytes"])
+        tensor = preprocess(file_like)
+        ## RUNNING LOCALLY
+        # tensor = preprocess(el)
+        with torch.no_grad():
+            # soft decision (such as log likelihood score)
+            # positive score correspond to synthetic prediction
+            # negative score correspond to pristine prediction
+            # OLD MODEL
+            # score = model(tensor.to(device)).cpu().item()
+            # RANWNET2 MODEL
+            score = model(tensor.to(device))[:, 1].cpu()
+            print(f'SCORE OUT: {score}')
+            score = score.mean().item()
+            print(f'SCORE FINAL: {score}')
+            # we require a hard decision to be submited. so you need to pick a threshold
+            pred = "generated" if score > model.threshold else "pristine"
+        # append your prediction
+        # "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results
+        # RUNNING ON HUGGINGFACE
+        out.append(dict(id=el["id"], pred=pred, score=score, time=time.time() - start_time))
+        ## RUNNING LOCALLY
+        # out.append(dict(id=el, pred=pred, score=score, time=time.time() - start_time))
+    except Exception as e:
+        print(e)
+        print("failed", el["id"])
+        out.append(dict(id=el["id"], pred="none", score=None))
+        # print("failed", el)
+        # out.append(dict(id=el, pred="none", score=None))
 # save the final result and that's it
 pd.DataFrame(out).to_csv("submission.csv", index=False)

script_custom.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import io
+import time
+import os
+os.environ['OMP_NUM_THREADS'] = '1'
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
+os.environ['MKL_NUM_THREADS'] = '1'
+os.environ['NUMEXPR_NUM_THREADS'] = '1'
+import torch
+import tqdm.auto as tqdm
+from glob import glob
+from datasets import load_dataset
+import numpy as np
+import pandas as pd
+# from models import Model
+from preprocess import preprocess
+from src.rawnet_model import RawNet
+# Import your model and anything else you want
+# You can even install other packages included in your repo
+# However, during the evaluation the container will not have access to the internet.
+# So you must include everything you need in your model repo. Common python libraries will be installed.
+# Feel free to contact us to add dependencies to the requiremnts.txt
+# For testing, this is the docker image that will be used https://github.com/huggingface/competitions/blob/main/Dockerfile
+# It can be pulled here https://hub.docker.com/r/huggingface/competitions/tags
+print('imported packages')
+# load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
+DATASET_PATH = "/tmp/data"
+dataset_remote = glob(os.path.join(DATASET_PATH, '*'))
+# dataset_remote = load_dataset('safe-challenge/safe-challenge-practice-dataset', split="test", streaming=True)
+print('loaded dataset')
+# device = "cuda:0"
+device = "cpu"
+# DEFINE RAWNET2 MODEL
+config = {
+    "first_conv": 1024,
+    "in_channels": 1,
+    "filts": [20, [20, 20], [20, 128], [128, 128]],
+    "blocks": [2, 4],
+    "nb_fc_node": 1024,
+    "gru_node": 1024,
+    "nb_gru_layer": 3,
+    "nb_classes": 2
+}
+model = RawNet(config, device)
+model_path = '/tmp/checkpoints/RAWNET_ASVSPOOF_FOR_INTHEWILD_PURDUE.pth'
+model.load_state_dict(torch.load(model_path, map_location=device))
+# model = model.float()
+print('Loaded RawNet2 Weights')
+# model = Model().to(device) # OLD MODEL
+# iterate over the dataset
+out = []
+# for el in tqdm.tqdm(dataset_remote):
+for el in dataset_remote:
+    start_time = time.time()
+    # each element is a dict
+    # el["id"] id of example and el["audio"] contains the audio file
+    # el["audio"]["bytes"] contains bytes from reading the raw audio
+    # el["audio"]["path"] containts the filename. This is just for reference and you cant actually load it
+    # if you are using libraries that expect a file. You can use BytesIO object
+    try:
+        # file_like = io.BytesIO(el["audio"]["bytes"])
+        # tensor = preprocess(file_like)
+        tensor = preprocess(el)
+        with torch.no_grad():
+            # soft decision (such as log likelihood score)
+            # positive score correspond to synthetic prediction
+            # negative score correspond to pristine prediction
+            # score = model(tensor.to(device)).cpu().item()
+            score = model(tensor.to(device))[:, 1].cpu()
+            print(f'SCORE OUT: {score}')
+            score = score.mean().item()
+            print(f'SCORE FINAL: {score}')
+            # we require a hard decision to be submited. so you need to pick a threshold
+            pred = "generated" if score > model.threshold else "pristine"
+        # append your prediction
+        # "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results
+        # out.append(dict(id = el["id"], pred = pred, score = score, time = time.time() - start_time))
+        out.append(dict(id=el, pred=pred, score=score, time=time.time() - start_time))
+    except Exception as e:
+        print(e)
+        # print("failed", el["id"])
+        # out.append(dict(id = el["id"], pred = "none", score = None))
+        print("failed", el)
+        out.append(dict(id=el, pred="none", score=None))
+# save the final result and that's it
+pd.DataFrame(out).to_csv("submission.csv", index=False)

script_orig.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import pandas as pd
+from datasets import load_dataset
+import numpy as np
+import tqdm.auto as tqdm
+import os
+import io
+import torch
+import time
+# Import your model and anything else you want
+# You can even install other packages included in your repo
+# However, during the evaluation the container will not have access to the internet.
+# So you must include everything you need in your model repo. Common python libraries will be installed.
+# Feel free to contact us to add dependencies to the requiremnts.txt
+# For testing, this is the docker image that will be used https://github.com/huggingface/competitions/blob/main/Dockerfile
+# It can be pulled here https://hub.docker.com/r/huggingface/competitions/tags
+from models import Model
+from preprocess import preprocess
+# load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
+DATASET_PATH = "/tmp/data"
+dataset_remote = load_dataset(DATASET_PATH,split = "test",streaming = True)
+# load your model
+device = "cuda:0"
+model = Model().to(device)
+# iterate over the dataset
+out = []
+for el in tqdm.tqdm(dataset_remote):
+    start_time = time.time()
+    # each element is a dict
+    # el["id"] id of example and el["audio"] contains the audio file
+    # el["audio"]["bytes"] contains bytes from reading the raw audio
+    # el["audio"]["path"] containts the filename. This is just for reference and you cant actually load it
+    # if you are using libraries that expect a file. You can use BytesIO object
+    try:
+        file_like = io.BytesIO(el["audio"]["bytes"])
+        tensor = preprocess(file_like)
+        with torch.no_grad():
+            # soft decision (such as log likelihood score)
+            # positive score correspond to synthetic prediction
+            # negative score correspond to pristine prediction
+            score = model(tensor.to(device)).cpu().item()
+            # we require a hard decision to be submited. so you need to pick a threshold
+            pred = "generated" if score > model.threshold else "pristine"
+        # append your prediction
+        # "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results
+        out.append(dict(id = el["id"], pred = pred, score = score, time = time.time() - start_time))
+    except Exception as e:
+        print(e)
+        print("failed", el["id"])
+        out.append(dict(id = el["id"], pred = "none", score = None))
+# save the final result and that's it
+pd.DataFrame(out).to_csv("submission.csv",index = False)