safe-challenge
/

safe-example-submission

Model card Files Files and versions

kt-test-account commited on Feb 24, 2025

Commit

7fce61a

·

1 Parent(s): 5ae57bc

Update script.py

Files changed (1) hide show

script.py +48 -3

script.py CHANGED Viewed

@@ -2,10 +2,55 @@ import pandas as pd
 from datasets import load_dataset
 import numpy as np
 import tqdm.auto as tqdm
-dataset_remote = load_dataset("/tmp/data",split = "test",streaming = True)
 out = []
 for el in tqdm.tqdm(dataset_remote):
-    print(el["id"], len(el["audio"]["bytes"]))
-    out.append(dict(id = el["id"], pred = np.random.choice(["generated","pristine"])))
 pd.DataFrame(out).to_csv("submission.csv",index = False)

 from datasets import load_dataset
 import numpy as np
 import tqdm.auto as tqdm
+import os
+import io
+# Import your model and anything else you want
+# You can even install other packages included in your repo
+# However, during the evaluation the container will not have access to the internet.
+# So you must include everything you need in your model repo. Common python libraries will be installed.
+# Feel free to contact us to add dependencies to the requiremnts.txt
+from models import Model
+from preprocess import preproccess
+# load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
+SAFE_DATASET = os.environ.get("SAFE_DATASET","/tmp/data")
+dataset_remote = load_dataset(SAFE_DATASET,split = "test",streaming = True)
+# load your model
+device = "cuda:0"
+model = Model().to(device)
+# iterate over the dataset
 out = []
 for el in tqdm.tqdm(dataset_remote):
+    # each element is a dict
+    # el["id"] id of example and el["audio"] contains the audio file
+    # el["audio"]["bytes"] contains bytes from reading the raw audio
+    # el["audio"]["path"] containts the filename. This is just for reference and you cant actually load it
+    # if you are using libraries that expect a file. You can use BytesIO object
+    file_like = io.BytesIO(el["audio"]["bytes"])
+    tensor = preproces(file_like)
+    with torch.no_grad():
+        # soft decision (such as log likelihood score)
+        # positive score correspond to synthetic prediction
+        # negative score correspond to pristine prediction
+        score = model(tensor.to(device)).cpu().item()
+        # we require a hard decision to be submited. so you need to pick a threshold
+        pred = "generated" if score > model.threshold else "pristine"
+    # append your prediction
+    # "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use for analysis of the results
+    out.append(dict(id = el["id"], pred = pred, score = score)))
+# save the final result and that's it
 pd.DataFrame(out).to_csv("submission.csv",index = False)