Spaces:

AMR-KELEG
/

MLADI

Running

AMR-KELEG commited on Oct 25, 2024

Commit

465af14

1 Parent(s): 24cf6c5

Store the predictions

Files changed (2) hide show

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ from transformers import AutoModelForSequenceClassification, AutoTokenizer
 from constants import DIALECTS_WITH_LABELS
 from inspect import getmembers, isfunction
 import eval_utils
 import numpy as np
 from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
@@ -42,7 +43,10 @@ with tab2:
             for sentence in tqdm(sentences)
         ]
-        # TODO: Store the predictions in a private dataset
         # Evaluate the model
         accuracy_scores = {}

 from constants import DIALECTS_WITH_LABELS
 from inspect import getmembers, isfunction
 import eval_utils
+import utils
 import numpy as np
 from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
             for sentence in tqdm(sentences)
         ]
+        # Store the predictions in a private dataset
+        utils.upload_predictions(
+            os.environ["PREDICTIONS_DATASET_NAME"], predictions, model_name
+        )
         # Evaluate the model
         accuracy_scores = {}

utils.py ADDED Viewed

+import re
+import json
+import time
+from huggingface_hub import HfApi
+def current_seconds_time():
+    return round(time.time())
+def upload_predictions(repo_id, predictions, model_name):
+    api = HfApi()
+    predictions_filename = (
+        f"predictions_{current_seconds_time()}_{re.sub('/', '_', model_name)}.json"
+    )
+    predictions_object = {"model_name": model_name, "predictions": predictions}
+    with open(predictions_filename, "w") as f:
+        json.dump(predictions_object, f)
+    future = api.upload_file(
+        path_or_fileobj=predictions_filename,
+        path_in_repo=predictions_filename,
+        repo_id=repo_id,
+        repo_type="dataset",
+        run_as_future=True,
+    )