Spaces:

AMR-KELEG
/

MLADI

Running

AMR-KELEG commited on Feb 10, 2025

Commit

9f4840a

1 Parent(s): 37b6ae3

Tweak the newly added evaluation method

Files changed (1) hide show

eval_utils.py CHANGED Viewed

@@ -81,7 +81,10 @@ def prompt_chat_LLM(model, tokenizer, text):
 def predict_binary_outcomes(model, tokenizer, texts, threshold=0.3):
     """Predict the validity in each dialect, by indepenently applying a sigmoid activation to each dialect's logit.
-    Dialects with probabilities (sigmoid activations) above a threshold (set by defauly to 0.3) are considered predicted.
     """
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -98,13 +101,13 @@ def predict_binary_outcomes(model, tokenizer, texts, threshold=0.3):
         logits = outputs.logits
     probabilities = torch.sigmoid(logits).cpu().numpy().reshape(-1)
-    predictions = (probabilities >= threshold).astype(int)
     # Map indices to actual labels
-    predicted_labels = [
         dialect
-        for dialect, dialect_probability in zip(DIALECTS, predictions)
-        if dialect_probability == 1
     ]
-    return predicted_labels

 def predict_binary_outcomes(model, tokenizer, texts, threshold=0.3):
     """Predict the validity in each dialect, by indepenently applying a sigmoid activation to each dialect's logit.
+    Dialects with probabilities (sigmoid activations) above a threshold (set by defauly to 0.3) are predicted as valid.
+    The model is expected to generate logits for each dialect of the following dialects in the same order:
+    Algeria, Bahrain, Egypt, Iraq, Jordan, Kuwait, Lebanon, Libya, Morocco, Oman, Palestine, Qatar, Saudi_Arabia, Sudan, Syria, Tunisia, UAE, Yemen.
+    Credits: method proposed by Ali Mekky, Lara Hassan, and Mohamed ELZeftawy from MBZUAI.
     """
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         logits = outputs.logits
     probabilities = torch.sigmoid(logits).cpu().numpy().reshape(-1)
+    binary_predictions = (probabilities >= threshold).astype(int)
     # Map indices to actual labels
+    predicted_dialects = [
         dialect
+        for dialect, dialect_prediction in zip(DIALECTS, binary_predictions)
+        if dialect_prediction == 1
     ]
+    return predicted_dialects