eduardosanchez
/

test_model

Model card Files Files and versions

xet

Community

eduardosanchez commited on 19 days ago

Commit

5eb492b

verified ·

1 Parent(s): e9065c7

Update script.py

Browse files

Files changed (1) hide show

script.py +57 -7

script.py CHANGED Viewed

@@ -1,13 +1,20 @@
-import os
 import csv
 # ---------------------------------------------------------------------------
 # IOL-AI 2024 - organizer demonstration submission.
 #
-# This script is NOT a model. It contains a curated answer key for ~52% of the
-# competition points (spread across the public and private splits) so that
-# submitting it lands a mid-table 'pass' score (~50-something on both
-# leaderboards). Use it only to smoke-test the leaderboard end to end.
 #
 # !!! KEEP THE MODEL REPO YOU UPLOAD THIS TO PRIVATE !!!
 # It embeds gold answers; a public repo would leak them.
@@ -61,13 +68,56 @@ ANSWERS = {
     "012024030204": "nge yé"
 }
 TEST = "/tmp/data/test.csv"   # competition test set, mounted by the platform
 rows = []
 with open(TEST, newline="") as f:
     for r in csv.DictReader(f):
         rid = str(r["id"]).strip()
-        rows.append({"id": r["id"], "pred": ANSWERS.get(rid, "")})
 with open("submission.csv", "w", newline="") as f:
     w = csv.DictWriter(f, fieldnames=["id", "pred"])
@@ -75,4 +125,4 @@ with open("submission.csv", "w", newline="") as f:
     w.writerows(rows)
 print(f"Wrote submission.csv with {len(rows)} rows; "
-      f"{sum(1 for x in rows if x['pred'])} answered.")

 import csv
 # ---------------------------------------------------------------------------
 # IOL-AI 2024 - organizer demonstration submission.
 #
+# This script is NOT a model. It embeds a curated answer key and deliberately
+# produces THREE kinds of predictions so you can watch chrF, exact_match and the
+# geometric-mean score DIVERGE on the leaderboard:
+#
+#   EXACT  -> gold answer verbatim        -> exact_match = 1, chrF = 1
+#   NEAR   -> gold answer with a tiny typo -> exact_match = 0, chrF high (partial)
+#   blank  -> id not in the key            -> exact_match = 0, chrF = 0
+#
+# Because the NEAR bucket scores 0 on exact_match but high on chrF, you'll see
+#   chrF  >  score  >  exact_match
+# instead of the three collapsing to one number (which happens when every item
+# is all-or-nothing). Use it only to smoke-test the leaderboard end to end.
 #
 # !!! KEEP THE MODEL REPO YOU UPLOAD THIS TO PRIVATE !!!
 # It embeds gold answers; a public repo would leak them.
     "012024030204": "nge yé"
 }
+# Ids whose prediction is a deliberate NEAR-MISS (right idea, one-character typo).
+# These score 0 on exact_match but high on chrF, so the two metrics diverge.
+# Chosen to be long enough that a single typo still leaves high character overlap.
+NEAR_MISS_IDS = {
+    "012024010102",   # "you(du) will bite me"
+    "012024010104",   # "I will wait for you(pl)"
+    "012024020201",   # "car (= short lorry)"
+    "012024020303",   # "ruubiitcha puphubii"
+    "012024020304",   # "mu’akoeta uhuyitibee"
+    "012024020305",   # "makuitcha eratibii"
+    "012024020202",   # "tall cooking pots"
+    "012024020301",   # "uphukwama gogogogo"
+    "012024020302",   # "shumukosa dongoko"
+    "012024020306",   # "wiribiisa pophoko"
+    "012024010103",   # "I caught them(pl)"
+    "012024010202",   # "mətəjgolan"
+}
+def near_miss(s):
+    """Return a near-miss copy of `s`: transpose the first interior pair of
+    differing letters. Guarantees exact_match = 0 while keeping nearly every
+    character (and most n-grams) intact, so chrF stays high."""
+    chars = list(s)
+    for i in range(1, len(chars) - 1):
+        a, b = chars[i], chars[i + 1]
+        if a != b and a.isalnum() and b.isalnum():
+            chars[i], chars[i + 1] = b, a
+            return "".join(chars)
+    # fallback: duplicate the last character
+    return s + s[-1] if s else s
 TEST = "/tmp/data/test.csv"   # competition test set, mounted by the platform
 rows = []
+n_exact = n_near = n_blank = 0
 with open(TEST, newline="") as f:
     for r in csv.DictReader(f):
         rid = str(r["id"]).strip()
+        if rid in NEAR_MISS_IDS:
+            pred = near_miss(ANSWERS[rid])
+            n_near += 1
+        elif rid in ANSWERS:
+            pred = ANSWERS[rid]
+            n_exact += 1
+        else:
+            pred = ""
+            n_blank += 1
+        rows.append({"id": r["id"], "pred": pred})
 with open("submission.csv", "w", newline="") as f:
     w = csv.DictWriter(f, fieldnames=["id", "pred"])
     w.writerows(rows)
 print(f"Wrote submission.csv with {len(rows)} rows; "
+      f"{n_exact} exact, {n_near} near-miss, {n_blank} blank.")