Update script.py
Browse files
script.py
CHANGED
|
@@ -1,13 +1,20 @@
|
|
| 1 |
-
import os
|
| 2 |
import csv
|
| 3 |
|
| 4 |
# ---------------------------------------------------------------------------
|
| 5 |
# IOL-AI 2024 - organizer demonstration submission.
|
| 6 |
#
|
| 7 |
-
# This script is NOT a model. It
|
| 8 |
-
#
|
| 9 |
-
#
|
| 10 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
#
|
| 12 |
# !!! KEEP THE MODEL REPO YOU UPLOAD THIS TO PRIVATE !!!
|
| 13 |
# It embeds gold answers; a public repo would leak them.
|
|
@@ -61,13 +68,56 @@ ANSWERS = {
|
|
| 61 |
"012024030204": "nge yé"
|
| 62 |
}
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
TEST = "/tmp/data/test.csv" # competition test set, mounted by the platform
|
| 65 |
|
| 66 |
rows = []
|
|
|
|
| 67 |
with open(TEST, newline="") as f:
|
| 68 |
for r in csv.DictReader(f):
|
| 69 |
rid = str(r["id"]).strip()
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
with open("submission.csv", "w", newline="") as f:
|
| 73 |
w = csv.DictWriter(f, fieldnames=["id", "pred"])
|
|
@@ -75,4 +125,4 @@ with open("submission.csv", "w", newline="") as f:
|
|
| 75 |
w.writerows(rows)
|
| 76 |
|
| 77 |
print(f"Wrote submission.csv with {len(rows)} rows; "
|
| 78 |
-
f"{
|
|
|
|
|
|
|
| 1 |
import csv
|
| 2 |
|
| 3 |
# ---------------------------------------------------------------------------
|
| 4 |
# IOL-AI 2024 - organizer demonstration submission.
|
| 5 |
#
|
| 6 |
+
# This script is NOT a model. It embeds a curated answer key and deliberately
|
| 7 |
+
# produces THREE kinds of predictions so you can watch chrF, exact_match and the
|
| 8 |
+
# geometric-mean score DIVERGE on the leaderboard:
|
| 9 |
+
#
|
| 10 |
+
# EXACT -> gold answer verbatim -> exact_match = 1, chrF = 1
|
| 11 |
+
# NEAR -> gold answer with a tiny typo -> exact_match = 0, chrF high (partial)
|
| 12 |
+
# blank -> id not in the key -> exact_match = 0, chrF = 0
|
| 13 |
+
#
|
| 14 |
+
# Because the NEAR bucket scores 0 on exact_match but high on chrF, you'll see
|
| 15 |
+
# chrF > score > exact_match
|
| 16 |
+
# instead of the three collapsing to one number (which happens when every item
|
| 17 |
+
# is all-or-nothing). Use it only to smoke-test the leaderboard end to end.
|
| 18 |
#
|
| 19 |
# !!! KEEP THE MODEL REPO YOU UPLOAD THIS TO PRIVATE !!!
|
| 20 |
# It embeds gold answers; a public repo would leak them.
|
|
|
|
| 68 |
"012024030204": "nge yé"
|
| 69 |
}
|
| 70 |
|
| 71 |
+
# Ids whose prediction is a deliberate NEAR-MISS (right idea, one-character typo).
|
| 72 |
+
# These score 0 on exact_match but high on chrF, so the two metrics diverge.
|
| 73 |
+
# Chosen to be long enough that a single typo still leaves high character overlap.
|
| 74 |
+
NEAR_MISS_IDS = {
|
| 75 |
+
"012024010102", # "you(du) will bite me"
|
| 76 |
+
"012024010104", # "I will wait for you(pl)"
|
| 77 |
+
"012024020201", # "car (= short lorry)"
|
| 78 |
+
"012024020303", # "ruubiitcha puphubii"
|
| 79 |
+
"012024020304", # "mu’akoeta uhuyitibee"
|
| 80 |
+
"012024020305", # "makuitcha eratibii"
|
| 81 |
+
"012024020202", # "tall cooking pots"
|
| 82 |
+
"012024020301", # "uphukwama gogogogo"
|
| 83 |
+
"012024020302", # "shumukosa dongoko"
|
| 84 |
+
"012024020306", # "wiribiisa pophoko"
|
| 85 |
+
"012024010103", # "I caught them(pl)"
|
| 86 |
+
"012024010202", # "mətəjgolan"
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def near_miss(s):
|
| 91 |
+
"""Return a near-miss copy of `s`: transpose the first interior pair of
|
| 92 |
+
differing letters. Guarantees exact_match = 0 while keeping nearly every
|
| 93 |
+
character (and most n-grams) intact, so chrF stays high."""
|
| 94 |
+
chars = list(s)
|
| 95 |
+
for i in range(1, len(chars) - 1):
|
| 96 |
+
a, b = chars[i], chars[i + 1]
|
| 97 |
+
if a != b and a.isalnum() and b.isalnum():
|
| 98 |
+
chars[i], chars[i + 1] = b, a
|
| 99 |
+
return "".join(chars)
|
| 100 |
+
# fallback: duplicate the last character
|
| 101 |
+
return s + s[-1] if s else s
|
| 102 |
+
|
| 103 |
+
|
| 104 |
TEST = "/tmp/data/test.csv" # competition test set, mounted by the platform
|
| 105 |
|
| 106 |
rows = []
|
| 107 |
+
n_exact = n_near = n_blank = 0
|
| 108 |
with open(TEST, newline="") as f:
|
| 109 |
for r in csv.DictReader(f):
|
| 110 |
rid = str(r["id"]).strip()
|
| 111 |
+
if rid in NEAR_MISS_IDS:
|
| 112 |
+
pred = near_miss(ANSWERS[rid])
|
| 113 |
+
n_near += 1
|
| 114 |
+
elif rid in ANSWERS:
|
| 115 |
+
pred = ANSWERS[rid]
|
| 116 |
+
n_exact += 1
|
| 117 |
+
else:
|
| 118 |
+
pred = ""
|
| 119 |
+
n_blank += 1
|
| 120 |
+
rows.append({"id": r["id"], "pred": pred})
|
| 121 |
|
| 122 |
with open("submission.csv", "w", newline="") as f:
|
| 123 |
w = csv.DictWriter(f, fieldnames=["id", "pred"])
|
|
|
|
| 125 |
w.writerows(rows)
|
| 126 |
|
| 127 |
print(f"Wrote submission.csv with {len(rows)} rows; "
|
| 128 |
+
f"{n_exact} exact, {n_near} near-miss, {n_blank} blank.")
|