eduardosanchez commited on
Commit
5eb492b
·
verified ·
1 Parent(s): e9065c7

Update script.py

Browse files
Files changed (1) hide show
  1. script.py +57 -7
script.py CHANGED
@@ -1,13 +1,20 @@
1
- import os
2
  import csv
3
 
4
  # ---------------------------------------------------------------------------
5
  # IOL-AI 2024 - organizer demonstration submission.
6
  #
7
- # This script is NOT a model. It contains a curated answer key for ~52% of the
8
- # competition points (spread across the public and private splits) so that
9
- # submitting it lands a mid-table 'pass' score (~50-something on both
10
- # leaderboards). Use it only to smoke-test the leaderboard end to end.
 
 
 
 
 
 
 
 
11
  #
12
  # !!! KEEP THE MODEL REPO YOU UPLOAD THIS TO PRIVATE !!!
13
  # It embeds gold answers; a public repo would leak them.
@@ -61,13 +68,56 @@ ANSWERS = {
61
  "012024030204": "nge yé"
62
  }
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  TEST = "/tmp/data/test.csv" # competition test set, mounted by the platform
65
 
66
  rows = []
 
67
  with open(TEST, newline="") as f:
68
  for r in csv.DictReader(f):
69
  rid = str(r["id"]).strip()
70
- rows.append({"id": r["id"], "pred": ANSWERS.get(rid, "")})
 
 
 
 
 
 
 
 
 
71
 
72
  with open("submission.csv", "w", newline="") as f:
73
  w = csv.DictWriter(f, fieldnames=["id", "pred"])
@@ -75,4 +125,4 @@ with open("submission.csv", "w", newline="") as f:
75
  w.writerows(rows)
76
 
77
  print(f"Wrote submission.csv with {len(rows)} rows; "
78
- f"{sum(1 for x in rows if x['pred'])} answered.")
 
 
1
  import csv
2
 
3
  # ---------------------------------------------------------------------------
4
  # IOL-AI 2024 - organizer demonstration submission.
5
  #
6
+ # This script is NOT a model. It embeds a curated answer key and deliberately
7
+ # produces THREE kinds of predictions so you can watch chrF, exact_match and the
8
+ # geometric-mean score DIVERGE on the leaderboard:
9
+ #
10
+ # EXACT -> gold answer verbatim -> exact_match = 1, chrF = 1
11
+ # NEAR -> gold answer with a tiny typo -> exact_match = 0, chrF high (partial)
12
+ # blank -> id not in the key -> exact_match = 0, chrF = 0
13
+ #
14
+ # Because the NEAR bucket scores 0 on exact_match but high on chrF, you'll see
15
+ # chrF > score > exact_match
16
+ # instead of the three collapsing to one number (which happens when every item
17
+ # is all-or-nothing). Use it only to smoke-test the leaderboard end to end.
18
  #
19
  # !!! KEEP THE MODEL REPO YOU UPLOAD THIS TO PRIVATE !!!
20
  # It embeds gold answers; a public repo would leak them.
 
68
  "012024030204": "nge yé"
69
  }
70
 
71
+ # Ids whose prediction is a deliberate NEAR-MISS (right idea, one-character typo).
72
+ # These score 0 on exact_match but high on chrF, so the two metrics diverge.
73
+ # Chosen to be long enough that a single typo still leaves high character overlap.
74
+ NEAR_MISS_IDS = {
75
+ "012024010102", # "you(du) will bite me"
76
+ "012024010104", # "I will wait for you(pl)"
77
+ "012024020201", # "car (= short lorry)"
78
+ "012024020303", # "ruubiitcha puphubii"
79
+ "012024020304", # "mu’akoeta uhuyitibee"
80
+ "012024020305", # "makuitcha eratibii"
81
+ "012024020202", # "tall cooking pots"
82
+ "012024020301", # "uphukwama gogogogo"
83
+ "012024020302", # "shumukosa dongoko"
84
+ "012024020306", # "wiribiisa pophoko"
85
+ "012024010103", # "I caught them(pl)"
86
+ "012024010202", # "mətəjgolan"
87
+ }
88
+
89
+
90
+ def near_miss(s):
91
+ """Return a near-miss copy of `s`: transpose the first interior pair of
92
+ differing letters. Guarantees exact_match = 0 while keeping nearly every
93
+ character (and most n-grams) intact, so chrF stays high."""
94
+ chars = list(s)
95
+ for i in range(1, len(chars) - 1):
96
+ a, b = chars[i], chars[i + 1]
97
+ if a != b and a.isalnum() and b.isalnum():
98
+ chars[i], chars[i + 1] = b, a
99
+ return "".join(chars)
100
+ # fallback: duplicate the last character
101
+ return s + s[-1] if s else s
102
+
103
+
104
  TEST = "/tmp/data/test.csv" # competition test set, mounted by the platform
105
 
106
  rows = []
107
+ n_exact = n_near = n_blank = 0
108
  with open(TEST, newline="") as f:
109
  for r in csv.DictReader(f):
110
  rid = str(r["id"]).strip()
111
+ if rid in NEAR_MISS_IDS:
112
+ pred = near_miss(ANSWERS[rid])
113
+ n_near += 1
114
+ elif rid in ANSWERS:
115
+ pred = ANSWERS[rid]
116
+ n_exact += 1
117
+ else:
118
+ pred = ""
119
+ n_blank += 1
120
+ rows.append({"id": r["id"], "pred": pred})
121
 
122
  with open("submission.csv", "w", newline="") as f:
123
  w = csv.DictWriter(f, fieldnames=["id", "pred"])
 
125
  w.writerows(rows)
126
 
127
  print(f"Wrote submission.csv with {len(rows)} rows; "
128
+ f"{n_exact} exact, {n_near} near-miss, {n_blank} blank.")