ravish5 commited on
Commit
baffc2d
·
verified ·
1 Parent(s): 4ba49bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -13
app.py CHANGED
@@ -48,6 +48,19 @@ def normalize_text(s: str):
48
  s = re.sub(r"\s+"," ",s).strip()
49
  return s
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  df = pd.read_csv(CSV_PATH)
53
  df["context_norm"] = df["context"].apply(normalize_text)
@@ -158,27 +171,25 @@ def kn_to_en(text):
158
 
159
 
160
 
161
- def exact_match(pred,gold):
162
-
163
- return int(normalize_text(pred)==normalize_text(gold))
164
-
165
 
166
- def token_f1(pred,gold):
167
 
168
- p=set(pred.split())
169
- g=set(gold.split())
170
 
171
- common=len(p & g)
172
 
173
- if common==0:
 
174
 
175
- return 0
176
 
177
- precision=common/len(p)
 
178
 
179
- recall=common/len(g)
 
180
 
181
- return 2*precision*recall/(precision+recall)
182
 
183
 
184
  def semantic_similarity(pred,gold):
 
48
  s = re.sub(r"\s+"," ",s).strip()
49
  return s
50
 
51
+ import string
52
+
53
+ def normalize_answer(text):
54
+
55
+ text = normalize_text(text)
56
+
57
+ # remove punctuation
58
+ text = re.sub(r"[^\w\s]", "", text)
59
+
60
+ # lowercase (safe for Hindi/Kannada)
61
+ text = text.lower()
62
+
63
+ return text.strip()
64
 
65
  df = pd.read_csv(CSV_PATH)
66
  df["context_norm"] = df["context"].apply(normalize_text)
 
171
 
172
 
173
 
174
+ def exact_match(pred, gold):
 
 
 
175
 
176
+ return int(normalize_answer(pred) == normalize_answer(gold))
177
 
 
 
178
 
179
+ def token_f1(pred, gold):
180
 
181
+ pred_tokens = normalize_answer(pred).split()
182
+ gold_tokens = normalize_answer(gold).split()
183
 
184
+ common = set(pred_tokens) & set(gold_tokens)
185
 
186
+ if len(common) == 0:
187
+ return 0.0
188
 
189
+ precision = len(common) / len(pred_tokens)
190
+ recall = len(common) / len(gold_tokens)
191
 
192
+ return 2 * precision * recall / (precision + recall)
193
 
194
 
195
  def semantic_similarity(pred,gold):