Update app.py
Browse files
app.py
CHANGED
|
@@ -48,6 +48,19 @@ def normalize_text(s: str):
|
|
| 48 |
s = re.sub(r"\s+"," ",s).strip()
|
| 49 |
return s
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
df = pd.read_csv(CSV_PATH)
|
| 53 |
df["context_norm"] = df["context"].apply(normalize_text)
|
|
@@ -158,27 +171,25 @@ def kn_to_en(text):
|
|
| 158 |
|
| 159 |
|
| 160 |
|
| 161 |
-
def exact_match(pred,gold):
|
| 162 |
-
|
| 163 |
-
return int(normalize_text(pred)==normalize_text(gold))
|
| 164 |
-
|
| 165 |
|
| 166 |
-
|
| 167 |
|
| 168 |
-
p=set(pred.split())
|
| 169 |
-
g=set(gold.split())
|
| 170 |
|
| 171 |
-
|
| 172 |
|
| 173 |
-
|
|
|
|
| 174 |
|
| 175 |
-
|
| 176 |
|
| 177 |
-
|
|
|
|
| 178 |
|
| 179 |
-
|
|
|
|
| 180 |
|
| 181 |
-
return 2*precision*recall/(precision+recall)
|
| 182 |
|
| 183 |
|
| 184 |
def semantic_similarity(pred,gold):
|
|
|
|
| 48 |
s = re.sub(r"\s+"," ",s).strip()
|
| 49 |
return s
|
| 50 |
|
| 51 |
+
import string
|
| 52 |
+
|
| 53 |
+
def normalize_answer(text):
|
| 54 |
+
|
| 55 |
+
text = normalize_text(text)
|
| 56 |
+
|
| 57 |
+
# remove punctuation
|
| 58 |
+
text = re.sub(r"[^\w\s]", "", text)
|
| 59 |
+
|
| 60 |
+
# lowercase (safe for Hindi/Kannada)
|
| 61 |
+
text = text.lower()
|
| 62 |
+
|
| 63 |
+
return text.strip()
|
| 64 |
|
| 65 |
df = pd.read_csv(CSV_PATH)
|
| 66 |
df["context_norm"] = df["context"].apply(normalize_text)
|
|
|
|
| 171 |
|
| 172 |
|
| 173 |
|
| 174 |
+
def exact_match(pred, gold):
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
+
return int(normalize_answer(pred) == normalize_answer(gold))
|
| 177 |
|
|
|
|
|
|
|
| 178 |
|
| 179 |
+
def token_f1(pred, gold):
|
| 180 |
|
| 181 |
+
pred_tokens = normalize_answer(pred).split()
|
| 182 |
+
gold_tokens = normalize_answer(gold).split()
|
| 183 |
|
| 184 |
+
common = set(pred_tokens) & set(gold_tokens)
|
| 185 |
|
| 186 |
+
if len(common) == 0:
|
| 187 |
+
return 0.0
|
| 188 |
|
| 189 |
+
precision = len(common) / len(pred_tokens)
|
| 190 |
+
recall = len(common) / len(gold_tokens)
|
| 191 |
|
| 192 |
+
return 2 * precision * recall / (precision + recall)
|
| 193 |
|
| 194 |
|
| 195 |
def semantic_similarity(pred,gold):
|