optimize sbert values
Browse files
app.py
CHANGED
|
@@ -84,8 +84,6 @@ def normalize_ar_orth(text: str) -> str:
|
|
| 84 |
# ุชุทุจูุน ุนุงู
ููู
ุญุงุฐุงุฉ
|
| 85 |
text = re.sub(r"[ููููููููู]", "", text)
|
| 86 |
text = re.sub(r"[โโ\"',:ุุ.!()\[\]{}ุ\-โโ_]", " ", text)
|
| 87 |
-
text = re.sub(r"[ุฅุฃูฑุขุง]", "ุง", text)
|
| 88 |
-
text = text.replace("ุฉ", "ู").replace("ู", "ู")
|
| 89 |
text = re.sub(r"\s+", " ", text).strip()
|
| 90 |
return text
|
| 91 |
|
|
@@ -93,8 +91,6 @@ def _normalize_for_models(s: str) -> str:
|
|
| 93 |
# ุชุทุจูุน ุฎุงุต ูู
ุฏุฎูุงุช SBERT/MARBERT
|
| 94 |
s = re.sub(r"[ููููููููู]", "", s)
|
| 95 |
s = re.sub(r"[โโ\"',:ุุ.!()\[\]{}ุ\-โโ_]", " ", s)
|
| 96 |
-
s = re.sub(r"[ุฅุฃูฑุขุง]", "ุง", s)
|
| 97 |
-
s = s.replace("ุฉ", "ู").replace("ู", "ู")
|
| 98 |
s = re.sub(r"\s+", " ", s).strip()
|
| 99 |
return s
|
| 100 |
|
|
@@ -336,7 +332,7 @@ def classify_pair(ref_w, hyp_w, bert_scores, phon_sim, lev1, short_word,
|
|
| 336 |
return 'ASR error (short+lev1)'
|
| 337 |
|
| 338 |
# semantic/phonetic
|
| 339 |
-
sbert_ok = bert_scores["sbert"] >= 0.
|
| 340 |
avg_ok = bert_scores["avg"] >= bert_thresh
|
| 341 |
max_ok = (bert_scores["max"] > max_bert) and sbert_ok
|
| 342 |
disagree = (bert_scores.get("note") == "models_disagree")
|
|
@@ -349,7 +345,7 @@ def classify_pair(ref_w, hyp_w, bert_scores, phon_sim, lev1, short_word,
|
|
| 349 |
if sbert_ok and avg_ok:
|
| 350 |
return 'ASR error (semantic/phonetic)'
|
| 351 |
else:
|
| 352 |
-
if bert_scores["sbert"] >= 0.
|
| 353 |
return 'ASR error (semantic)'
|
| 354 |
|
| 355 |
return 'Memorization error'
|
|
|
|
| 84 |
# ุชุทุจูุน ุนุงู
ููู
ุญุงุฐุงุฉ
|
| 85 |
text = re.sub(r"[ููููููููู]", "", text)
|
| 86 |
text = re.sub(r"[โโ\"',:ุุ.!()\[\]{}ุ\-โโ_]", " ", text)
|
|
|
|
|
|
|
| 87 |
text = re.sub(r"\s+", " ", text).strip()
|
| 88 |
return text
|
| 89 |
|
|
|
|
| 91 |
# ุชุทุจูุน ุฎุงุต ูู
ุฏุฎูุงุช SBERT/MARBERT
|
| 92 |
s = re.sub(r"[ููููููููู]", "", s)
|
| 93 |
s = re.sub(r"[โโ\"',:ุุ.!()\[\]{}ุ\-โโ_]", " ", s)
|
|
|
|
|
|
|
| 94 |
s = re.sub(r"\s+", " ", s).strip()
|
| 95 |
return s
|
| 96 |
|
|
|
|
| 332 |
return 'ASR error (short+lev1)'
|
| 333 |
|
| 334 |
# semantic/phonetic
|
| 335 |
+
sbert_ok = bert_scores["sbert"] >= 0.80
|
| 336 |
avg_ok = bert_scores["avg"] >= bert_thresh
|
| 337 |
max_ok = (bert_scores["max"] > max_bert) and sbert_ok
|
| 338 |
disagree = (bert_scores.get("note") == "models_disagree")
|
|
|
|
| 345 |
if sbert_ok and avg_ok:
|
| 346 |
return 'ASR error (semantic/phonetic)'
|
| 347 |
else:
|
| 348 |
+
if bert_scores["sbert"] >= 0.80:
|
| 349 |
return 'ASR error (semantic)'
|
| 350 |
|
| 351 |
return 'Memorization error'
|