MuhammadHijazii commited on
Commit
3d93605
ยท
verified ยท
1 Parent(s): 5d519e9

optimize sbert values

Browse files
Files changed (1) hide show
  1. app.py +2 -6
app.py CHANGED
@@ -84,8 +84,6 @@ def normalize_ar_orth(text: str) -> str:
84
  # ุชุทุจูŠุน ุนุงู… ู„ู„ู…ุญุงุฐุงุฉ
85
  text = re.sub(r"[ู‘ูŽู‹ููŒููู’ู€]", "", text)
86
  text = re.sub(r"[โ€œโ€\"',:ุ›ุŸ.!()\[\]{}ุŒ\-โ€“โ€”_]", " ", text)
87
- text = re.sub(r"[ุฅุฃูฑุขุง]", "ุง", text)
88
- text = text.replace("ุฉ", "ู‡").replace("ู‰", "ูŠ")
89
  text = re.sub(r"\s+", " ", text).strip()
90
  return text
91
 
@@ -93,8 +91,6 @@ def _normalize_for_models(s: str) -> str:
93
  # ุชุทุจูŠุน ุฎุงุต ู„ู…ุฏุฎู„ุงุช SBERT/MARBERT
94
  s = re.sub(r"[ู‘ูŽู‹ููŒููู’ู€]", "", s)
95
  s = re.sub(r"[โ€œโ€\"',:ุ›ุŸ.!()\[\]{}ุŒ\-โ€“โ€”_]", " ", s)
96
- s = re.sub(r"[ุฅุฃูฑุขุง]", "ุง", s)
97
- s = s.replace("ุฉ", "ู‡").replace("ู‰", "ูŠ")
98
  s = re.sub(r"\s+", " ", s).strip()
99
  return s
100
 
@@ -336,7 +332,7 @@ def classify_pair(ref_w, hyp_w, bert_scores, phon_sim, lev1, short_word,
336
  return 'ASR error (short+lev1)'
337
 
338
  # semantic/phonetic
339
- sbert_ok = bert_scores["sbert"] >= 0.70
340
  avg_ok = bert_scores["avg"] >= bert_thresh
341
  max_ok = (bert_scores["max"] > max_bert) and sbert_ok
342
  disagree = (bert_scores.get("note") == "models_disagree")
@@ -349,7 +345,7 @@ def classify_pair(ref_w, hyp_w, bert_scores, phon_sim, lev1, short_word,
349
  if sbert_ok and avg_ok:
350
  return 'ASR error (semantic/phonetic)'
351
  else:
352
- if bert_scores["sbert"] >= 0.78:
353
  return 'ASR error (semantic)'
354
 
355
  return 'Memorization error'
 
84
  # ุชุทุจูŠุน ุนุงู… ู„ู„ู…ุญุงุฐุงุฉ
85
  text = re.sub(r"[ู‘ูŽู‹ููŒููู’ู€]", "", text)
86
  text = re.sub(r"[โ€œโ€\"',:ุ›ุŸ.!()\[\]{}ุŒ\-โ€“โ€”_]", " ", text)
 
 
87
  text = re.sub(r"\s+", " ", text).strip()
88
  return text
89
 
 
91
  # ุชุทุจูŠุน ุฎุงุต ู„ู…ุฏุฎู„ุงุช SBERT/MARBERT
92
  s = re.sub(r"[ู‘ูŽู‹ููŒููู’ู€]", "", s)
93
  s = re.sub(r"[โ€œโ€\"',:ุ›ุŸ.!()\[\]{}ุŒ\-โ€“โ€”_]", " ", s)
 
 
94
  s = re.sub(r"\s+", " ", s).strip()
95
  return s
96
 
 
332
  return 'ASR error (short+lev1)'
333
 
334
  # semantic/phonetic
335
+ sbert_ok = bert_scores["sbert"] >= 0.80
336
  avg_ok = bert_scores["avg"] >= bert_thresh
337
  max_ok = (bert_scores["max"] > max_bert) and sbert_ok
338
  disagree = (bert_scores.get("note") == "models_disagree")
 
345
  if sbert_ok and avg_ok:
346
  return 'ASR error (semantic/phonetic)'
347
  else:
348
+ if bert_scores["sbert"] >= 0.80:
349
  return 'ASR error (semantic)'
350
 
351
  return 'Memorization error'