KevSun commited on
Commit
1c79529
·
verified ·
1 Parent(s): 9725557

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -10
app.py CHANGED
@@ -17,12 +17,8 @@ def calculate_bertscore(translations, references, lang):
17
  P, R, F1 = bert_score(translations, references, lang=lang)
18
  return F1.mean().item()
19
 
20
- def tokenize_text(text, lang_code):
21
- if lang_code == "zh":
22
- return ' '.join(jieba.cut(text))
23
- # For other languages, we'll use a simple space-based tokenization
24
- # This might not be ideal for all languages, but it's a start
25
- return ' '.join(text.split())
26
 
27
  st.title("Machine Translation Quality Evaluation")
28
  st.write("Input the translated text and the reference translation to compute BLEU, TER, CHRF, and BERTScore metrics.")
@@ -48,10 +44,19 @@ reference_input = st.text_area("Reference Translation", height=200)
48
  if st.button("Evaluate"):
49
  if translation_input and reference_input:
50
  try:
51
- translations = [tokenize_text(translation_input.strip(), target_lang_code)]
52
- references = [tokenize_text(reference_input.strip(), target_lang_code)]
53
-
54
- st.write("Debug: Inputs received and tokenized")
 
 
 
 
 
 
 
 
 
55
  st.write(f"Translation: {translations}")
56
  st.write(f"Reference: {references}")
57
 
 
17
  P, R, F1 = bert_score(translations, references, lang=lang)
18
  return F1.mean().item()
19
 
20
+ def tokenize_chinese(text):
21
+ return ' '.join(jieba.cut(text))
 
 
 
 
22
 
23
  st.title("Machine Translation Quality Evaluation")
24
  st.write("Input the translated text and the reference translation to compute BLEU, TER, CHRF, and BERTScore metrics.")
 
44
  if st.button("Evaluate"):
45
  if translation_input and reference_input:
46
  try:
47
+ # Process translation input
48
+ if target_lang_code == "zh":
49
+ translations = [tokenize_chinese(translation_input.strip())]
50
+ else:
51
+ translations = [translation_input.strip()]
52
+
53
+ # Process reference input
54
+ if target_lang_code == "zh":
55
+ references = [tokenize_chinese(reference_input.strip())]
56
+ else:
57
+ references = [reference_input.strip()]
58
+
59
+ st.write("Debug: Inputs processed")
60
  st.write(f"Translation: {translations}")
61
  st.write(f"Reference: {references}")
62