work

Sleeping

App Files Files Community

hellokawei commited on Jun 28, 2025

Commit

0938f57

verified ·

1 Parent(s): 451b0b5

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -41

app.py CHANGED Viewed

@@ -18,15 +18,15 @@ MODEL_CONFIGS = {
         "max_length": 200, # 翻译输出的最大长度
         "color": "#FF6B6B"
     },
-    "Chinese-to-English (mBART-Large-50)": { # 替换为mBART模型
-        "model_name": "facebook/mbart-large-50-many-to-one-mmt",
-        "description": "中文到英文的机器翻译模型 (Facebook mBART-Large-50)",
         "max_length": 200, # 翻译输出的最大长度
         "color": "#4ECDC4"
     }
     # 如果需要第三个模型，可以取消注释下面这个，或替换成您想要的
     # "Chinese-to-English (Another Model)": {
-    #     "model_name": "facebook/mbart-large-50-one-to-many-mmt", # 另一个多语言模型，需要指定 src_lang/tgt_lang
     #     "description": "中文到英文的机器翻译模型 (Facebook mBART-Large-50)",
     #     "max_length": 200,
     #     "color": "#45B7D1"
@@ -45,26 +45,15 @@ class TranslationComparator:
             try:
                 print(f"加载 {model_key} ({config['model_name']})...")
-                # 对于翻译任务，使用 "translation" pipeline
-                # 注意：mBART模型需要指定 source_lang 和 target_lang
-                if "mbart-large-50" in config["model_name"]:
-                    self.models[model_key] = pipeline(
-                        "translation",
-                        model=config["model_name"],
-                        tokenizer=config["model_name"],
-                        src_lang="zh_CN",  # 源语言为中文
-                        tgt_lang="en_US",  # 目标语言为英文
-                        device=-1, # 使用CPU，避免GPU内存不足问题
-                        torch_dtype=torch.float32 # 保持一致，或根据模型精度调整
-                    )
-                else: # 对于Helsinki-NLP/opus-mt-zh-en等
-                    self.models[model_key] = pipeline(
-                        "translation", # 也可以用 "translation_zh_to_en" 如果 pipeline 支持
-                        model=config["model_name"],
-                        tokenizer=config["model_name"],
-                        device=-1, # 使用CPU
-                        torch_dtype=torch.float32
-                    )
                 print(f"✓ {model_key} 加载成功")
             except Exception as e:
                 print(f"✗ {model_key} 加载失败: {e}")
@@ -87,12 +76,19 @@ class TranslationComparator:
         try:
             start_time = time.time()
-            # 翻译文本
-            # pipeline("translation") 的返回格式是 [{"translation_text": "..."}]
-            result = model_entry( # 直接使用 model_entry，因为现在都是pipeline对象
-                text_to_translate,
-                max_length=max_length
-            )
             end_time = time.time()
@@ -163,12 +159,12 @@ def calculate_grace_scores_for_translation():
             "Consistency": 7.9,   # 翻译稳定性
             "Efficiency": 7.5     # 推理效率
         },
-        "Chinese-to-English (mBART-Large-50)": { # **这里已修改！**
-            "Generalization": 8.5, # 更大型多语言模型，泛化性通常更强
-            "Relevance": 8.8,
-            "Accuracy": 8.6,
-            "Consistency": 8.5,
-            "Efficiency": 6.0     # 模型较大，效率可能略低
         }
     }
     return grace_data
@@ -183,8 +179,7 @@ def create_translation_radar_chart():
     for i, (model_name, scores) in enumerate(grace_scores.items()):
         values = [scores[cat] for cat in categories]
-        # **这里使用 MODEL_CONFIGS[model_name]["color"] 依赖于 MODEL_CONFIGS 和 grace_scores 的键名一致**
-        # 这是导致之前 KeyError 的地方，现在应该已修复，因为 calculate_grace_scores_for_translation 的键名已更新
         color = MODEL_CONFIGS[model_name]["color"]
         fig.add_trace(go.Scatterpolar(
@@ -253,9 +248,9 @@ def create_model_info_table():
         if "opus-mt-zh-en" in config["model_name"]:
             params = "~3亿"
             size = "~1.2GB"
-        elif "mbart-large-50" in config["model_name"]: # 修改为mBART的参数
-            params = "~6.1亿" # mBART-Large-50 的实际参数量
-            size = "~2.4GB" # mBART-Large-50 的实际模型大小
         else: # 默认值
             params = "未知"
             size = "未知"

         "max_length": 200, # 翻译输出的最大长度
         "color": "#FF6B6B"
     },
+    "Chinese-to-English (T5-Small)": { # **更改为 T5-Small 模型**
+        "model_name": "google-t5/t5-small",
+        "description": "中文到英文的机器翻译模型 (Google T5-Small)",
         "max_length": 200, # 翻译输出的最大长度
         "color": "#4ECDC4"
     }
     # 如果需要第三个模型，可以取消注释下面这个，或替换成您想要的
     # "Chinese-to-English (Another Model)": {
+    #     "model_name": "facebook/mbart-large-50-one-to-many-mmt",
     #     "description": "中文到英文的机器翻译模型 (Facebook mBART-Large-50)",
     #     "max_length": 200,
     #     "color": "#45B7D1"
             try:
                 print(f"加载 {model_key} ({config['model_name']})...")
+                # T5模型通常用于多任务，这里我们明确指定它用于翻译
+                # pipeline("translation") 会尝试自动处理，但T5需要特定输入格式
+                self.models[model_key] = pipeline(
+                    "translation", # T5可以用'translation' task
+                    model=config["model_name"],
+                    tokenizer=config["model_name"],
+                    device=-1, # 使用CPU，避免GPU内存不足问题
+                    torch_dtype=torch.float32 # 保持一致，或根据模型精度调整
+                )
                 print(f"✓ {model_key} 加载成功")
             except Exception as e:
                 print(f"✗ {model_key} 加载失败: {e}")
         try:
             start_time = time.time()
+            # **针对 T5 模型添加输入格式化**
+            if "t5-small" in model_key.lower(): # 检查是否是T5-Small模型
+                # T5的翻译任务通常需要这样的前缀
+                formatted_text = f"translate Chinese to English: {text_to_translate}"
+                result = model_entry(
+                    formatted_text,
+                    max_length=max_length
+                )
+            else: # 对于Helsinki-NLP/opus-mt-zh-en等其他模型
+                result = model_entry( # 直接使用 model_entry，因为现在都是pipeline对象
+                    text_to_translate,
+                    max_length=max_length
+                )
             end_time = time.time()
             "Consistency": 7.9,   # 翻译稳定性
             "Efficiency": 7.5     # 推理效率
         },
+        "Chinese-to-English (T5-Small)": { # **T5-Small 的模拟 GRACE 分数**
+            "Generalization": 6.8, # 比T5-Base略低，泛化性可能稍弱
+            "Relevance": 7.0,
+            "Accuracy": 6.5,
+            "Consistency": 6.8,
+            "Efficiency": 9.0     # 模型更小，效率更高
         }
     }
     return grace_data
     for i, (model_name, scores) in enumerate(grace_scores.items()):
         values = [scores[cat] for cat in categories]
+        # 这里使用 MODEL_CONFIGS[model_name]["color"] 依赖于 MODEL_CONFIGS 和 grace_scores 的键名一致
         color = MODEL_CONFIGS[model_name]["color"]
         fig.add_trace(go.Scatterpolar(
         if "opus-mt-zh-en" in config["model_name"]:
             params = "~3亿"
             size = "~1.2GB"
+        elif "t5-small" in config["model_name"]: # **更新 T5-Small 的参数**
+            params = "~6千万" # T5-Small 实际参数量约 60 million
+            size = "~240MB" # T5-Small 实际模型大小约 240MB
         else: # 默认值
             params = "未知"
             size = "未知"