work

Sleeping

App Files Files Community

hellokawei commited on Jun 28, 2025

Commit

a70ed43

verified ·

1 Parent(s): 90d1820

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -41

app.py CHANGED Viewed

@@ -17,19 +17,12 @@ MODEL_CONFIGS = {
         "max_length": 200, # 翻译输出的最大长度
         "color": "#FF6B6B"
     },
-    "Chinese-to-English (M4-Small)": {
-        "model_name": "HuggingFaceM4/m4-small-en-zh", # 这是一个多语言模型，支持zh-en
-        "description": "中文到英文的机器翻译模型 (HuggingFaceM4 M4-Small)",
         "max_length": 200, # 翻译输出的最大长度
         "color": "#4ECDC4"
     }
-    # 如果需要第三个模型，可以取消注释下面这个，或替换成您想要的
-    # "Chinese-to-English (Another Model)": {
-    #     "model_name": "facebook/mbart-large-50-one-to-many-mmt", # 另一个多语言模型，需要指定 src_lang/tgt_lang
-    #     "description": "中文到英文的机器翻译模型 (Facebook mBART-Large-50)",
-    #     "max_length": 200,
-    #     "color": "#45B7D1"
-    # }
 }
 class TranslationComparator:
@@ -43,38 +36,27 @@ class TranslationComparator:
         for model_key, config in MODEL_CONFIGS.items():
             try:
                 print(f"加载 {model_key} ({config['model_name']})...")
                 # 对于翻译任务，使用 "translation" pipeline
-                # 注意：某些多语言模型（如 m4-small）可能需要显式指定源语言和目标语言
-                # 对于 Helsinki-NLP/opus-mt-zh-en，pipeline会自动处理
-                # 对于 HuggingFaceM4/m4-small-en-zh，虽然名字是en-zh，但它内部支持zh-en。
-                # 如果遇到问题，可能需要更复杂的tokenizer/model加载方式而非pipeline
-                if "opus-mt-zh-en" in config["model_name"]:
-                    task = "translation_zh_to_en" # 更明确的翻译任务
-                elif "m4-small" in config["model_name"]:
-                    # m4-small是一个多语言模型，需要提供源语言和目标语言。
-                    # pipeline("translation") 不直接支持 src_lang/tgt_lang 参数
-                    # 需要手动加载 AutoModelForSeq2SeqLM 和 AutoTokenizer
-                    print(f"特别加载 {model_key} 及其Tokenizer...")
-                    tokenizer = AutoTokenizer.from_pretrained(config["model_name"])
-                    model = AutoModelForSeq2SeqLM.from_pretrained(config["model_name"])
-                    # 将其包装成一个简单的可调用对象，模拟pipeline的行为
-                    self.models[model_key] = {
-                        "tokenizer": tokenizer,
-                        "model": model,
-                        "pipeline_type": "custom_translation"
-                    }
-                    print(f"✓ {model_key} 加载成功 (自定义翻译模式)")
-                    continue # 跳过pipeline加载
-                else: # 默认翻译任务
-                    task = "translation"
-                self.models[model_key] = pipeline(
-                    task,
-                    model=config["model_name"],
-                    tokenizer=config["model_name"],
-                    device=-1, # 使用CPU
-                    torch_dtype=torch.float32
-                )
                 print(f"✓ {model_key} 加载成功")
             except Exception as e:
                 print(f"✗ {model_key} 加载失败: {e}")

         "max_length": 200, # 翻译输出的最大长度
         "color": "#FF6B6B"
     },
+    "Chinese-to-English (mBART-Large-50)": { # 替换为mBART模型
+        "model_name": "facebook/mbart-large-50-many-to-one-mmt",
+        "description": "中文到英文的机器翻译模型 (Facebook mBART-Large-50)",
         "max_length": 200, # 翻译输出的最大长度
         "color": "#4ECDC4"
     }
 }
 class TranslationComparator:
         for model_key, config in MODEL_CONFIGS.items():
             try:
                 print(f"加载 {model_key} ({config['model_name']})...")
                 # 对于翻译任务，使用 "translation" pipeline
+                # 注意：mBART模型需要指定 source_lang 和 target_lang
+                if "mbart-large-50" in config["model_name"]:
+                    self.models[model_key] = pipeline(
+                        "translation",
+                        model=config["model_name"],
+                        tokenizer=config["model_name"],
+                        src_lang="zh_CN",  # 源语言为中文
+                        tgt_lang="en_US",  # 目标语言为英文
+                        device=-1, # 使用CPU
+                        torch_dtype=torch.float32
+                    )
+                else: # 对于Helsinki-NLP/opus-mt-zh-en等
+                    self.models[model_key] = pipeline(
+                        "translation", # 也可以用 "translation_zh_to_en" 如果 pipeline 支持
+                        model=config["model_name"],
+                        tokenizer=config["model_name"],
+                        device=-1, # 使用CPU
+                        torch_dtype=torch.float32
+                    )
                 print(f"✓ {model_key} 加载成功")
             except Exception as e:
                 print(f"✗ {model_key} 加载失败: {e}")