Spaces:

Iscte-Sintra
/

User-Interface_v1.0

Sleeping

ErikDaska commited on Jan 16

Commit

f2eb0c4

verified ·

1 Parent(s): 45fd469

Update src/streamlit_app.py

Files changed (1) hide show

src/streamlit_app.py CHANGED Viewed

@@ -47,18 +47,19 @@ def instantiate_translation_model(model_name, text, src_lg, tgt_lg):
         return pipe(text)[0]["translation_text"]
     # ---- M2M100 ----
-    elif "m2m100" in model_name:
-        pipe = pipeline(
-            "translation",
-            model=model_path,
-            tokenizer=model_path,
-            token=token
-        )
-        pipe.tokenizer.src_lang = src_lg
         result = pipe(
             text,
-            forced_bos_token_id=pipe.tokenizer.get_lang_id(tgt_lg)
         )
         return result[0]["translation_text"]
@@ -86,8 +87,8 @@ def build_translation_page(model_name):
     elif "m2m100" in model_name:
         lang_map = {
-            "Português": "pt",
-            "Kabuverdianu": "en"  # m2m100 does NOT support kea
         }
     else:  # mBART

         return pipe(text)[0]["translation_text"]
     # ---- M2M100 ----
+    elif "m2m100" in model_name:
+        pipe = load_pipeline("translation", model_path)
+        # Set the source language
+        pipe.tokenizer.src_lang = src_lg
+        # M2M100 requires the forced_bos_token_id to be the target lang token
+        tgt_lang_id = pipe.tokenizer.convert_tokens_to_ids(tgt_lg)
         result = pipe(
             text,
+            forced_bos_token_id=tgt_lang_id
         )
         return result[0]["translation_text"]
     elif "m2m100" in model_name:
         lang_map = {
+            "Português": "__pt__",
+            "Kabuverdianu": "__en__"  # Proxying kea as __en__
         }
     else:  # mBART