Spaces:

Gaoussin
/

bm-translator

Running

App Files Files Community

Gaoussin commited on Nov 17, 2025

Commit

dc0f0ce

verified ·

1 Parent(s): 993c4e8

Updated model and minimize code.

Browse files

Files changed (1) hide show

main.py +1 -32

main.py CHANGED Viewed

@@ -18,37 +18,9 @@ if HF_TOKEN is None:
 # 3️⃣ DEVICE
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# 4️⃣ Load model + tokenizer (PRIVATE REPO)
-#model_name = "Gaoussin/bamalingua-bm-fr"
-#tokenizer = MBart50TokenizerFast.from_pretrained(model_name, token=HF_TOKEN)
-#model = MBartForConditionalGeneration.from_pretrained(model_name, token=HF_TOKEN).to(device)
-####
-# 3. Load tokenizer & add Bambara token
-# ========================================
 model_name = "Gaoussin/bamalingua-bm_ml-fr_XX"
-# Load the tokenizer with a default language and suppress the error
-try:
-    tokenizer = MBart50Tokenizer.from_pretrained(model_name, src_lang="fr_XX")
-except KeyError:
-    # If loading with en_XX fails, try without specifying src_lang and fix afterwards
-    tokenizer = MBart50Tokenizer.from_pretrained(model_name)
-# Add the new language as an additional special token and update mappings
-new_lang = 'bm_ml'
-if new_lang not in tokenizer.lang_code_to_id:
-    tokenizer.add_special_tokens({'additional_special_tokens': [new_lang]})
-    # Update the internal language code mappings
-    new_id = len(tokenizer) - 1
-    tokenizer.lang_code_to_id[new_lang] = new_id
-    tokenizer.id_to_lang_code[new_id] = new_lang
-    print(f"Added new language token '{new_lang}' with ID {new_id}")
-else:
-    print(f"Language token '{new_lang}' already exists in tokenizer.")
-# Load model
 model = MBartForConditionalGeneration.from_pretrained("Gaoussin/bamalingua-bm_ml-fr_XX")
-model.resize_token_embeddings(len(tokenizer))
 #####
@@ -71,9 +43,6 @@ class TranslationRequest(BaseModel):
 @app.post("/translate")
 def translate(request: TranslationRequest):
     output = translateTo(request.text, request.src_lang, request.tgt_lang)
-    # Remove the unwanted token if it's present
-    if "fr_XX" in output:
-        output = output.replace("fr_XX", "").strip()
     return {"translation": output}
 @app.get("/")

 # 3️⃣ DEVICE
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_name = "Gaoussin/bamalingua-bm_ml-fr_XX"
+tokenizer = MBart50Tokenizer.from_pretrained(model_name)
 model = MBartForConditionalGeneration.from_pretrained("Gaoussin/bamalingua-bm_ml-fr_XX")
 #####
 @app.post("/translate")
 def translate(request: TranslationRequest):
     output = translateTo(request.text, request.src_lang, request.tgt_lang)
     return {"translation": output}
 @app.get("/")