Spaces:

Gaoussin
/

bm-translator

Running

File size: 2,985 Bytes

2b2f086
8d78e99
af87f0e
8d78e99
5e470f8
af87f0e
5e470f8
 
 
 
 
 
 
fec37b6
 
8d78e99
af87f0e
 
 
 
2b2f086
 
 
 
 
af87f0e
 
 
8d78e99
af87f0e
70dba97
af87f0e
 
 
 
 
8b2a768
af87f0e
 
 
 
 
 
 
312edaf
af87f0e
 
 
9464e73
70dba97
5e470f8
af87f0e
312edaf
 
af87f0e
 
 
5e470f8
af87f0e
 
 
 
 
 
b1771ab
5e470f8
af87f0e
 
 
5e470f8
af87f0e
 
5e470f8
af87f0e
 
bb7ba24
5e470f8
af87f0e
 
 
2020753
9464e73
af87f0e
fec37b6
5e470f8
fec37b6
5e470f8
fec37b6
2020753
af87f0e
 
5e470f8
 
12d2d84
9464e73
 
5e470f8

import os
import torch
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel

# Note: Keep the imports together for clarity
from transformers import (
    NllbTokenizer,
    AutoModelForSeq2SeqLM,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments,
    DataCollatorForSeq2Seq,
)
from normalize_bm_input import normalize_bm_input
from normalize_bm_output import normalize_bm_output

# =====================
# 1️⃣ Environment / Cache
# =====================
# Setting cache environment variables for Hugging Face
os.environ["HF_HOME"] = "/tmp/hf"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf"
os.environ["HF_DATASETS_CACHE"] = "/tmp/hf"
os.makedirs("/tmp/hf", exist_ok=True)

# =====================
# 2️⃣ Device
# =====================
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# =====================
# 3️⃣ Load Model & Tokenizer
# =====================
# Charger le modèle et le tokenizer NLLB
try:
    model_name = "Gaoussin/Bamalingua-2"
    tokenizer = NllbTokenizer.from_pretrained(model_name)
    # Move model to the selected device (CPU or GPU)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
    print(f"Model '{model_name}' loaded successfully on {device}.")
except Exception as e:
    print(f"Error loading model or tokenizer: {e}")
    # In a real application, you might exit or handle this more gracefully

# =====================
# 4️⃣ FastAPI setup - Define Input and Output Schemas
# =====================
app = FastAPI()


# Input schema
class TranslationRequest(BaseModel):
    text: str
    src_lang: str  # e.g., "bam_Latn"
    tgt_lang: str  # e.g., "fra_Latn"


# =====================
# 5️⃣ Translation function - Restored to user's original logic
# =====================
def translateTo(text, src, tgt):
    tokenizer.src_lang = src
    tokenizer.tgt_lang = tgt
    print({text, tokenizer.src_lang, tokenizer.tgt_lang})

    # Prepare input for the model
    # We explicitly move the inputs to the same device as the model
    inputs = tokenizer(text, return_tensors="pt").to(device)

    # Generate translation using the user's logic
    output = model.generate(**inputs, max_length=128)

    # Decode the output
    return tokenizer.decode(output[0], skip_special_tokens=True)


# =====================
# 6️⃣ API Endpoints - Applying the Response Model
# =====================
@app.post("/translate")
def translate(request: TranslationRequest):
    try:
        # --- 2. Core Translation ---
        result = translateTo(request.text, request.src_lang, request.tgt_lang)
        # --- 4. Final Output ---
        translation_list = [result, model_name]
        ###
        return [translation_list]
    except Exception as e:
        print(f"An error occurred during translation: {e}")
        raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")


@app.get("/")
def root():
    return {"message": "API is running 🚀"}