Spaces:
Running
Running
File size: 2,985 Bytes
2b2f086 8d78e99 af87f0e 8d78e99 5e470f8 af87f0e 5e470f8 fec37b6 8d78e99 af87f0e 2b2f086 af87f0e 8d78e99 af87f0e 70dba97 af87f0e 8b2a768 af87f0e 312edaf af87f0e 9464e73 70dba97 5e470f8 af87f0e 312edaf af87f0e 5e470f8 af87f0e b1771ab 5e470f8 af87f0e 5e470f8 af87f0e 5e470f8 af87f0e bb7ba24 5e470f8 af87f0e 2020753 9464e73 af87f0e fec37b6 5e470f8 fec37b6 5e470f8 fec37b6 2020753 af87f0e 5e470f8 12d2d84 9464e73 5e470f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import os
import torch
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
# Note: Keep the imports together for clarity
from transformers import (
NllbTokenizer,
AutoModelForSeq2SeqLM,
Seq2SeqTrainer,
Seq2SeqTrainingArguments,
DataCollatorForSeq2Seq,
)
from normalize_bm_input import normalize_bm_input
from normalize_bm_output import normalize_bm_output
# =====================
# 1️⃣ Environment / Cache
# =====================
# Setting cache environment variables for Hugging Face
os.environ["HF_HOME"] = "/tmp/hf"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf"
os.environ["HF_DATASETS_CACHE"] = "/tmp/hf"
os.makedirs("/tmp/hf", exist_ok=True)
# =====================
# 2️⃣ Device
# =====================
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# =====================
# 3️⃣ Load Model & Tokenizer
# =====================
# Charger le modèle et le tokenizer NLLB
try:
model_name = "Gaoussin/Bamalingua-2"
tokenizer = NllbTokenizer.from_pretrained(model_name)
# Move model to the selected device (CPU or GPU)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
print(f"Model '{model_name}' loaded successfully on {device}.")
except Exception as e:
print(f"Error loading model or tokenizer: {e}")
# In a real application, you might exit or handle this more gracefully
# =====================
# 4️⃣ FastAPI setup - Define Input and Output Schemas
# =====================
app = FastAPI()
# Input schema
class TranslationRequest(BaseModel):
text: str
src_lang: str # e.g., "bam_Latn"
tgt_lang: str # e.g., "fra_Latn"
# =====================
# 5️⃣ Translation function - Restored to user's original logic
# =====================
def translateTo(text, src, tgt):
tokenizer.src_lang = src
tokenizer.tgt_lang = tgt
print({text, tokenizer.src_lang, tokenizer.tgt_lang})
# Prepare input for the model
# We explicitly move the inputs to the same device as the model
inputs = tokenizer(text, return_tensors="pt").to(device)
# Generate translation using the user's logic
output = model.generate(**inputs, max_length=128)
# Decode the output
return tokenizer.decode(output[0], skip_special_tokens=True)
# =====================
# 6️⃣ API Endpoints - Applying the Response Model
# =====================
@app.post("/translate")
def translate(request: TranslationRequest):
try:
# --- 2. Core Translation ---
result = translateTo(request.text, request.src_lang, request.tgt_lang)
# --- 4. Final Output ---
translation_list = [result, model_name]
###
return [translation_list]
except Exception as e:
print(f"An error occurred during translation: {e}")
raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
@app.get("/")
def root():
return {"message": "API is running 🚀"}
|