import os
import torch
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
# Note: Keep the imports together for clarity
from transformers import NllbTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainer, Seq2SeqTrainingArguments, DataCollatorForSeq2Seq
from normalize_bm_words import normalize_text

# =====================
# 1️⃣ Environment / Cache
# =====================
# Setting cache environment variables for Hugging Face
os.environ["HF_HOME"] = "/tmp/hf"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf"
os.environ["HF_DATASETS_CACHE"] = "/tmp/hf"
os.makedirs("/tmp/hf", exist_ok=True)

# =====================
# 2️⃣ Device
# =====================
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# =====================
# 3️⃣ Load Model & Tokenizer
# =====================
# Charger le modèle et le tokenizer NLLB
try:
    model_name = "Gaoussin/bamalingua-4"
    tokenizer = NllbTokenizer.from_pretrained(model_name)
    # Move model to the selected device (CPU or GPU)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
    print(f"Model '{model_name}' loaded successfully on {device}.")
except Exception as e:
    print(f"Error loading model or tokenizer: {e}")
    # In a real application, you might exit or handle this more gracefully

# =====================
# 4️⃣ FastAPI setup - Define Input and Output Schemas
# =====================
app = FastAPI()

# Input schema
class TranslationRequest(BaseModel):
    text: str
    src_lang: str  # e.g., "bam_Latn"
    tgt_lang: str  # e.g., "fra_Latn"

# Output schema (THE FIX: ensures both fields are returned)
class TranslationResponse(BaseModel):
    """
    Ensures both the translated text and the app version ID are included 
    in the response JSON.
    """
    translation: str
    appVersionId: str

# =====================
# 5️⃣ Translation function - Restored to user's original logic
# =====================
def translateTo(text, src, tgt):
    tokenizer.src_lang = src
    tokenizer.tgt_lang = tgt
    print(tokenizer.src_lang, tokenizer.tgt_lang)
    
    # Prepare input for the model
    # We explicitly move the inputs to the same device as the model
    inputs = tokenizer(text, return_tensors="pt").to(device)
    
    # Generate translation using the user's logic
    output = model.generate(**inputs, max_length=128)
    
    # Decode the output
    return tokenizer.decode(output[0], skip_special_tokens=True)

# =====================
# 6️⃣ API Endpoints - Applying the Response Model
# =====================
@app.post("/translate", response_model=TranslationResponse) # <-- Fix remains here
def translate(request: TranslationRequest):
    try:
        # normalize_text from imported file
        text = normalize_text(request.text)
        result = translateTo(text, request.src_lang, request.tgt_lang)
        appVersionId = "App Version id = 2"
        
        # Return the dictionary matching the TranslationResponse schema
        return {"translation": result, "appVersionId": appVersionId}
        
    except Exception as e:
        print(f"An error occurred during translation: {e}")
        # When raising an HTTPException, the response model is bypassed,
        # and a standard JSON error is returned.
        raise HTTPException(
            status_code=500, 
            detail=f"Translation failed: {str(e)}"
        )

@app.get("/")
def root():
    return {"message": "API is running 🚀"}