from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# --- Performance: Load models once at startup ---
model_id = "rrrr66254/Glossa-BART"

# This model loading now relies on the environment variables 
# set in apiRoutes.py to correctly define the cache location.
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id, trust_remote_code=True)
model.eval()

# Move model to CUDA if available, once at startup
if torch.cuda.is_available():
    model = model.to("cuda")

# ----------------------------------------------------------------------

def translateGloss(gloss: str) -> str:
    """
    Translates a sign language gloss sequence into a natural language sentence 
    using the pre-loaded Glossa-BART model.
    """
    
    # --- DEBUG STEP 1: Log Input ---
    print(f"DEBUG: Starting translation for gloss: '{gloss}'")
    
    inputs = tokenizer(gloss, return_tensors="pt", padding=True, truncation=True)
    
    # Move inputs to CUDA if the model is on CUDA
    if torch.cuda.is_available():
        inputs = {k: v.to("cuda") for k,v in inputs.items()}
    
    # Generate the translated sentence
    outputs = model.generate(**inputs, max_new_tokens=50, do_sample=False)
    
    # Decode the output
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Clean up any residual whitespace
    result = result.strip()
    
    # --- DEBUG STEP 2: Log Output ---
    print(f"DEBUG: Model generated raw result: '{result}'")
    
    return result