from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import torch # --- Performance: Load models once at startup --- model_id = "rrrr66254/Glossa-BART" # This model loading now relies on the environment variables # set in apiRoutes.py to correctly define the cache location. tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) model = AutoModelForSeq2SeqLM.from_pretrained(model_id, trust_remote_code=True) model.eval() # Move model to CUDA if available, once at startup if torch.cuda.is_available(): model = model.to("cuda") # ---------------------------------------------------------------------- def translateGloss(gloss: str) -> str: """ Translates a sign language gloss sequence into a natural language sentence using the pre-loaded Glossa-BART model. """ # --- DEBUG STEP 1: Log Input --- print(f"DEBUG: Starting translation for gloss: '{gloss}'") inputs = tokenizer(gloss, return_tensors="pt", padding=True, truncation=True) # Move inputs to CUDA if the model is on CUDA if torch.cuda.is_available(): inputs = {k: v.to("cuda") for k,v in inputs.items()} # Generate the translated sentence outputs = model.generate(**inputs, max_new_tokens=50, do_sample=False) # Decode the output result = tokenizer.decode(outputs[0], skip_special_tokens=True) # Clean up any residual whitespace result = result.strip() # --- DEBUG STEP 2: Log Output --- print(f"DEBUG: Model generated raw result: '{result}'") return result