prompt-compiler-api / src /runtime /translator.py
JairoDanielMT's picture
Upload src/runtime/translator.py with huggingface_hub
929984d verified
Raw
History Blame Contribute Delete
1.14 kB
import torch
from optimum.onnxruntime import ORTModelForSeq2SeqLM
from transformers import AutoTokenizer
import os
class OfflineTranslator:
def __init__(self, model_id="Xenova/opus-mt-es-en"):
self.model_id = model_id
self.tokenizer = None
self.model = None
def _init_model(self):
if not self.tokenizer:
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
if not self.model:
self.model = ORTModelForSeq2SeqLM.from_pretrained(self.model_id)
def translate(self, text: str) -> str:
if not text or not any(c.isalpha() for c in text):
return text
self._init_model()
# We always translate if the translator is active for the session.
# The Opus-MT model is smart enough to leave existing English words as-is.
inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True)
outputs = self.model.generate(**inputs)
translated = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
return translated