|
|
from fastapi import FastAPI |
|
|
from pydantic import BaseModel, Field |
|
|
from typing import Optional |
|
|
from transformers import MarianMTModel, MarianTokenizer |
|
|
import datetime |
|
|
import logging |
|
|
import hashlib |
|
|
import time |
|
|
|
|
|
logger = logging.getLogger("translate") |
|
|
logger.setLevel(logging.INFO) |
|
|
|
|
|
app = FastAPI(title="翻译服务") |
|
|
|
|
|
|
|
|
cache = {} |
|
|
|
|
|
def _hash_text(text: str) -> str: |
|
|
return hashlib.sha256(text.encode("utf-8")).hexdigest() |
|
|
|
|
|
def _clean_cache(): |
|
|
now = time.time() |
|
|
|
|
|
expired_keys = [k for k, (_, exp) in cache.items() if exp < now] |
|
|
for k in expired_keys: |
|
|
del cache[k] |
|
|
|
|
|
|
|
|
|
|
|
MODEL_NAME = "Helsinki-NLP/opus-mt-tc-bible-big-zhx-en" |
|
|
logger.info(f"{datetime.datetime.now()} Loading model {MODEL_NAME}...") |
|
|
tokenizer = MarianTokenizer.from_pretrained(MODEL_NAME) |
|
|
model = MarianMTModel.from_pretrained(MODEL_NAME) |
|
|
logger.info(f"{datetime.datetime.now()} Model loaded.") |
|
|
|
|
|
class TranslateRequest(BaseModel): |
|
|
text: str = Field(..., description="待翻译的中文文本") |
|
|
|
|
|
class TranslateResponse(BaseModel): |
|
|
translated_text: str |
|
|
detected_lang: Optional[str] = None |
|
|
|
|
|
@app.post("/api/translate", response_model=TranslateResponse) |
|
|
async def translate(req: TranslateRequest): |
|
|
_clean_cache() |
|
|
|
|
|
h = _hash_text(req.text) |
|
|
|
|
|
|
|
|
if h in cache: |
|
|
translated_text, expire_ts = cache[h] |
|
|
if expire_ts > time.time(): |
|
|
logger.info(f"Cache hit: {h}") |
|
|
return TranslateResponse(translated_text=translated_text) |
|
|
|
|
|
batch = tokenizer([req.text], return_tensors="pt", padding=True) |
|
|
translated = model.generate(**batch) |
|
|
output = tokenizer.decode(translated[0], skip_special_tokens=True) |
|
|
|
|
|
|
|
|
cache[h] = (output, time.time() + 30 * 60) |
|
|
|
|
|
return TranslateResponse( |
|
|
translated_text=output, |
|
|
detected_lang=None |
|
|
) |
|
|
|