""" VoiceAura Translation API Models: 1. SLPG/English_to_Urdu_Unsupervised_MT (en → ur) 2. SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration (pa-s → pa-g) 3. SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration (pa-g → pa-s) """ from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel import os, requests, argparse, torch, re # ✅ PyTorch 2.6 fix torch.serialization.add_safe_globals([argparse.Namespace]) _original_torch_load = torch.load def patched_torch_load(*args, **kwargs): kwargs["weights_only"] = False return _original_torch_load(*args, **kwargs) app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) # ── Model configs ───────────────────────────────────────── MODELS_CONFIG = { "en-ur": { "files": { "checkpoint_8_96000.pt": "https://huggingface.co/SLPG/English_to_Urdu_Unsupervised_MT/resolve/main/checkpoint_8_96000.pt", "dict.en.txt": "https://huggingface.co/SLPG/English_to_Urdu_Unsupervised_MT/resolve/main/dict.en.txt", "dict.ur.txt": "https://huggingface.co/SLPG/English_to_Urdu_Unsupervised_MT/resolve/main/dict.ur.txt", }, "dir": "models/en_ur", "checkpoint": "checkpoint_8_96000.pt", "detokenize": False, "instance": None, }, "pa-s-pa-g": { "files": { "checkpoint_5_78000.pt": "https://huggingface.co/SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration/resolve/main/checkpoint_5_78000.pt", "dict.pa.txt": "https://huggingface.co/SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration/resolve/main/dict.pa.txt", "dict.pk.txt": "https://huggingface.co/SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration/resolve/main/dict.pk.txt", }, "dir": "models/pa_s_pa_g", "checkpoint": "checkpoint_5_78000.pt", "detokenize": True, "instance": None, }, "pa-g-pa-s": { "files": { "checkpoint_13_129000.pt": "https://huggingface.co/SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration/resolve/main/checkpoint_13_129000.pt", "dict.pa.txt": "https://huggingface.co/SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration/resolve/main/dict.pa.txt", "dict.pk.txt": "https://huggingface.co/SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration/resolve/main/dict.pk.txt", }, "dir": "models/pa_g_pa_s", "checkpoint": "checkpoint_13_129000.pt", "detokenize": True, "instance": None, }, } # ── Helpers ─────────────────────────────────────────────── def download_file(url: str, path: str): if os.path.exists(path): print(f"[✓] Exists: {path}") return print(f"[↓] Downloading: {path} ...") os.makedirs(os.path.dirname(path), exist_ok=True) with requests.get(url, stream=True) as r: r.raise_for_status() with open(path, "wb") as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) print(f"[✓] Done: {path}") def detokenize(sentence: str) -> str: """ SLPG original logic — exactly same as their Streamlit app: ▁ = word start marker 'ت ُس ِیں' spaces already sahi hain model ke output mein bas ▁ remove karo """ return sentence.replace('▁', '').strip() def load_model(pair: str): cfg = MODELS_CONFIG[pair] if cfg["instance"] is not None: return cfg["instance"] for fname, url in cfg["files"].items(): download_file(url, os.path.join(cfg["dir"], fname)) torch.load = patched_torch_load from fairseq.models.transformer import TransformerModel model = TransformerModel.from_pretrained( cfg["dir"], checkpoint_file=cfg["checkpoint"], data_name_or_path=cfg["dir"], ) torch.load = _original_torch_load model.eval() cfg["instance"] = model print(f"[✓] Model ready: {pair}") return model # ── Startup ─────────────────────────────────────────────── @app.on_event("startup") async def startup(): for pair in MODELS_CONFIG: load_model(pair) # ── API ─────────────────────────────────────────────────── class Req(BaseModel): text: str from_lang: str = "en" to_lang: str = "ur" @app.get("/") def root(): loaded = {k: MODELS_CONFIG[k]["instance"] is not None for k in MODELS_CONFIG} return {"status": "VoiceAura API ✓", "models_loaded": loaded} @app.post("/translate") def translate(req: Req): if not req.text.strip(): return {"success": False, "translation": ""} pair = f"{req.from_lang}-{req.to_lang}" if pair not in MODELS_CONFIG: return {"success": False, "translation": f"⚠️ Pair '{pair}' not supported."} try: cfg = MODELS_CONFIG[pair] model = load_model(pair) raw = model.translate(req.text.strip()) print(f"[DEBUG] pair={pair} | input={req.text} | raw={repr(raw)}") result = detokenize(raw) if cfg["detokenize"] else raw print(f"[DEBUG] final={repr(result)}") return { "success": True, "translation": result, "pair": pair, "raw": raw, } except Exception as e: print(f"[ERROR] [{pair}]: {e}") return {"success": False, "translation": str(e)}