Spaces:

Fatitommy
/

application

Sleeping

File size: 5,919 Bytes

"""
VoiceAura Translation API
Models:
  1. SLPG/English_to_Urdu_Unsupervised_MT              (en    → ur)
  2. SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration (pa-s → pa-g)
  3. SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration (pa-g → pa-s)
"""

from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import os, requests, argparse, torch, re

# ✅ PyTorch 2.6 fix
torch.serialization.add_safe_globals([argparse.Namespace])
_original_torch_load = torch.load

def patched_torch_load(*args, **kwargs):
    kwargs["weights_only"] = False
    return _original_torch_load(*args, **kwargs)

app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

# ── Model configs ─────────────────────────────────────────
MODELS_CONFIG = {
    "en-ur": {
        "files": {
            "checkpoint_8_96000.pt": "https://huggingface.co/SLPG/English_to_Urdu_Unsupervised_MT/resolve/main/checkpoint_8_96000.pt",
            "dict.en.txt":           "https://huggingface.co/SLPG/English_to_Urdu_Unsupervised_MT/resolve/main/dict.en.txt",
            "dict.ur.txt":           "https://huggingface.co/SLPG/English_to_Urdu_Unsupervised_MT/resolve/main/dict.ur.txt",
        },
        "dir":        "models/en_ur",
        "checkpoint": "checkpoint_8_96000.pt",
        "detokenize": False,
        "instance":   None,
    },
    "pa-s-pa-g": {
        "files": {
            "checkpoint_5_78000.pt": "https://huggingface.co/SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration/resolve/main/checkpoint_5_78000.pt",
            "dict.pa.txt":           "https://huggingface.co/SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration/resolve/main/dict.pa.txt",
            "dict.pk.txt":           "https://huggingface.co/SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration/resolve/main/dict.pk.txt",
        },
        "dir":        "models/pa_s_pa_g",
        "checkpoint": "checkpoint_5_78000.pt",
        "detokenize": True,
        "instance":   None,
    },
    "pa-g-pa-s": {
        "files": {
            "checkpoint_13_129000.pt": "https://huggingface.co/SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration/resolve/main/checkpoint_13_129000.pt",
            "dict.pa.txt":             "https://huggingface.co/SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration/resolve/main/dict.pa.txt",
            "dict.pk.txt":             "https://huggingface.co/SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration/resolve/main/dict.pk.txt",
        },
        "dir":        "models/pa_g_pa_s",
        "checkpoint": "checkpoint_13_129000.pt",
        "detokenize": True,
        "instance":   None,
    },
}

# ── Helpers ───────────────────────────────────────────────
def download_file(url: str, path: str):
    if os.path.exists(path):
        print(f"[✓] Exists: {path}")
        return
    print(f"[↓] Downloading: {path} ...")
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(path, "wb") as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
    print(f"[✓] Done: {path}")

def detokenize(sentence: str) -> str:
    """
    SLPG original logic — exactly same as their Streamlit app:
    ▁ = word start marker
    'ت ُس ِیں' spaces already sahi hain model ke output mein
    bas ▁ remove karo
    """
    return sentence.replace('▁', '').strip()

def load_model(pair: str):
    cfg = MODELS_CONFIG[pair]
    if cfg["instance"] is not None:
        return cfg["instance"]

    for fname, url in cfg["files"].items():
        download_file(url, os.path.join(cfg["dir"], fname))

    torch.load = patched_torch_load
    from fairseq.models.transformer import TransformerModel
    model = TransformerModel.from_pretrained(
        cfg["dir"],
        checkpoint_file=cfg["checkpoint"],
        data_name_or_path=cfg["dir"],
    )
    torch.load = _original_torch_load
    model.eval()
    cfg["instance"] = model
    print(f"[✓] Model ready: {pair}")
    return model

# ── Startup ───────────────────────────────────────────────
@app.on_event("startup")
async def startup():
    for pair in MODELS_CONFIG:
        load_model(pair)

# ── API ───────────────────────────────────────────────────
class Req(BaseModel):
    text: str
    from_lang: str = "en"
    to_lang: str   = "ur"

@app.get("/")
def root():
    loaded = {k: MODELS_CONFIG[k]["instance"] is not None for k in MODELS_CONFIG}
    return {"status": "VoiceAura API ✓", "models_loaded": loaded}

@app.post("/translate")
def translate(req: Req):
    if not req.text.strip():
        return {"success": False, "translation": ""}

    pair = f"{req.from_lang}-{req.to_lang}"

    if pair not in MODELS_CONFIG:
        return {"success": False, "translation": f"⚠️ Pair '{pair}' not supported."}

    try:
        cfg    = MODELS_CONFIG[pair]
        model  = load_model(pair)
        raw    = model.translate(req.text.strip())

        print(f"[DEBUG] pair={pair} | input={req.text} | raw={repr(raw)}")

        result = detokenize(raw) if cfg["detokenize"] else raw

        print(f"[DEBUG] final={repr(result)}")

        return {
            "success":     True,
            "translation": result,
            "pair":        pair,
            "raw":         raw,
        }

    except Exception as e:
        print(f"[ERROR] [{pair}]: {e}")
        return {"success": False, "translation": str(e)}