Spaces:
Sleeping
Sleeping
File size: 5,919 Bytes
3459a45 646911f deec1f2 3459a45 6a4685e 3459a45 deec1f2 3459a45 646911f 3459a45 deec1f2 3459a45 deec1f2 3459a45 646911f 3459a45 6a4685e 646911f 6a4685e 646911f 6a4685e 3459a45 646911f 3459a45 646911f 3459a45 deec1f2 3459a45 deec1f2 3459a45 646911f deec1f2 6a4685e 646911f f9aa059 6a4685e f9aa059 6a4685e f9aa059 deec1f2 3459a45 6a4685e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | """
VoiceAura Translation API
Models:
1. SLPG/English_to_Urdu_Unsupervised_MT (en β ur)
2. SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration (pa-s β pa-g)
3. SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration (pa-g β pa-s)
"""
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import os, requests, argparse, torch, re
# β
PyTorch 2.6 fix
torch.serialization.add_safe_globals([argparse.Namespace])
_original_torch_load = torch.load
def patched_torch_load(*args, **kwargs):
kwargs["weights_only"] = False
return _original_torch_load(*args, **kwargs)
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# ββ Model configs βββββββββββββββββββββββββββββββββββββββββ
MODELS_CONFIG = {
"en-ur": {
"files": {
"checkpoint_8_96000.pt": "https://huggingface.co/SLPG/English_to_Urdu_Unsupervised_MT/resolve/main/checkpoint_8_96000.pt",
"dict.en.txt": "https://huggingface.co/SLPG/English_to_Urdu_Unsupervised_MT/resolve/main/dict.en.txt",
"dict.ur.txt": "https://huggingface.co/SLPG/English_to_Urdu_Unsupervised_MT/resolve/main/dict.ur.txt",
},
"dir": "models/en_ur",
"checkpoint": "checkpoint_8_96000.pt",
"detokenize": False,
"instance": None,
},
"pa-s-pa-g": {
"files": {
"checkpoint_5_78000.pt": "https://huggingface.co/SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration/resolve/main/checkpoint_5_78000.pt",
"dict.pa.txt": "https://huggingface.co/SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration/resolve/main/dict.pa.txt",
"dict.pk.txt": "https://huggingface.co/SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration/resolve/main/dict.pk.txt",
},
"dir": "models/pa_s_pa_g",
"checkpoint": "checkpoint_5_78000.pt",
"detokenize": True,
"instance": None,
},
"pa-g-pa-s": {
"files": {
"checkpoint_13_129000.pt": "https://huggingface.co/SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration/resolve/main/checkpoint_13_129000.pt",
"dict.pa.txt": "https://huggingface.co/SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration/resolve/main/dict.pa.txt",
"dict.pk.txt": "https://huggingface.co/SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration/resolve/main/dict.pk.txt",
},
"dir": "models/pa_g_pa_s",
"checkpoint": "checkpoint_13_129000.pt",
"detokenize": True,
"instance": None,
},
}
# ββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββ
def download_file(url: str, path: str):
if os.path.exists(path):
print(f"[β] Exists: {path}")
return
print(f"[β] Downloading: {path} ...")
os.makedirs(os.path.dirname(path), exist_ok=True)
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
print(f"[β] Done: {path}")
def detokenize(sentence: str) -> str:
"""
SLPG original logic β exactly same as their Streamlit app:
β = word start marker
'Ψͺ ΩΨ³ ΩΫΪΊ' spaces already sahi hain model ke output mein
bas β remove karo
"""
return sentence.replace('β', '').strip()
def load_model(pair: str):
cfg = MODELS_CONFIG[pair]
if cfg["instance"] is not None:
return cfg["instance"]
for fname, url in cfg["files"].items():
download_file(url, os.path.join(cfg["dir"], fname))
torch.load = patched_torch_load
from fairseq.models.transformer import TransformerModel
model = TransformerModel.from_pretrained(
cfg["dir"],
checkpoint_file=cfg["checkpoint"],
data_name_or_path=cfg["dir"],
)
torch.load = _original_torch_load
model.eval()
cfg["instance"] = model
print(f"[β] Model ready: {pair}")
return model
# ββ Startup βββββββββββββββββββββββββββββββββββββββββββββββ
@app.on_event("startup")
async def startup():
for pair in MODELS_CONFIG:
load_model(pair)
# ββ API βββββββββββββββββββββββββββββββββββββββββββββββββββ
class Req(BaseModel):
text: str
from_lang: str = "en"
to_lang: str = "ur"
@app.get("/")
def root():
loaded = {k: MODELS_CONFIG[k]["instance"] is not None for k in MODELS_CONFIG}
return {"status": "VoiceAura API β", "models_loaded": loaded}
@app.post("/translate")
def translate(req: Req):
if not req.text.strip():
return {"success": False, "translation": ""}
pair = f"{req.from_lang}-{req.to_lang}"
if pair not in MODELS_CONFIG:
return {"success": False, "translation": f"β οΈ Pair '{pair}' not supported."}
try:
cfg = MODELS_CONFIG[pair]
model = load_model(pair)
raw = model.translate(req.text.strip())
print(f"[DEBUG] pair={pair} | input={req.text} | raw={repr(raw)}")
result = detokenize(raw) if cfg["detokenize"] else raw
print(f"[DEBUG] final={repr(result)}")
return {
"success": True,
"translation": result,
"pair": pair,
"raw": raw,
}
except Exception as e:
print(f"[ERROR] [{pair}]: {e}")
return {"success": False, "translation": str(e)}
|