Fatitommy commited on
Commit
646911f
Β·
verified Β·
1 Parent(s): deec1f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -12
app.py CHANGED
@@ -1,7 +1,7 @@
1
  """
2
  VoiceAura Translation API
3
  Models:
4
- 1. SLPG/English_to_Urdu_Unsupervised_MT (en β†’ ur)
5
  2. SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration (pa-s β†’ pa-g)
6
  3. SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration (pa-g β†’ pa-s)
7
  """
@@ -27,7 +27,7 @@ app.add_middleware(
27
  allow_headers=["*"],
28
  )
29
 
30
- # ── Model configs ────────────────────────────────────────
31
  MODELS_CONFIG = {
32
  "en-ur": {
33
  "files": {
@@ -64,7 +64,7 @@ MODELS_CONFIG = {
64
  },
65
  }
66
 
67
- # ── Helpers ──────────────────────────────────────────────
68
  def download_file(url: str, path: str):
69
  if os.path.exists(path):
70
  print(f"[βœ“] Exists: {path}")
@@ -79,8 +79,16 @@ def download_file(url: str, path: str):
79
  print(f"[βœ“] Done: {path}")
80
 
81
  def detokenize(text: str) -> str:
82
- """Remove fairseq BPE tokens (▁ symbols)"""
83
- return text.replace("▁", "").strip()
 
 
 
 
 
 
 
 
84
 
85
  def load_model(pair: str):
86
  cfg = MODELS_CONFIG[pair]
@@ -103,13 +111,13 @@ def load_model(pair: str):
103
  print(f"[βœ“] Model ready: {pair}")
104
  return model
105
 
106
- # ── Startup ──────────────────────────────────────────────
107
  @app.on_event("startup")
108
  async def startup():
109
  for pair in MODELS_CONFIG:
110
  load_model(pair)
111
 
112
- # ── API ──────────────────────────────────────────────────
113
  class Req(BaseModel):
114
  text: str
115
  from_lang: str = "en"
@@ -133,13 +141,13 @@ def translate(req: Req):
133
  try:
134
  cfg = MODELS_CONFIG[pair]
135
  model = load_model(pair)
136
- result = model.translate(req.text.strip())
137
 
138
- # Detokenize if needed (Punjabi models)
139
- if cfg["detokenize"]:
140
- result = detokenize(result)
 
141
 
142
- return {"success": True, "translation": result, "pair": pair}
143
  except Exception as e:
144
  print(f"Error [{pair}]: {e}")
145
  return {"success": False, "translation": str(e)}
 
1
  """
2
  VoiceAura Translation API
3
  Models:
4
+ 1. SLPG/English_to_Urdu_Unsupervised_MT (en β†’ ur)
5
  2. SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration (pa-s β†’ pa-g)
6
  3. SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration (pa-g β†’ pa-s)
7
  """
 
27
  allow_headers=["*"],
28
  )
29
 
30
+ # ── Model configs ─────────────────────────────────────────
31
  MODELS_CONFIG = {
32
  "en-ur": {
33
  "files": {
 
64
  },
65
  }
66
 
67
+ # ── Helpers ───────────────────────────────────────────────
68
  def download_file(url: str, path: str):
69
  if os.path.exists(path):
70
  print(f"[βœ“] Exists: {path}")
 
79
  print(f"[βœ“] Done: {path}")
80
 
81
  def detokenize(text: str) -> str:
82
+ """
83
+ Fairseq BPE output fix:
84
+ 'β–Ψ³Ψ§ΪˆΨ§ ▁گھر β–Ψ±Ψ§ΩˆΩ„ΩΎΩ†ΪˆΫŒ β–ΩˆΪ† ▁ہے' β†’ 'ساڈا Ϊ―ΪΎΨ± Ψ±Ψ§ΩˆΩ„ΩΎΩ†ΪˆΫŒ ΩˆΪ† ہے'
85
+ ▁ = word boundary marker β€” replace with space, then clean up
86
+ """
87
+ # ▁ at start of word = space before it
88
+ result = text.replace(" ▁", " ").replace("▁", "")
89
+ # Clean extra spaces
90
+ result = " ".join(result.split())
91
+ return result.strip()
92
 
93
  def load_model(pair: str):
94
  cfg = MODELS_CONFIG[pair]
 
111
  print(f"[βœ“] Model ready: {pair}")
112
  return model
113
 
114
+ # ── Startup ───────────────────────────────────────────────
115
  @app.on_event("startup")
116
  async def startup():
117
  for pair in MODELS_CONFIG:
118
  load_model(pair)
119
 
120
+ # ── API ───────────────────────────────────────────────────
121
  class Req(BaseModel):
122
  text: str
123
  from_lang: str = "en"
 
141
  try:
142
  cfg = MODELS_CONFIG[pair]
143
  model = load_model(pair)
144
+ raw = model.translate(req.text.strip())
145
 
146
+ print(f"[DEBUG] raw output [{pair}]: {raw}") # debug log
147
+
148
+ result = detokenize(raw) if cfg["detokenize"] else raw
149
+ return {"success": True, "translation": result, "pair": pair, "raw": raw}
150
 
 
151
  except Exception as e:
152
  print(f"Error [{pair}]: {e}")
153
  return {"success": False, "translation": str(e)}