Fatitommy commited on
Commit
6a4685e
·
verified ·
1 Parent(s): f9aa059

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -35
app.py CHANGED
@@ -9,7 +9,7 @@ Models:
9
  from fastapi import FastAPI
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from pydantic import BaseModel
12
- import os, requests, argparse, torch
13
 
14
  # ✅ PyTorch 2.6 fix
15
  torch.serialization.add_safe_globals([argparse.Namespace])
@@ -78,34 +78,14 @@ def download_file(url: str, path: str):
78
  f.write(chunk)
79
  print(f"[✓] Done: {path}")
80
 
81
- def detokenize(text: str) -> str:
82
  """
83
- Fairseq sentencepiece/BPE output clean karo.
84
-
85
- Possible formats:
86
- 1. "ساڈا ▁گھر ▁راولپنڈی" → spaces ke saath
87
- 2. "ت ُس ِیں ک ِو ے ہ ِو" → characters alag alag
88
- 3. "▁ت▁و▁س▁ی▁ں" → chipke hue
89
-
90
- Fix: pehle ▁ ko space se replace karo, phir
91
- sirf word boundaries pe space rakho
92
  """
93
- # Step 1: ko space se replace karo
94
- text = text.replace("▁", " ")
95
-
96
- # Step 2: multiple spaces ko single space karo
97
- text = " ".join(text.split())
98
-
99
- # Step 3: characters ke darmiyan wali extra spaces hatao
100
- # (jab BPE ne har character ko alag token banaya ho)
101
- # Unicode ranges: Shahmukhi (0600-06FF), Gurmukhi (0A00-0A7F)
102
- import re
103
- # Shahmukhi characters ke beech space hatao
104
- text = re.sub(r'(?<=[\u0600-\u06FF])\s(?=[\u0600-\u06FF\u0610-\u061A\u064B-\u065F])', '', text)
105
- # Gurmukhi characters ke beech space hatao
106
- text = re.sub(r'(?<=[\u0A00-\u0A7F])\s(?=[\u0A00-\u0A7F])', '', text)
107
-
108
- return text.strip()
109
 
110
  def load_model(pair: str):
111
  cfg = MODELS_CONFIG[pair]
@@ -160,22 +140,19 @@ def translate(req: Req):
160
  model = load_model(pair)
161
  raw = model.translate(req.text.strip())
162
 
163
- # Debug logs mein dikhega
164
- print(f"[DEBUG] pair={pair}")
165
- print(f"[DEBUG] input={req.text}")
166
- print(f"[DEBUG] raw={repr(raw)}")
167
 
168
  result = detokenize(raw) if cfg["detokenize"] else raw
169
 
170
- print(f"[DEBUG] final={result}")
171
 
172
  return {
173
  "success": True,
174
  "translation": result,
175
  "pair": pair,
176
- "raw": raw, # debug ke liye
177
  }
178
 
179
  except Exception as e:
180
- print(f"Error [{pair}]: {e}")
181
- return {"success": False, "translation": str(e)}
 
9
  from fastapi import FastAPI
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from pydantic import BaseModel
12
+ import os, requests, argparse, torch, re
13
 
14
  # ✅ PyTorch 2.6 fix
15
  torch.serialization.add_safe_globals([argparse.Namespace])
 
78
  f.write(chunk)
79
  print(f"[✓] Done: {path}")
80
 
81
+ def detokenize(sentence: str) -> str:
82
  """
83
+ SLPG original logic exactly same as their Streamlit app:
84
+ ▁ = word start marker
85
+ ُس ِیں' spaces already sahi hain model ke output mein
86
+ basremove karo
 
 
 
 
 
87
  """
88
+ return sentence.replace('', '').strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  def load_model(pair: str):
91
  cfg = MODELS_CONFIG[pair]
 
140
  model = load_model(pair)
141
  raw = model.translate(req.text.strip())
142
 
143
+ print(f"[DEBUG] pair={pair} | input={req.text} | raw={repr(raw)}")
 
 
 
144
 
145
  result = detokenize(raw) if cfg["detokenize"] else raw
146
 
147
+ print(f"[DEBUG] final={repr(result)}")
148
 
149
  return {
150
  "success": True,
151
  "translation": result,
152
  "pair": pair,
153
+ "raw": raw,
154
  }
155
 
156
  except Exception as e:
157
+ print(f"[ERROR] [{pair}]: {e}")
158
+ return {"success": False, "translation": str(e)}