Spaces:

Fatitommy
/

application

Sleeping

App Files Files Community

Fatitommy commited on 14 days ago

Commit

6a4685e

verified ·

1 Parent(s): f9aa059

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -35

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ Models:
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-import os, requests, argparse, torch
 # ✅ PyTorch 2.6 fix
 torch.serialization.add_safe_globals([argparse.Namespace])
@@ -78,34 +78,14 @@ def download_file(url: str, path: str):
                 f.write(chunk)
     print(f"[✓] Done: {path}")
-def detokenize(text: str) -> str:
     """
-    Fairseq sentencepiece/BPE output clean karo.
-    Possible formats:
-    1. "▁ساڈا ▁گھر ▁راولپنڈی"  → spaces ke saath
-    2. "ت ُس ِیں ک ِو ے ہ ِو"   → characters alag alag
-    3. "▁ت▁و▁س▁ی▁ں"            → chipke hue
-    Fix: pehle ▁ ko space se replace karo, phir
-    sirf word boundaries pe space rakho
     """
-    # Step 1: ▁ ko space se replace karo
-    text = text.replace("▁", " ")
-    # Step 2: multiple spaces ko single space karo
-    text = " ".join(text.split())
-    # Step 3: characters ke darmiyan wali extra spaces hatao
-    # (jab BPE ne har character ko alag token banaya ho)
-    # Unicode ranges: Shahmukhi (0600-06FF), Gurmukhi (0A00-0A7F)
-    import re
-    # Shahmukhi characters ke beech space hatao
-    text = re.sub(r'(?<=[\u0600-\u06FF])\s(?=[\u0600-\u06FF\u0610-\u061A\u064B-\u065F])', '', text)
-    # Gurmukhi characters ke beech space hatao
-    text = re.sub(r'(?<=[\u0A00-\u0A7F])\s(?=[\u0A00-\u0A7F])', '', text)
-    return text.strip()
 def load_model(pair: str):
     cfg = MODELS_CONFIG[pair]
@@ -160,22 +140,19 @@ def translate(req: Req):
         model  = load_model(pair)
         raw    = model.translate(req.text.strip())
-        # Debug — logs mein dikhega
-        print(f"[DEBUG] pair={pair}")
-        print(f"[DEBUG] input={req.text}")
-        print(f"[DEBUG] raw={repr(raw)}")
         result = detokenize(raw) if cfg["detokenize"] else raw
-        print(f"[DEBUG] final={result}")
         return {
             "success":     True,
             "translation": result,
             "pair":        pair,
-            "raw":         raw,      # debug ke liye
         }
     except Exception as e:
-        print(f"Error [{pair}]: {e}")
-        return {"success": False, "translation": str(e)}

 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
+import os, requests, argparse, torch, re
 # ✅ PyTorch 2.6 fix
 torch.serialization.add_safe_globals([argparse.Namespace])
                 f.write(chunk)
     print(f"[✓] Done: {path}")
+def detokenize(sentence: str) -> str:
     """
+    SLPG original logic — exactly same as their Streamlit app:
+    ▁ = word start marker
+    'ت ُس ِیں' spaces already sahi hain model ke output mein
+    bas ▁ remove karo
     """
+    return sentence.replace('▁', '').strip()
 def load_model(pair: str):
     cfg = MODELS_CONFIG[pair]
         model  = load_model(pair)
         raw    = model.translate(req.text.strip())
+        print(f"[DEBUG] pair={pair} | input={req.text} | raw={repr(raw)}")
         result = detokenize(raw) if cfg["detokenize"] else raw
+        print(f"[DEBUG] final={repr(result)}")
         return {
             "success":     True,
             "translation": result,
             "pair":        pair,
+            "raw":         raw,
         }
     except Exception as e:
+        print(f"[ERROR] [{pair}]: {e}")
+        return {"success": False, "translation": str(e)}