Spaces:

kingabzpro
/

Transcribed-Urdu

Sleeping

App Files Files Community

Abid Ali Awan commited on Jul 6, 2025

Commit

8fa375c

1 Parent(s): 75f4da9

Add Urdu punctuation support in app.py by implementing a function to add full stops and optional commas, enhancing text output from the transcription process.

Browse files

Files changed (1) hide show

app.py +31 -1

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import warnings
 import gradio as gr
@@ -46,6 +47,32 @@ transcriber = pipeline(
 )
 def transcribe(audio):
     if audio is None:
         return "No audio provided. Please record or upload an audio file."
@@ -64,7 +91,10 @@ def transcribe(audio):
     # Inference under no_grad
     with torch.no_grad():
         result = transcriber({"sampling_rate": sr, "raw": y})
-    return result.get("text", "")
 # —— Gradio UI ——

 import os
+import re
 import warnings
 import gradio as gr
 )
+def add_urdu_punctuation(text):
+    """
+    Adds Urdu full stop (۔) at the end of sentences and optionally Urdu comma (،) after conjunctions.
+    This is a simple heuristic and may not be perfect for all cases.
+    """
+    # List of common Urdu conjunctions (for optional comma insertion)
+    conjunctions = ["اور", "لیکن", "مگر", "یا", "پھر", "جبکہ", "کیونکہ", "تاہم"]
+    # Add comma after conjunctions (optional, can be commented out if not desired)
+    for conj in conjunctions:
+        # Only add comma if not already present
+        text = re.sub(rf"\b({conj})\b(?!،)", r"\1،", text)
+    # Split sentences heuristically (by length or by pause words)
+    # Here, we split by newlines or keep as one if no punctuation
+    sentences = re.split(r"[\n]+", text)
+    processed = []
+    for s in sentences:
+        s = s.strip()
+        if not s:
+            continue
+        # Add Urdu full stop if not already present at end
+        if not s.endswith("۔") and not s.endswith("؟"):
+            s += "۔"
+        processed.append(s)
+    return " ".join(processed)
 def transcribe(audio):
     if audio is None:
         return "No audio provided. Please record or upload an audio file."
     # Inference under no_grad
     with torch.no_grad():
         result = transcriber({"sampling_rate": sr, "raw": y})
+    text = result.get("text", "")
+    # Add Urdu punctuation
+    text = add_urdu_punctuation(text)
+    return text
 # —— Gradio UI ——