Spaces:

mkfallah
/

pasr

Sleeping

mkfallah commited on Sep 4, 2025

Commit

ddf950c

verified ·

1 Parent(s): 61c5f65

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -21,10 +21,6 @@ custom_vocab_map = {
 }
 def replace_fuzzy(text, vocab_map, threshold=85):
-    """
-    Replace words/phrases in text using fuzzy matching with high threshold.
-    Supports multiple alternatives per word/phrase.
-    """
     for target, alternatives in vocab_map.items():
         match, score = process.extractOne(text, alternatives, scorer=fuzz.partial_ratio)
         if score >= threshold:
@@ -32,15 +28,25 @@ def replace_fuzzy(text, vocab_map, threshold=85):
     return text
 def transcribe(audio):
-    # audio is a tuple (numpy array, sample_rate)
-    data, sr = audio
-    # --- convert mono to 2D for soundfile ---
-    if data.ndim == 1:
-        data = np.expand_dims(data, axis=1)
-    with tempfile.NamedTemporaryFile(suffix=".wav") as tmp:
-        sf.write(tmp.name, data, samplerate=sr)
-        # ASR with chunking for long audios
-        result = asr(tmp.name, chunk_length_s=30, stride_length_s=[5,5])
     text = result["text"]
     final_text = replace_fuzzy(text, custom_vocab_map, threshold=85)
     return final_text
@@ -48,10 +54,10 @@ def transcribe(audio):
 # --- Gradio interface ---
 iface = gr.Interface(
     fn=transcribe,
-    inputs=gr.Audio(type="numpy"),
     outputs="text",
     title="Persian ASR with High Accuracy Vocabulary",
-    description="Upload a Persian audio file; recognized words are corrected using a custom high-accuracy vocabulary."
 )
 iface.launch()

 }
 def replace_fuzzy(text, vocab_map, threshold=85):
     for target, alternatives in vocab_map.items():
         match, score = process.extractOne(text, alternatives, scorer=fuzz.partial_ratio)
         if score >= threshold:
     return text
 def transcribe(audio):
+    # check if audio is None
+    if audio is None:
+        return "No audio input detected."
+    # Gradio may pass a tuple or just a file path depending on version
+    if isinstance(audio, tuple):
+        data, sr = audio
+        # convert to 2D if mono
+        if isinstance(data, int):
+            return "Invalid audio data."
+        if data.ndim == 1:
+            data = np.expand_dims(data, axis=1)
+        with tempfile.NamedTemporaryFile(suffix=".wav") as tmp:
+            sf.write(tmp.name, data, samplerate=sr)
+            result = asr(tmp.name, chunk_length_s=30, stride_length_s=[5,5])
+    else:
+        # assume audio is file path
+        result = asr(audio, chunk_length_s=30, stride_length_s=[5,5])
     text = result["text"]
     final_text = replace_fuzzy(text, custom_vocab_map, threshold=85)
     return final_text
 # --- Gradio interface ---
 iface = gr.Interface(
     fn=transcribe,
+    inputs=gr.Audio(source="microphone", type="numpy"),  # live mic recording
     outputs="text",
     title="Persian ASR with High Accuracy Vocabulary",
+    description="Speak in Persian; recognized words are corrected using a custom high-accuracy vocabulary."
 )
 iface.launch()