Spaces:

mkfallah
/

pasr

Sleeping

App Files Files Community

mkfallah commited on Sep 4, 2025

Commit

9f13d0c

verified ·

1 Parent(s): 5afd83b

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -19

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import gradio as gr
 from transformers import pipeline
 from rapidfuzz import process, fuzz
-import tempfile
 import soundfile as sf
 import numpy as np
@@ -31,27 +30,19 @@ def replace_fuzzy(text, vocab_map, threshold=85):
             text = text.replace(match, target)
     return text
-def transcribe(audio):
     """
-    Handle audio input from Gradio: tuple (numpy array, sample_rate) or file path.
     """
-    if audio is None:
         return "No audio input detected."
-    # If audio is a tuple (numpy array, sample_rate)
-    if isinstance(audio, tuple) and len(audio) == 2:
-        data, sr = audio
-        data = np.asarray(data)
-        if data.ndim == 1:
-            data = np.expand_dims(data, axis=1)
-        with tempfile.NamedTemporaryFile(suffix=".wav") as tmp:
-            sf.write(tmp.name, data, samplerate=sr)
-            result = asr(tmp.name, chunk_length_s=30, stride_length_s=[5,5])
-    elif isinstance(audio, str):
-        # If audio is a file path
-        result = asr(audio, chunk_length_s=30, stride_length_s=[5,5])
-    else:
-        return "Unsupported audio input type."
     text = result.get("text", "")
     final_text = replace_fuzzy(text, custom_vocab_map, threshold=85)
@@ -60,7 +51,7 @@ def transcribe(audio):
 # Gradio interface
 iface = gr.Interface(
     fn=transcribe,
-    inputs=gr.Audio(type="numpy", label="Record or upload audio"),
     outputs="text",
     title="Persian ASR with High Accuracy Vocabulary",
     description="""Speak in Persian or upload an audio file;

 import gradio as gr
 from transformers import pipeline
 from rapidfuzz import process, fuzz
 import soundfile as sf
 import numpy as np
             text = text.replace(match, target)
     return text
+def transcribe(audio_file):
     """
+    Handle audio input from Gradio.
+    audio_file: path to WAV file (Gradio sends file path for mic and upload)
     """
+    if audio_file is None:
         return "No audio input detected."
+    # Run ASR directly on file path
+    try:
+        result = asr(audio_file, chunk_length_s=30, stride_length_s=[5,5])
+    except Exception as e:
+        return f"ASR error: {e}"
     text = result.get("text", "")
     final_text = replace_fuzzy(text, custom_vocab_map, threshold=85)
 # Gradio interface
 iface = gr.Interface(
     fn=transcribe,
+    inputs=gr.Audio(type="filepath", label="Record or upload audio"),
     outputs="text",
     title="Persian ASR with High Accuracy Vocabulary",
     description="""Speak in Persian or upload an audio file;