Spaces:

mkfallah
/

pasr

Sleeping

mkfallah commited on Sep 4, 2025

Commit

11dabbc

verified ·

1 Parent(s): 98b2436

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import numpy as np
 asr = pipeline(
     task="automatic-speech-recognition",
     model="vhdm/whisper-large-fa-v1",
-    device=-1  # CPU; for GPU device=0
 )
 # --- Custom vocabulary with multiple forms for accuracy ---
@@ -33,29 +33,27 @@ def replace_fuzzy(text, vocab_map, threshold=85):
 def transcribe(audio):
     """
-    audio: tuple(numpy array, sample_rate) from Gradio
     """
     if audio is None:
         return "No audio input detected."
-    # Handle audio input
     if isinstance(audio, tuple):
         data, sr = audio
         # Convert mono to 2D array for soundfile
-        if isinstance(data, int):
-            return "Invalid audio data."
         if data.ndim == 1:
             data = np.expand_dims(data, axis=1)
-        # Write temporary WAV file
         with tempfile.NamedTemporaryFile(suffix=".wav") as tmp:
             sf.write(tmp.name, data, samplerate=sr)
-            # Run ASR with chunking
             result = asr(tmp.name, chunk_length_s=30, stride_length_s=[5,5])
     else:
-        # If audio is a file path
         result = asr(audio, chunk_length_s=30, stride_length_s=[5,5])
-    text = result["text"]
     final_text = replace_fuzzy(text, custom_vocab_map, threshold=85)
     return final_text

 asr = pipeline(
     task="automatic-speech-recognition",
     model="vhdm/whisper-large-fa-v1",
+    device=-1  # CPU; برای GPU device=0
 )
 # --- Custom vocabulary with multiple forms for accuracy ---
 def transcribe(audio):
     """
+    Handle audio input from Gradio: tuple (numpy array, sample_rate) or file path
     """
     if audio is None:
         return "No audio input detected."
+    # If tuple (numpy array + sample_rate)
     if isinstance(audio, tuple):
         data, sr = audio
+        data = np.asarray(data)
         # Convert mono to 2D array for soundfile
         if data.ndim == 1:
             data = np.expand_dims(data, axis=1)
         with tempfile.NamedTemporaryFile(suffix=".wav") as tmp:
             sf.write(tmp.name, data, samplerate=sr)
+            # Run ASR with chunking for long audio
             result = asr(tmp.name, chunk_length_s=30, stride_length_s=[5,5])
     else:
+        # If file path
         result = asr(audio, chunk_length_s=30, stride_length_s=[5,5])
+    text = result.get("text", "")
     final_text = replace_fuzzy(text, custom_vocab_map, threshold=85)
     return final_text