mkfallah commited on
Commit
11dabbc
·
verified ·
1 Parent(s): 98b2436

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -9
app.py CHANGED
@@ -9,7 +9,7 @@ import numpy as np
9
  asr = pipeline(
10
  task="automatic-speech-recognition",
11
  model="vhdm/whisper-large-fa-v1",
12
- device=-1 # CPU; for GPU device=0
13
  )
14
 
15
  # --- Custom vocabulary with multiple forms for accuracy ---
@@ -33,29 +33,27 @@ def replace_fuzzy(text, vocab_map, threshold=85):
33
 
34
  def transcribe(audio):
35
  """
36
- audio: tuple(numpy array, sample_rate) from Gradio
37
  """
38
  if audio is None:
39
  return "No audio input detected."
40
 
41
- # Handle audio input
42
  if isinstance(audio, tuple):
43
  data, sr = audio
 
44
  # Convert mono to 2D array for soundfile
45
- if isinstance(data, int):
46
- return "Invalid audio data."
47
  if data.ndim == 1:
48
  data = np.expand_dims(data, axis=1)
49
- # Write temporary WAV file
50
  with tempfile.NamedTemporaryFile(suffix=".wav") as tmp:
51
  sf.write(tmp.name, data, samplerate=sr)
52
- # Run ASR with chunking
53
  result = asr(tmp.name, chunk_length_s=30, stride_length_s=[5,5])
54
  else:
55
- # If audio is a file path
56
  result = asr(audio, chunk_length_s=30, stride_length_s=[5,5])
57
 
58
- text = result["text"]
59
  final_text = replace_fuzzy(text, custom_vocab_map, threshold=85)
60
  return final_text
61
 
 
9
  asr = pipeline(
10
  task="automatic-speech-recognition",
11
  model="vhdm/whisper-large-fa-v1",
12
+ device=-1 # CPU; برای GPU device=0
13
  )
14
 
15
  # --- Custom vocabulary with multiple forms for accuracy ---
 
33
 
34
  def transcribe(audio):
35
  """
36
+ Handle audio input from Gradio: tuple (numpy array, sample_rate) or file path
37
  """
38
  if audio is None:
39
  return "No audio input detected."
40
 
41
+ # If tuple (numpy array + sample_rate)
42
  if isinstance(audio, tuple):
43
  data, sr = audio
44
+ data = np.asarray(data)
45
  # Convert mono to 2D array for soundfile
 
 
46
  if data.ndim == 1:
47
  data = np.expand_dims(data, axis=1)
 
48
  with tempfile.NamedTemporaryFile(suffix=".wav") as tmp:
49
  sf.write(tmp.name, data, samplerate=sr)
50
+ # Run ASR with chunking for long audio
51
  result = asr(tmp.name, chunk_length_s=30, stride_length_s=[5,5])
52
  else:
53
+ # If file path
54
  result = asr(audio, chunk_length_s=30, stride_length_s=[5,5])
55
 
56
+ text = result.get("text", "")
57
  final_text = replace_fuzzy(text, custom_vocab_map, threshold=85)
58
  return final_text
59