mkfallah commited on
Commit
9f13d0c
·
verified ·
1 Parent(s): 5afd83b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -19
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
  from rapidfuzz import process, fuzz
4
- import tempfile
5
  import soundfile as sf
6
  import numpy as np
7
 
@@ -31,27 +30,19 @@ def replace_fuzzy(text, vocab_map, threshold=85):
31
  text = text.replace(match, target)
32
  return text
33
 
34
- def transcribe(audio):
35
  """
36
- Handle audio input from Gradio: tuple (numpy array, sample_rate) or file path.
 
37
  """
38
- if audio is None:
39
  return "No audio input detected."
40
 
41
- # If audio is a tuple (numpy array, sample_rate)
42
- if isinstance(audio, tuple) and len(audio) == 2:
43
- data, sr = audio
44
- data = np.asarray(data)
45
- if data.ndim == 1:
46
- data = np.expand_dims(data, axis=1)
47
- with tempfile.NamedTemporaryFile(suffix=".wav") as tmp:
48
- sf.write(tmp.name, data, samplerate=sr)
49
- result = asr(tmp.name, chunk_length_s=30, stride_length_s=[5,5])
50
- elif isinstance(audio, str):
51
- # If audio is a file path
52
- result = asr(audio, chunk_length_s=30, stride_length_s=[5,5])
53
- else:
54
- return "Unsupported audio input type."
55
 
56
  text = result.get("text", "")
57
  final_text = replace_fuzzy(text, custom_vocab_map, threshold=85)
@@ -60,7 +51,7 @@ def transcribe(audio):
60
  # Gradio interface
61
  iface = gr.Interface(
62
  fn=transcribe,
63
- inputs=gr.Audio(type="numpy", label="Record or upload audio"),
64
  outputs="text",
65
  title="Persian ASR with High Accuracy Vocabulary",
66
  description="""Speak in Persian or upload an audio file;
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  from rapidfuzz import process, fuzz
 
4
  import soundfile as sf
5
  import numpy as np
6
 
 
30
  text = text.replace(match, target)
31
  return text
32
 
33
+ def transcribe(audio_file):
34
  """
35
+ Handle audio input from Gradio.
36
+ audio_file: path to WAV file (Gradio sends file path for mic and upload)
37
  """
38
+ if audio_file is None:
39
  return "No audio input detected."
40
 
41
+ # Run ASR directly on file path
42
+ try:
43
+ result = asr(audio_file, chunk_length_s=30, stride_length_s=[5,5])
44
+ except Exception as e:
45
+ return f"ASR error: {e}"
 
 
 
 
 
 
 
 
 
46
 
47
  text = result.get("text", "")
48
  final_text = replace_fuzzy(text, custom_vocab_map, threshold=85)
 
51
  # Gradio interface
52
  iface = gr.Interface(
53
  fn=transcribe,
54
+ inputs=gr.Audio(type="filepath", label="Record or upload audio"),
55
  outputs="text",
56
  title="Persian ASR with High Accuracy Vocabulary",
57
  description="""Speak in Persian or upload an audio file;