mkfallah commited on
Commit
93c6594
·
verified ·
1 Parent(s): 5901b41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -32
app.py CHANGED
@@ -2,38 +2,13 @@ import gradio as gr
2
  from transformers import pipeline
3
  from rapidfuzz import process, fuzz
4
 
5
- # Initialize ASR pipeline
6
  asr = pipeline(
7
  task="automatic-speech-recognition",
8
  model="vhdm/whisper-large-fa-v1",
9
  device=-1 # CPU; set device=0 for GPU
10
  )
11
 
12
- # Custom vocabulary with multiple forms
13
- custom_vocab_map = {
14
- "نرد": ["نرد", "نِرد", "نَرد"],
15
- "کامپیوتر": ["کامپیوتر", "کامپیوتره"],
16
- "هوش مصنوعی": ["هوش مصنوعی", "هوش صنعتی"],
17
- "ماشین": ["ماشین", "ماشینه"]
18
- }
19
-
20
- def replace_fuzzy(text, vocab_map, threshold=85):
21
- """
22
- Replace words/phrases in text using fuzzy matching with a high threshold.
23
- """
24
- for target, alternatives in vocab_map.items():
25
- result = process.extractOne(text, alternatives, scorer=fuzz.partial_ratio)
26
- if result is None:
27
- continue
28
- if hasattr(result, 'score') and hasattr(result, 'value'):
29
- score = result.score
30
- match = result.value
31
- else:
32
- match, score = result[:2]
33
- if score >= threshold:
34
- text = text.replace(match, target)
35
- return text
36
-
37
  def transcribe(audio_file):
38
  """
39
  audio_file: path to WAV file (Gradio mic or upload)
@@ -42,23 +17,21 @@ def transcribe(audio_file):
42
  return "No audio input detected."
43
 
44
  try:
45
- # Run ASR
46
  result = asr(audio_file, chunk_length_s=30, stride_length_s=[5,5])
47
  except Exception as e:
48
  return f"ASR error: {e}"
49
 
50
  text = result.get("text", "")
51
- final_text = replace_fuzzy(text, custom_vocab_map, threshold=80)
52
- return final_text
53
 
54
- # Gradio interface
55
  iface = gr.Interface(
56
  fn=transcribe,
57
  inputs=gr.Audio(type="filepath", label="Record or upload audio"),
58
  outputs="text",
59
  title="Persian ASR with High Accuracy Vocabulary",
60
- description=""" Speak in Persian or upload an audio file; recognized words
61
- are corrected using a custom high-accuracy vocabulary."""
62
  )
63
 
64
  if __name__ == "__main__":
 
2
  from transformers import pipeline
3
  from rapidfuzz import process, fuzz
4
 
5
+ # initialize ASR pipeline
6
  asr = pipeline(
7
  task="automatic-speech-recognition",
8
  model="vhdm/whisper-large-fa-v1",
9
  device=-1 # CPU; set device=0 for GPU
10
  )
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def transcribe(audio_file):
13
  """
14
  audio_file: path to WAV file (Gradio mic or upload)
 
17
  return "No audio input detected."
18
 
19
  try:
20
+ # run ASR
21
  result = asr(audio_file, chunk_length_s=30, stride_length_s=[5,5])
22
  except Exception as e:
23
  return f"ASR error: {e}"
24
 
25
  text = result.get("text", "")
26
+ return text
 
27
 
28
+ # gradio interface
29
  iface = gr.Interface(
30
  fn=transcribe,
31
  inputs=gr.Audio(type="filepath", label="Record or upload audio"),
32
  outputs="text",
33
  title="Persian ASR with High Accuracy Vocabulary",
34
+ description=""" Speak in Persian or upload an audio file."""
 
35
  )
36
 
37
  if __name__ == "__main__":