mkfallah commited on
Commit
9f5d540
·
verified ·
1 Parent(s): 68f2a89

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ from rapidfuzz import process, fuzz
4
+ import tempfile
5
+ import soundfile as sf
6
+
7
+ # --- ASR pipeline ---
8
+ asr = pipeline(
9
+ task="automatic-speech-recognition",
10
+ model="vhdm/whisper-large-fa-v1",
11
+ device=-1 # CPU
12
+ )
13
+
14
+ # --- Custom vocabulary with multiple forms for accuracy ---
15
+ custom_vocab_map = {
16
+ "نرد": ["نرد", "نِرد", "نَرد"],
17
+ "کامپیوتر": ["کامپیوتر", "کامپیوتره"],
18
+ "هوش مصنوعی": ["هوش مصنوعی", "هوش صنعتی"],
19
+ "ماشین": ["ماشین", "ماشینه"]
20
+ }
21
+
22
+ def replace_fuzzy(text, vocab_map, threshold=85):
23
+ """
24
+ Replace words/phrases in text using fuzzy matching with high threshold.
25
+ Supports multiple alternatives per word/phrase.
26
+ """
27
+ for target, alternatives in vocab_map.items():
28
+ # find best match among alternatives
29
+ match, score = process.extractOne(text, alternatives, scorer=fuzz.partial_ratio)
30
+ if score >= threshold:
31
+ text = text.replace(match, target)
32
+ return text
33
+
34
+ def transcribe(audio):
35
+ # audio is a tuple (numpy array, sample_rate)
36
+ with tempfile.NamedTemporaryFile(suffix=".wav") as tmp:
37
+ sf.write(tmp.name, audio[0], samplerate=audio[1])
38
+ # ASR with chunking for long audios
39
+ result = asr(tmp.name, chunk_length_s=30, stride_length_s=[5,5])
40
+ text = result["text"]
41
+ final_text = replace_fuzzy(text, custom_vocab_map, threshold=85)
42
+ return final_text
43
+
44
+ # --- Gradio interface ---
45
+ iface = gr.Interface(
46
+ fn=transcribe,
47
+ inputs=gr.Audio(type="numpy"),
48
+ outputs="text",
49
+ title="Persian ASR with High Accuracy Vocabulary",
50
+ description="Upload a Persian audio file; recognized words are corrected using a custom high-accuracy vocabulary."
51
+ )
52
+
53
+ iface.launch()