Update app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,7 @@ import sox
|
|
| 6 |
import subprocess
|
| 7 |
from fuzzywuzzy import fuzz
|
| 8 |
|
|
|
|
| 9 |
def read_file_and_process(wav_file):
|
| 10 |
filename = wav_file.split('.')[0]
|
| 11 |
filename_16k = filename + "16k.wav"
|
|
@@ -36,29 +37,28 @@ def parse(wav_file):
|
|
| 36 |
logits = model(**input_values).logits
|
| 37 |
return parse_transcription(logits)
|
| 38 |
|
| 39 |
-
|
| 40 |
model_id = "jonatasgrosman/wav2vec2-large-xlsr-53-persian"
|
| 41 |
processor = Wav2Vec2Processor.from_pretrained(model_id)
|
| 42 |
model = Wav2Vec2ForCTC.from_pretrained(model_id)
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
txtbox = gr.Textbox(
|
| 47 |
-
label="
|
| 48 |
lines=5,
|
| 49 |
-
placeholder="متن نوشتاری گفتار شما",
|
| 50 |
-
show_label=True,
|
| 51 |
-
container=True,
|
| 52 |
text_align="right",
|
|
|
|
| 53 |
show_copy_button=True,
|
| 54 |
)
|
| 55 |
|
| 56 |
title = "Speech-to-Text (persian)"
|
| 57 |
-
description = "
|
| 58 |
article = "<p style='text-align: center'><a href='https://github.com/nimaprgrmr'>Large-Scale Self- and Semi-Supervised Learning for Speech Translation</a></p>"
|
| 59 |
|
| 60 |
-
|
| 61 |
-
|
| 62 |
demo = gr.Interface(fn=parse, inputs = input_, outputs=txtbox, title=title, description=description, article = article,
|
| 63 |
streaming=True, interactive=True,
|
| 64 |
analytics_enabled=False, show_tips=False, enable_queue=True)
|
|
|
|
| 6 |
import subprocess
|
| 7 |
from fuzzywuzzy import fuzz
|
| 8 |
|
| 9 |
+
|
| 10 |
def read_file_and_process(wav_file):
|
| 11 |
filename = wav_file.split('.')[0]
|
| 12 |
filename_16k = filename + "16k.wav"
|
|
|
|
| 37 |
logits = model(**input_values).logits
|
| 38 |
return parse_transcription(logits)
|
| 39 |
|
|
|
|
| 40 |
model_id = "jonatasgrosman/wav2vec2-large-xlsr-53-persian"
|
| 41 |
processor = Wav2Vec2Processor.from_pretrained(model_id)
|
| 42 |
model = Wav2Vec2ForCTC.from_pretrained(model_id)
|
| 43 |
|
| 44 |
+
input_ = gr.Audio(source="microphone",
|
| 45 |
+
type="filepath",
|
| 46 |
+
label="لطفا دکمه ضبط صدا را بزنید و شروع به صحبت کنید و بعذ از اتمام صحبت دوباره دکمه ضبط را فشار دهید.",
|
| 47 |
+
show_download_button=True,
|
| 48 |
+
show_edit_button=True,
|
| 49 |
+
)
|
| 50 |
txtbox = gr.Textbox(
|
| 51 |
+
label="متن گفتار شما: ",
|
| 52 |
lines=5,
|
|
|
|
|
|
|
|
|
|
| 53 |
text_align="right",
|
| 54 |
+
show_label=True,
|
| 55 |
show_copy_button=True,
|
| 56 |
)
|
| 57 |
|
| 58 |
title = "Speech-to-Text (persian)"
|
| 59 |
+
description = "، توجه داشته باشید که هرچه گفتار شما شمرده تر باشد خروجی با کیفیت تری دارید.روی دکمه ضبط صدا کلیک کنید و سپس دسترسی مرورگر خود را به میکروفون دستگاه بدهید، سپس شروع به صحبت کنید و برای اتمام ضبط دوباره روی دکمه کلیک کنید"
|
| 60 |
article = "<p style='text-align: center'><a href='https://github.com/nimaprgrmr'>Large-Scale Self- and Semi-Supervised Learning for Speech Translation</a></p>"
|
| 61 |
|
|
|
|
|
|
|
| 62 |
demo = gr.Interface(fn=parse, inputs = input_, outputs=txtbox, title=title, description=description, article = article,
|
| 63 |
streaming=True, interactive=True,
|
| 64 |
analytics_enabled=False, show_tips=False, enable_queue=True)
|