Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,10 +4,14 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
|
| 4 |
import gradio as gr
|
| 5 |
import sox
|
| 6 |
import subprocess
|
| 7 |
-
from google_spell_checker import GoogleSpellChecker
|
|
|
|
| 8 |
|
|
|
|
|
|
|
| 9 |
|
| 10 |
-
|
|
|
|
| 11 |
|
| 12 |
def read_file_and_process(wav_file):
|
| 13 |
filename = wav_file.split('.')[0]
|
|
@@ -34,22 +38,33 @@ def parse_transcription(logits):
|
|
| 34 |
return transcription
|
| 35 |
|
| 36 |
|
| 37 |
-
def corrector(sentence):
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
def parse(wav_file):
|
| 46 |
input_values = read_file_and_process(wav_file)
|
| 47 |
with torch.no_grad():
|
| 48 |
logits = model(**input_values).logits
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
|
| 54 |
# def parse(wav_file):
|
| 55 |
# check_spell = ''
|
|
@@ -84,7 +99,7 @@ txtbox = gr.Textbox(
|
|
| 84 |
|
| 85 |
title = "Speech-to-Text (persian)"
|
| 86 |
description = "، توجه داشته باشید که هرچه گفتار شما شمرده تر باشد خروجی با کیفیت تری دارید.روی دکمه ضبط صدا کلیک کنید و سپس دسترسی مرورگر خود را به میکروفون دستگاه بدهید، سپس شروع به صحبت کنید و برای اتمام ضبط دوباره روی دکمه کلیک کنید"
|
| 87 |
-
article = "<p style='text-align: center'><a href='https://github.com/nimaprgrmr'>Large-Scale Self- and Semi-Supervised Learning for Speech Translation</a></p>"
|
| 88 |
|
| 89 |
demo = gr.Interface(fn=parse, inputs = input_, outputs=txtbox, title=title, description=description, article = article,
|
| 90 |
streaming=True, interactive=True,
|
|
|
|
| 4 |
import gradio as gr
|
| 5 |
import sox
|
| 6 |
import subprocess
|
| 7 |
+
# from google_spell_checker import GoogleSpellChecker
|
| 8 |
+
import openai
|
| 9 |
|
| 10 |
+
# Set your OpenAI API key
|
| 11 |
+
api_key = "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"
|
| 12 |
|
| 13 |
+
|
| 14 |
+
# spell_checker = GoogleSpellChecker(lang="fa")
|
| 15 |
|
| 16 |
def read_file_and_process(wav_file):
|
| 17 |
filename = wav_file.split('.')[0]
|
|
|
|
| 38 |
return transcription
|
| 39 |
|
| 40 |
|
| 41 |
+
# def corrector(sentence):
|
| 42 |
+
# check_spell = spell_checker.check(sentence)
|
| 43 |
+
# if check_spell[1] is None:
|
| 44 |
+
# return sentence
|
| 45 |
+
# else:
|
| 46 |
+
# return check_spell[1]
|
| 47 |
+
def correct_text_with_gpt(text):
|
| 48 |
+
openai.api_key = api_key
|
| 49 |
+
response = openai.Completion.create(
|
| 50 |
+
engine="text-davinci-003",
|
| 51 |
+
prompt=f"Please correct the following text: '{text}'\n\nCorrected text:",
|
| 52 |
+
max_tokens=1000,
|
| 53 |
+
temperature=0.5, # Temperature controls the randomness of the model's output. A higher value like 1.0 makes the output more random, while a lower value like 0.2 makes it more deterministic and focused.
|
| 54 |
+
top_p=1.0, # This parameter controls the diversity of the output. It sets a threshold for the cumulative probability of words to keep. Smaller values like 0.2 will result in more focused responses, while larger values like 0.8 will allow for more diversity.
|
| 55 |
+
frequency_penalty=0.2, # encourages the use of less common words
|
| 56 |
+
presence_penalty=0.5, # discourages the use of common words.
|
| 57 |
+
)
|
| 58 |
+
return response.choices[0].text.strip()
|
| 59 |
+
|
| 60 |
+
|
| 61 |
def parse(wav_file):
|
| 62 |
input_values = read_file_and_process(wav_file)
|
| 63 |
with torch.no_grad():
|
| 64 |
logits = model(**input_values).logits
|
| 65 |
+
return correct_text_with_gpt(parse_transcription(logits))
|
| 66 |
+
|
| 67 |
+
|
|
|
|
| 68 |
|
| 69 |
# def parse(wav_file):
|
| 70 |
# check_spell = ''
|
|
|
|
| 99 |
|
| 100 |
title = "Speech-to-Text (persian)"
|
| 101 |
description = "، توجه داشته باشید که هرچه گفتار شما شمرده تر باشد خروجی با کیفیت تری دارید.روی دکمه ضبط صدا کلیک کنید و سپس دسترسی مرورگر خود را به میکروفون دستگاه بدهید، سپس شروع به صحبت کنید و برای اتمام ضبط دوباره روی دکمه کلیک کنید"
|
| 102 |
+
# article = "<p style='text-align: center'><a href='https://github.com/nimaprgrmr'>Large-Scale Self- and Semi-Supervised Learning for Speech Translation</a></p>"
|
| 103 |
|
| 104 |
demo = gr.Interface(fn=parse, inputs = input_, outputs=txtbox, title=title, description=description, article = article,
|
| 105 |
streaming=True, interactive=True,
|