Speech to text and Text to speech function.
Browse files
app.py
CHANGED
|
@@ -4,7 +4,7 @@ import MAIAI
|
|
| 4 |
|
| 5 |
# from deep_translator import GoogleTranslator
|
| 6 |
|
| 7 |
-
import speech_recognition as sr
|
| 8 |
# import assemblyai as aai
|
| 9 |
|
| 10 |
# import pyttsx3
|
|
@@ -170,30 +170,30 @@ def gtts(input_text,language='English'):
|
|
| 170 |
# transcript = transcriber.transcribe(audio_file_path)
|
| 171 |
# return transcript.text
|
| 172 |
|
| 173 |
-
def google_speech_to_text(audio_file_path):
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
|
| 186 |
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
|
| 198 |
def chat(text, history, native_language, language, persona, tone = "Casual", model = "gpt-4o-mini"):
|
| 199 |
|
|
@@ -274,7 +274,7 @@ You MUST give your feedback in {native_language}.
|
|
| 274 |
|
| 275 |
# Gradio Custom Chatbot -------------------------------------------------------
|
| 276 |
|
| 277 |
-
with gr.Blocks(fill_height=True) as demo:
|
| 278 |
|
| 279 |
chatbot = gr.Chatbot(
|
| 280 |
elem_id="chatbot",
|
|
@@ -286,7 +286,7 @@ with gr.Blocks(fill_height=True) as demo:
|
|
| 286 |
|
| 287 |
chat_input = gr.Textbox(interactive=True, scale=8,
|
| 288 |
inputs=[gr.components.Audio(sources="microphone", type="filepath", label="Speak or upload audio")],
|
| 289 |
-
value=
|
| 290 |
submit_button = gr.Button("Submit", scale=1)
|
| 291 |
|
| 292 |
with gr.Row():
|
|
|
|
| 4 |
|
| 5 |
# from deep_translator import GoogleTranslator
|
| 6 |
|
| 7 |
+
# import speech_recognition as sr
|
| 8 |
# import assemblyai as aai
|
| 9 |
|
| 10 |
# import pyttsx3
|
|
|
|
| 170 |
# transcript = transcriber.transcribe(audio_file_path)
|
| 171 |
# return transcript.text
|
| 172 |
|
| 173 |
+
# def google_speech_to_text(audio_file_path):
|
| 174 |
+
# if audio_file_path:
|
| 175 |
+
# recognizer = sr.Recognizer()
|
| 176 |
+
# with sr.AudioFile(audio_file_path) as source:
|
| 177 |
+
# audio_data = recognizer.record(source)
|
| 178 |
+
# try:
|
| 179 |
+
# text = recognizer.recognize_google(audio_data)
|
| 180 |
+
# return text
|
| 181 |
+
# except sr.UnknownValueError:
|
| 182 |
+
# return "Google Speech Recognition could not understand audio"
|
| 183 |
+
# except sr.RequestError as e:
|
| 184 |
+
# return f"Could not request results from Google Speech Recognition service; {e}"
|
| 185 |
|
| 186 |
|
| 187 |
+
def openai_speech_to_text(audio_file_path):
|
| 188 |
+
if audio_file_path:
|
| 189 |
+
client = OpenAI()
|
| 190 |
+
audio_file= open(audio_file_path, "rb")
|
| 191 |
+
transcription = client.audio.transcriptions.create(
|
| 192 |
+
model="whisper-1",
|
| 193 |
+
response_format="text",
|
| 194 |
+
file=audio_file
|
| 195 |
+
)
|
| 196 |
+
return transcription
|
| 197 |
|
| 198 |
def chat(text, history, native_language, language, persona, tone = "Casual", model = "gpt-4o-mini"):
|
| 199 |
|
|
|
|
| 274 |
|
| 275 |
# Gradio Custom Chatbot -------------------------------------------------------
|
| 276 |
|
| 277 |
+
with gr.Blocks(fill_height=True, theme=Base()) as demo:
|
| 278 |
|
| 279 |
chatbot = gr.Chatbot(
|
| 280 |
elem_id="chatbot",
|
|
|
|
| 286 |
|
| 287 |
chat_input = gr.Textbox(interactive=True, scale=8,
|
| 288 |
inputs=[gr.components.Audio(sources="microphone", type="filepath", label="Speak or upload audio")],
|
| 289 |
+
value=openai_speech_to_text)
|
| 290 |
submit_button = gr.Button("Submit", scale=1)
|
| 291 |
|
| 292 |
with gr.Row():
|