Spaces:
No application file
No application file
gent commited on
Commit ·
d809d42
1
Parent(s): d6b147b
init
Browse files- .gitignore +162 -0
- app.py +88 -0
- requirements.txt +3 -0
- utils/__init__.py +3 -0
- utils/asr.py +29 -0
- utils/bark_example.py +23 -0
- utils/llm.py +39 -0
- utils/tts.py +40 -0
.gitignore
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
apps
|
| 2 |
+
|
| 3 |
+
# Byte-compiled / optimized / DLL files
|
| 4 |
+
__pycache__/
|
| 5 |
+
*.py[cod]
|
| 6 |
+
*$py.class
|
| 7 |
+
|
| 8 |
+
# C extensions
|
| 9 |
+
*.so
|
| 10 |
+
|
| 11 |
+
# Distribution / packaging
|
| 12 |
+
.Python
|
| 13 |
+
build/
|
| 14 |
+
develop-eggs/
|
| 15 |
+
dist/
|
| 16 |
+
downloads/
|
| 17 |
+
eggs/
|
| 18 |
+
.eggs/
|
| 19 |
+
lib/
|
| 20 |
+
lib64/
|
| 21 |
+
parts/
|
| 22 |
+
sdist/
|
| 23 |
+
var/
|
| 24 |
+
wheels/
|
| 25 |
+
share/python-wheels/
|
| 26 |
+
*.egg-info/
|
| 27 |
+
.installed.cfg
|
| 28 |
+
*.egg
|
| 29 |
+
MANIFEST
|
| 30 |
+
|
| 31 |
+
# PyInstaller
|
| 32 |
+
# Usually these files are written by a python script from a template
|
| 33 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 34 |
+
*.manifest
|
| 35 |
+
*.spec
|
| 36 |
+
|
| 37 |
+
# Installer logs
|
| 38 |
+
pip-log.txt
|
| 39 |
+
pip-delete-this-directory.txt
|
| 40 |
+
|
| 41 |
+
# Unit test / coverage reports
|
| 42 |
+
htmlcov/
|
| 43 |
+
.tox/
|
| 44 |
+
.nox/
|
| 45 |
+
.coverage
|
| 46 |
+
.coverage.*
|
| 47 |
+
.cache
|
| 48 |
+
nosetests.xml
|
| 49 |
+
coverage.xml
|
| 50 |
+
*.cover
|
| 51 |
+
*.py,cover
|
| 52 |
+
.hypothesis/
|
| 53 |
+
.pytest_cache/
|
| 54 |
+
cover/
|
| 55 |
+
|
| 56 |
+
# Translations
|
| 57 |
+
*.mo
|
| 58 |
+
*.pot
|
| 59 |
+
|
| 60 |
+
# Django stuff:
|
| 61 |
+
*.log
|
| 62 |
+
local_settings.py
|
| 63 |
+
db.sqlite3
|
| 64 |
+
db.sqlite3-journal
|
| 65 |
+
|
| 66 |
+
# Flask stuff:
|
| 67 |
+
instance/
|
| 68 |
+
.webassets-cache
|
| 69 |
+
|
| 70 |
+
# Scrapy stuff:
|
| 71 |
+
.scrapy
|
| 72 |
+
|
| 73 |
+
# Sphinx documentation
|
| 74 |
+
docs/_build/
|
| 75 |
+
|
| 76 |
+
# PyBuilder
|
| 77 |
+
.pybuilder/
|
| 78 |
+
target/
|
| 79 |
+
|
| 80 |
+
# Jupyter Notebook
|
| 81 |
+
.ipynb_checkpoints
|
| 82 |
+
|
| 83 |
+
# IPython
|
| 84 |
+
profile_default/
|
| 85 |
+
ipython_config.py
|
| 86 |
+
|
| 87 |
+
# pyenv
|
| 88 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 89 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 90 |
+
# .python-version
|
| 91 |
+
|
| 92 |
+
# pipenv
|
| 93 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 94 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 95 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 96 |
+
# install all needed dependencies.
|
| 97 |
+
#Pipfile.lock
|
| 98 |
+
|
| 99 |
+
# poetry
|
| 100 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 101 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 102 |
+
# commonly ignored for libraries.
|
| 103 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 104 |
+
#poetry.lock
|
| 105 |
+
|
| 106 |
+
# pdm
|
| 107 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 108 |
+
#pdm.lock
|
| 109 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 110 |
+
# in version control.
|
| 111 |
+
# https://pdm.fming.dev/#use-with-ide
|
| 112 |
+
.pdm.toml
|
| 113 |
+
|
| 114 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 115 |
+
__pypackages__/
|
| 116 |
+
|
| 117 |
+
# Celery stuff
|
| 118 |
+
celerybeat-schedule
|
| 119 |
+
celerybeat.pid
|
| 120 |
+
|
| 121 |
+
# SageMath parsed files
|
| 122 |
+
*.sage.py
|
| 123 |
+
|
| 124 |
+
# Environments
|
| 125 |
+
.env
|
| 126 |
+
.venv
|
| 127 |
+
env/
|
| 128 |
+
venv/
|
| 129 |
+
ENV/
|
| 130 |
+
env.bak/
|
| 131 |
+
venv.bak/
|
| 132 |
+
|
| 133 |
+
# Spyder project settings
|
| 134 |
+
.spyderproject
|
| 135 |
+
.spyproject
|
| 136 |
+
|
| 137 |
+
# Rope project settings
|
| 138 |
+
.ropeproject
|
| 139 |
+
|
| 140 |
+
# mkdocs documentation
|
| 141 |
+
/site
|
| 142 |
+
|
| 143 |
+
# mypy
|
| 144 |
+
.mypy_cache/
|
| 145 |
+
.dmypy.json
|
| 146 |
+
dmypy.json
|
| 147 |
+
|
| 148 |
+
# Pyre type checker
|
| 149 |
+
.pyre/
|
| 150 |
+
|
| 151 |
+
# pytype static type analyzer
|
| 152 |
+
.pytype/
|
| 153 |
+
|
| 154 |
+
# Cython debug symbols
|
| 155 |
+
cython_debug/
|
| 156 |
+
|
| 157 |
+
# PyCharm
|
| 158 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 159 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 160 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 161 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 162 |
+
#.idea/
|
app.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import time
|
| 3 |
+
from utils import *
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
grammar_prompt="""
|
| 7 |
+
I want you to act as a grammar mistake checker and make the sentence more fluent. You take all the user input and auto correct it. Just reply to user input with correct grammar and reasons, DO NOT reply the context of the question of the user input. If the user input is grammatically correct and fluent, just ignore it. Sample of the conversation will show below:
|
| 8 |
+
|
| 9 |
+
Correct: today is a good day.
|
| 10 |
+
|
| 11 |
+
Original: today is a good day.
|
| 12 |
+
Corrected: Today is a good day.
|
| 13 |
+
Reason: Capitalize the first letter of the sentence.
|
| 14 |
+
|
| 15 |
+
###
|
| 16 |
+
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
chat_history = [
|
| 20 |
+
{"role": "system", "content": os.environ.get("SECRET_PROMPT","You are a chat bot. Talk to me!")},
|
| 21 |
+
]
|
| 22 |
+
def convert_chatbox(chat_history):
|
| 23 |
+
return [f"{i['role']}: {i['content']}" for i in chat_history]
|
| 24 |
+
|
| 25 |
+
with gr.Blocks() as demo:
|
| 26 |
+
chatbot = gr.Chatbot()
|
| 27 |
+
with gr.Row():
|
| 28 |
+
msg = gr.Textbox()
|
| 29 |
+
audio = gr.Audio(source="microphone", type="filepath", streaming=False)
|
| 30 |
+
player = gr.Audio( type="filepath", label="Speaker",interactive=False)
|
| 31 |
+
summary = gr.Button("Summary")
|
| 32 |
+
summary_box = gr.Textbox(label="Summary")
|
| 33 |
+
# functions
|
| 34 |
+
def respond(message):
|
| 35 |
+
# TODO: replace this with real GPT model
|
| 36 |
+
chat_history.append({'role': 'user', 'content': message})
|
| 37 |
+
result = generate_response(chat_history)
|
| 38 |
+
mesg=result['choices'][0]['message']
|
| 39 |
+
print("recv: ", mesg)
|
| 40 |
+
|
| 41 |
+
response = mesg['content']
|
| 42 |
+
chat_history.append(mesg)
|
| 43 |
+
|
| 44 |
+
# write to file
|
| 45 |
+
result = tts(response)
|
| 46 |
+
with open("temp.wav", "wb") as audio_file:
|
| 47 |
+
audio_file.write(result.audio_data)
|
| 48 |
+
|
| 49 |
+
print("write to temp.wav")
|
| 50 |
+
|
| 51 |
+
chatbot.value.append((message,response))
|
| 52 |
+
print("chat_history: ", chatbot.value)
|
| 53 |
+
|
| 54 |
+
return None, "temp.wav", chatbot.value
|
| 55 |
+
|
| 56 |
+
msg.submit(respond, [msg], [msg, player,chatbot])
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def transcribe(audio_file):
|
| 60 |
+
print("start transcribe, ", audio_file)
|
| 61 |
+
|
| 62 |
+
start = time.time()
|
| 63 |
+
text = recognize_from_file(audio_file)
|
| 64 |
+
print("use ", time.time()-start)
|
| 65 |
+
|
| 66 |
+
print("transcribe done, ", text)
|
| 67 |
+
return respond(text)
|
| 68 |
+
|
| 69 |
+
audio.change(transcribe, [audio], [audio, player, chatbot])
|
| 70 |
+
|
| 71 |
+
def summary_response():
|
| 72 |
+
messages = [
|
| 73 |
+
]
|
| 74 |
+
sentences = []
|
| 75 |
+
for user,assistant in chatbot.value:
|
| 76 |
+
sentences.append("Correct: " + user)
|
| 77 |
+
messages.append({'role': 'user', 'content': grammar_prompt + "\n".join(sentences)})
|
| 78 |
+
|
| 79 |
+
result = generate_response(messages)
|
| 80 |
+
mesg=result['choices'][0]['message']
|
| 81 |
+
corrected = mesg['content']
|
| 82 |
+
print("recv: ", mesg)
|
| 83 |
+
|
| 84 |
+
return corrected
|
| 85 |
+
|
| 86 |
+
summary.click(summary_response, None, summary_box, queue=False)
|
| 87 |
+
|
| 88 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openai>=0.27.7
|
| 2 |
+
azure-cognitiveservices-speech==1.28.0
|
| 3 |
+
gradio==3.23.0
|
utils/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .asr import *
|
| 2 |
+
from .tts import *
|
| 3 |
+
from .llm import *
|
utils/asr.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import azure.cognitiveservices.speech as speechsdk
|
| 2 |
+
import os
|
| 3 |
+
import time
|
| 4 |
+
# Replace with your own subscription key and service region
|
| 5 |
+
|
| 6 |
+
def get_recoginizer(**kwargs):
|
| 7 |
+
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION'))
|
| 8 |
+
speech_config.speech_recognition_language="en-US"
|
| 9 |
+
audio_config = speechsdk.audio.AudioConfig(**kwargs)
|
| 10 |
+
return speechsdk.SpeechRecognizer(speech_config=speech_config,audio_config=audio_config)
|
| 11 |
+
|
| 12 |
+
def recognize_from_file(file=None):
|
| 13 |
+
# This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION"
|
| 14 |
+
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION'))
|
| 15 |
+
speech_config.speech_recognition_language="en-US"
|
| 16 |
+
# speech_config.set_proxy("127.0.0.1", 2080)
|
| 17 |
+
audio_config = speechsdk.audio.AudioConfig(filename=file)
|
| 18 |
+
|
| 19 |
+
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
|
| 20 |
+
|
| 21 |
+
result = speech_recognizer.recognize_once_async().get()
|
| 22 |
+
return result.text
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
if __name__ == '__main__':
|
| 26 |
+
for audio_file in os.listdir("audio_samples"):
|
| 27 |
+
start = time.time()
|
| 28 |
+
print(recognize_from_file(f"audio_samples/{audio_file}"), " in ", time.time()-start)
|
| 29 |
+
|
utils/bark_example.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
os.environ["SUNO_USE_SMALL_MODELS"] = "True"
|
| 3 |
+
os.environ["SUNO_OFFLOAD_CPU"] = "True"
|
| 4 |
+
from bark import SAMPLE_RATE, generate_audio, preload_models
|
| 5 |
+
from scipy.io.wavfile import write as write_wav
|
| 6 |
+
from IPython.display import Audio
|
| 7 |
+
|
| 8 |
+
# download and load all models
|
| 9 |
+
preload_models()
|
| 10 |
+
|
| 11 |
+
# generate audio from text
|
| 12 |
+
text_prompt = """
|
| 13 |
+
Of course! Here is our menu. We have a wide variety of authentic Chinese dishes to choose from.
|
| 14 |
+
"""
|
| 15 |
+
import torch
|
| 16 |
+
with torch.no_grad():
|
| 17 |
+
audio_array = generate_audio(text_prompt, history_prompt="v2/en_speaker_1")
|
| 18 |
+
|
| 19 |
+
# save audio to disk
|
| 20 |
+
write_wav("bark_generation.wav", SAMPLE_RATE, audio_array)
|
| 21 |
+
|
| 22 |
+
# play text in notebook
|
| 23 |
+
Audio(audio_array, rate=SAMPLE_RATE)
|
utils/llm.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Import necessary libraries
|
| 2 |
+
import gradio as gr
|
| 3 |
+
import openai
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
# Set up OpenAI API key
|
| 7 |
+
openai.api_key = os.environ['OPENAI_API_KEY']
|
| 8 |
+
openai.proxy = os.getenv('HTTP_PROXY', "")
|
| 9 |
+
|
| 10 |
+
# Define function to generate bot response
|
| 11 |
+
# messages=[
|
| 12 |
+
# {"role": "system", "content": "You are a helpful assistant."},
|
| 13 |
+
# {"role": "user", "content": "Who won the world series in 2020?"},
|
| 14 |
+
# {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
|
| 15 |
+
# {"role": "user", "content": "Where was it played?"}
|
| 16 |
+
# ]
|
| 17 |
+
|
| 18 |
+
# Call OpenAI GPT-3 API to generate a response
|
| 19 |
+
def generate_response(messages, model = "gpt-3.5-turbo"):
|
| 20 |
+
|
| 21 |
+
response = openai.ChatCompletion.create(
|
| 22 |
+
model=model,
|
| 23 |
+
messages = messages
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
# Extract and return the generated response
|
| 27 |
+
return response
|
| 28 |
+
|
| 29 |
+
if __name__ == '__main__':
|
| 30 |
+
messages=[
|
| 31 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
| 32 |
+
{"role": "user", "content": "Who won the world series in 2020?"},
|
| 33 |
+
{"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
|
| 34 |
+
{"role": "user", "content": "Where was it played?"}
|
| 35 |
+
]
|
| 36 |
+
response = generate_response(messages)
|
| 37 |
+
print(response)
|
| 38 |
+
print(response['choices'][0]['message']['content'])
|
| 39 |
+
|
utils/tts.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import azure.cognitiveservices.speech as speechsdk
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
# Create a speech synthesizer object
|
| 5 |
+
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION'))
|
| 6 |
+
speech_config.speech_synthesis_voice_name = "en-GB-ElliotNeural"; # Set the desired voice here
|
| 7 |
+
speech_config.set_proxy("127.0.0.1", 2080)
|
| 8 |
+
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)
|
| 9 |
+
|
| 10 |
+
def tts(text)-> speechsdk.SpeechSynthesisResult:
|
| 11 |
+
# Synthesize the text to audio
|
| 12 |
+
speech_synthesis_result = speech_synthesizer.speak_text(text)
|
| 13 |
+
|
| 14 |
+
if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
|
| 15 |
+
print("Speech synthesized for text [{}]".format(text))
|
| 16 |
+
elif speech_synthesis_result.reason == speechsdk.ResultReason.Canceled:
|
| 17 |
+
cancellation_details = speech_synthesis_result.cancellation_details
|
| 18 |
+
print("Speech synthesis canceled: {}".format(cancellation_details.reason))
|
| 19 |
+
if cancellation_details.reason == speechsdk.CancellationReason.Error:
|
| 20 |
+
if cancellation_details.error_details:
|
| 21 |
+
print("Error details: {}".format(cancellation_details.error_details))
|
| 22 |
+
print("Did you set the speech resource key and region values?")
|
| 23 |
+
return speech_synthesis_result
|
| 24 |
+
|
| 25 |
+
if __name__ == '__main__':
|
| 26 |
+
|
| 27 |
+
# Save the audio to file
|
| 28 |
+
import time
|
| 29 |
+
|
| 30 |
+
for i,text in enumerate([
|
| 31 |
+
"Welcome to Jade Palace. My name is Jack, I'll be your server tonight. How can I help you?",
|
| 32 |
+
"We have a variety of traditional Chinese dishes. Some of our most popular items are Kung Pao Chicken, Sweet and Sour Pork, Beef with Broccoli, and Egg Foo Young. We also offer Dim Sum, fresh seafood, noodle soups, and of course classic dishes like Spring Rolls, Dumplings, and Won Ton Soup. Would you like to see our full menu?",
|
| 33 |
+
"Absolutely, my apologies. Please, right this way. Here we are, I have a nice quiet table for you in the corner. Please have a seat. Would you like to start with some tea or a drink while you look over the menu? We have jasmine tea, oolong tea, Tsingtao beer or hot sake if you prefer. Just let me know when you are ready to order. Thank you."
|
| 34 |
+
]):
|
| 35 |
+
start = time.time()
|
| 36 |
+
result = tts(text)
|
| 37 |
+
with open(f"audio_samples/audo_{i}.wav", "wb") as audio_file:
|
| 38 |
+
audio_file.write(result.audio_data)
|
| 39 |
+
print(result, " in ", time.time()-start)
|
| 40 |
+
|