gent commited on
Commit
d809d42
·
1 Parent(s): d6b147b
Files changed (8) hide show
  1. .gitignore +162 -0
  2. app.py +88 -0
  3. requirements.txt +3 -0
  4. utils/__init__.py +3 -0
  5. utils/asr.py +29 -0
  6. utils/bark_example.py +23 -0
  7. utils/llm.py +39 -0
  8. utils/tts.py +40 -0
.gitignore ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ apps
2
+
3
+ # Byte-compiled / optimized / DLL files
4
+ __pycache__/
5
+ *.py[cod]
6
+ *$py.class
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ wheels/
25
+ share/python-wheels/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Unit test / coverage reports
42
+ htmlcov/
43
+ .tox/
44
+ .nox/
45
+ .coverage
46
+ .coverage.*
47
+ .cache
48
+ nosetests.xml
49
+ coverage.xml
50
+ *.cover
51
+ *.py,cover
52
+ .hypothesis/
53
+ .pytest_cache/
54
+ cover/
55
+
56
+ # Translations
57
+ *.mo
58
+ *.pot
59
+
60
+ # Django stuff:
61
+ *.log
62
+ local_settings.py
63
+ db.sqlite3
64
+ db.sqlite3-journal
65
+
66
+ # Flask stuff:
67
+ instance/
68
+ .webassets-cache
69
+
70
+ # Scrapy stuff:
71
+ .scrapy
72
+
73
+ # Sphinx documentation
74
+ docs/_build/
75
+
76
+ # PyBuilder
77
+ .pybuilder/
78
+ target/
79
+
80
+ # Jupyter Notebook
81
+ .ipynb_checkpoints
82
+
83
+ # IPython
84
+ profile_default/
85
+ ipython_config.py
86
+
87
+ # pyenv
88
+ # For a library or package, you might want to ignore these files since the code is
89
+ # intended to run in multiple environments; otherwise, check them in:
90
+ # .python-version
91
+
92
+ # pipenv
93
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
+ # install all needed dependencies.
97
+ #Pipfile.lock
98
+
99
+ # poetry
100
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
101
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
102
+ # commonly ignored for libraries.
103
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
104
+ #poetry.lock
105
+
106
+ # pdm
107
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
108
+ #pdm.lock
109
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
110
+ # in version control.
111
+ # https://pdm.fming.dev/#use-with-ide
112
+ .pdm.toml
113
+
114
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115
+ __pypackages__/
116
+
117
+ # Celery stuff
118
+ celerybeat-schedule
119
+ celerybeat.pid
120
+
121
+ # SageMath parsed files
122
+ *.sage.py
123
+
124
+ # Environments
125
+ .env
126
+ .venv
127
+ env/
128
+ venv/
129
+ ENV/
130
+ env.bak/
131
+ venv.bak/
132
+
133
+ # Spyder project settings
134
+ .spyderproject
135
+ .spyproject
136
+
137
+ # Rope project settings
138
+ .ropeproject
139
+
140
+ # mkdocs documentation
141
+ /site
142
+
143
+ # mypy
144
+ .mypy_cache/
145
+ .dmypy.json
146
+ dmypy.json
147
+
148
+ # Pyre type checker
149
+ .pyre/
150
+
151
+ # pytype static type analyzer
152
+ .pytype/
153
+
154
+ # Cython debug symbols
155
+ cython_debug/
156
+
157
+ # PyCharm
158
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
161
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
+ #.idea/
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import time
3
+ from utils import *
4
+ import os
5
+
6
+ grammar_prompt="""
7
+ I want you to act as a grammar mistake checker and make the sentence more fluent. You take all the user input and auto correct it. Just reply to user input with correct grammar and reasons, DO NOT reply the context of the question of the user input. If the user input is grammatically correct and fluent, just ignore it. Sample of the conversation will show below:
8
+
9
+ Correct: today is a good day.
10
+
11
+ Original: today is a good day.
12
+ Corrected: Today is a good day.
13
+ Reason: Capitalize the first letter of the sentence.
14
+
15
+ ###
16
+
17
+ """
18
+
19
+ chat_history = [
20
+ {"role": "system", "content": os.environ.get("SECRET_PROMPT","You are a chat bot. Talk to me!")},
21
+ ]
22
+ def convert_chatbox(chat_history):
23
+ return [f"{i['role']}: {i['content']}" for i in chat_history]
24
+
25
+ with gr.Blocks() as demo:
26
+ chatbot = gr.Chatbot()
27
+ with gr.Row():
28
+ msg = gr.Textbox()
29
+ audio = gr.Audio(source="microphone", type="filepath", streaming=False)
30
+ player = gr.Audio( type="filepath", label="Speaker",interactive=False)
31
+ summary = gr.Button("Summary")
32
+ summary_box = gr.Textbox(label="Summary")
33
+ # functions
34
+ def respond(message):
35
+ # TODO: replace this with real GPT model
36
+ chat_history.append({'role': 'user', 'content': message})
37
+ result = generate_response(chat_history)
38
+ mesg=result['choices'][0]['message']
39
+ print("recv: ", mesg)
40
+
41
+ response = mesg['content']
42
+ chat_history.append(mesg)
43
+
44
+ # write to file
45
+ result = tts(response)
46
+ with open("temp.wav", "wb") as audio_file:
47
+ audio_file.write(result.audio_data)
48
+
49
+ print("write to temp.wav")
50
+
51
+ chatbot.value.append((message,response))
52
+ print("chat_history: ", chatbot.value)
53
+
54
+ return None, "temp.wav", chatbot.value
55
+
56
+ msg.submit(respond, [msg], [msg, player,chatbot])
57
+
58
+
59
+ def transcribe(audio_file):
60
+ print("start transcribe, ", audio_file)
61
+
62
+ start = time.time()
63
+ text = recognize_from_file(audio_file)
64
+ print("use ", time.time()-start)
65
+
66
+ print("transcribe done, ", text)
67
+ return respond(text)
68
+
69
+ audio.change(transcribe, [audio], [audio, player, chatbot])
70
+
71
+ def summary_response():
72
+ messages = [
73
+ ]
74
+ sentences = []
75
+ for user,assistant in chatbot.value:
76
+ sentences.append("Correct: " + user)
77
+ messages.append({'role': 'user', 'content': grammar_prompt + "\n".join(sentences)})
78
+
79
+ result = generate_response(messages)
80
+ mesg=result['choices'][0]['message']
81
+ corrected = mesg['content']
82
+ print("recv: ", mesg)
83
+
84
+ return corrected
85
+
86
+ summary.click(summary_response, None, summary_box, queue=False)
87
+
88
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ openai>=0.27.7
2
+ azure-cognitiveservices-speech==1.28.0
3
+ gradio==3.23.0
utils/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .asr import *
2
+ from .tts import *
3
+ from .llm import *
utils/asr.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import azure.cognitiveservices.speech as speechsdk
2
+ import os
3
+ import time
4
+ # Replace with your own subscription key and service region
5
+
6
+ def get_recoginizer(**kwargs):
7
+ speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION'))
8
+ speech_config.speech_recognition_language="en-US"
9
+ audio_config = speechsdk.audio.AudioConfig(**kwargs)
10
+ return speechsdk.SpeechRecognizer(speech_config=speech_config,audio_config=audio_config)
11
+
12
+ def recognize_from_file(file=None):
13
+ # This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION"
14
+ speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION'))
15
+ speech_config.speech_recognition_language="en-US"
16
+ # speech_config.set_proxy("127.0.0.1", 2080)
17
+ audio_config = speechsdk.audio.AudioConfig(filename=file)
18
+
19
+ speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
20
+
21
+ result = speech_recognizer.recognize_once_async().get()
22
+ return result.text
23
+
24
+
25
+ if __name__ == '__main__':
26
+ for audio_file in os.listdir("audio_samples"):
27
+ start = time.time()
28
+ print(recognize_from_file(f"audio_samples/{audio_file}"), " in ", time.time()-start)
29
+
utils/bark_example.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["SUNO_USE_SMALL_MODELS"] = "True"
3
+ os.environ["SUNO_OFFLOAD_CPU"] = "True"
4
+ from bark import SAMPLE_RATE, generate_audio, preload_models
5
+ from scipy.io.wavfile import write as write_wav
6
+ from IPython.display import Audio
7
+
8
+ # download and load all models
9
+ preload_models()
10
+
11
+ # generate audio from text
12
+ text_prompt = """
13
+ Of course! Here is our menu. We have a wide variety of authentic Chinese dishes to choose from.
14
+ """
15
+ import torch
16
+ with torch.no_grad():
17
+ audio_array = generate_audio(text_prompt, history_prompt="v2/en_speaker_1")
18
+
19
+ # save audio to disk
20
+ write_wav("bark_generation.wav", SAMPLE_RATE, audio_array)
21
+
22
+ # play text in notebook
23
+ Audio(audio_array, rate=SAMPLE_RATE)
utils/llm.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import necessary libraries
2
+ import gradio as gr
3
+ import openai
4
+ import os
5
+
6
+ # Set up OpenAI API key
7
+ openai.api_key = os.environ['OPENAI_API_KEY']
8
+ openai.proxy = os.getenv('HTTP_PROXY', "")
9
+
10
+ # Define function to generate bot response
11
+ # messages=[
12
+ # {"role": "system", "content": "You are a helpful assistant."},
13
+ # {"role": "user", "content": "Who won the world series in 2020?"},
14
+ # {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
15
+ # {"role": "user", "content": "Where was it played?"}
16
+ # ]
17
+
18
+ # Call OpenAI GPT-3 API to generate a response
19
+ def generate_response(messages, model = "gpt-3.5-turbo"):
20
+
21
+ response = openai.ChatCompletion.create(
22
+ model=model,
23
+ messages = messages
24
+ )
25
+
26
+ # Extract and return the generated response
27
+ return response
28
+
29
+ if __name__ == '__main__':
30
+ messages=[
31
+ {"role": "system", "content": "You are a helpful assistant."},
32
+ {"role": "user", "content": "Who won the world series in 2020?"},
33
+ {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
34
+ {"role": "user", "content": "Where was it played?"}
35
+ ]
36
+ response = generate_response(messages)
37
+ print(response)
38
+ print(response['choices'][0]['message']['content'])
39
+
utils/tts.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import azure.cognitiveservices.speech as speechsdk
2
+ import os
3
+
4
+ # Create a speech synthesizer object
5
+ speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION'))
6
+ speech_config.speech_synthesis_voice_name = "en-GB-ElliotNeural"; # Set the desired voice here
7
+ speech_config.set_proxy("127.0.0.1", 2080)
8
+ speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)
9
+
10
+ def tts(text)-> speechsdk.SpeechSynthesisResult:
11
+ # Synthesize the text to audio
12
+ speech_synthesis_result = speech_synthesizer.speak_text(text)
13
+
14
+ if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
15
+ print("Speech synthesized for text [{}]".format(text))
16
+ elif speech_synthesis_result.reason == speechsdk.ResultReason.Canceled:
17
+ cancellation_details = speech_synthesis_result.cancellation_details
18
+ print("Speech synthesis canceled: {}".format(cancellation_details.reason))
19
+ if cancellation_details.reason == speechsdk.CancellationReason.Error:
20
+ if cancellation_details.error_details:
21
+ print("Error details: {}".format(cancellation_details.error_details))
22
+ print("Did you set the speech resource key and region values?")
23
+ return speech_synthesis_result
24
+
25
+ if __name__ == '__main__':
26
+
27
+ # Save the audio to file
28
+ import time
29
+
30
+ for i,text in enumerate([
31
+ "Welcome to Jade Palace. My name is Jack, I'll be your server tonight. How can I help you?",
32
+ "We have a variety of traditional Chinese dishes. Some of our most popular items are Kung Pao Chicken, Sweet and Sour Pork, Beef with Broccoli, and Egg Foo Young. We also offer Dim Sum, fresh seafood, noodle soups, and of course classic dishes like Spring Rolls, Dumplings, and Won Ton Soup. Would you like to see our full menu?",
33
+ "Absolutely, my apologies. Please, right this way. Here we are, I have a nice quiet table for you in the corner. Please have a seat. Would you like to start with some tea or a drink while you look over the menu? We have jasmine tea, oolong tea, Tsingtao beer or hot sake if you prefer. Just let me know when you are ready to order. Thank you."
34
+ ]):
35
+ start = time.time()
36
+ result = tts(text)
37
+ with open(f"audio_samples/audo_{i}.wav", "wb") as audio_file:
38
+ audio_file.write(result.audio_data)
39
+ print(result, " in ", time.time()-start)
40
+