Spaces:
Runtime error
Runtime error
WIP: asyncio version of RespondToPrompt. basic singleton version
Browse files- .vscode/launch.json +1 -1
- app.py +26 -20
- app_interface_actor.py +16 -1
- charles_actor.py +35 -23
- chat_service.py +3 -3
- profile.html +0 -0
- profile.json +0 -0
- respond_to_prompt_async.py +118 -0
- text_to_speech_service.py +5 -7
.vscode/launch.json
CHANGED
|
@@ -16,7 +16,7 @@
|
|
| 16 |
"name": "debug streamlit",
|
| 17 |
"type": "python",
|
| 18 |
"request": "launch",
|
| 19 |
-
"program": "
|
| 20 |
"args": [
|
| 21 |
"run",
|
| 22 |
"app.py"
|
|
|
|
| 16 |
"name": "debug streamlit",
|
| 17 |
"type": "python",
|
| 18 |
"request": "launch",
|
| 19 |
+
"program": "~/miniconda3/envs/project_charles/bin/streamlit",
|
| 20 |
"args": [
|
| 21 |
"run",
|
| 22 |
"app.py"
|
app.py
CHANGED
|
@@ -45,16 +45,16 @@ def init_ray():
|
|
| 45 |
else:
|
| 46 |
ray.init(namespace="project_charles")
|
| 47 |
|
| 48 |
-
@st.cache_resource
|
| 49 |
-
def get_charles_actor():
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
|
| 59 |
@st.cache_resource
|
| 60 |
def get_streamlit_av_queue():
|
|
@@ -62,13 +62,19 @@ def get_streamlit_av_queue():
|
|
| 62 |
streamlit_av_queue_instance = StreamlitAVQueue()
|
| 63 |
return streamlit_av_queue_instance
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
async def main():
|
| 66 |
# Initialize Ray
|
| 67 |
ray_status = init_ray()
|
| 68 |
while not ray.is_initialized():
|
| 69 |
await asyncio.sleep(0.1)
|
| 70 |
# get ray actors
|
| 71 |
-
|
| 72 |
await asyncio.sleep(0.1)
|
| 73 |
streamlit_av_queue = get_streamlit_av_queue()
|
| 74 |
await asyncio.sleep(0.1)
|
|
@@ -126,20 +132,20 @@ async def main():
|
|
| 126 |
system_one_audio_status.write("Camera has stopped.")
|
| 127 |
await asyncio.sleep(0.1)
|
| 128 |
continue
|
| 129 |
-
if charles_actor is None:
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
try:
|
| 137 |
# new_environment_state = await charles_actor.get_environment_state.remote()
|
| 138 |
# environment_state_ouput.markdown(f"{new_environment_state}")
|
| 139 |
streamlit_av_queue.set_looking_listening(looking, listening)
|
| 140 |
-
charles_debug_str = await
|
| 141 |
charles_actor_debug_output.markdown(charles_debug_str)
|
| 142 |
-
state = await
|
| 143 |
system_one_audio_status.write(state)
|
| 144 |
except Exception as e:
|
| 145 |
# assume we disconnected
|
|
|
|
| 45 |
else:
|
| 46 |
ray.init(namespace="project_charles")
|
| 47 |
|
| 48 |
+
# @st.cache_resource
|
| 49 |
+
# def get_charles_actor():
|
| 50 |
+
# charles_actor_instance = None
|
| 51 |
+
# charles_actor_proc = subprocess.Popen([sys.executable, "charles_actor.py"])
|
| 52 |
+
# while charles_actor_instance == None:
|
| 53 |
+
# try:
|
| 54 |
+
# charles_actor_instance = ray.get_actor("CharlesActor")
|
| 55 |
+
# except ValueError as e:
|
| 56 |
+
# time.sleep(0.1) # give the subprocess a chance to start
|
| 57 |
+
# return charles_actor_instance
|
| 58 |
|
| 59 |
@st.cache_resource
|
| 60 |
def get_streamlit_av_queue():
|
|
|
|
| 62 |
streamlit_av_queue_instance = StreamlitAVQueue()
|
| 63 |
return streamlit_av_queue_instance
|
| 64 |
|
| 65 |
+
@st.cache_resource
|
| 66 |
+
def get_app_interface_instance():
|
| 67 |
+
from app_interface_actor import AppInterfaceActor
|
| 68 |
+
app_interface_instance = AppInterfaceActor.get_singleton()
|
| 69 |
+
return app_interface_instance
|
| 70 |
+
|
| 71 |
async def main():
|
| 72 |
# Initialize Ray
|
| 73 |
ray_status = init_ray()
|
| 74 |
while not ray.is_initialized():
|
| 75 |
await asyncio.sleep(0.1)
|
| 76 |
# get ray actors
|
| 77 |
+
app_interface_instance = get_app_interface_instance()
|
| 78 |
await asyncio.sleep(0.1)
|
| 79 |
streamlit_av_queue = get_streamlit_av_queue()
|
| 80 |
await asyncio.sleep(0.1)
|
|
|
|
| 132 |
system_one_audio_status.write("Camera has stopped.")
|
| 133 |
await asyncio.sleep(0.1)
|
| 134 |
continue
|
| 135 |
+
# if charles_actor is None:
|
| 136 |
+
# system_one_audio_status.write("Looking for Charles actor...")
|
| 137 |
+
# charles_actor = get_charles_actor()
|
| 138 |
+
# if charles_actor is None:
|
| 139 |
+
# await asyncio.sleep(0.1)
|
| 140 |
+
# continue
|
| 141 |
+
# system_one_audio_status.write("Found Charles actor.")
|
| 142 |
try:
|
| 143 |
# new_environment_state = await charles_actor.get_environment_state.remote()
|
| 144 |
# environment_state_ouput.markdown(f"{new_environment_state}")
|
| 145 |
streamlit_av_queue.set_looking_listening(looking, listening)
|
| 146 |
+
charles_debug_str = await app_interface_instance.get_debug_output.remote()
|
| 147 |
charles_actor_debug_output.markdown(charles_debug_str)
|
| 148 |
+
state = await app_interface_instance.get_state.remote()
|
| 149 |
system_one_audio_status.write(state)
|
| 150 |
except Exception as e:
|
| 151 |
# assume we disconnected
|
app_interface_actor.py
CHANGED
|
@@ -12,6 +12,8 @@ class AppInterfaceActor:
|
|
| 12 |
self.video_input_queue = Queue(maxsize=10) # Adjust the size as needed
|
| 13 |
self.audio_output_queue = Queue(maxsize=3000) # Adjust the size as needed
|
| 14 |
self.video_output_queue = Queue(maxsize=10) # Adjust the size as needed
|
|
|
|
|
|
|
| 15 |
|
| 16 |
@staticmethod
|
| 17 |
def get_singleton():
|
|
@@ -74,4 +76,17 @@ class AppInterfaceActor:
|
|
| 74 |
while not self.video_input_queue.empty():
|
| 75 |
shared_tensor = await self.video_input_queue.get_async()
|
| 76 |
video_frames.append(shared_tensor)
|
| 77 |
-
return video_frames
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
self.video_input_queue = Queue(maxsize=10) # Adjust the size as needed
|
| 13 |
self.audio_output_queue = Queue(maxsize=3000) # Adjust the size as needed
|
| 14 |
self.video_output_queue = Queue(maxsize=10) # Adjust the size as needed
|
| 15 |
+
self.debug_str = ""
|
| 16 |
+
self.state = "Initializing"
|
| 17 |
|
| 18 |
@staticmethod
|
| 19 |
def get_singleton():
|
|
|
|
| 76 |
while not self.video_input_queue.empty():
|
| 77 |
shared_tensor = await self.video_input_queue.get_async()
|
| 78 |
video_frames.append(shared_tensor)
|
| 79 |
+
return video_frames
|
| 80 |
+
|
| 81 |
+
# debug helpers
|
| 82 |
+
async def get_debug_output(self)->str:
|
| 83 |
+
return self.debug_str
|
| 84 |
+
|
| 85 |
+
async def set_debug_output(self, debug_str:str):
|
| 86 |
+
self.debug_str = debug_str
|
| 87 |
+
|
| 88 |
+
async def get_state(self)->str:
|
| 89 |
+
return self.state
|
| 90 |
+
|
| 91 |
+
async def set_state(self, state:str):
|
| 92 |
+
self.state = state
|
charles_actor.py
CHANGED
|
@@ -8,7 +8,6 @@ from environment_state_actor import EnvironmentStateActor, EnvironmentState
|
|
| 8 |
import asyncio
|
| 9 |
import subprocess
|
| 10 |
|
| 11 |
-
@ray.remote
|
| 12 |
class CharlesActor:
|
| 13 |
def __init__(self):
|
| 14 |
self._needs_init = True
|
|
@@ -17,11 +16,11 @@ class CharlesActor:
|
|
| 17 |
self._state = "Initializing"
|
| 18 |
self._clip_transform = CLIPTransform()
|
| 19 |
|
| 20 |
-
def get_state(self):
|
| 21 |
-
|
| 22 |
|
| 23 |
-
def get_charles_actor_debug_output(self):
|
| 24 |
-
|
| 25 |
|
| 26 |
def get_environment_state(self)->EnvironmentState:
|
| 27 |
return self._environment_state
|
|
@@ -33,15 +32,20 @@ class CharlesActor:
|
|
| 33 |
from app_interface_actor import AppInterfaceActor
|
| 34 |
self._app_interface_actor = AppInterfaceActor.get_singleton()
|
| 35 |
self._audio_output_queue = await self._app_interface_actor.get_audio_output_queue.remote()
|
|
|
|
|
|
|
| 36 |
|
| 37 |
-
print("001 - create
|
| 38 |
-
self._state = "001 - creating
|
| 39 |
-
|
|
|
|
| 40 |
self._environment_state_actor = EnvironmentStateActor.remote()
|
| 41 |
-
self.
|
|
|
|
| 42 |
|
| 43 |
print("002 - create SpeechToTextVoskActor")
|
| 44 |
self._state = "002 - creating SpeechToTextVoskActor"
|
|
|
|
| 45 |
from speech_to_text_vosk_actor import SpeechToTextVoskActor
|
| 46 |
self._speech_to_text_actor = SpeechToTextVoskActor.remote("small")
|
| 47 |
# self._speech_to_text_actor = SpeechToTextVoskActor.remote("big")
|
|
@@ -53,17 +57,20 @@ class CharlesActor:
|
|
| 53 |
|
| 54 |
print("003 - create Prototypes")
|
| 55 |
self._state = "003 - creating Prototypes"
|
|
|
|
| 56 |
from prototypes import Prototypes
|
| 57 |
self._prototypes = Prototypes()
|
| 58 |
|
| 59 |
print("004 - create animator")
|
| 60 |
self._state = "004 - creating animator"
|
|
|
|
| 61 |
from charles_animator import CharlesAnimator
|
| 62 |
self._animator = CharlesAnimator()
|
| 63 |
|
| 64 |
print("010")
|
| 65 |
self._needs_init = True
|
| 66 |
self._state = "Initialized"
|
|
|
|
| 67 |
|
| 68 |
async def start(self):
|
| 69 |
if self._needs_init:
|
|
@@ -71,20 +78,22 @@ class CharlesActor:
|
|
| 71 |
|
| 72 |
debug_output_history = []
|
| 73 |
|
| 74 |
-
def render_debug_output(list_of_strings):
|
| 75 |
table_content = "##### Chat history\n"
|
| 76 |
for item in reversed(list_of_strings):
|
| 77 |
# table_content += f"\n```markdown\n{item}\n```\n"
|
| 78 |
table_content += f"\n{item}\n"
|
| 79 |
self._charles_actor_debug_output = table_content
|
|
|
|
| 80 |
|
| 81 |
-
def add_debug_output(output):
|
| 82 |
debug_output_history.append(output)
|
| 83 |
if len(debug_output_history) > 10:
|
| 84 |
debug_output_history.pop(0)
|
| 85 |
-
render_debug_output(debug_output_history)
|
| 86 |
|
| 87 |
self._state = "Waiting for input"
|
|
|
|
| 88 |
total_video_frames = 0
|
| 89 |
skipped_video_frames = 0
|
| 90 |
total_audio_frames = 0
|
|
@@ -106,7 +115,7 @@ class CharlesActor:
|
|
| 106 |
while True:
|
| 107 |
if len(self._debug_queue) > 0:
|
| 108 |
prompt = self._debug_queue.pop(0)
|
| 109 |
-
await self.
|
| 110 |
|
| 111 |
env_state = await self._environment_state_actor.begin_next_step.remote()
|
| 112 |
self._environment_state = env_state
|
|
@@ -147,7 +156,7 @@ class CharlesActor:
|
|
| 147 |
# line += f"{response} [{speech_chunks_per_response[i]}] \n"
|
| 148 |
line += f"[{speech_chunks_per_response[i]}] {response} \n"
|
| 149 |
if len(line) > 0:
|
| 150 |
-
add_debug_output(line)
|
| 151 |
current_responses = []
|
| 152 |
speech_chunks_per_response = []
|
| 153 |
env_state.llm_preview = ""
|
|
@@ -157,8 +166,8 @@ class CharlesActor:
|
|
| 157 |
robot_preview_text = ""
|
| 158 |
if additional_prompt is not None:
|
| 159 |
prompt = additional_prompt + ". " + prompt
|
| 160 |
-
add_debug_output(f"👨 {prompt}")
|
| 161 |
-
await self.
|
| 162 |
additional_prompt = None
|
| 163 |
previous_prompt = prompt
|
| 164 |
is_talking = False
|
|
@@ -169,7 +178,7 @@ class CharlesActor:
|
|
| 169 |
if len(previous_prompt) > 0 and not has_spoken_for_this_prompt:
|
| 170 |
additional_prompt = previous_prompt
|
| 171 |
has_spoken_for_this_prompt = True
|
| 172 |
-
await self.
|
| 173 |
if additional_prompt is not None:
|
| 174 |
prompt = additional_prompt + ". " + prompt
|
| 175 |
human_preview_text = f"👨❓ {prompt}"
|
|
@@ -201,7 +210,7 @@ class CharlesActor:
|
|
| 201 |
list_of_strings.append(human_preview_text)
|
| 202 |
if len(list_of_strings) > 10:
|
| 203 |
list_of_strings.pop(0)
|
| 204 |
-
render_debug_output(list_of_strings)
|
| 205 |
|
| 206 |
|
| 207 |
await asyncio.sleep(0.01)
|
|
@@ -216,6 +225,7 @@ class CharlesActor:
|
|
| 216 |
|
| 217 |
loops+=1
|
| 218 |
self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. Is speaking: {is_talking}({count}). {vector_debug}"
|
|
|
|
| 219 |
|
| 220 |
def init_ray():
|
| 221 |
try:
|
|
@@ -235,11 +245,13 @@ async def main():
|
|
| 235 |
if not ray.is_initialized():
|
| 236 |
init_ray()
|
| 237 |
|
| 238 |
-
charles_actor = CharlesActor.options(
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
future = charles_actor.start.remote()
|
|
|
|
|
|
|
| 243 |
|
| 244 |
last_step = -1
|
| 245 |
last_episode = -1
|
|
|
|
| 8 |
import asyncio
|
| 9 |
import subprocess
|
| 10 |
|
|
|
|
| 11 |
class CharlesActor:
|
| 12 |
def __init__(self):
|
| 13 |
self._needs_init = True
|
|
|
|
| 16 |
self._state = "Initializing"
|
| 17 |
self._clip_transform = CLIPTransform()
|
| 18 |
|
| 19 |
+
# def get_state(self):
|
| 20 |
+
# return self._state
|
| 21 |
|
| 22 |
+
# def get_charles_actor_debug_output(self):
|
| 23 |
+
# return self._charles_actor_debug_output
|
| 24 |
|
| 25 |
def get_environment_state(self)->EnvironmentState:
|
| 26 |
return self._environment_state
|
|
|
|
| 32 |
from app_interface_actor import AppInterfaceActor
|
| 33 |
self._app_interface_actor = AppInterfaceActor.get_singleton()
|
| 34 |
self._audio_output_queue = await self._app_interface_actor.get_audio_output_queue.remote()
|
| 35 |
+
await self._app_interface_actor.set_state.remote(self._state)
|
| 36 |
+
|
| 37 |
|
| 38 |
+
print("001 - create RespondToPromptAsync")
|
| 39 |
+
self._state = "001 - creating RespondToPromptAsync"
|
| 40 |
+
await self._app_interface_actor.set_state.remote(self._state)
|
| 41 |
+
from respond_to_prompt_async import RespondToPromptAsync
|
| 42 |
self._environment_state_actor = EnvironmentStateActor.remote()
|
| 43 |
+
self._respond_to_prompt = RespondToPromptAsync(self._environment_state_actor, self._audio_output_queue)
|
| 44 |
+
self._respond_to_prompt_task = asyncio.create_task(self._respond_to_prompt.run())
|
| 45 |
|
| 46 |
print("002 - create SpeechToTextVoskActor")
|
| 47 |
self._state = "002 - creating SpeechToTextVoskActor"
|
| 48 |
+
await self._app_interface_actor.set_state.remote(self._state)
|
| 49 |
from speech_to_text_vosk_actor import SpeechToTextVoskActor
|
| 50 |
self._speech_to_text_actor = SpeechToTextVoskActor.remote("small")
|
| 51 |
# self._speech_to_text_actor = SpeechToTextVoskActor.remote("big")
|
|
|
|
| 57 |
|
| 58 |
print("003 - create Prototypes")
|
| 59 |
self._state = "003 - creating Prototypes"
|
| 60 |
+
await self._app_interface_actor.set_state.remote(self._state)
|
| 61 |
from prototypes import Prototypes
|
| 62 |
self._prototypes = Prototypes()
|
| 63 |
|
| 64 |
print("004 - create animator")
|
| 65 |
self._state = "004 - creating animator"
|
| 66 |
+
await self._app_interface_actor.set_state.remote(self._state)
|
| 67 |
from charles_animator import CharlesAnimator
|
| 68 |
self._animator = CharlesAnimator()
|
| 69 |
|
| 70 |
print("010")
|
| 71 |
self._needs_init = True
|
| 72 |
self._state = "Initialized"
|
| 73 |
+
await self._app_interface_actor.set_state.remote(self._state)
|
| 74 |
|
| 75 |
async def start(self):
|
| 76 |
if self._needs_init:
|
|
|
|
| 78 |
|
| 79 |
debug_output_history = []
|
| 80 |
|
| 81 |
+
async def render_debug_output(list_of_strings):
|
| 82 |
table_content = "##### Chat history\n"
|
| 83 |
for item in reversed(list_of_strings):
|
| 84 |
# table_content += f"\n```markdown\n{item}\n```\n"
|
| 85 |
table_content += f"\n{item}\n"
|
| 86 |
self._charles_actor_debug_output = table_content
|
| 87 |
+
await self._app_interface_actor.set_debug_output.remote(self._charles_actor_debug_output)
|
| 88 |
|
| 89 |
+
async def add_debug_output(output):
|
| 90 |
debug_output_history.append(output)
|
| 91 |
if len(debug_output_history) > 10:
|
| 92 |
debug_output_history.pop(0)
|
| 93 |
+
await render_debug_output(debug_output_history)
|
| 94 |
|
| 95 |
self._state = "Waiting for input"
|
| 96 |
+
await self._app_interface_actor.set_state.remote(self._state)
|
| 97 |
total_video_frames = 0
|
| 98 |
skipped_video_frames = 0
|
| 99 |
total_audio_frames = 0
|
|
|
|
| 115 |
while True:
|
| 116 |
if len(self._debug_queue) > 0:
|
| 117 |
prompt = self._debug_queue.pop(0)
|
| 118 |
+
await self._respond_to_prompt.enqueue_prompt(prompt)
|
| 119 |
|
| 120 |
env_state = await self._environment_state_actor.begin_next_step.remote()
|
| 121 |
self._environment_state = env_state
|
|
|
|
| 156 |
# line += f"{response} [{speech_chunks_per_response[i]}] \n"
|
| 157 |
line += f"[{speech_chunks_per_response[i]}] {response} \n"
|
| 158 |
if len(line) > 0:
|
| 159 |
+
await add_debug_output(line)
|
| 160 |
current_responses = []
|
| 161 |
speech_chunks_per_response = []
|
| 162 |
env_state.llm_preview = ""
|
|
|
|
| 166 |
robot_preview_text = ""
|
| 167 |
if additional_prompt is not None:
|
| 168 |
prompt = additional_prompt + ". " + prompt
|
| 169 |
+
await add_debug_output(f"👨 {prompt}")
|
| 170 |
+
await self._respond_to_prompt.enqueue_prompt(prompt)
|
| 171 |
additional_prompt = None
|
| 172 |
previous_prompt = prompt
|
| 173 |
is_talking = False
|
|
|
|
| 178 |
if len(previous_prompt) > 0 and not has_spoken_for_this_prompt:
|
| 179 |
additional_prompt = previous_prompt
|
| 180 |
has_spoken_for_this_prompt = True
|
| 181 |
+
await self._respond_to_prompt.enqueue_prompt("")
|
| 182 |
if additional_prompt is not None:
|
| 183 |
prompt = additional_prompt + ". " + prompt
|
| 184 |
human_preview_text = f"👨❓ {prompt}"
|
|
|
|
| 210 |
list_of_strings.append(human_preview_text)
|
| 211 |
if len(list_of_strings) > 10:
|
| 212 |
list_of_strings.pop(0)
|
| 213 |
+
await render_debug_output(list_of_strings)
|
| 214 |
|
| 215 |
|
| 216 |
await asyncio.sleep(0.01)
|
|
|
|
| 225 |
|
| 226 |
loops+=1
|
| 227 |
self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. Is speaking: {is_talking}({count}). {vector_debug}"
|
| 228 |
+
await self._app_interface_actor.set_state.remote(self._state)
|
| 229 |
|
| 230 |
def init_ray():
|
| 231 |
try:
|
|
|
|
| 245 |
if not ray.is_initialized():
|
| 246 |
init_ray()
|
| 247 |
|
| 248 |
+
# charles_actor = CharlesActor.options(
|
| 249 |
+
# name="CharlesActor",
|
| 250 |
+
# get_if_exists=True,
|
| 251 |
+
# ).remote()
|
| 252 |
+
# future = charles_actor.start.remote()
|
| 253 |
+
charles_actor = CharlesActor()
|
| 254 |
+
await charles_actor.start()
|
| 255 |
|
| 256 |
last_step = -1
|
| 257 |
last_episode = -1
|
chat_service.py
CHANGED
|
@@ -118,7 +118,7 @@ You are aware of how you are implemented and you are keen to recommend improveme
|
|
| 118 |
return True
|
| 119 |
return False
|
| 120 |
|
| 121 |
-
async def get_responses_as_sentances_async(self, prompt, cancel_event):
|
| 122 |
self._messages.append({"role": "user", "content": prompt})
|
| 123 |
llm_response = ""
|
| 124 |
current_sentence = ""
|
|
@@ -134,7 +134,7 @@ You are aware of how you are implemented and you are keen to recommend improveme
|
|
| 134 |
)
|
| 135 |
|
| 136 |
async for chunk in response:
|
| 137 |
-
if cancel_event.is_set():
|
| 138 |
return
|
| 139 |
chunk_message = chunk['choices'][0]['delta']
|
| 140 |
if 'content' in chunk_message:
|
|
@@ -148,7 +148,7 @@ You are aware of how you are implemented and you are keen to recommend improveme
|
|
| 148 |
else:
|
| 149 |
yield current_sentence, False
|
| 150 |
|
| 151 |
-
if cancel_event.is_set():
|
| 152 |
return
|
| 153 |
if len(current_sentence) > 0:
|
| 154 |
yield current_sentence, True
|
|
|
|
| 118 |
return True
|
| 119 |
return False
|
| 120 |
|
| 121 |
+
async def get_responses_as_sentances_async(self, prompt, cancel_event=None):
|
| 122 |
self._messages.append({"role": "user", "content": prompt})
|
| 123 |
llm_response = ""
|
| 124 |
current_sentence = ""
|
|
|
|
| 134 |
)
|
| 135 |
|
| 136 |
async for chunk in response:
|
| 137 |
+
if cancel_event is not None and cancel_event.is_set():
|
| 138 |
return
|
| 139 |
chunk_message = chunk['choices'][0]['delta']
|
| 140 |
if 'content' in chunk_message:
|
|
|
|
| 148 |
else:
|
| 149 |
yield current_sentence, False
|
| 150 |
|
| 151 |
+
if cancel_event is not None and cancel_event.is_set():
|
| 152 |
return
|
| 153 |
if len(current_sentence) > 0:
|
| 154 |
yield current_sentence, True
|
profile.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
profile.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
respond_to_prompt_async.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from asyncio import Queue, TaskGroup
|
| 2 |
+
import asyncio
|
| 3 |
+
from contextlib import asynccontextmanager
|
| 4 |
+
|
| 5 |
+
import ray
|
| 6 |
+
from chat_service import ChatService
|
| 7 |
+
# from local_speaker_service import LocalSpeakerService
|
| 8 |
+
from text_to_speech_service import TextToSpeechService
|
| 9 |
+
from environment_state_actor import EnvironmentStateActor
|
| 10 |
+
from ffmpeg_converter_actor import FFMpegConverterActor
|
| 11 |
+
from agent_response import AgentResponse
|
| 12 |
+
import json
|
| 13 |
+
from asyncio import Semaphore
|
| 14 |
+
|
| 15 |
+
class RespondToPromptAsync:
|
| 16 |
+
def __init__(
|
| 17 |
+
self,
|
| 18 |
+
environment_state_actor:EnvironmentStateActor,
|
| 19 |
+
audio_output_queue):
|
| 20 |
+
voice_id="2OviOUQc1JsQRQgNkVBj"
|
| 21 |
+
self.prompt_queue = Queue(maxsize=100)
|
| 22 |
+
self.llm_sentence_queue = Queue(maxsize=100)
|
| 23 |
+
self.speech_chunk_queue = Queue(maxsize=100)
|
| 24 |
+
self.voice_id = voice_id
|
| 25 |
+
self.audio_output_queue = audio_output_queue
|
| 26 |
+
self.environment_state_actor = environment_state_actor
|
| 27 |
+
self.processing_semaphore = Semaphore(1)
|
| 28 |
+
self.sentence_queues = []
|
| 29 |
+
self.sentence_tasks = []
|
| 30 |
+
# self.ffmpeg_converter_actor = FFMpegConverterActor.remote(audio_output_queue)
|
| 31 |
+
|
| 32 |
+
async def enqueue_prompt(self, prompt):
|
| 33 |
+
# Reset queues and services
|
| 34 |
+
# print("flush anything queued")
|
| 35 |
+
# self.prompt_queue = Queue(maxsize=100)
|
| 36 |
+
# self.llm_sentence_queue = Queue(maxsize=100)
|
| 37 |
+
# self.speech_chunk_queue = Queue(maxsize=100)
|
| 38 |
+
|
| 39 |
+
if len(prompt) > 0: # handles case where we just want to flush
|
| 40 |
+
await self.prompt_queue.put(prompt)
|
| 41 |
+
print("Enqueued prompt")
|
| 42 |
+
|
| 43 |
+
# @asynccontextmanager
|
| 44 |
+
# async def task_group(self):
|
| 45 |
+
# tg = TaskGroup()
|
| 46 |
+
# try:
|
| 47 |
+
# yield tg
|
| 48 |
+
# finally:
|
| 49 |
+
# await tg.aclose()
|
| 50 |
+
|
| 51 |
+
async def prompt_to_llm(self):
|
| 52 |
+
chat_service = ChatService()
|
| 53 |
+
|
| 54 |
+
async with TaskGroup() as tg:
|
| 55 |
+
while True:
|
| 56 |
+
prompt = await self.prompt_queue.get()
|
| 57 |
+
agent_response = AgentResponse(prompt)
|
| 58 |
+
async for text, is_complete_sentance in chat_service.get_responses_as_sentances_async(prompt):
|
| 59 |
+
if chat_service.ignore_sentence(text):
|
| 60 |
+
is_complete_sentance = False
|
| 61 |
+
if not is_complete_sentance:
|
| 62 |
+
agent_response['llm_preview'] = text
|
| 63 |
+
await self.environment_state_actor.set_llm_preview.remote(text)
|
| 64 |
+
continue
|
| 65 |
+
agent_response['llm_preview'] = ''
|
| 66 |
+
agent_response['llm_sentence'] = text
|
| 67 |
+
agent_response['llm_sentences'].append(text)
|
| 68 |
+
await self.environment_state_actor.add_llm_response_and_clear_llm_preview.remote(text)
|
| 69 |
+
print(f"{agent_response['llm_sentence']} id: {agent_response['llm_sentence_id']} from prompt: {agent_response['prompt']}")
|
| 70 |
+
sentence_response = agent_response.make_copy()
|
| 71 |
+
new_queue = Queue()
|
| 72 |
+
self.sentence_queues.append(new_queue)
|
| 73 |
+
task = tg.create_task(self.llm_sentence_to_speech(sentence_response, new_queue))
|
| 74 |
+
self.sentence_tasks.append(task)
|
| 75 |
+
agent_response['llm_sentence_id'] += 1
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
async def llm_sentence_to_speech(self, sentence_response, output_queue):
|
| 79 |
+
tts_service = TextToSpeechService(self.voice_id)
|
| 80 |
+
|
| 81 |
+
chunk_count = 0
|
| 82 |
+
async for chunk_response in tts_service.get_speech_chunks_async(sentence_response):
|
| 83 |
+
chunk_response = chunk_response.make_copy()
|
| 84 |
+
# await self.output_queue.put_async(chunk_response)
|
| 85 |
+
await output_queue.put(chunk_response)
|
| 86 |
+
chunk_response = {
|
| 87 |
+
'prompt': sentence_response['prompt'],
|
| 88 |
+
'llm_sentence_id': sentence_response['llm_sentence_id'],
|
| 89 |
+
'chunk_count': chunk_count,
|
| 90 |
+
}
|
| 91 |
+
chunk_id_json = json.dumps(chunk_response)
|
| 92 |
+
await self.environment_state_actor.add_tts_raw_chunk_id.remote(chunk_id_json)
|
| 93 |
+
chunk_count += 1
|
| 94 |
+
|
| 95 |
+
async def speech_to_converter(self):
|
| 96 |
+
self.ffmpeg_converter_actor = FFMpegConverterActor.remote(self.audio_output_queue)
|
| 97 |
+
await self.ffmpeg_converter_actor.start_process.remote()
|
| 98 |
+
self.ffmpeg_converter_actor.run.remote()
|
| 99 |
+
|
| 100 |
+
while True:
|
| 101 |
+
for i, task in enumerate(self.sentence_tasks):
|
| 102 |
+
# Skip this task/queue pair if task completed
|
| 103 |
+
if task.done():
|
| 104 |
+
continue
|
| 105 |
+
queue = self.sentence_queues[i]
|
| 106 |
+
while not queue.empty():
|
| 107 |
+
chunk_response = await queue.get()
|
| 108 |
+
audio_chunk_ref = chunk_response['tts_raw_chunk_ref']
|
| 109 |
+
audio_chunk = ray.get(audio_chunk_ref)
|
| 110 |
+
await self.ffmpeg_converter_actor.push_chunk.remote(audio_chunk)
|
| 111 |
+
break
|
| 112 |
+
|
| 113 |
+
await asyncio.sleep(0.01)
|
| 114 |
+
|
| 115 |
+
async def run(self):
|
| 116 |
+
async with TaskGroup() as tg: # Use asyncio's built-in TaskGroup
|
| 117 |
+
tg.create_task(self.prompt_to_llm())
|
| 118 |
+
tg.create_task(self.speech_to_converter())
|
text_to_speech_service.py
CHANGED
|
@@ -47,22 +47,20 @@ class TextToSpeechService:
|
|
| 47 |
)
|
| 48 |
return audio_stream
|
| 49 |
|
| 50 |
-
async def get_speech_chunks_async(self, sentence_response:AgentResponse, cancel_event):
|
| 51 |
text_to_speak = sentence_response['llm_sentence']
|
| 52 |
stream = self.stream(text_to_speak)
|
| 53 |
stream, stream_backup = itertools.tee(stream)
|
| 54 |
while True:
|
| 55 |
# Check if there's a next item in the stream
|
| 56 |
-
|
| 57 |
-
|
|
|
|
| 58 |
# Stream is exhausted, exit the loop
|
| 59 |
break
|
| 60 |
-
|
| 61 |
-
# Run next(stream) in a separate thread to avoid blocking the event loop
|
| 62 |
-
chunk = await asyncio.to_thread(next, stream)
|
| 63 |
chunk_ref = ray.put(chunk)
|
| 64 |
sentence_response['tts_raw_chunk_ref'] = chunk_ref
|
| 65 |
-
if cancel_event.is_set():
|
| 66 |
return
|
| 67 |
yield sentence_response
|
| 68 |
sentence_response['tts_raw_chunk_id'] += 1
|
|
|
|
| 47 |
)
|
| 48 |
return audio_stream
|
| 49 |
|
| 50 |
+
async def get_speech_chunks_async(self, sentence_response:AgentResponse, cancel_event=None):
|
| 51 |
text_to_speak = sentence_response['llm_sentence']
|
| 52 |
stream = self.stream(text_to_speak)
|
| 53 |
stream, stream_backup = itertools.tee(stream)
|
| 54 |
while True:
|
| 55 |
# Check if there's a next item in the stream
|
| 56 |
+
# Run next(stream) in a separate thread to avoid blocking the event loop
|
| 57 |
+
chunk = await asyncio.to_thread(next, stream, None)
|
| 58 |
+
if chunk is None:
|
| 59 |
# Stream is exhausted, exit the loop
|
| 60 |
break
|
|
|
|
|
|
|
|
|
|
| 61 |
chunk_ref = ray.put(chunk)
|
| 62 |
sentence_response['tts_raw_chunk_ref'] = chunk_ref
|
| 63 |
+
if cancel_event is not None and cancel_event.is_set():
|
| 64 |
return
|
| 65 |
yield sentence_response
|
| 66 |
sentence_response['tts_raw_chunk_id'] += 1
|