Spaces:
Runtime error
Runtime error
slight improvement on history
Browse files- app.py +2 -2
- charles_actor.py +47 -16
app.py
CHANGED
|
@@ -114,8 +114,8 @@ async def main():
|
|
| 114 |
pass
|
| 115 |
if charles_actor is not None:
|
| 116 |
try:
|
| 117 |
-
new_environment_state = await charles_actor.get_environment_state.remote()
|
| 118 |
-
environment_state_ouput.markdown(f"{new_environment_state}")
|
| 119 |
charles_debug_str = await charles_actor.get_charles_actor_debug_output.remote()
|
| 120 |
charles_actor_debug_output.markdown(charles_debug_str)
|
| 121 |
except Exception as e:
|
|
|
|
| 114 |
pass
|
| 115 |
if charles_actor is not None:
|
| 116 |
try:
|
| 117 |
+
# new_environment_state = await charles_actor.get_environment_state.remote()
|
| 118 |
+
# environment_state_ouput.markdown(f"{new_environment_state}")
|
| 119 |
charles_debug_str = await charles_actor.get_charles_actor_debug_output.remote()
|
| 120 |
charles_actor_debug_output.markdown(charles_debug_str)
|
| 121 |
except Exception as e:
|
charles_actor.py
CHANGED
|
@@ -68,12 +68,6 @@ class CharlesActor:
|
|
| 68 |
table_content = "| Charles Actor debug history |\n| --- |\n"
|
| 69 |
table_content += "\n".join([f"| {item} |" for item in reversed(debug_output_history)])
|
| 70 |
self._charles_actor_debug_output = table_content
|
| 71 |
-
def preview_debug_output(output):
|
| 72 |
-
table_content = "| Charles Actor debug history |\n| --- |\n"
|
| 73 |
-
debug_output_history_copy = debug_output_history.copy()
|
| 74 |
-
debug_output_history_copy.append(output)
|
| 75 |
-
table_content += "\n".join([f"| {item} |" for item in reversed(debug_output_history_copy)])
|
| 76 |
-
self._charles_actor_debug_output = table_content
|
| 77 |
|
| 78 |
self._state = "Waiting for input"
|
| 79 |
total_video_frames = 0
|
|
@@ -84,6 +78,11 @@ class CharlesActor:
|
|
| 84 |
vector_debug = "--n/a--"
|
| 85 |
|
| 86 |
process_speech_to_text_future = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
while True:
|
| 89 |
if len(self._debug_queue) > 0:
|
|
@@ -114,10 +113,6 @@ class CharlesActor:
|
|
| 114 |
distances, closest_item_key, distance_debug_str = self._prototypes.get_distances(image_vector)
|
| 115 |
vector_debug = f"{closest_item_key} {distance_debug_str}"
|
| 116 |
|
| 117 |
-
|
| 118 |
-
human_preview_text = ""
|
| 119 |
-
robot_preview_text = ""
|
| 120 |
-
|
| 121 |
if len(process_speech_to_text_future) > 0:
|
| 122 |
ready, _ = ray.wait([process_speech_to_text_future[0]], timeout=0)
|
| 123 |
if ready:
|
|
@@ -128,22 +123,58 @@ class CharlesActor:
|
|
| 128 |
|
| 129 |
if speaker_finished and len(prompt) > 0 and prompt not in prompts_to_ignore:
|
| 130 |
print(f"Prompt: {prompt}")
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
add_debug_output(f"π¨ {prompt}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
await self._respond_to_prompt_actor.enqueue_prompt.remote(prompt)
|
| 134 |
elif len(prompt) > 0 and prompt not in prompts_to_ignore:
|
| 135 |
human_preview_text = f"π¨β {prompt}"
|
| 136 |
|
| 137 |
for new_response in env_state.llm_responses:
|
| 138 |
-
add_debug_output(f"π€ {new_response}")
|
|
|
|
|
|
|
|
|
|
| 139 |
if len(env_state.llm_preview):
|
| 140 |
robot_preview_text = f"π€β {env_state.llm_preview}"
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
if len(human_preview_text) > 0:
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
|
|
|
|
|
|
|
|
|
| 147 |
await asyncio.sleep(0.01)
|
| 148 |
loops+=1
|
| 149 |
self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. {vector_debug}"
|
|
|
|
| 68 |
table_content = "| Charles Actor debug history |\n| --- |\n"
|
| 69 |
table_content += "\n".join([f"| {item} |" for item in reversed(debug_output_history)])
|
| 70 |
self._charles_actor_debug_output = table_content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
self._state = "Waiting for input"
|
| 73 |
total_video_frames = 0
|
|
|
|
| 78 |
vector_debug = "--n/a--"
|
| 79 |
|
| 80 |
process_speech_to_text_future = []
|
| 81 |
+
current_responses = []
|
| 82 |
+
speech_chunks_per_response = []
|
| 83 |
+
human_preview_text = ""
|
| 84 |
+
robot_preview_text = ""
|
| 85 |
+
|
| 86 |
|
| 87 |
while True:
|
| 88 |
if len(self._debug_queue) > 0:
|
|
|
|
| 113 |
distances, closest_item_key, distance_debug_str = self._prototypes.get_distances(image_vector)
|
| 114 |
vector_debug = f"{closest_item_key} {distance_debug_str}"
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
if len(process_speech_to_text_future) > 0:
|
| 117 |
ready, _ = ray.wait([process_speech_to_text_future[0]], timeout=0)
|
| 118 |
if ready:
|
|
|
|
| 123 |
|
| 124 |
if speaker_finished and len(prompt) > 0 and prompt not in prompts_to_ignore:
|
| 125 |
print(f"Prompt: {prompt}")
|
| 126 |
+
lines = []
|
| 127 |
+
for i, response in enumerate(current_responses):
|
| 128 |
+
line = "π€ " if len(lines) == 0 else "... "
|
| 129 |
+
line += f"{response} [{speech_chunks_per_response[i]}]"
|
| 130 |
+
lines.append(line)
|
| 131 |
+
for line in reversed(lines):
|
| 132 |
+
add_debug_output(line)
|
| 133 |
add_debug_output(f"π¨ {prompt}")
|
| 134 |
+
current_responses = []
|
| 135 |
+
speech_chunks_per_response = []
|
| 136 |
+
env_state.llm_preview = ""
|
| 137 |
+
env_state.llm_responses = []
|
| 138 |
+
env_state.tts_raw_chunk_ids = []
|
| 139 |
+
human_preview_text = ""
|
| 140 |
+
robot_preview_text = ""
|
| 141 |
await self._respond_to_prompt_actor.enqueue_prompt.remote(prompt)
|
| 142 |
elif len(prompt) > 0 and prompt not in prompts_to_ignore:
|
| 143 |
human_preview_text = f"π¨β {prompt}"
|
| 144 |
|
| 145 |
for new_response in env_state.llm_responses:
|
| 146 |
+
# add_debug_output(f"π€ {new_response}")
|
| 147 |
+
current_responses.append(new_response)
|
| 148 |
+
speech_chunks_per_response.append(0)
|
| 149 |
+
robot_preview_text = ""
|
| 150 |
if len(env_state.llm_preview):
|
| 151 |
robot_preview_text = f"π€β {env_state.llm_preview}"
|
| 152 |
|
| 153 |
+
for chunk in env_state.tts_raw_chunk_ids:
|
| 154 |
+
chunk = json.loads(chunk)
|
| 155 |
+
# prompt = chunk['prompt']
|
| 156 |
+
response_id = chunk['llm_sentence_id']
|
| 157 |
+
speech_chunks_per_response[response_id] += 1
|
| 158 |
+
|
| 159 |
+
table_content = "| Charles Actor debug history |\n| --- |\n"
|
| 160 |
+
debug_output_history_copy = debug_output_history.copy()
|
| 161 |
+
if len(robot_preview_text) > 0:
|
| 162 |
+
debug_output_history_copy.append(robot_preview_text)
|
| 163 |
+
lines = []
|
| 164 |
+
for i, response in enumerate(current_responses):
|
| 165 |
+
line = "π€ " if len(lines) == 0 else "... "
|
| 166 |
+
line += f"{response} [{speech_chunks_per_response[i]}]"
|
| 167 |
+
lines.append(line)
|
| 168 |
+
for line in reversed(lines):
|
| 169 |
+
debug_output_history_copy.append(line)
|
| 170 |
if len(human_preview_text) > 0:
|
| 171 |
+
debug_output_history_copy.append(human_preview_text)
|
| 172 |
+
if len(debug_output_history_copy) > 10:
|
| 173 |
+
debug_output_history_copy.pop(0)
|
| 174 |
+
table_content += "\n".join([f"| {item} |" for item in reversed(debug_output_history_copy)])
|
| 175 |
+
self._charles_actor_debug_output = table_content
|
| 176 |
+
|
| 177 |
+
|
| 178 |
await asyncio.sleep(0.01)
|
| 179 |
loops+=1
|
| 180 |
self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. {vector_debug}"
|