Spaces:
Sleeping
Sleeping
Commit Β·
e75cf4f
1
Parent(s): b1bd3b9
fix: replace dict State with flat primitive States to fix Gradio API schema TypeError
Browse files- app.py +80 -69
- build-errors/build_errors.md +16 -0
app.py
CHANGED
|
@@ -4,6 +4,10 @@ Replicates the exact frontend flow:
|
|
| 4 |
Language select β 3-phrase enrollment β chat (Claude Haiku replies in cloned voice) β wall at turn 7
|
| 5 |
|
| 6 |
RTF is shown after each AI turn so you can verify the GPU is keeping up.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
"""
|
| 8 |
|
| 9 |
import math
|
|
@@ -80,7 +84,7 @@ L2_OPTIONS = [("Spanish (es)", "es"), ("English (en)", "en"), ("French (fr)", "
|
|
| 80 |
("Korean (ko)", "ko"), ("Chinese (zh)", "zh")]
|
| 81 |
|
| 82 |
# ββ Audio helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 83 |
-
def _to_mono_16k(audio_input)
|
| 84 |
if audio_input is None:
|
| 85 |
return None
|
| 86 |
sr, samples = audio_input
|
|
@@ -97,10 +101,15 @@ def _to_mono_16k(audio_input) -> np.ndarray | None:
|
|
| 97 |
|
| 98 |
|
| 99 |
# ββ GPU functions βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
|
|
|
| 100 |
|
| 101 |
@spaces.GPU
|
| 102 |
def gpu_enroll_and_greet(audio1, audio2, audio3, l2):
|
| 103 |
-
"""WavLM enrollment + synthesize first AI
|
|
|
|
|
|
|
|
|
|
| 104 |
chunks, ref_texts = [], []
|
| 105 |
for i, a in enumerate([audio1, audio2, audio3]):
|
| 106 |
chunk = _to_mono_16k(a)
|
|
@@ -109,7 +118,7 @@ def gpu_enroll_and_greet(audio1, audio2, audio3, l2):
|
|
| 109 |
ref_texts.append(ENROLLMENT_PHRASES[i])
|
| 110 |
|
| 111 |
if not chunks:
|
| 112 |
-
return
|
| 113 |
|
| 114 |
ref = np.concatenate(chunks)
|
| 115 |
ref_text = " ".join(ref_texts)
|
|
@@ -125,20 +134,24 @@ def gpu_enroll_and_greet(audio1, audio2, audio3, l2):
|
|
| 125 |
|
| 126 |
status = "PASS β" if rtf < 1.0 else "FAIL β"
|
| 127 |
rtf_text = f"Greeting β RTF: {rtf:.3f} | {status} | {elapsed*1000:.0f} ms"
|
| 128 |
-
#
|
| 129 |
return ref.tolist(), (16_000, greeting_audio), rtf_text
|
| 130 |
|
| 131 |
|
| 132 |
@spaces.GPU
|
| 133 |
def gpu_chat_turn(audio_input, ref_list, history, turn_count, l1, l2):
|
| 134 |
-
"""ASR β Claude Haiku reply β TTS.
|
| 135 |
-
ref_list is a plain Python list (
|
|
|
|
| 136 |
"""
|
| 137 |
samples = _to_mono_16k(audio_input)
|
| 138 |
if samples is None or len(samples) == 0:
|
| 139 |
return None, None, None, history, "β οΈ No audio"
|
| 140 |
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
| 142 |
ref = np.array(ref_list, dtype=np.float32)
|
| 143 |
|
| 144 |
# ASR β Whisper tiny (CPU)
|
|
@@ -155,9 +168,9 @@ def gpu_chat_turn(audio_input, ref_list, history, turn_count, l1, l2):
|
|
| 155 |
turn_number=turn_count + 1,
|
| 156 |
whisper_signals=whisper_signals if whisper_signals else None,
|
| 157 |
)
|
| 158 |
-
reply_text
|
| 159 |
-
new_history
|
| 160 |
-
lang_name
|
| 161 |
|
| 162 |
# TTS β hybrid router (OpenVoice short / Qwen3 long)
|
| 163 |
t0 = time.perf_counter()
|
|
@@ -172,12 +185,20 @@ def gpu_chat_turn(audio_input, ref_list, history, turn_count, l1, l2):
|
|
| 172 |
|
| 173 |
|
| 174 |
# ββ Gradio UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 175 |
-
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
with gr.Blocks(title="Vocal Mirror") as demo:
|
| 179 |
|
| 180 |
-
state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
# ββ Screen 1: Language select βββββββββββββββββββββββββββββββββββββββββββββ
|
| 183 |
with gr.Column(visible=True) as screen_lang:
|
|
@@ -204,18 +225,18 @@ with gr.Blocks(title="Vocal Mirror") as demo:
|
|
| 204 |
with gr.Column():
|
| 205 |
gr.Markdown(f'**Phrase 3**\n\n*"{ENROLLMENT_PHRASES[2]}"*')
|
| 206 |
enroll_a3 = gr.Audio(label="Phrase 3", sources=["microphone"], type="numpy")
|
| 207 |
-
enroll_btn
|
| 208 |
enroll_status = gr.Textbox(label="Status", interactive=False, visible=False)
|
| 209 |
|
| 210 |
# ββ Screen 3: Chat ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 211 |
with gr.Column(visible=False) as screen_chat:
|
| 212 |
gr.Markdown("## Chat")
|
| 213 |
-
chatbot
|
| 214 |
ai_audio = gr.Audio(label="AI reply (cloned voice)", type="numpy", autoplay=True)
|
| 215 |
rtf_box = gr.Textbox(label="RTF", interactive=False)
|
| 216 |
gr.Markdown("### Your turn β record your reply")
|
| 217 |
-
user_mic
|
| 218 |
-
send_btn
|
| 219 |
|
| 220 |
# ββ Screen 4: Wall ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 221 |
with gr.Column(visible=False) as screen_wall:
|
|
@@ -225,53 +246,50 @@ with gr.Blocks(title="Vocal Mirror") as demo:
|
|
| 225 |
"in your own voice.\n\n"
|
| 226 |
"Join the waitlist to get early access when we launch."
|
| 227 |
)
|
| 228 |
-
gr.Markdown(
|
| 229 |
|
| 230 |
|
| 231 |
# ββ Callbacks βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 232 |
|
| 233 |
-
def on_start(l1, l2
|
| 234 |
-
state = state.copy()
|
| 235 |
-
state["l1"] = l1
|
| 236 |
-
state["l2"] = l2
|
| 237 |
-
state["history"] = []
|
| 238 |
-
state["turn_count"] = 0
|
| 239 |
-
state["ref"] = None
|
| 240 |
return (
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
)
|
| 245 |
|
| 246 |
start_btn.click(
|
| 247 |
fn=on_start,
|
| 248 |
-
inputs=[l1_dd, l2_dd
|
| 249 |
-
outputs=[
|
|
|
|
| 250 |
)
|
| 251 |
|
| 252 |
|
| 253 |
-
def on_enroll(a1, a2, a3,
|
| 254 |
-
|
| 255 |
-
l2 = state.get("l2", "es")
|
| 256 |
-
|
| 257 |
-
ref, greeting, rtf_text = gpu_enroll_and_greet(a1, a2, a3, l2)
|
| 258 |
|
| 259 |
-
if
|
| 260 |
return (
|
| 261 |
-
|
| 262 |
-
gr.update(visible=True),
|
| 263 |
-
gr.update(value=rtf_text, visible=True),
|
| 264 |
-
gr.update(visible=False),
|
| 265 |
-
|
|
|
|
|
|
|
|
|
|
| 266 |
)
|
| 267 |
|
| 268 |
fluent_text = FLUENT_PHRASES.get(l2, FLUENT_PHRASES["en"])
|
| 269 |
-
state["ref"] = ref
|
| 270 |
-
# Greeting counts as AI turn 0 β history stays empty until user speaks
|
| 271 |
messages = [{"role": "assistant", "content": fluent_text}]
|
| 272 |
|
| 273 |
return (
|
| 274 |
-
|
| 275 |
gr.update(visible=False), # screen_enroll
|
| 276 |
gr.update(visible=False), # enroll_status
|
| 277 |
gr.update(visible=True), # screen_chat
|
|
@@ -283,45 +301,35 @@ with gr.Blocks(title="Vocal Mirror") as demo:
|
|
| 283 |
|
| 284 |
enroll_btn.click(
|
| 285 |
fn=on_enroll,
|
| 286 |
-
inputs=[enroll_a1, enroll_a2, enroll_a3,
|
| 287 |
-
outputs=[
|
| 288 |
chatbot, ai_audio, rtf_box],
|
| 289 |
)
|
| 290 |
|
| 291 |
|
| 292 |
-
def on_send(audio,
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
history = state.get("history", [])
|
| 296 |
-
turn_count = state.get("turn_count", 0)
|
| 297 |
-
l1 = state.get("l1", "en")
|
| 298 |
-
l2 = state.get("l2", "es")
|
| 299 |
-
|
| 300 |
-
if ref is None:
|
| 301 |
-
return state, gr.update(), None, "β οΈ Not enrolled", gr.update(), gr.update()
|
| 302 |
|
| 303 |
user_text, reply_text, audio_out, new_history, rtf_text = gpu_chat_turn(
|
| 304 |
-
audio,
|
| 305 |
)
|
| 306 |
|
| 307 |
if reply_text is None:
|
| 308 |
-
|
| 309 |
-
return state, gr.update(), None, rtf_text, gr.update(), gr.update()
|
| 310 |
|
| 311 |
-
turn_count +
|
| 312 |
-
state["history"] = new_history
|
| 313 |
-
state["turn_count"] = turn_count
|
| 314 |
|
| 315 |
-
# Build display messages (show only current conversation, not enrollment greeting)
|
| 316 |
-
# Prepend greeting so it stays at top
|
| 317 |
fluent_text = FLUENT_PHRASES.get(l2, FLUENT_PHRASES["en"])
|
| 318 |
messages = [{"role": "assistant", "content": fluent_text}]
|
| 319 |
for msg in new_history:
|
| 320 |
messages.append({"role": msg["role"], "content": msg["content"]})
|
| 321 |
|
| 322 |
-
if
|
| 323 |
return (
|
| 324 |
-
|
|
|
|
|
|
|
| 325 |
messages,
|
| 326 |
audio_out,
|
| 327 |
rtf_text,
|
|
@@ -330,7 +338,9 @@ with gr.Blocks(title="Vocal Mirror") as demo:
|
|
| 330 |
)
|
| 331 |
|
| 332 |
return (
|
| 333 |
-
|
|
|
|
|
|
|
| 334 |
messages,
|
| 335 |
audio_out,
|
| 336 |
rtf_text,
|
|
@@ -340,8 +350,9 @@ with gr.Blocks(title="Vocal Mirror") as demo:
|
|
| 340 |
|
| 341 |
send_btn.click(
|
| 342 |
fn=on_send,
|
| 343 |
-
inputs=[user_mic,
|
| 344 |
-
outputs=[
|
|
|
|
| 345 |
)
|
| 346 |
|
| 347 |
demo.queue()
|
|
|
|
| 4 |
Language select β 3-phrase enrollment β chat (Claude Haiku replies in cloned voice) β wall at turn 7
|
| 5 |
|
| 6 |
RTF is shown after each AI turn so you can verify the GPU is keeping up.
|
| 7 |
+
|
| 8 |
+
Iteration 13: replaced single gr.State(dict) with flat primitive States (str/int/list) to fix
|
| 9 |
+
Gradio API schema crash β gradio_client.utils._json_schema_to_python_type cannot handle dict
|
| 10 |
+
additionalProperties=True (a bool), causing TypeError: argument of type 'bool' is not iterable.
|
| 11 |
"""
|
| 12 |
|
| 13 |
import math
|
|
|
|
| 84 |
("Korean (ko)", "ko"), ("Chinese (zh)", "zh")]
|
| 85 |
|
| 86 |
# ββ Audio helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 87 |
+
def _to_mono_16k(audio_input):
|
| 88 |
if audio_input is None:
|
| 89 |
return None
|
| 90 |
sr, samples = audio_input
|
|
|
|
| 101 |
|
| 102 |
|
| 103 |
# ββ GPU functions βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 104 |
+
# NOTE: No type hints on parameters β Gradio's json_schema_to_python_type crashes
|
| 105 |
+
# on np.ndarray and dict types. State is stored as flat primitives only (str/int/list).
|
| 106 |
|
| 107 |
@spaces.GPU
|
| 108 |
def gpu_enroll_and_greet(audio1, audio2, audio3, l2):
|
| 109 |
+
"""WavLM enrollment + synthesize first AI greeting.
|
| 110 |
+
Returns (ref_list, greeting_audio, rtf_text).
|
| 111 |
+
ref_list is a plain Python list so it survives JSON serialization through gr.State.
|
| 112 |
+
"""
|
| 113 |
chunks, ref_texts = [], []
|
| 114 |
for i, a in enumerate([audio1, audio2, audio3]):
|
| 115 |
chunk = _to_mono_16k(a)
|
|
|
|
| 118 |
ref_texts.append(ENROLLMENT_PHRASES[i])
|
| 119 |
|
| 120 |
if not chunks:
|
| 121 |
+
return [], None, "β οΈ No audio recorded"
|
| 122 |
|
| 123 |
ref = np.concatenate(chunks)
|
| 124 |
ref_text = " ".join(ref_texts)
|
|
|
|
| 134 |
|
| 135 |
status = "PASS β" if rtf < 1.0 else "FAIL β"
|
| 136 |
rtf_text = f"Greeting β RTF: {rtf:.3f} | {status} | {elapsed*1000:.0f} ms"
|
| 137 |
+
# Convert np.ndarray β list so gr.State stays JSON-serializable
|
| 138 |
return ref.tolist(), (16_000, greeting_audio), rtf_text
|
| 139 |
|
| 140 |
|
| 141 |
@spaces.GPU
|
| 142 |
def gpu_chat_turn(audio_input, ref_list, history, turn_count, l1, l2):
|
| 143 |
+
"""ASR β Claude Haiku reply β TTS.
|
| 144 |
+
ref_list is a plain Python list (from gr.State); converted to np.ndarray here.
|
| 145 |
+
Returns (user_text, reply_text, audio_out, new_history, rtf_text).
|
| 146 |
"""
|
| 147 |
samples = _to_mono_16k(audio_input)
|
| 148 |
if samples is None or len(samples) == 0:
|
| 149 |
return None, None, None, history, "β οΈ No audio"
|
| 150 |
|
| 151 |
+
if not ref_list:
|
| 152 |
+
return None, None, None, history, "β οΈ Not enrolled"
|
| 153 |
+
|
| 154 |
+
# Reconstruct np.ndarray from list stored in State
|
| 155 |
ref = np.array(ref_list, dtype=np.float32)
|
| 156 |
|
| 157 |
# ASR β Whisper tiny (CPU)
|
|
|
|
| 168 |
turn_number=turn_count + 1,
|
| 169 |
whisper_signals=whisper_signals if whisper_signals else None,
|
| 170 |
)
|
| 171 |
+
reply_text = reply_obj.l2_text
|
| 172 |
+
new_history = reply_obj.updated_history
|
| 173 |
+
lang_name = LANG_NAMES.get(l2, "English")
|
| 174 |
|
| 175 |
# TTS β hybrid router (OpenVoice short / Qwen3 long)
|
| 176 |
t0 = time.perf_counter()
|
|
|
|
| 185 |
|
| 186 |
|
| 187 |
# ββ Gradio UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 188 |
+
# Use FLAT, PRIMITIVE gr.State objects β NOT a single gr.State(dict).
|
| 189 |
+
# Gradio's API schema generator (gradio_client.utils._json_schema_to_python_type)
|
| 190 |
+
# crashes on dict additionalProperties=True (a bool) with:
|
| 191 |
+
# TypeError: argument of type 'bool' is not iterable
|
| 192 |
+
# Flat primitives (str, int, list) are safe.
|
| 193 |
|
| 194 |
with gr.Blocks(title="Vocal Mirror") as demo:
|
| 195 |
|
| 196 |
+
# Flat state β each piece of session state is its own gr.State
|
| 197 |
+
state_l1 = gr.State("en") # native language code
|
| 198 |
+
state_l2 = gr.State("es") # target language code
|
| 199 |
+
state_ref = gr.State([]) # voice ref as plain float list
|
| 200 |
+
state_history = gr.State([]) # conversation history (list of dicts)
|
| 201 |
+
state_turn_count = gr.State(0) # number of completed turns
|
| 202 |
|
| 203 |
# ββ Screen 1: Language select βββββββββββββββββββββββββββββββββββββββββββββ
|
| 204 |
with gr.Column(visible=True) as screen_lang:
|
|
|
|
| 225 |
with gr.Column():
|
| 226 |
gr.Markdown(f'**Phrase 3**\n\n*"{ENROLLMENT_PHRASES[2]}"*')
|
| 227 |
enroll_a3 = gr.Audio(label="Phrase 3", sources=["microphone"], type="numpy")
|
| 228 |
+
enroll_btn = gr.Button("Clone my voice & start β", variant="primary", size="lg")
|
| 229 |
enroll_status = gr.Textbox(label="Status", interactive=False, visible=False)
|
| 230 |
|
| 231 |
# ββ Screen 3: Chat ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 232 |
with gr.Column(visible=False) as screen_chat:
|
| 233 |
gr.Markdown("## Chat")
|
| 234 |
+
chatbot = gr.Chatbot(label="Conversation", type="messages", height=400)
|
| 235 |
ai_audio = gr.Audio(label="AI reply (cloned voice)", type="numpy", autoplay=True)
|
| 236 |
rtf_box = gr.Textbox(label="RTF", interactive=False)
|
| 237 |
gr.Markdown("### Your turn β record your reply")
|
| 238 |
+
user_mic = gr.Audio(label="Your voice", sources=["microphone"], type="numpy")
|
| 239 |
+
send_btn = gr.Button("Send β", variant="primary")
|
| 240 |
|
| 241 |
# ββ Screen 4: Wall ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 242 |
with gr.Column(visible=False) as screen_wall:
|
|
|
|
| 246 |
"in your own voice.\n\n"
|
| 247 |
"Join the waitlist to get early access when we launch."
|
| 248 |
)
|
| 249 |
+
gr.Markdown("_RTF benchmark ran throughout β all turns are real-time capable on A10G GPU._")
|
| 250 |
|
| 251 |
|
| 252 |
# ββ Callbacks βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 253 |
|
| 254 |
+
def on_start(l1, l2):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
return (
|
| 256 |
+
l1, # state_l1
|
| 257 |
+
l2, # state_l2
|
| 258 |
+
[], # state_ref (reset)
|
| 259 |
+
[], # state_history (reset)
|
| 260 |
+
0, # state_turn_count (reset)
|
| 261 |
+
gr.update(visible=False), # screen_lang
|
| 262 |
+
gr.update(visible=True), # screen_enroll
|
| 263 |
)
|
| 264 |
|
| 265 |
start_btn.click(
|
| 266 |
fn=on_start,
|
| 267 |
+
inputs=[l1_dd, l2_dd],
|
| 268 |
+
outputs=[state_l1, state_l2, state_ref, state_history, state_turn_count,
|
| 269 |
+
screen_lang, screen_enroll],
|
| 270 |
)
|
| 271 |
|
| 272 |
|
| 273 |
+
def on_enroll(a1, a2, a3, l2):
|
| 274 |
+
ref_list, greeting, rtf_text = gpu_enroll_and_greet(a1, a2, a3, l2)
|
|
|
|
|
|
|
|
|
|
| 275 |
|
| 276 |
+
if not ref_list:
|
| 277 |
return (
|
| 278 |
+
[], # state_ref unchanged
|
| 279 |
+
gr.update(visible=True), # screen_enroll stays
|
| 280 |
+
gr.update(value=rtf_text, visible=True), # enroll_status
|
| 281 |
+
gr.update(visible=False), # screen_chat
|
| 282 |
+
gr.update(visible=False), # screen_wall
|
| 283 |
+
[], # chatbot
|
| 284 |
+
None, # ai_audio
|
| 285 |
+
"", # rtf_box
|
| 286 |
)
|
| 287 |
|
| 288 |
fluent_text = FLUENT_PHRASES.get(l2, FLUENT_PHRASES["en"])
|
|
|
|
|
|
|
| 289 |
messages = [{"role": "assistant", "content": fluent_text}]
|
| 290 |
|
| 291 |
return (
|
| 292 |
+
ref_list, # state_ref
|
| 293 |
gr.update(visible=False), # screen_enroll
|
| 294 |
gr.update(visible=False), # enroll_status
|
| 295 |
gr.update(visible=True), # screen_chat
|
|
|
|
| 301 |
|
| 302 |
enroll_btn.click(
|
| 303 |
fn=on_enroll,
|
| 304 |
+
inputs=[enroll_a1, enroll_a2, enroll_a3, state_l2],
|
| 305 |
+
outputs=[state_ref, screen_enroll, enroll_status, screen_chat, screen_wall,
|
| 306 |
chatbot, ai_audio, rtf_box],
|
| 307 |
)
|
| 308 |
|
| 309 |
|
| 310 |
+
def on_send(audio, ref_list, history, turn_count, l1, l2):
|
| 311 |
+
if not ref_list:
|
| 312 |
+
return ref_list, history, turn_count, gr.update(), None, "β οΈ Not enrolled", gr.update(), gr.update()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
|
| 314 |
user_text, reply_text, audio_out, new_history, rtf_text = gpu_chat_turn(
|
| 315 |
+
audio, ref_list, history, turn_count, l1, l2
|
| 316 |
)
|
| 317 |
|
| 318 |
if reply_text is None:
|
| 319 |
+
return ref_list, history, turn_count, gr.update(), None, rtf_text, gr.update(), gr.update()
|
|
|
|
| 320 |
|
| 321 |
+
new_turn_count = turn_count + 1
|
|
|
|
|
|
|
| 322 |
|
|
|
|
|
|
|
| 323 |
fluent_text = FLUENT_PHRASES.get(l2, FLUENT_PHRASES["en"])
|
| 324 |
messages = [{"role": "assistant", "content": fluent_text}]
|
| 325 |
for msg in new_history:
|
| 326 |
messages.append({"role": msg["role"], "content": msg["content"]})
|
| 327 |
|
| 328 |
+
if new_turn_count >= WALL_TURN_COUNT:
|
| 329 |
return (
|
| 330 |
+
ref_list,
|
| 331 |
+
new_history,
|
| 332 |
+
new_turn_count,
|
| 333 |
messages,
|
| 334 |
audio_out,
|
| 335 |
rtf_text,
|
|
|
|
| 338 |
)
|
| 339 |
|
| 340 |
return (
|
| 341 |
+
ref_list,
|
| 342 |
+
new_history,
|
| 343 |
+
new_turn_count,
|
| 344 |
messages,
|
| 345 |
audio_out,
|
| 346 |
rtf_text,
|
|
|
|
| 350 |
|
| 351 |
send_btn.click(
|
| 352 |
fn=on_send,
|
| 353 |
+
inputs=[user_mic, state_ref, state_history, state_turn_count, state_l1, state_l2],
|
| 354 |
+
outputs=[state_ref, state_history, state_turn_count,
|
| 355 |
+
chatbot, ai_audio, rtf_box, screen_chat, screen_wall],
|
| 356 |
)
|
| 357 |
|
| 358 |
demo.queue()
|
build-errors/build_errors.md
CHANGED
|
@@ -115,5 +115,21 @@ This file is committed alongside every fix so the repo retains full context of w
|
|
| 115 |
- Changed `gpu_chat_turn` to accept `ref_list` (plain list) and convert to `np.ndarray` internally via `np.array(ref_list, dtype=np.float32)` before passing to `synthesize()`
|
| 116 |
- No changes to callbacks β `on_enroll` stores whatever the function returns; `on_send` passes it through unchanged
|
| 117 |
**Files changed:** `app.py` only.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
**Result:** Pending β pushed, awaiting rebuild.
|
| 119 |
|
|
|
|
| 115 |
- Changed `gpu_chat_turn` to accept `ref_list` (plain list) and convert to `np.ndarray` internally via `np.array(ref_list, dtype=np.float32)` before passing to `synthesize()`
|
| 116 |
- No changes to callbacks β `on_enroll` stores whatever the function returns; `on_send` passes it through unchanged
|
| 117 |
**Files changed:** `app.py` only.
|
| 118 |
+
**Result:** FAIL β same crash persists. Removing np.ndarray type hints did not resolve it. Root cause was actually the gr.State(dict) itself, not the function signature. See Iteration 13.
|
| 119 |
+
|
| 120 |
+
---
|
| 121 |
+
|
| 122 |
+
## Iteration 13 β 2026-04-13
|
| 123 |
+
**Stage:** RUNNING but `/gradio_api/info` still returns 500
|
| 124 |
+
**Error:** `TypeError: argument of type 'bool' is not iterable` at `gradio_client/utils.py:882 β get_type β if "const" in schema`
|
| 125 |
+
**Root cause:** Removing np.ndarray type hints in Iteration 12 did not fix the crash. The actual source is `gr.State({"l1": "en", "l2": "es", "ref": None, "history": [], "turn_count": 0})`. When Gradio generates the API schema for this State, it calls `_json_schema_to_python_type` on the dict schema. The dict's JSON Schema representation has `additionalProperties: True` (a Python bool, per JSON Schema spec). The schema generator then does `if "const" in schema` where `schema` is already a Python bool `True`, causing `TypeError: argument of type 'bool' is not iterable`. This happens in `gradio_client/utils.py` at line 882 regardless of function type hints β it's triggered by the State type itself.
|
| 126 |
+
**Fix applied:** Replaced single `gr.State(dict)` with **5 flat, primitive `gr.State` objects**:
|
| 127 |
+
- `state_l1 = gr.State("en")` β string, safe
|
| 128 |
+
- `state_l2 = gr.State("es")` β string, safe
|
| 129 |
+
- `state_ref = gr.State([])` β empty list (no numpy), safe
|
| 130 |
+
- `state_history = gr.State([])` β list of dicts (plain JSON), safe
|
| 131 |
+
- `state_turn_count = gr.State(0)` β int, safe
|
| 132 |
+
All callbacks updated to accept/return these flat states. `ref_list` (a Python list) is passed as `state_ref` and converted to `np.ndarray` inside `gpu_chat_turn` only. Full `app.py` rewrite.
|
| 133 |
+
**Files changed:** `app.py` only.
|
| 134 |
**Result:** Pending β pushed, awaiting rebuild.
|
| 135 |
|