Spaces:
Running on Zero
Running on Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -1379,8 +1379,7 @@ def _build_demo(
|
|
| 1379 |
first_audio_time: float | None = None
|
| 1380 |
sample_rate = SAMPLE_RATE
|
| 1381 |
rtf_logger: StreamRTFLogger | None = None
|
| 1382 |
-
|
| 1383 |
-
|
| 1384 |
request = _build_request(
|
| 1385 |
args,
|
| 1386 |
user_text=user_text_value,
|
|
@@ -1404,22 +1403,24 @@ def _build_demo(
|
|
| 1404 |
chunk_duration=float(chunk_duration_value),
|
| 1405 |
prebuffer_seconds=float(stream_prebuffer_seconds_value),
|
| 1406 |
)
|
|
|
|
| 1407 |
rtf_logger = StreamRTFLogger.create(request, started_at)
|
| 1408 |
-
|
| 1409 |
for event in tts_demo.run_stream(request):
|
| 1410 |
if event.audio is None:
|
| 1411 |
-
yield gr.update(), gr.update(), event.message
|
| 1412 |
continue
|
| 1413 |
-
|
| 1414 |
sr, chunk = event.audio
|
| 1415 |
chunk = np.asarray(chunk).reshape(-1)
|
| 1416 |
if chunk.size == 0:
|
| 1417 |
continue
|
|
|
|
| 1418 |
full_chunks.append(chunk)
|
| 1419 |
sample_rate = sr
|
| 1420 |
-
|
| 1421 |
if first_audio_time is None:
|
| 1422 |
first_audio_time = time.monotonic()
|
|
|
|
| 1423 |
if rtf_logger is not None:
|
| 1424 |
rtf_logger.log_chunk(
|
| 1425 |
event_message=event.message,
|
|
@@ -1427,15 +1428,13 @@ def _build_demo(
|
|
| 1427 |
chunk=chunk,
|
| 1428 |
first_audio_time=first_audio_time,
|
| 1429 |
)
|
| 1430 |
-
|
| 1431 |
-
ttfa_ms = (first_audio_time - started_at) * 1000.0 if first_audio_time is not None else float("nan")
|
| 1432 |
-
status_msg = f"{event.message} | chunks={idx} | ttfa={ttfa_ms:.0f}ms"
|
| 1433 |
-
yield payload, gr.update(), status_msg
|
| 1434 |
-
|
| 1435 |
if full_chunks:
|
| 1436 |
full_audio = np.concatenate(full_chunks)
|
|
|
|
| 1437 |
if rtf_logger is not None:
|
| 1438 |
rtf_logger.log_completion(first_audio_time=first_audio_time)
|
|
|
|
| 1439 |
done_msg = _format_completion_status(
|
| 1440 |
len(full_chunks),
|
| 1441 |
sample_rate,
|
|
@@ -1443,17 +1442,22 @@ def _build_demo(
|
|
| 1443 |
started_at,
|
| 1444 |
first_audio_time,
|
| 1445 |
)
|
| 1446 |
-
|
| 1447 |
-
|
| 1448 |
-
|
| 1449 |
-
rtf_logger.log_no_audio()
|
| 1450 |
-
yield gr.update(), gr.update(), "Done | no audio chunks emitted"
|
| 1451 |
-
except Exception as exc:
|
| 1452 |
-
import traceback
|
| 1453 |
-
traceback.print_exc()
|
| 1454 |
if rtf_logger is not None:
|
| 1455 |
-
rtf_logger.
|
| 1456 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1457 |
|
| 1458 |
run_btn.click(
|
| 1459 |
_on_generate,
|
|
|
|
| 1379 |
first_audio_time: float | None = None
|
| 1380 |
sample_rate = SAMPLE_RATE
|
| 1381 |
rtf_logger: StreamRTFLogger | None = None
|
| 1382 |
+
|
|
|
|
| 1383 |
request = _build_request(
|
| 1384 |
args,
|
| 1385 |
user_text=user_text_value,
|
|
|
|
| 1403 |
chunk_duration=float(chunk_duration_value),
|
| 1404 |
prebuffer_seconds=float(stream_prebuffer_seconds_value),
|
| 1405 |
)
|
| 1406 |
+
|
| 1407 |
rtf_logger = StreamRTFLogger.create(request, started_at)
|
| 1408 |
+
|
| 1409 |
for event in tts_demo.run_stream(request):
|
| 1410 |
if event.audio is None:
|
|
|
|
| 1411 |
continue
|
| 1412 |
+
|
| 1413 |
sr, chunk = event.audio
|
| 1414 |
chunk = np.asarray(chunk).reshape(-1)
|
| 1415 |
if chunk.size == 0:
|
| 1416 |
continue
|
| 1417 |
+
|
| 1418 |
full_chunks.append(chunk)
|
| 1419 |
sample_rate = sr
|
| 1420 |
+
|
| 1421 |
if first_audio_time is None:
|
| 1422 |
first_audio_time = time.monotonic()
|
| 1423 |
+
|
| 1424 |
if rtf_logger is not None:
|
| 1425 |
rtf_logger.log_chunk(
|
| 1426 |
event_message=event.message,
|
|
|
|
| 1428 |
chunk=chunk,
|
| 1429 |
first_audio_time=first_audio_time,
|
| 1430 |
)
|
| 1431 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1432 |
if full_chunks:
|
| 1433 |
full_audio = np.concatenate(full_chunks)
|
| 1434 |
+
|
| 1435 |
if rtf_logger is not None:
|
| 1436 |
rtf_logger.log_completion(first_audio_time=first_audio_time)
|
| 1437 |
+
|
| 1438 |
done_msg = _format_completion_status(
|
| 1439 |
len(full_chunks),
|
| 1440 |
sample_rate,
|
|
|
|
| 1442 |
started_at,
|
| 1443 |
first_audio_time,
|
| 1444 |
)
|
| 1445 |
+
|
| 1446 |
+
return "", (sample_rate, full_audio), done_msg
|
| 1447 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1448 |
if rtf_logger is not None:
|
| 1449 |
+
rtf_logger.log_no_audio()
|
| 1450 |
+
|
| 1451 |
+
return "", None, "Done | no audio chunks emitted"
|
| 1452 |
+
|
| 1453 |
+
except Exception as exc:
|
| 1454 |
+
import traceback
|
| 1455 |
+
traceback.print_exc()
|
| 1456 |
+
|
| 1457 |
+
if rtf_logger is not None:
|
| 1458 |
+
rtf_logger.log_error(exc, first_audio_time=first_audio_time)
|
| 1459 |
+
|
| 1460 |
+
return "", None, f"Error: {exc}"
|
| 1461 |
|
| 1462 |
run_btn.click(
|
| 1463 |
_on_generate,
|