Update README.md
Browse files
README.md
CHANGED
|
@@ -1362,6 +1362,7 @@ user_audio, _ = librosa.load("user_audio.wav", sr=16000, mono=True)
|
|
| 1362 |
IN_SAMPLE_RATE = 16000 # input audio sample rate, fixed value
|
| 1363 |
CHUNK_SAMPLES = IN_SAMPLE_RATE # sample
|
| 1364 |
OUT_SAMPLE_RATE = 24000 # output audio sample rate, fixed value
|
|
|
|
| 1365 |
|
| 1366 |
total_samples = len(user_audio)
|
| 1367 |
num_chunks = (total_samples + CHUNK_SAMPLES - 1) // CHUNK_SAMPLES
|
|
@@ -1372,7 +1373,9 @@ for chunk_idx in range(num_chunks):
|
|
| 1372 |
chunk_audio = user_audio[start:end]
|
| 1373 |
|
| 1374 |
is_last_chunk = (chunk_idx == num_chunks - 1)
|
| 1375 |
-
|
|
|
|
|
|
|
| 1376 |
user_msg = {"role": "user", "content": [chunk_audio]}
|
| 1377 |
|
| 1378 |
# For each 1s audio chunk, perform streaming_prefill once to reduce first-token latency
|
|
@@ -1398,6 +1401,7 @@ iter_gen = model.streaming_generate(
|
|
| 1398 |
audios = []
|
| 1399 |
text = ""
|
| 1400 |
|
|
|
|
| 1401 |
if generate_audio:
|
| 1402 |
for wav_chunk, text_chunk in iter_gen:
|
| 1403 |
audios.append(wav_chunk)
|
|
|
|
| 1362 |
IN_SAMPLE_RATE = 16000 # input audio sample rate, fixed value
|
| 1363 |
CHUNK_SAMPLES = IN_SAMPLE_RATE # sample
|
| 1364 |
OUT_SAMPLE_RATE = 24000 # output audio sample rate, fixed value
|
| 1365 |
+
MIN_AUDIO_SAMPLES = 1600
|
| 1366 |
|
| 1367 |
total_samples = len(user_audio)
|
| 1368 |
num_chunks = (total_samples + CHUNK_SAMPLES - 1) // CHUNK_SAMPLES
|
|
|
|
| 1373 |
chunk_audio = user_audio[start:end]
|
| 1374 |
|
| 1375 |
is_last_chunk = (chunk_idx == num_chunks - 1)
|
| 1376 |
+
if is_last_chunk and len(chunk_audio) < MIN_AUDIO_SAMPLES:
|
| 1377 |
+
chunk_audio = np.concatenate([chunk_audio, np.zeros(MIN_AUDIO_SAMPLES - len(chunk_audio), dtype=chunk_audio.dtype)])
|
| 1378 |
+
|
| 1379 |
user_msg = {"role": "user", "content": [chunk_audio]}
|
| 1380 |
|
| 1381 |
# For each 1s audio chunk, perform streaming_prefill once to reduce first-token latency
|
|
|
|
| 1401 |
audios = []
|
| 1402 |
text = ""
|
| 1403 |
|
| 1404 |
+
output_audio_path = ...
|
| 1405 |
if generate_audio:
|
| 1406 |
for wav_chunk, text_chunk in iter_gen:
|
| 1407 |
audios.append(wav_chunk)
|