Spaces:
Runtime error
Runtime error
Update app.py
Browse filesadaptation of tts using timestamp speech for cloning
app.py
CHANGED
|
@@ -23,6 +23,10 @@ ASR_API = "http://astarwiz.com:9998/asr"
|
|
| 23 |
TTS_SPEAK_SERVICE = 'http://astarwiz.com:9603/speak'
|
| 24 |
TTS_WAVE_SERVICE = 'http://astarwiz.com:9603/wave'
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
LANGUAGE_MAP = {
|
| 27 |
"en": "English",
|
| 28 |
"ma": "Malay",
|
|
@@ -40,10 +44,12 @@ AVAILABLE_SPEAKERS = {
|
|
| 40 |
"zh": ["childChinese2"]
|
| 41 |
}
|
| 42 |
|
|
|
|
| 43 |
audio_update_event = asyncio.Event()
|
| 44 |
acc_cosy_audio = None
|
| 45 |
# cosy voice tts related;
|
| 46 |
-
TTS_SOCKET_SERVER = "http://
|
|
|
|
| 47 |
|
| 48 |
sio = socketio.AsyncClient()
|
| 49 |
|
|
@@ -209,7 +215,9 @@ async def download_youtube_audio(youtube_url: str, output_dir: Optional[str] = N
|
|
| 209 |
if url.get('isBundle'):
|
| 210 |
audio_url = url['url']
|
| 211 |
extension = url['extension']
|
|
|
|
| 212 |
async with session.get(audio_url) as audio_response:
|
|
|
|
| 213 |
if audio_response.status == 200:
|
| 214 |
content = await audio_response.read()
|
| 215 |
temp_filename = os.path.join(output_dir, f"{video_id}.{extension}")
|
|
@@ -320,18 +328,17 @@ async def upload_file(file_path, upload_url):
|
|
| 320 |
with open(file_path, 'rb') as f:
|
| 321 |
form_data = aiohttp.FormData()
|
| 322 |
form_data.add_field('file', f, filename=os.path.basename(file_path))
|
| 323 |
-
|
| 324 |
async with session.post(upload_url, data=form_data) as response:
|
| 325 |
print(f"5. Client receives headers: {time.time()}")
|
| 326 |
print(f"Status: {response.status}")
|
| 327 |
-
|
| 328 |
result = await response.json()
|
| 329 |
print(f"7. Client fully received and parsed response: {time.time()}")
|
| 330 |
if response.status == 200:
|
| 331 |
return result
|
| 332 |
else:
|
| 333 |
return {"file_id",""}
|
| 334 |
-
|
| 335 |
async def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None, target_speaker=None, progress_tracker=None):
|
| 336 |
global transcription_update, translation_update, audio_update, acc_cosy_audio,audio_update_event
|
| 337 |
transcription_update = {"content": "", "new": True}
|
|
@@ -357,9 +364,12 @@ async def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None
|
|
| 357 |
data = aiohttp.FormData()
|
| 358 |
data.add_field('file', open(audio, 'rb'))
|
| 359 |
data.add_field('language', 'ms' if source_lang == 'ma' else source_lang)
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
|
|
|
|
|
|
|
|
|
| 363 |
|
| 364 |
async with aiohttp.ClientSession() as session:
|
| 365 |
async with session.post(ASR_API, data=data) as asr_response:
|
|
@@ -382,8 +392,7 @@ async def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None
|
|
| 382 |
server_url = TTS_SOCKET_SERVER
|
| 383 |
await sio.connect(server_url)
|
| 384 |
print(f"Connected to {server_url}")
|
| 385 |
-
|
| 386 |
-
|
| 387 |
# Handle the audio file
|
| 388 |
file_id=""
|
| 389 |
if audio and os.path.exists(audio):
|
|
@@ -395,6 +404,7 @@ async def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None
|
|
| 395 |
print ("upload_result:", upload_result)
|
| 396 |
file_id = upload_result['file_id']
|
| 397 |
|
|
|
|
| 398 |
# use defualt voice
|
| 399 |
tts_request = {
|
| 400 |
'text': transcription,
|
|
@@ -418,15 +428,20 @@ async def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None
|
|
| 418 |
|
| 419 |
|
| 420 |
|
| 421 |
-
|
| 422 |
-
|
|
|
|
|
|
|
|
|
|
| 423 |
translate_segments = []
|
| 424 |
accumulated_audio = None
|
| 425 |
sample_rate = 22050
|
| 426 |
global is_playing
|
| 427 |
for i, segment in enumerate(split_result):
|
| 428 |
-
|
| 429 |
-
|
|
|
|
|
|
|
| 430 |
translated_seg_txt = await inference_via_llm_api(translation_prompt)
|
| 431 |
translate_segments.append(translated_seg_txt)
|
| 432 |
print(f"Translation: {translated_seg_txt}")
|
|
@@ -454,8 +469,8 @@ async def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None
|
|
| 454 |
content = await response.read()
|
| 455 |
audio_chunk, sr = sf.read(BytesIO(content))
|
| 456 |
#print ('audio_chunk:', type(audio_chunk),audio_chunk)
|
| 457 |
-
print ('audio_chunk:, src:', segment['end'] -segment['start'], ' tts:', len(audio_chunk)/sr)
|
| 458 |
-
|
| 459 |
|
| 460 |
|
| 461 |
if accumulated_audio is None:
|
|
@@ -513,6 +528,10 @@ async def update_audio():
|
|
| 513 |
return content
|
| 514 |
return gr.update()
|
| 515 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 516 |
with gr.Blocks() as demo:
|
| 517 |
gr.Markdown("# Speech Translation")
|
| 518 |
|
|
@@ -533,15 +552,14 @@ with gr.Blocks() as demo:
|
|
| 533 |
with gr.Row():
|
| 534 |
user_transcription_output = gr.Textbox(label="Transcription")
|
| 535 |
user_translation_output = gr.Textbox(label="Translation")
|
| 536 |
-
user_audio_output = gr.Audio(label="Translated Speech")
|
| 537 |
user_audio_final = gr.Audio(label="Final total Speech")
|
| 538 |
-
progress_bar = gr.Textbox(label="progress", interactive=False)
|
| 539 |
status_message = gr.Textbox(label="Status", interactive=False)
|
| 540 |
|
| 541 |
user_video_output = gr.HTML(label="YouTube Video")
|
| 542 |
|
| 543 |
-
replace_audio_button = gr.Button("Replace Audio", interactive=False)
|
| 544 |
-
final_video_output = gr.Video(label="Video with Replaced Audio")
|
| 545 |
|
| 546 |
temp_video_path = gr.State()
|
| 547 |
translation_progress = gr.State(0.0)
|
|
@@ -549,6 +567,7 @@ with gr.Blocks() as demo:
|
|
| 549 |
async def update_button_state(audio, youtube_url, progress):
|
| 550 |
print(audio, youtube_url, progress)
|
| 551 |
# Button is interactive if there's input and progress is 0 or 1 (not in progress)
|
|
|
|
| 552 |
return gr.Button(interactive=(bool(audio) or bool(youtube_url)) and (progress == 0 or progress == 1))
|
| 553 |
|
| 554 |
user_audio_input.change(
|
|
@@ -562,31 +581,23 @@ with gr.Blocks() as demo:
|
|
| 562 |
outputs=user_button
|
| 563 |
)
|
| 564 |
|
| 565 |
-
async def run_speech_translation_wrapper(audio, source_lang, target_lang, youtube_url, target_speaker):
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
#audio_data, sample_rate = sf.read(audio)
|
| 569 |
-
#print ("user_audio_input:", audio, audio_data, sample_rate)
|
| 570 |
-
|
| 571 |
|
| 572 |
-
|
| 573 |
-
gr.update(interactive=False),
|
| 574 |
-
gr.update(), gr.update(), gr.update(), gr.update(),
|
| 575 |
-
"Translation in progress...",None)
|
| 576 |
|
| 577 |
-
|
| 578 |
temp_video_path = None
|
| 579 |
transcription, translated_text, audio_chunksr, temp_video_path, accumulated_aud_buf = await transcribe_and_speak(audio, source_lang, target_lang, youtube_url, target_speaker)
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
gr.update(interactive=True),
|
| 583 |
-
transcription, translated_text, audio_chunksr, temp_video_path,
|
| 584 |
-
"Translation complete", accumulated_aud_buf)
|
| 585 |
|
| 586 |
user_button.click(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 587 |
fn=run_speech_translation_wrapper,
|
| 588 |
-
inputs=[user_audio_input, user_source_lang, user_target_lang, user_youtube_url, user_target_speaker],
|
| 589 |
-
outputs=[
|
| 590 |
)
|
| 591 |
|
| 592 |
async def update_replace_audio_button(audio_url, video_path):
|
|
@@ -601,8 +612,8 @@ with gr.Blocks() as demo:
|
|
| 601 |
|
| 602 |
replace_audio_button.click(
|
| 603 |
fn=replace_audio_and_generate_video,
|
| 604 |
-
inputs=[temp_video_path,
|
| 605 |
-
outputs=[
|
| 606 |
)
|
| 607 |
|
| 608 |
async def update_video_embed(youtube_url):
|
|
@@ -659,13 +670,16 @@ with gr.Blocks() as demo:
|
|
| 659 |
async (audioFilePath) => {
|
| 660 |
// Debug: Log received audio file path
|
| 661 |
console.log("Received audio file path:", audioFilePath);
|
|
|
|
| 662 |
if (!window.audioQueue) {
|
| 663 |
window.audioQueue = [];
|
| 664 |
window.isPlaying = false;
|
| 665 |
}
|
|
|
|
| 666 |
// Ensure the correct URL for the audio file is available
|
| 667 |
if (audioFilePath && audioFilePath.url) {
|
| 668 |
console.log("Processing audio file...");
|
|
|
|
| 669 |
try {
|
| 670 |
// Fetch and decode the audio file
|
| 671 |
const response = await fetch(audioFilePath.url);
|
|
@@ -673,51 +687,64 @@ with gr.Blocks() as demo:
|
|
| 673 |
console.error("Failed to fetch audio file:", response.statusText);
|
| 674 |
return;
|
| 675 |
}
|
|
|
|
| 676 |
const audioData = await response.arrayBuffer();
|
| 677 |
const audioContext = new AudioContext();
|
| 678 |
const decodedData = await audioContext.decodeAudioData(audioData);
|
|
|
|
| 679 |
// Split the decoded audio buffer into two chunks
|
| 680 |
const totalDuration = decodedData.duration;
|
| 681 |
const midPoint = Math.floor(decodedData.length / 2); // Midpoint for splitting
|
| 682 |
const sampleRate = decodedData.sampleRate;
|
|
|
|
| 683 |
// Create two separate AudioBuffers for each chunk
|
| 684 |
const firstHalfBuffer = audioContext.createBuffer(decodedData.numberOfChannels, midPoint, sampleRate);
|
| 685 |
const secondHalfBuffer = audioContext.createBuffer(decodedData.numberOfChannels, decodedData.length - midPoint, sampleRate);
|
|
|
|
| 686 |
// Copy data from original buffer to the two new buffers
|
| 687 |
for (let channel = 0; channel < decodedData.numberOfChannels; channel++) {
|
| 688 |
firstHalfBuffer.copyToChannel(decodedData.getChannelData(channel).slice(0, midPoint), channel, 0);
|
| 689 |
secondHalfBuffer.copyToChannel(decodedData.getChannelData(channel).slice(midPoint), channel, 0);
|
| 690 |
}
|
|
|
|
| 691 |
// Add both chunks to the queue
|
| 692 |
window.audioQueue.push(firstHalfBuffer);
|
| 693 |
window.audioQueue.push(secondHalfBuffer);
|
| 694 |
console.log("Two audio chunks added to queue. Queue length:", window.audioQueue.length);
|
|
|
|
| 695 |
// Function to play the next audio chunk from the queue
|
| 696 |
const playNextChunk = async () => {
|
| 697 |
console.log("Attempting to play next chunk. isPlaying:", window.isPlaying);
|
|
|
|
| 698 |
if (!window.isPlaying && window.audioQueue.length > 0) {
|
| 699 |
console.log("Starting playback...");
|
| 700 |
window.isPlaying = true;
|
|
|
|
| 701 |
// Get the next audio buffer from the queue
|
| 702 |
const audioBuffer = window.audioQueue.shift();
|
| 703 |
console.log("Playing audio chunk from buffer.");
|
|
|
|
| 704 |
const source = audioContext.createBufferSource();
|
| 705 |
source.buffer = audioBuffer;
|
| 706 |
source.connect(audioContext.destination);
|
|
|
|
| 707 |
// When the audio finishes playing, play the next chunk
|
| 708 |
source.onended = () => {
|
| 709 |
console.log("Audio chunk finished playing.");
|
| 710 |
window.isPlaying = false;
|
| 711 |
playNextChunk(); // Play the next audio chunk in the queue
|
| 712 |
};
|
|
|
|
| 713 |
source.start(0); // Start playing the current chunk
|
| 714 |
console.log("Audio chunk started.");
|
| 715 |
} else {
|
| 716 |
console.log("Already playing or queue is empty.");
|
| 717 |
}
|
| 718 |
};
|
|
|
|
| 719 |
// Start playing the next chunk if not already playing
|
| 720 |
playNextChunk();
|
|
|
|
| 721 |
} catch (error) {
|
| 722 |
console.error("Error during audio playback:", error);
|
| 723 |
window.isPlaying = false;
|
|
@@ -733,3 +760,4 @@ demo.queue()
|
|
| 733 |
|
| 734 |
#demo.launch(auth=(os.getenv("DEV_USER"), os.getenv("DEV_PWD")))
|
| 735 |
asyncio.run(demo.launch(auth=(os.getenv("DEV_USER"), os.getenv("DEV_PWD"))))
|
|
|
|
|
|
| 23 |
TTS_SPEAK_SERVICE = 'http://astarwiz.com:9603/speak'
|
| 24 |
TTS_WAVE_SERVICE = 'http://astarwiz.com:9603/wave'
|
| 25 |
|
| 26 |
+
|
| 27 |
+
#bSegByPunct = True
|
| 28 |
+
bSegByPunct = False
|
| 29 |
+
|
| 30 |
LANGUAGE_MAP = {
|
| 31 |
"en": "English",
|
| 32 |
"ma": "Malay",
|
|
|
|
| 44 |
"zh": ["childChinese2"]
|
| 45 |
}
|
| 46 |
|
| 47 |
+
|
| 48 |
audio_update_event = asyncio.Event()
|
| 49 |
acc_cosy_audio = None
|
| 50 |
# cosy voice tts related;
|
| 51 |
+
#TTS_SOCKET_SERVER = "http://localhost:9244"
|
| 52 |
+
TTS_SOCKET_SERVER = "http://astarwiz.com:9244"
|
| 53 |
|
| 54 |
sio = socketio.AsyncClient()
|
| 55 |
|
|
|
|
| 215 |
if url.get('isBundle'):
|
| 216 |
audio_url = url['url']
|
| 217 |
extension = url['extension']
|
| 218 |
+
print ("audio_url :", audio_url)
|
| 219 |
async with session.get(audio_url) as audio_response:
|
| 220 |
+
print ("audio_response:", audio_response)
|
| 221 |
if audio_response.status == 200:
|
| 222 |
content = await audio_response.read()
|
| 223 |
temp_filename = os.path.join(output_dir, f"{video_id}.{extension}")
|
|
|
|
| 328 |
with open(file_path, 'rb') as f:
|
| 329 |
form_data = aiohttp.FormData()
|
| 330 |
form_data.add_field('file', f, filename=os.path.basename(file_path))
|
| 331 |
+
|
| 332 |
async with session.post(upload_url, data=form_data) as response:
|
| 333 |
print(f"5. Client receives headers: {time.time()}")
|
| 334 |
print(f"Status: {response.status}")
|
| 335 |
+
|
| 336 |
result = await response.json()
|
| 337 |
print(f"7. Client fully received and parsed response: {time.time()}")
|
| 338 |
if response.status == 200:
|
| 339 |
return result
|
| 340 |
else:
|
| 341 |
return {"file_id",""}
|
|
|
|
| 342 |
async def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None, target_speaker=None, progress_tracker=None):
|
| 343 |
global transcription_update, translation_update, audio_update, acc_cosy_audio,audio_update_event
|
| 344 |
transcription_update = {"content": "", "new": True}
|
|
|
|
| 364 |
data = aiohttp.FormData()
|
| 365 |
data.add_field('file', open(audio, 'rb'))
|
| 366 |
data.add_field('language', 'ms' if source_lang == 'ma' else source_lang)
|
| 367 |
+
if bSegByPunct:
|
| 368 |
+
data.add_field('model_name', 'whisper-large-v2-local-cs')
|
| 369 |
+
data.add_field('with_timestamp', 'false')
|
| 370 |
+
else:
|
| 371 |
+
data.add_field('model_name', 'official-v3')
|
| 372 |
+
data.add_field('with_timestamp', 'true')
|
| 373 |
|
| 374 |
async with aiohttp.ClientSession() as session:
|
| 375 |
async with session.post(ASR_API, data=data) as asr_response:
|
|
|
|
| 392 |
server_url = TTS_SOCKET_SERVER
|
| 393 |
await sio.connect(server_url)
|
| 394 |
print(f"Connected to {server_url}")
|
| 395 |
+
|
|
|
|
| 396 |
# Handle the audio file
|
| 397 |
file_id=""
|
| 398 |
if audio and os.path.exists(audio):
|
|
|
|
| 404 |
print ("upload_result:", upload_result)
|
| 405 |
file_id = upload_result['file_id']
|
| 406 |
|
| 407 |
+
|
| 408 |
# use defualt voice
|
| 409 |
tts_request = {
|
| 410 |
'text': transcription,
|
|
|
|
| 428 |
|
| 429 |
|
| 430 |
|
| 431 |
+
if bSegByPunct:
|
| 432 |
+
split_result = split_text_with_punctuation(transcription)
|
| 433 |
+
else:
|
| 434 |
+
split_result = extract_segments(transcription);
|
| 435 |
+
|
| 436 |
translate_segments = []
|
| 437 |
accumulated_audio = None
|
| 438 |
sample_rate = 22050
|
| 439 |
global is_playing
|
| 440 |
for i, segment in enumerate(split_result):
|
| 441 |
+
if bSegByPunct:
|
| 442 |
+
translation_prompt = f"Translate the following text from {LANGUAGE_MAP[source_lang]} to {LANGUAGE_MAP[target_lang]}: {segment}"
|
| 443 |
+
else:
|
| 444 |
+
translation_prompt = f"Translate the following text from {LANGUAGE_MAP[source_lang]} to {LANGUAGE_MAP[target_lang]}: {segment['text']}"
|
| 445 |
translated_seg_txt = await inference_via_llm_api(translation_prompt)
|
| 446 |
translate_segments.append(translated_seg_txt)
|
| 447 |
print(f"Translation: {translated_seg_txt}")
|
|
|
|
| 469 |
content = await response.read()
|
| 470 |
audio_chunk, sr = sf.read(BytesIO(content))
|
| 471 |
#print ('audio_chunk:', type(audio_chunk),audio_chunk)
|
| 472 |
+
#print ('audio_chunk:, src:', segment['end'] -segment['start'], ' tts:', len(audio_chunk)/sr)
|
| 473 |
+
# _, audio_chunk = adjust_tempo_pysox_array( (sr, audio_chunk), segment['end'] -segment['start'])
|
| 474 |
|
| 475 |
|
| 476 |
if accumulated_audio is None:
|
|
|
|
| 528 |
return content
|
| 529 |
return gr.update()
|
| 530 |
|
| 531 |
+
def disable_button():
|
| 532 |
+
# Disable the button during processing
|
| 533 |
+
return gr.update(interactive=False)
|
| 534 |
+
|
| 535 |
with gr.Blocks() as demo:
|
| 536 |
gr.Markdown("# Speech Translation")
|
| 537 |
|
|
|
|
| 552 |
with gr.Row():
|
| 553 |
user_transcription_output = gr.Textbox(label="Transcription")
|
| 554 |
user_translation_output = gr.Textbox(label="Translation")
|
| 555 |
+
user_audio_output = gr.Audio(label="Translated Speech", visible =False)
|
| 556 |
user_audio_final = gr.Audio(label="Final total Speech")
|
|
|
|
| 557 |
status_message = gr.Textbox(label="Status", interactive=False)
|
| 558 |
|
| 559 |
user_video_output = gr.HTML(label="YouTube Video")
|
| 560 |
|
| 561 |
+
replace_audio_button = gr.Button("Replace Audio", interactive=False, visible =False)
|
| 562 |
+
final_video_output = gr.Video(label="Video with Replaced Audio",visible=False)
|
| 563 |
|
| 564 |
temp_video_path = gr.State()
|
| 565 |
translation_progress = gr.State(0.0)
|
|
|
|
| 567 |
async def update_button_state(audio, youtube_url, progress):
|
| 568 |
print(audio, youtube_url, progress)
|
| 569 |
# Button is interactive if there's input and progress is 0 or 1 (not in progress)
|
| 570 |
+
print ("progress:", audio, youtube_url,bool(audio) , bool(youtube_url), progress == 0 or progress == 1)
|
| 571 |
return gr.Button(interactive=(bool(audio) or bool(youtube_url)) and (progress == 0 or progress == 1))
|
| 572 |
|
| 573 |
user_audio_input.change(
|
|
|
|
| 581 |
outputs=user_button
|
| 582 |
)
|
| 583 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 584 |
|
| 585 |
+
async def run_speech_translation_wrapper(audio, source_lang, target_lang, youtube_url, target_speaker,progress):
|
|
|
|
|
|
|
|
|
|
| 586 |
|
| 587 |
+
progress = 0.1
|
| 588 |
temp_video_path = None
|
| 589 |
transcription, translated_text, audio_chunksr, temp_video_path, accumulated_aud_buf = await transcribe_and_speak(audio, source_lang, target_lang, youtube_url, target_speaker)
|
| 590 |
+
progress = 1
|
| 591 |
+
return transcription, translated_text, audio_chunksr, temp_video_path, "Translation complete", accumulated_aud_buf, gr.update(interactive=True)
|
|
|
|
|
|
|
|
|
|
| 592 |
|
| 593 |
user_button.click(
|
| 594 |
+
fn=disable_button,
|
| 595 |
+
inputs=[],
|
| 596 |
+
outputs=[user_button] # Disable the button during processing
|
| 597 |
+
).then(
|
| 598 |
fn=run_speech_translation_wrapper,
|
| 599 |
+
inputs=[user_audio_input, user_source_lang, user_target_lang, user_youtube_url, user_target_speaker, translation_progress],
|
| 600 |
+
outputs=[user_transcription_output, user_translation_output, user_audio_output, temp_video_path, status_message,user_audio_final,user_button]
|
| 601 |
)
|
| 602 |
|
| 603 |
async def update_replace_audio_button(audio_url, video_path):
|
|
|
|
| 612 |
|
| 613 |
replace_audio_button.click(
|
| 614 |
fn=replace_audio_and_generate_video,
|
| 615 |
+
inputs=[temp_video_path, user_audio_final],
|
| 616 |
+
outputs=[status_message, final_video_output]
|
| 617 |
)
|
| 618 |
|
| 619 |
async def update_video_embed(youtube_url):
|
|
|
|
| 670 |
async (audioFilePath) => {
|
| 671 |
// Debug: Log received audio file path
|
| 672 |
console.log("Received audio file path:", audioFilePath);
|
| 673 |
+
|
| 674 |
if (!window.audioQueue) {
|
| 675 |
window.audioQueue = [];
|
| 676 |
window.isPlaying = false;
|
| 677 |
}
|
| 678 |
+
|
| 679 |
// Ensure the correct URL for the audio file is available
|
| 680 |
if (audioFilePath && audioFilePath.url) {
|
| 681 |
console.log("Processing audio file...");
|
| 682 |
+
|
| 683 |
try {
|
| 684 |
// Fetch and decode the audio file
|
| 685 |
const response = await fetch(audioFilePath.url);
|
|
|
|
| 687 |
console.error("Failed to fetch audio file:", response.statusText);
|
| 688 |
return;
|
| 689 |
}
|
| 690 |
+
|
| 691 |
const audioData = await response.arrayBuffer();
|
| 692 |
const audioContext = new AudioContext();
|
| 693 |
const decodedData = await audioContext.decodeAudioData(audioData);
|
| 694 |
+
|
| 695 |
// Split the decoded audio buffer into two chunks
|
| 696 |
const totalDuration = decodedData.duration;
|
| 697 |
const midPoint = Math.floor(decodedData.length / 2); // Midpoint for splitting
|
| 698 |
const sampleRate = decodedData.sampleRate;
|
| 699 |
+
|
| 700 |
// Create two separate AudioBuffers for each chunk
|
| 701 |
const firstHalfBuffer = audioContext.createBuffer(decodedData.numberOfChannels, midPoint, sampleRate);
|
| 702 |
const secondHalfBuffer = audioContext.createBuffer(decodedData.numberOfChannels, decodedData.length - midPoint, sampleRate);
|
| 703 |
+
|
| 704 |
// Copy data from original buffer to the two new buffers
|
| 705 |
for (let channel = 0; channel < decodedData.numberOfChannels; channel++) {
|
| 706 |
firstHalfBuffer.copyToChannel(decodedData.getChannelData(channel).slice(0, midPoint), channel, 0);
|
| 707 |
secondHalfBuffer.copyToChannel(decodedData.getChannelData(channel).slice(midPoint), channel, 0);
|
| 708 |
}
|
| 709 |
+
|
| 710 |
// Add both chunks to the queue
|
| 711 |
window.audioQueue.push(firstHalfBuffer);
|
| 712 |
window.audioQueue.push(secondHalfBuffer);
|
| 713 |
console.log("Two audio chunks added to queue. Queue length:", window.audioQueue.length);
|
| 714 |
+
|
| 715 |
// Function to play the next audio chunk from the queue
|
| 716 |
const playNextChunk = async () => {
|
| 717 |
console.log("Attempting to play next chunk. isPlaying:", window.isPlaying);
|
| 718 |
+
|
| 719 |
if (!window.isPlaying && window.audioQueue.length > 0) {
|
| 720 |
console.log("Starting playback...");
|
| 721 |
window.isPlaying = true;
|
| 722 |
+
|
| 723 |
// Get the next audio buffer from the queue
|
| 724 |
const audioBuffer = window.audioQueue.shift();
|
| 725 |
console.log("Playing audio chunk from buffer.");
|
| 726 |
+
|
| 727 |
const source = audioContext.createBufferSource();
|
| 728 |
source.buffer = audioBuffer;
|
| 729 |
source.connect(audioContext.destination);
|
| 730 |
+
|
| 731 |
// When the audio finishes playing, play the next chunk
|
| 732 |
source.onended = () => {
|
| 733 |
console.log("Audio chunk finished playing.");
|
| 734 |
window.isPlaying = false;
|
| 735 |
playNextChunk(); // Play the next audio chunk in the queue
|
| 736 |
};
|
| 737 |
+
|
| 738 |
source.start(0); // Start playing the current chunk
|
| 739 |
console.log("Audio chunk started.");
|
| 740 |
} else {
|
| 741 |
console.log("Already playing or queue is empty.");
|
| 742 |
}
|
| 743 |
};
|
| 744 |
+
|
| 745 |
// Start playing the next chunk if not already playing
|
| 746 |
playNextChunk();
|
| 747 |
+
|
| 748 |
} catch (error) {
|
| 749 |
console.error("Error during audio playback:", error);
|
| 750 |
window.isPlaying = false;
|
|
|
|
| 760 |
|
| 761 |
#demo.launch(auth=(os.getenv("DEV_USER"), os.getenv("DEV_PWD")))
|
| 762 |
asyncio.run(demo.launch(auth=(os.getenv("DEV_USER"), os.getenv("DEV_PWD"))))
|
| 763 |
+
|