Update app.py
Browse files
app.py
CHANGED
|
@@ -381,8 +381,6 @@ class KittenTTSGradio:
|
|
| 381 |
chunk_label = "chunk" if chunk_size == 1 else f"chunk ({chunk_size} sentences each)"
|
| 382 |
progress(0, desc=f"Processing {total_sentences} sentences in {total_chunks} {chunk_label}s...")
|
| 383 |
|
| 384 |
-
audio_chunks = []
|
| 385 |
-
|
| 386 |
# Reset model state before starting
|
| 387 |
if hasattr(self.model, 'session'):
|
| 388 |
try:
|
|
@@ -397,22 +395,26 @@ class KittenTTSGradio:
|
|
| 397 |
except:
|
| 398 |
pass
|
| 399 |
|
|
|
|
|
|
|
|
|
|
| 400 |
if use_multithreading and total_chunks > 1:
|
| 401 |
# Process chunks in parallel with limited workers
|
| 402 |
with ThreadPoolExecutor(max_workers=min(self.max_workers, 4)) as executor:
|
| 403 |
-
|
|
|
|
| 404 |
executor.submit(self.process_single_sentence, chunk, voice, speed): i
|
| 405 |
for i, chunk in enumerate(chunks)
|
| 406 |
}
|
| 407 |
|
| 408 |
-
results = {}
|
| 409 |
completed = 0
|
| 410 |
|
| 411 |
-
|
|
|
|
|
|
|
| 412 |
try:
|
| 413 |
-
idx = futures[future]
|
| 414 |
audio = future.result()
|
| 415 |
-
|
| 416 |
completed += 1
|
| 417 |
progress(completed / total_chunks,
|
| 418 |
desc=f"Processed {completed}/{total_chunks} {chunk_label}s")
|
|
@@ -431,17 +433,21 @@ class KittenTTSGradio:
|
|
| 431 |
except:
|
| 432 |
pass
|
| 433 |
except Exception as e:
|
| 434 |
-
print(f"Error processing chunk: {e}")
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 439 |
else:
|
| 440 |
# Process chunks sequentially
|
| 441 |
for i, chunk in enumerate(chunks):
|
| 442 |
try:
|
| 443 |
audio = self.process_single_sentence(chunk, voice, speed)
|
| 444 |
-
audio_chunks
|
| 445 |
progress((i + 1) / total_chunks,
|
| 446 |
desc=f"Processed {i + 1}/{total_chunks} {chunk_label}s")
|
| 447 |
|
|
@@ -459,11 +465,25 @@ class KittenTTSGradio:
|
|
| 459 |
except:
|
| 460 |
pass
|
| 461 |
except Exception as e:
|
| 462 |
-
print(f"Error processing chunk: {e}")
|
| 463 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 464 |
|
| 465 |
-
if
|
| 466 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
|
| 468 |
progress(0.9, desc="Concatenating audio...")
|
| 469 |
|
|
|
|
| 381 |
chunk_label = "chunk" if chunk_size == 1 else f"chunk ({chunk_size} sentences each)"
|
| 382 |
progress(0, desc=f"Processing {total_sentences} sentences in {total_chunks} {chunk_label}s...")
|
| 383 |
|
|
|
|
|
|
|
| 384 |
# Reset model state before starting
|
| 385 |
if hasattr(self.model, 'session'):
|
| 386 |
try:
|
|
|
|
| 395 |
except:
|
| 396 |
pass
|
| 397 |
|
| 398 |
+
# Create a list to hold results in the correct order
|
| 399 |
+
audio_chunks = [None] * total_chunks
|
| 400 |
+
|
| 401 |
if use_multithreading and total_chunks > 1:
|
| 402 |
# Process chunks in parallel with limited workers
|
| 403 |
with ThreadPoolExecutor(max_workers=min(self.max_workers, 4)) as executor:
|
| 404 |
+
# Submit all tasks
|
| 405 |
+
future_to_index = {
|
| 406 |
executor.submit(self.process_single_sentence, chunk, voice, speed): i
|
| 407 |
for i, chunk in enumerate(chunks)
|
| 408 |
}
|
| 409 |
|
|
|
|
| 410 |
completed = 0
|
| 411 |
|
| 412 |
+
# Process as they complete
|
| 413 |
+
for future in as_completed(future_to_index):
|
| 414 |
+
index = future_to_index[future]
|
| 415 |
try:
|
|
|
|
| 416 |
audio = future.result()
|
| 417 |
+
audio_chunks[index] = audio # Place at the correct index
|
| 418 |
completed += 1
|
| 419 |
progress(completed / total_chunks,
|
| 420 |
desc=f"Processed {completed}/{total_chunks} {chunk_label}s")
|
|
|
|
| 433 |
except:
|
| 434 |
pass
|
| 435 |
except Exception as e:
|
| 436 |
+
print(f"Error processing chunk at index {index}: {e}")
|
| 437 |
+
# Generate silence for failed chunks
|
| 438 |
+
sample_rate = 24000
|
| 439 |
+
silence_duration = 0.5
|
| 440 |
+
silence = np.zeros(int(sample_rate * silence_duration))
|
| 441 |
+
audio_chunks[index] = silence
|
| 442 |
+
completed += 1
|
| 443 |
+
progress(completed / total_chunks,
|
| 444 |
+
desc=f"Processed {completed}/{total_chunks} {chunk_label}s")
|
| 445 |
else:
|
| 446 |
# Process chunks sequentially
|
| 447 |
for i, chunk in enumerate(chunks):
|
| 448 |
try:
|
| 449 |
audio = self.process_single_sentence(chunk, voice, speed)
|
| 450 |
+
audio_chunks[i] = audio
|
| 451 |
progress((i + 1) / total_chunks,
|
| 452 |
desc=f"Processed {i + 1}/{total_chunks} {chunk_label}s")
|
| 453 |
|
|
|
|
| 465 |
except:
|
| 466 |
pass
|
| 467 |
except Exception as e:
|
| 468 |
+
print(f"Error processing chunk at index {i}: {e}")
|
| 469 |
+
# Generate silence for failed chunks
|
| 470 |
+
sample_rate = 24000
|
| 471 |
+
silence_duration = 0.5
|
| 472 |
+
silence = np.zeros(int(sample_rate * silence_duration))
|
| 473 |
+
audio_chunks[i] = silence
|
| 474 |
+
progress((i + 1) / total_chunks,
|
| 475 |
+
desc=f"Processed {i + 1}/{total_chunks} {chunk_label}s")
|
| 476 |
|
| 477 |
+
# Check if we have any None values (shouldn't happen with the error handling)
|
| 478 |
+
if any(chunk is None for chunk in audio_chunks):
|
| 479 |
+
print("Warning: Some audio chunks were not generated properly")
|
| 480 |
+
# Replace any None values with silence
|
| 481 |
+
for i, chunk in enumerate(audio_chunks):
|
| 482 |
+
if chunk is None:
|
| 483 |
+
sample_rate = 24000
|
| 484 |
+
silence_duration = 0.5
|
| 485 |
+
silence = np.zeros(int(sample_rate * silence_duration))
|
| 486 |
+
audio_chunks[i] = silence
|
| 487 |
|
| 488 |
progress(0.9, desc="Concatenating audio...")
|
| 489 |
|