Update app.py
Browse files
app.py
CHANGED
|
@@ -481,7 +481,7 @@ def generate_voiceover_clone(translated_json, desired_duration, target_language,
|
|
| 481 |
# logger.warning(f"⚠️ Text too long for TTS model ({len(tokens)} tokens). Truncating to {MAX_TTS_TOKENS} tokens.")
|
| 482 |
# full_text = " ".join(tokens[:MAX_TTS_TOKENS])
|
| 483 |
|
| 484 |
-
speed_tts =
|
| 485 |
tts.tts_to_file(
|
| 486 |
text=full_text,
|
| 487 |
speaker_wav=speaker_wav_path,
|
|
@@ -506,25 +506,27 @@ def generate_voiceover_clone(translated_json, desired_duration, target_language,
|
|
| 506 |
logger.error(traceback.format_exc())
|
| 507 |
return None, err_msg, err_msg
|
| 508 |
|
| 509 |
-
def
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
else:
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
return
|
| 518 |
-
|
| 519 |
-
def calculate_speed(text, desired_duration):
|
| 520 |
-
# Calculate characters per second
|
| 521 |
-
char_count = len(text)
|
| 522 |
-
chars_per_second = char_count / (desired_duration + 0.001)
|
| 523 |
-
|
| 524 |
-
# Apply truncated linear function to get speed
|
| 525 |
-
speed = truncated_linear(chars_per_second)
|
| 526 |
-
|
| 527 |
-
return speed
|
| 528 |
|
| 529 |
def upload_and_manage(file, target_language, mode="transcription"):
|
| 530 |
if file is None:
|
|
|
|
| 481 |
# logger.warning(f"⚠️ Text too long for TTS model ({len(tokens)} tokens). Truncating to {MAX_TTS_TOKENS} tokens.")
|
| 482 |
# full_text = " ".join(tokens[:MAX_TTS_TOKENS])
|
| 483 |
|
| 484 |
+
speed_tts = calibrated_speed(full_text, desired_duration)
|
| 485 |
tts.tts_to_file(
|
| 486 |
text=full_text,
|
| 487 |
speaker_wav=speaker_wav_path,
|
|
|
|
| 506 |
logger.error(traceback.format_exc())
|
| 507 |
return None, err_msg, err_msg
|
| 508 |
|
| 509 |
+
def calibrated_speed(text, desired_duration):
|
| 510 |
+
"""
|
| 511 |
+
Compute a speed factor to help TTS fit audio into desired duration,
|
| 512 |
+
using a simple truncated linear function of characters per second.
|
| 513 |
+
"""
|
| 514 |
+
char_count = len(text.strip())
|
| 515 |
+
if char_count == 0 or desired_duration <= 0:
|
| 516 |
+
return 1.0 # fallback
|
| 517 |
+
|
| 518 |
+
cps = char_count / desired_duration # characters per second
|
| 519 |
+
|
| 520 |
+
# Truncated linear mapping
|
| 521 |
+
if cps < 10:
|
| 522 |
+
return 1.0
|
| 523 |
+
elif cps > 25:
|
| 524 |
+
return 1.4
|
| 525 |
else:
|
| 526 |
+
# Linearly scale between cps 10 -> 25 and speed 1.0 -> 1.3
|
| 527 |
+
slope = (1.4 - 1.0) / (25 - 10)
|
| 528 |
+
return 1.0 + slope * (cps - 10)
|
| 529 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 530 |
|
| 531 |
def upload_and_manage(file, target_language, mode="transcription"):
|
| 532 |
if file is None:
|