Spaces:
Running
Running
Update src/audio/generator.py
Browse files- src/audio/generator.py +57 -61
src/audio/generator.py
CHANGED
|
@@ -76,11 +76,13 @@ def generate_translated_audio(srt_path, target_lang, video_duration=180):
|
|
| 76 |
retry_count = 0
|
| 77 |
while retry_count < MAX_RETRY_ATTEMPTS:
|
| 78 |
try:
|
| 79 |
-
# For certain languages, use slower speed
|
| 80 |
slow_option = target_lang in ["hi", "ja", "zh-CN", "ar"]
|
| 81 |
tts = gTTS(text=text, lang=target_lang, slow=slow_option)
|
| 82 |
tts.save(str(audio_file))
|
| 83 |
|
|
|
|
|
|
|
| 84 |
if audio_file.exists() and audio_file.stat().st_size > 0:
|
| 85 |
break
|
| 86 |
else:
|
|
@@ -89,9 +91,9 @@ def generate_translated_audio(srt_path, target_lang, video_duration=180):
|
|
| 89 |
except Exception as e:
|
| 90 |
retry_count += 1
|
| 91 |
logger.warning(f"TTS attempt {retry_count} failed for {target_lang}: {str(e)}")
|
| 92 |
-
time.sleep(1)
|
| 93 |
|
| 94 |
-
#
|
| 95 |
if retry_count == MAX_RETRY_ATTEMPTS - 1 and len(text) > 100:
|
| 96 |
logger.warning(f"Trying with shortened text for {target_lang}")
|
| 97 |
shortened_text = text[:100] + "..."
|
|
@@ -104,83 +106,77 @@ def generate_translated_audio(srt_path, target_lang, video_duration=180):
|
|
| 104 |
else:
|
| 105 |
logger.warning(f"Failed to generate audio for subtitle {i}")
|
| 106 |
|
| 107 |
-
#
|
| 108 |
if not audio_files:
|
| 109 |
-
logger.warning(f"No audio files
|
| 110 |
-
# Create a silent audio file as fallback
|
| 111 |
silent_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.wav"
|
| 112 |
create_silent_audio(video_duration, silent_audio)
|
| 113 |
return silent_audio
|
| 114 |
-
|
| 115 |
-
#
|
|
|
|
| 116 |
silence_file = temp_dir / "silence.wav"
|
| 117 |
create_silent_audio(video_duration, silence_file)
|
| 118 |
-
|
| 119 |
-
#
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
# Add each audio segment
|
| 127 |
-
for start_time, end_time, duration, audio_file in timings:
|
| 128 |
-
delay_ms = int(start_time * 1000)
|
| 129 |
-
filter_parts.append(f"[{input_count}:a]adelay={delay_ms}|{delay_ms}")
|
| 130 |
-
input_count += 1
|
| 131 |
-
|
| 132 |
-
# Mix all audio tracks
|
| 133 |
-
filter_parts.append(f"amix=inputs={input_count}:dropout_transition=0:normalize=0[aout]")
|
| 134 |
-
filter_complex = ";".join(filter_parts)
|
| 135 |
-
|
| 136 |
-
# Build the ffmpeg command
|
| 137 |
cmd = ['ffmpeg', '-y']
|
|
|
|
| 138 |
|
| 139 |
-
# Add
|
| 140 |
-
cmd.extend(['-i', str(silence_file)])
|
| 141 |
-
|
| 142 |
-
# Add all audio chunks
|
| 143 |
for audio_file in audio_files:
|
| 144 |
-
cmd
|
| 145 |
-
|
| 146 |
-
#
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
-
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
'-filter_complex', filter_complex,
|
| 152 |
'-map', '[aout]',
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
logger.debug(f"Running command: {' '.join(cmd)}")
|
|
|
|
|
|
|
| 159 |
process = subprocess.run(cmd, capture_output=True, text=True)
|
| 160 |
|
| 161 |
if process.returncode != 0:
|
| 162 |
-
logger.error(f"Audio
|
| 163 |
-
# Create a fallback silent audio
|
| 164 |
silent_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.wav"
|
| 165 |
create_silent_audio(video_duration, silent_audio)
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
|
|
|
|
|
|
| 169 |
try:
|
| 170 |
shutil.rmtree(temp_dir)
|
| 171 |
-
logger.debug(f"Cleaned
|
| 172 |
except Exception as e:
|
| 173 |
-
logger.warning(f"
|
| 174 |
-
|
| 175 |
-
logger.info(f"Successfully created translated audio: {output_audio}")
|
| 176 |
return output_audio
|
|
|
|
| 177 |
except Exception as e:
|
| 178 |
-
logger.error(f"Audio
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
silent_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.wav"
|
| 183 |
-
create_silent_audio(video_duration, silent_audio)
|
| 184 |
-
return silent_audio
|
| 185 |
-
except:
|
| 186 |
-
raise Exception(f"Audio translation failed: {str(e)}")
|
|
|
|
| 76 |
retry_count = 0
|
| 77 |
while retry_count < MAX_RETRY_ATTEMPTS:
|
| 78 |
try:
|
| 79 |
+
# For certain languages, use slower speed
|
| 80 |
slow_option = target_lang in ["hi", "ja", "zh-CN", "ar"]
|
| 81 |
tts = gTTS(text=text, lang=target_lang, slow=slow_option)
|
| 82 |
tts.save(str(audio_file))
|
| 83 |
|
| 84 |
+
logger.info(f"Generated TTS file size for chunk {i}: {audio_file.stat().st_size} bytes")
|
| 85 |
+
|
| 86 |
if audio_file.exists() and audio_file.stat().st_size > 0:
|
| 87 |
break
|
| 88 |
else:
|
|
|
|
| 91 |
except Exception as e:
|
| 92 |
retry_count += 1
|
| 93 |
logger.warning(f"TTS attempt {retry_count} failed for {target_lang}: {str(e)}")
|
| 94 |
+
time.sleep(1)
|
| 95 |
|
| 96 |
+
# Fallback to shortened text
|
| 97 |
if retry_count == MAX_RETRY_ATTEMPTS - 1 and len(text) > 100:
|
| 98 |
logger.warning(f"Trying with shortened text for {target_lang}")
|
| 99 |
shortened_text = text[:100] + "..."
|
|
|
|
| 106 |
else:
|
| 107 |
logger.warning(f"Failed to generate audio for subtitle {i}")
|
| 108 |
|
| 109 |
+
# Fallback if no audio generated
|
| 110 |
if not audio_files:
|
| 111 |
+
logger.warning(f"No audio files generated for {target_lang}")
|
|
|
|
| 112 |
silent_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.wav"
|
| 113 |
create_silent_audio(video_duration, silent_audio)
|
| 114 |
return silent_audio
|
| 115 |
+
|
| 116 |
+
# Output configuration
|
| 117 |
+
output_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.mp3"
|
| 118 |
silence_file = temp_dir / "silence.wav"
|
| 119 |
create_silent_audio(video_duration, silence_file)
|
| 120 |
+
|
| 121 |
+
# Validate input files
|
| 122 |
+
for f in [silence_file, *audio_files]:
|
| 123 |
+
if not f.exists():
|
| 124 |
+
logger.error(f"Missing input file: {f}")
|
| 125 |
+
return create_silent_audio(video_duration, output_audio)
|
| 126 |
+
|
| 127 |
+
# Build FFmpeg command with volume boost and timing
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
cmd = ['ffmpeg', '-y']
|
| 129 |
+
cmd += ['-i', str(silence_file)]
|
| 130 |
|
| 131 |
+
# Add all audio chunks as inputs
|
|
|
|
|
|
|
|
|
|
| 132 |
for audio_file in audio_files:
|
| 133 |
+
cmd += ['-i', str(audio_file)]
|
| 134 |
+
|
| 135 |
+
# Create filter chain for each audio chunk
|
| 136 |
+
filter_chains = []
|
| 137 |
+
for i, (start_time, _, _, _) in enumerate(timings):
|
| 138 |
+
delay_ms = int(start_time * 1000)
|
| 139 |
+
filter_chains.append(
|
| 140 |
+
f"[{i+1}:a]volume=12dB,adelay={delay_ms}|{delay_ms},apad=whole_dur={video_duration}[a{i}]"
|
| 141 |
+
)
|
| 142 |
|
| 143 |
+
# Mix all audio streams with normalization
|
| 144 |
+
mix_inputs = ''.join([f"[a{i}]" for i in range(len(timings))])
|
| 145 |
+
filter_complex = ";".join(filter_chains) + \
|
| 146 |
+
f";{mix_inputs}amix=inputs={len(timings)}:duration=longest:normalize=0,volume=3dB[aout]"
|
| 147 |
+
|
| 148 |
+
cmd += [
|
| 149 |
'-filter_complex', filter_complex,
|
| 150 |
'-map', '[aout]',
|
| 151 |
+
'-c:a', 'libmp3lame', # Changed to MP3 codec
|
| 152 |
+
'-b:a', '192k',
|
| 153 |
+
str(output_audio)
|
| 154 |
+
]
|
| 155 |
+
|
| 156 |
+
logger.debug(f"Running FFmpeg command: {' '.join(cmd)}")
|
| 157 |
+
|
| 158 |
+
# Execute audio mixing
|
| 159 |
process = subprocess.run(cmd, capture_output=True, text=True)
|
| 160 |
|
| 161 |
if process.returncode != 0:
|
| 162 |
+
logger.error(f"Audio mixing failed: {process.stderr}")
|
|
|
|
| 163 |
silent_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.wav"
|
| 164 |
create_silent_audio(video_duration, silent_audio)
|
| 165 |
+
return silent_audio
|
| 166 |
+
|
| 167 |
+
logger.info(f"Final audio file size: {output_audio.stat().st_size} bytes")
|
| 168 |
+
|
| 169 |
+
# Cleanup temporary files
|
| 170 |
try:
|
| 171 |
shutil.rmtree(temp_dir)
|
| 172 |
+
logger.debug(f"Cleaned temporary directory: {temp_dir}")
|
| 173 |
except Exception as e:
|
| 174 |
+
logger.warning(f"Temp cleanup failed: {str(e)}")
|
| 175 |
+
|
|
|
|
| 176 |
return output_audio
|
| 177 |
+
|
| 178 |
except Exception as e:
|
| 179 |
+
logger.error(f"Audio generation failed: {str(e)}", exc_info=True)
|
| 180 |
+
silent_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.wav"
|
| 181 |
+
create_silent_audio(video_duration, silent_audio)
|
| 182 |
+
return silent_audio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|