Update srt_utils.py
Browse files- srt_utils.py +69 -1
srt_utils.py
CHANGED
|
@@ -292,4 +292,72 @@ def apply_netflix_style_filter(srt_content):
|
|
| 292 |
|
| 293 |
output_srt += f"{i}\n{start_time} --> {end_time}\n{formatted_text}\n\n"
|
| 294 |
|
| 295 |
-
return output_srt.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
|
| 293 |
output_srt += f"{i}\n{start_time} --> {end_time}\n{formatted_text}\n\n"
|
| 294 |
|
| 295 |
+
return output_srt.strip()
|
| 296 |
+
|
| 297 |
+
import subprocess
|
| 298 |
+
import shutil
|
| 299 |
+
|
| 300 |
+
def process_audio_for_transcription(input_file: str) -> str:
|
| 301 |
+
"""
|
| 302 |
+
Process audio to maximize speech clarity for invalid transcription.
|
| 303 |
+
Applies:
|
| 304 |
+
1. Mono conversion
|
| 305 |
+
2. Resampling to 16kHz (Whisper optimal)
|
| 306 |
+
3. Highpass (200Hz) to remove rumble
|
| 307 |
+
4. Lowpass (3000Hz) to focus on speech band (telephone quality is sufficient for text)
|
| 308 |
+
5. AFFTDN (FFT-based noise reduction)
|
| 309 |
+
6. Dynaudnorm (Dynamic Audio Normalizer) to boost quiet speech
|
| 310 |
+
7. Compression (generic)
|
| 311 |
+
|
| 312 |
+
Returns path to processed .mp3 file
|
| 313 |
+
"""
|
| 314 |
+
|
| 315 |
+
# Check if ffmpeg exists
|
| 316 |
+
ffmpeg_cmd = shutil.which("ffmpeg")
|
| 317 |
+
if not ffmpeg_cmd:
|
| 318 |
+
print("⚠️ FFmpeg não encontrado. Pulando processamento de áudio.")
|
| 319 |
+
return input_file
|
| 320 |
+
|
| 321 |
+
output_file = input_file + ".processed.mp3"
|
| 322 |
+
|
| 323 |
+
# Complex filter chain
|
| 324 |
+
# 1. afftdn: Denoise using FFT (requires noise profile, but default 'nr' often works blindly) - wait, rnnoise is better if available, but afftdn is standard.
|
| 325 |
+
# Actually, simplistic filters are safer to avoid artifacts.
|
| 326 |
+
# highpass=f=200, lowpass=f=3000, afftdn=nf=-25, dynaudnorm=f=150:g=15
|
| 327 |
+
# afftdn might not be available in all builds. Let's stick to safe filters first.
|
| 328 |
+
|
| 329 |
+
# Safe Filter Chain:
|
| 330 |
+
# 1. silenceremove=stop_periods=-1:stop_duration=1:stop_threshold=-90dB (optional, maybe skip)
|
| 331 |
+
# 2. highpass=f=200, lowpass=f=3000 (Bandpass)
|
| 332 |
+
# 3. dynaudnorm (Normalize loudness dynamically)
|
| 333 |
+
# 4. volume=1.5 (Boost a bit globally)
|
| 334 |
+
|
| 335 |
+
# "Retire qualquer música de fundo" -> Extremely hard without AI like Spleeter.
|
| 336 |
+
# But aggressive vocal isolation via EQ helps.
|
| 337 |
+
|
| 338 |
+
filters = "highpass=f=200,lowpass=f=3000,dynaudnorm=f=150:g=15"
|
| 339 |
+
|
| 340 |
+
try:
|
| 341 |
+
command = [
|
| 342 |
+
ffmpeg_cmd,
|
| 343 |
+
"-y", # Overwrite
|
| 344 |
+
"-i", input_file,
|
| 345 |
+
"-vn", # No video
|
| 346 |
+
"-ar", "16000", # 16kHz
|
| 347 |
+
"-ac", "1", # Mono
|
| 348 |
+
"-af", filters,
|
| 349 |
+
"-c:a", "libmp3lame",
|
| 350 |
+
"-q:a", "2", # High quality VBR
|
| 351 |
+
output_file
|
| 352 |
+
]
|
| 353 |
+
|
| 354 |
+
print(f"🔊 Processando áudio com FFmpeg: {' '.join(command)}")
|
| 355 |
+
subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 356 |
+
|
| 357 |
+
if os.path.exists(output_file):
|
| 358 |
+
return output_file
|
| 359 |
+
|
| 360 |
+
except Exception as e:
|
| 361 |
+
print(f"⚠️ Falha ao processar áudio: {e}")
|
| 362 |
+
|
| 363 |
+
return input_file
|