habulaj commited on
Commit
cfa7b66
·
verified ·
1 Parent(s): 8c42e32

Update srt_utils.py

Browse files
Files changed (1) hide show
  1. srt_utils.py +69 -1
srt_utils.py CHANGED
@@ -292,4 +292,72 @@ def apply_netflix_style_filter(srt_content):
292
 
293
  output_srt += f"{i}\n{start_time} --> {end_time}\n{formatted_text}\n\n"
294
 
295
- return output_srt.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
 
293
  output_srt += f"{i}\n{start_time} --> {end_time}\n{formatted_text}\n\n"
294
 
295
+ return output_srt.strip()
296
+
297
+ import subprocess
298
+ import shutil
299
+
300
+ def process_audio_for_transcription(input_file: str) -> str:
301
+ """
302
+ Process audio to maximize speech clarity for invalid transcription.
303
+ Applies:
304
+ 1. Mono conversion
305
+ 2. Resampling to 16kHz (Whisper optimal)
306
+ 3. Highpass (200Hz) to remove rumble
307
+ 4. Lowpass (3000Hz) to focus on speech band (telephone quality is sufficient for text)
308
+ 5. AFFTDN (FFT-based noise reduction)
309
+ 6. Dynaudnorm (Dynamic Audio Normalizer) to boost quiet speech
310
+ 7. Compression (generic)
311
+
312
+ Returns path to processed .mp3 file
313
+ """
314
+
315
+ # Check if ffmpeg exists
316
+ ffmpeg_cmd = shutil.which("ffmpeg")
317
+ if not ffmpeg_cmd:
318
+ print("⚠️ FFmpeg não encontrado. Pulando processamento de áudio.")
319
+ return input_file
320
+
321
+ output_file = input_file + ".processed.mp3"
322
+
323
+ # Complex filter chain
324
+ # 1. afftdn: Denoise using FFT (requires noise profile, but default 'nr' often works blindly) - wait, rnnoise is better if available, but afftdn is standard.
325
+ # Actually, simplistic filters are safer to avoid artifacts.
326
+ # highpass=f=200, lowpass=f=3000, afftdn=nf=-25, dynaudnorm=f=150:g=15
327
+ # afftdn might not be available in all builds. Let's stick to safe filters first.
328
+
329
+ # Safe Filter Chain:
330
+ # 1. silenceremove=stop_periods=-1:stop_duration=1:stop_threshold=-90dB (optional, maybe skip)
331
+ # 2. highpass=f=200, lowpass=f=3000 (Bandpass)
332
+ # 3. dynaudnorm (Normalize loudness dynamically)
333
+ # 4. volume=1.5 (Boost a bit globally)
334
+
335
+ # "Retire qualquer música de fundo" -> Extremely hard without AI like Spleeter.
336
+ # But aggressive vocal isolation via EQ helps.
337
+
338
+ filters = "highpass=f=200,lowpass=f=3000,dynaudnorm=f=150:g=15"
339
+
340
+ try:
341
+ command = [
342
+ ffmpeg_cmd,
343
+ "-y", # Overwrite
344
+ "-i", input_file,
345
+ "-vn", # No video
346
+ "-ar", "16000", # 16kHz
347
+ "-ac", "1", # Mono
348
+ "-af", filters,
349
+ "-c:a", "libmp3lame",
350
+ "-q:a", "2", # High quality VBR
351
+ output_file
352
+ ]
353
+
354
+ print(f"🔊 Processando áudio com FFmpeg: {' '.join(command)}")
355
+ subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
356
+
357
+ if os.path.exists(output_file):
358
+ return output_file
359
+
360
+ except Exception as e:
361
+ print(f"⚠️ Falha ao processar áudio: {e}")
362
+
363
+ return input_file