#!/usr/bin/env python3 """ download_real_samples.py - Baixa arquivos de áudio reais (domínio público) para exemplos. """ import subprocess from pathlib import Path from urllib.request import urlretrieve BASE_DIR = Path(__file__).parent INPUTS_DIR = BASE_DIR / "sample_inputs" OUTPUTS_DIR = BASE_DIR / "sample_outputs" URLS = { "real_sample_1.wav": "https://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_0010_8k.wav", "real_sample_2.wav": "https://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_0011_8k.wav", "real_sample_3.ogg": "https://upload.wikimedia.org/wikipedia/commons/9/99/Short_melody_from_%22Pa%27s_Fiddle%22.ogg", } def download_file(url, dest_path): if dest_path.exists(): print(f"⏭️ Already exists: {dest_path.name}") return True print(f"⬇️ Downloading {dest_path.name} ...") try: urlretrieve(url, dest_path) print(f" ✅ Done: {dest_path.name}") return True except Exception as e: print(f" ❌ Failed: {e}") return False def main(): INPUTS_DIR.mkdir(parents=True, exist_ok=True) for name, url in URLS.items(): dest = INPUTS_DIR / name download_file(url, dest) if name.endswith(".ogg"): mp3_path = INPUTS_DIR / name.replace(".ogg", ".mp3") if not mp3_path.exists(): subprocess.run([ "ffmpeg", "-y", "-i", str(dest), "-acodec", "libmp3lame", "-b:a", "192k", str(mp3_path) ], check=True, capture_output=True) dest.unlink() print(f" ✅ Converted to MP3: {mp3_path.name}") print("✅ Real samples downloaded.") if __name__ == "__main__": main()