esc50-model / src /data /download.py
mateo496's picture
OOP complete and functional rewrite
a3ea780
import requests
import zipfile
import io
import os
import shutil
from pathlib import Path
from tqdm import tqdm
import sys
from src.config.config import DownloadConfig
config = DownloadConfig()
class ESC50Downloader:
def __init__(self, repo_url: str = config.repo_url, repo_dst_dir: str = config.repo_dst_dir) -> None:
self.repo_url = repo_url
self.repo_dst_dir = Path(repo_dst_dir)
self.audio_dst_dir = config.audio_dst_dir
self.paths_to_delete = config.paths_to_delete
self.extracted_dir = config.extracted_dir
self.audio_src_dir = config.audio_src_dir
def download_and_extract(self) -> None:
os.makedirs(self.repo_dst_dir, exist_ok=True)
print(f"Downloading from {self.repo_url}")
response = requests.get(self.repo_url, stream=True)
response.raise_for_status()
total = int(response.headers.get("content-length", 0))
buffer = io.BytesIO()
with tqdm(total=total, unit="B", unit_scale=True, desc="Downloading") as bar:
for chunk in response.iter_content(chunk_size=8192):
buffer.write(chunk)
bar.update(len(chunk))
buffer.seek(0)
with zipfile.ZipFile(buffer) as z:
print(f"Extracting to {self.repo_dst_dir}")
z.extractall(self.repo_dst_dir)
print("Done extracting.")
def clean_files(self) -> None:
for f in self.paths_to_delete:
path = os.path.join(self.extracted_dir, f)
if os.path.isfile(path):
os.remove(path)
print(f"Deleted file: {path}")
elif os.path.isdir(path):
shutil.rmtree(path)
print(f"Deleted directory: {path}")
def move_audio_files(self) -> None:
os.makedirs(self.audio_dst_dir, exist_ok=True)
print(f"Moving audio files from {self.audio_src_dir} to {self.audio_dst_dir}")
for filename in os.listdir(self.audio_src_dir):
src_file = os.path.join(self.audio_src_dir, filename)
dst_file = os.path.join(self.audio_dst_dir, filename)
if os.path.isfile(src_file):
shutil.move(src_file, dst_file)
print(f"Moved all audio files to {self.audio_dst_dir}")
def download_clean(self) -> None:
self.download_and_extract()
self.clean_files()
self.move_audio_files()