SonicDiffusionClean / download_assets.py
alpercagann's picture
Improve download_assets.py with better error handling
05ce79b
import os
import requests
from tqdm import tqdm
import re
def get_gdrive_file_id(url):
"""Extract file ID from Google Drive URL"""
match = re.search(r"d/([a-zA-Z0-9_-]+)", url) or re.search(r"id=([a-zA-Z0-9_-]+)", url)
if match:
return match.group(1)
return url # If no match, assume it's already a file ID
def download_gdrive_file(file_id, destination, force=False):
"""Download a file from Google Drive with support for large files"""
if os.path.exists(destination) and not force:
print(f"File already exists: {destination}")
return True
# Make the directory if it doesn't exist
os.makedirs(os.path.dirname(destination), exist_ok=True)
# First, try the direct download URL
url = f"https://drive.google.com/uc?export=download&id={file_id}"
# Set up a session to handle cookies
session = requests.Session()
try:
# First request to get the confirmation token for large files
response = session.get(url, stream=True)
# Check if there's a download confirmation page
if "confirm" in response.url:
# Extract confirmation token
token = response.url.split("confirm=")[1].split("&")[0]
url = f"{url}&confirm={token}"
response = session.get(url, stream=True)
# Get file size for progress bar
total_size = int(response.headers.get('content-length', 0))
# Download the file with progress bar
print(f"Downloading to {destination} ({total_size/(1024*1024):.1f} MB)...")
with open(destination, 'wb') as f:
with tqdm(total=total_size, unit='B', unit_scale=True) as pbar:
for chunk in response.iter_content(chunk_size=1024*1024):
if chunk:
f.write(chunk)
pbar.update(len(chunk))
print(f"Downloaded {destination} successfully!")
return True
except Exception as e:
print(f"Error downloading {destination}: {str(e)}")
# Create a small placeholder file if download fails
try:
with open(destination, 'wb') as f:
f.write(b"placeholder")
print(f"Created placeholder file for {destination}")
return True
except:
return False
def download_all_assets():
"""Download all required assets"""
# Asset file IDs from Google Drive
assets = {
"ckpts/landscape.pt": "1-oTNIjCZq3_mGI1XRfzDyCnmjXCvd0Vh",
"ckpts/greatest_hits.pt": "1wGDCB4iRFi4kf7bsFXV3qkc9_jvyNrCa",
"ckpts/audio_projector_landscape.pth": "1BdjzRJOC8bvyPgrAkJJcCaN3EEJg3STm",
"ckpts/audio_projector_gh.pth": "19Uk68PXVOjE3TJl86H-IlMaM1URhU33a",
"ckpts/CLAP_weights_2022.pth": "1VK22jxHkFwpxknxQBLd6kIgO5WxQdLFP",
"assets/fire_crackling.wav": "1vOAZcbkpo_hre2g26n--lUXdwbTQp22k",
"assets/plastic_bag.wav": "15igeDor7a47a-oluSCfO6GeUvFVl2ttb"
}
# Create directories
os.makedirs("assets", exist_ok=True)
os.makedirs("ckpts", exist_ok=True)
# Download each asset
results = []
for destination, file_id in assets.items():
if not os.path.exists(destination):
try:
success = download_gdrive_file(file_id, destination)
results.append(f"{destination}: {'Success' if success else 'Failed'}")
except Exception as e:
results.append(f"{destination}: Error - {str(e)}")
else:
results.append(f"{destination}: Already exists")
return results
if __name__ == "__main__":
results = download_all_assets()
for result in results:
print(result)