orchid-ncd / backend /scripts /download_cache.py
marcellorusso's picture
Sync from GitHub: a283597
eaf6401 verified
import os
import shutil
from huggingface_hub import snapshot_download
def main():
# Read token from Docker BuildKit secret
token = None
secret_path = "/run/secrets/HF_TOKEN"
if os.path.exists(secret_path):
with open(secret_path, "r") as f:
token = f.read().strip()
else:
# Fallback to env var if running locally without secrets
token = os.environ.get("HF_TOKEN")
if not token:
print("Warning: HF_TOKEN not found in secrets or environment. Downloads might hit rate limits or fail if repositories are private.")
print("Downloading clean dataset from HuggingFace...")
try:
snapshot_download(
'marcellorusso/orchid-ncd-dataset',
repo_type='dataset',
local_dir='dataset',
allow_patterns=['train_clean/*', 'test_clean/*', 'split_manifest.json'],
token=token
)
print("Dataset downloaded successfully.")
except Exception as e:
print(f"Dataset download failed: {e}")
print("\nDownloading model weights from HuggingFace...")
try:
snapshot_download(
'marcellorusso/orchid-ncd-models',
repo_type='model',
local_dir='experiments',
allow_patterns=['serie1_ce/**/*.pt', 'serie1_ce/**/*.json', 'serie1_ce/**/*.safetensors'],
token=token
)
# Flatten: move serie1_ce/exp6_*/ to experiments/exp6_*/
src = 'experiments/serie1_ce'
if os.path.isdir(src):
for d in os.listdir(src):
s = os.path.join(src, d)
t = os.path.join('experiments', d)
if os.path.isdir(s):
shutil.copytree(s, t, dirs_exist_ok=True)
print(f"Merged {d}")
shutil.rmtree(src)
# Create marker file to signal successful sync
open('experiments/.models_synced', 'w').close()
print("Models synced successfully.")
except Exception as e:
print(f"Model download failed: {e}")
if __name__ == "__main__":
main()