PixDLM / scripts /download_assets.py
WhynotHug's picture
Upload folder using huggingface_hub
3334467 verified
Raw
History Blame Contribute Delete
1.77 kB
#!/usr/bin/env python3
import argparse
from pathlib import Path
from huggingface_hub import snapshot_download
def main():
parser = argparse.ArgumentParser(description="Download PixDLM model assets and DRSeg metadata.")
parser.add_argument("--model-repo", default="WhynotHug/PixDLM")
parser.add_argument("--dataset-repo", default="WhynotHug/DRSeg")
parser.add_argument("--output-dir", default=".")
parser.add_argument("--with-data", action="store_true", help="Download full DRSeg data files when hosted in the dataset repo.")
args = parser.parse_args()
root = Path(args.output_dir).resolve()
checkpoint_dir = root / "pretrained" / "pixdlm-7b"
data_dir = root / "data"
checkpoint_dir.mkdir(parents=True, exist_ok=True)
data_dir.mkdir(parents=True, exist_ok=True)
snapshot_download(
repo_id=args.model_repo,
repo_type="model",
local_dir=checkpoint_dir,
allow_patterns=[
"added_tokens.json",
"config.json",
"generation_config.json",
"model*.safetensors",
"model.safetensors.index.json",
"pytorch_model*.bin",
"pytorch_model.bin.index.json",
"special_tokens_map.json",
"tokenizer.json",
"tokenizer.model",
"tokenizer_config.json",
],
)
dataset_ignore = None if args.with_data else ["*.jpg", "*.png", "*.zip", "*.tar", "*.tar.gz"]
snapshot_download(
repo_id=args.dataset_repo,
repo_type="dataset",
local_dir=data_dir / "DRSeg",
ignore_patterns=dataset_ignore,
)
print(f"PixDLM model snapshot: {checkpoint_dir}")
print(f"DRSeg files: {data_dir / 'DRSeg'}")
if __name__ == "__main__":
main()