#!/usr/bin/env python3 import argparse from pathlib import Path from huggingface_hub import snapshot_download def main(): parser = argparse.ArgumentParser(description="Download PixDLM model assets and DRSeg metadata.") parser.add_argument("--model-repo", default="WhynotHug/PixDLM") parser.add_argument("--dataset-repo", default="WhynotHug/DRSeg") parser.add_argument("--output-dir", default=".") parser.add_argument("--with-data", action="store_true", help="Download full DRSeg data files when hosted in the dataset repo.") args = parser.parse_args() root = Path(args.output_dir).resolve() checkpoint_dir = root / "pretrained" / "pixdlm-7b" data_dir = root / "data" checkpoint_dir.mkdir(parents=True, exist_ok=True) data_dir.mkdir(parents=True, exist_ok=True) snapshot_download( repo_id=args.model_repo, repo_type="model", local_dir=checkpoint_dir, allow_patterns=[ "added_tokens.json", "config.json", "generation_config.json", "model*.safetensors", "model.safetensors.index.json", "pytorch_model*.bin", "pytorch_model.bin.index.json", "special_tokens_map.json", "tokenizer.json", "tokenizer.model", "tokenizer_config.json", ], ) dataset_ignore = None if args.with_data else ["*.jpg", "*.png", "*.zip", "*.tar", "*.tar.gz"] snapshot_download( repo_id=args.dataset_repo, repo_type="dataset", local_dir=data_dir / "DRSeg", ignore_patterns=dataset_ignore, ) print(f"PixDLM model snapshot: {checkpoint_dir}") print(f"DRSeg files: {data_dir / 'DRSeg'}") if __name__ == "__main__": main()