# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """ Prepare offline package for FastGen deployment. This script downloads all dependencies needed to run FastGen on an isolated machine: - Python wheel packages - HuggingFace models (Wan-T2V-1.3B) - Self-Forcing checkpoint Usage: python scripts/prepare_offline_package.py --output-dir ./offline_package # Skip model downloads (if you already have them) python scripts/prepare_offline_package.py --output-dir ./offline_package --skip-models # Specify Python version and platform python scripts/prepare_offline_package.py --output-dir ./offline_package \ --python-version 3.12 --platform manylinux2014_x86_64 """ import argparse import hashlib import json import os import shutil import subprocess import sys from datetime import datetime from pathlib import Path def run_command(cmd: list[str], check: bool = True, capture_output: bool = False) -> subprocess.CompletedProcess: """Run a shell command and handle errors.""" print(f" Running: {' '.join(cmd)}") result = subprocess.run(cmd, check=check, capture_output=capture_output, text=True) return result def compute_sha256(filepath: Path) -> str: """Compute SHA256 hash of a file.""" sha256_hash = hashlib.sha256() with open(filepath, "rb") as f: for byte_block in iter(lambda: f.read(4096), b""): sha256_hash.update(byte_block) return sha256_hash.hexdigest() def get_dir_size(path: Path) -> int: """Get total size of directory in bytes.""" total = 0 for entry in path.rglob("*"): if entry.is_file(): total += entry.stat().st_size return total def format_size(size_bytes: int) -> str: """Format byte size to human readable.""" for unit in ["B", "KB", "MB", "GB"]: if size_bytes < 1024: return f"{size_bytes:.2f} {unit}" size_bytes /= 1024 return f"{size_bytes:.2f} TB" def download_pip_wheels( output_dir: Path, requirements_file: Path, python_version: str = "312", platform: str = "manylinux2014_x86_64", ) -> bool: """Download pip wheels for all dependencies.""" wheels_dir = output_dir / "pip_wheels" wheels_dir.mkdir(parents=True, exist_ok=True) print(f"\n[1/4] Downloading pip wheels to {wheels_dir}") print(f" Python version: {python_version}") print(f" Platform: {platform}") # Download wheels with platform specification cmd = [ sys.executable, "-m", "pip", "download", "-r", str(requirements_file), "-d", str(wheels_dir), "--platform", platform, "--python-version", python_version, "--only-binary=:all:", ] try: run_command(cmd) print(f" Wheels downloaded to: {wheels_dir}") print(f" Total size: {format_size(get_dir_size(wheels_dir))}") return True except subprocess.CalledProcessError as e: print(f" Warning: Some wheels may require source builds.") print(f" Try running without --only-binary flag for those packages.") # Try again without only-binary restriction for problematic packages cmd_fallback = [ sys.executable, "-m", "pip", "download", "-r", str(requirements_file), "-d", str(wheels_dir), ] try: run_command(cmd_fallback) return True except subprocess.CalledProcessError: print(f" Error downloading wheels: {e}") return False def download_huggingface_model(output_dir: Path, model_id: str, local_name: str) -> bool: """Download a HuggingFace model using huggingface_hub.""" models_dir = output_dir / "hf_models" / local_name models_dir.parent.mkdir(parents=True, exist_ok=True) print(f"\n[2/4] Downloading HuggingFace model: {model_id}") print(f" Destination: {models_dir}") try: from huggingface_hub import snapshot_download snapshot_download( repo_id=model_id, local_dir=str(models_dir), local_dir_use_symlinks=False, ) print(f" Model downloaded successfully") print(f" Total size: {format_size(get_dir_size(models_dir))}") return True except Exception as e: print(f" Error downloading model: {e}") return False def download_self_forcing_checkpoint(output_dir: Path) -> bool: """Download Self-Forcing checkpoint from HuggingFace.""" ckpt_dir = output_dir / "checkpoints" / "Self-Forcing" ckpt_dir.parent.mkdir(parents=True, exist_ok=True) print(f"\n[3/4] Downloading Self-Forcing checkpoint") print(f" Destination: {ckpt_dir}") try: from huggingface_hub import snapshot_download snapshot_download( repo_id="gdhe17/Self-Forcing", local_dir=str(ckpt_dir), local_dir_use_symlinks=False, ) print(f" Checkpoint downloaded successfully") print(f" Total size: {format_size(get_dir_size(ckpt_dir))}") return True except Exception as e: print(f" Error downloading checkpoint: {e}") return False def copy_fastgen_source(output_dir: Path, source_dir: Path) -> bool: """Copy FastGen source code to the package.""" dest_dir = output_dir / "FastGen" print(f"\n[4/4] Copying FastGen source code") print(f" Source: {source_dir}") print(f" Destination: {dest_dir}") # Items to exclude from copy exclude_patterns = { "__pycache__", ".git", ".pytest_cache", "*.pyc", "*.pyo", ".eggs", "*.egg-info", "dist", "build", "outputs", "FASTGEN_OUTPUT", ".venv", "venv", "offline_package", } def should_exclude(path: Path) -> bool: for pattern in exclude_patterns: if pattern.startswith("*"): if path.name.endswith(pattern[1:]): return True elif path.name == pattern: return True return False def copy_tree(src: Path, dst: Path): dst.mkdir(parents=True, exist_ok=True) for item in src.iterdir(): if should_exclude(item): continue dest_item = dst / item.name if item.is_dir(): copy_tree(item, dest_item) else: shutil.copy2(item, dest_item) try: if dest_dir.exists(): shutil.rmtree(dest_dir) copy_tree(source_dir, dest_dir) print(f" Source code copied successfully") print(f" Total size: {format_size(get_dir_size(dest_dir))}") return True except Exception as e: print(f" Error copying source: {e}") return False def create_manifest(output_dir: Path) -> bool: """Create a manifest file with checksums and metadata.""" manifest_path = output_dir / "manifest.json" print(f"\nCreating manifest: {manifest_path}") manifest = { "created_at": datetime.now().isoformat(), "python_version": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}", "components": {}, } # Collect info about each component for component_dir in output_dir.iterdir(): if component_dir.is_dir(): component_name = component_dir.name files = [] total_size = 0 for filepath in component_dir.rglob("*"): if filepath.is_file(): rel_path = filepath.relative_to(output_dir) file_size = filepath.stat().st_size total_size += file_size files.append( { "path": str(rel_path), "size": file_size, } ) manifest["components"][component_name] = { "file_count": len(files), "total_size": total_size, "total_size_human": format_size(total_size), } # Calculate total package size total_package_size = sum(c["total_size"] for c in manifest["components"].values()) manifest["total_size"] = total_package_size manifest["total_size_human"] = format_size(total_package_size) try: with open(manifest_path, "w") as f: json.dump(manifest, f, indent=2) print(f" Manifest created successfully") return True except Exception as e: print(f" Error creating manifest: {e}") return False def create_archive(output_dir: Path, archive_name: str = "fastgen_wan_offline.tar.gz") -> bool: """Create a tar.gz archive of the offline package.""" archive_path = output_dir.parent / archive_name print(f"\nCreating archive: {archive_path}") print(" This may take a while for large packages...") try: # Use tar command for better compression cmd = [ "tar", "-czvf", str(archive_path), "-C", str(output_dir.parent), output_dir.name, ] run_command(cmd) archive_size = archive_path.stat().st_size print(f" Archive created successfully") print(f" Archive size: {format_size(archive_size)}") return True except Exception as e: print(f" Error creating archive: {e}") return False def main(): parser = argparse.ArgumentParser( description="Prepare offline package for FastGen deployment", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Full package with all components python scripts/prepare_offline_package.py --output-dir ./offline_package # Skip model downloads (if already downloaded) python scripts/prepare_offline_package.py --output-dir ./offline_package --skip-models # Custom Python version and platform python scripts/prepare_offline_package.py --output-dir ./offline_package \\ --python-version 3.11 --platform manylinux2014_x86_64 # Create archive after preparation python scripts/prepare_offline_package.py --output-dir ./offline_package --create-archive """, ) parser.add_argument( "--output-dir", type=Path, default=Path("./offline_package"), help="Directory to store the offline package (default: ./offline_package)", ) parser.add_argument( "--python-version", type=str, default="312", help="Python version for wheels (e.g., 312 for Python 3.12)", ) parser.add_argument( "--platform", type=str, default="manylinux2014_x86_64", help="Platform for wheels (default: manylinux2014_x86_64)", ) parser.add_argument( "--skip-wheels", action="store_true", help="Skip downloading pip wheels", ) parser.add_argument( "--skip-models", action="store_true", help="Skip downloading HuggingFace models and checkpoints", ) parser.add_argument( "--skip-source", action="store_true", help="Skip copying FastGen source code", ) parser.add_argument( "--create-archive", action="store_true", help="Create a tar.gz archive after preparation", ) parser.add_argument( "--archive-name", type=str, default="fastgen_wan_offline.tar.gz", help="Name of the archive file (default: fastgen_wan_offline.tar.gz)", ) args = parser.parse_args() # Determine FastGen root directory script_dir = Path(__file__).resolve().parent fastgen_root = script_dir.parent requirements_file = fastgen_root / "requirements.txt" if not requirements_file.exists(): print(f"Error: requirements.txt not found at {requirements_file}") sys.exit(1) # Create output directory output_dir = args.output_dir.resolve() output_dir.mkdir(parents=True, exist_ok=True) print("=" * 60) print("FastGen Offline Package Preparation") print("=" * 60) print(f"Output directory: {output_dir}") print(f"FastGen root: {fastgen_root}") success = True # Step 1: Download pip wheels if not args.skip_wheels: if not download_pip_wheels( output_dir, requirements_file, args.python_version, args.platform ): print("\nWarning: Wheel download had issues, continuing...") else: print("\n[1/4] Skipping pip wheels download") # Step 2: Download HuggingFace model if not args.skip_models: if not download_huggingface_model( output_dir, "Wan-AI/Wan2.1-T2V-1.3B-Diffusers", "Wan2.1-T2V-1.3B-Diffusers" ): print("\nWarning: HuggingFace model download failed") success = False else: print("\n[2/4] Skipping HuggingFace model download") # Step 3: Download Self-Forcing checkpoint if not args.skip_models: if not download_self_forcing_checkpoint(output_dir): print("\nWarning: Self-Forcing checkpoint download failed") success = False else: print("\n[3/4] Skipping Self-Forcing checkpoint download") # Step 4: Copy FastGen source if not args.skip_source: if not copy_fastgen_source(output_dir, fastgen_root): success = False else: print("\n[4/4] Skipping FastGen source copy") # Create manifest create_manifest(output_dir) # Create archive if requested if args.create_archive: create_archive(output_dir, args.archive_name) print("\n" + "=" * 60) if success: print("Package preparation completed successfully!") else: print("Package preparation completed with some warnings.") print(f"\nNext steps:") print(f" 1. Transfer the package to the offline machine:") if args.create_archive: print(f" scp {output_dir.parent / args.archive_name} user@offline-machine:/path/to/") else: print(f" rsync -avz {output_dir}/ user@offline-machine:/path/to/offline_package/") print(f" 2. On the offline machine, run:") print(f" bash setup_offline_env.sh") print("=" * 60) if __name__ == "__main__": main()