| |
| """ |
| Clean up stale HuggingFace cache artifacts from previous failed downloads. |
| |
| This script removes incomplete marker files, temporary files, and lock files |
| from the HuggingFace cache directory. These artifacts can accumulate from |
| interrupted or failed downloads and may interfere with future downloads. |
| """ |
|
|
| import os |
| import sys |
| from pathlib import Path |
| from typing import List |
|
|
| try: |
| from huggingface_hub import constants |
|
|
| HF_HUB_AVAILABLE = True |
| except ImportError: |
| print("Warning: huggingface_hub not available") |
| HF_HUB_AVAILABLE = False |
|
|
|
|
| def get_hf_cache_dir() -> str: |
| """Get the HuggingFace cache directory.""" |
| if HF_HUB_AVAILABLE: |
| return constants.HF_HUB_CACHE |
|
|
| |
| hf_home = os.environ.get("HF_HOME", os.path.expanduser("~/.cache/huggingface")) |
| return os.path.join(hf_home, "hub") |
|
|
|
|
| def find_stale_artifacts(cache_dir: str) -> List[Path]: |
| """ |
| Find stale artifact files in the HuggingFace cache. |
| |
| Args: |
| cache_dir: HuggingFace cache directory |
| |
| Returns: |
| List of paths to stale artifact files |
| """ |
| cache_path = Path(cache_dir) |
|
|
| if not cache_path.exists(): |
| return [] |
|
|
| |
| patterns = [ |
| "**/*.incomplete", |
| "**/*.tmp", |
| "**/*.lock", |
| ] |
|
|
| stale_files = [] |
| for pattern in patterns: |
| stale_files.extend(cache_path.glob(pattern)) |
|
|
| return stale_files |
|
|
|
|
| def cleanup_artifacts(artifacts: List[Path]) -> tuple[int, int]: |
| """ |
| Remove stale artifact files. |
| |
| Args: |
| artifacts: List of file paths to remove |
| |
| Returns: |
| Tuple of (successful_removals, failed_removals) |
| """ |
| successful = 0 |
| failed = 0 |
|
|
| for file_path in artifacts: |
| try: |
| file_path.unlink() |
| print(f" Removed: {file_path}") |
| successful += 1 |
| except Exception as e: |
| print(f" Warning: Could not remove {file_path}: {e}") |
| failed += 1 |
|
|
| return successful, failed |
|
|
|
|
| def main() -> int: |
| """ |
| Main cleanup logic. |
| |
| Returns: |
| Always returns 0 (cleanup is best-effort and should not fail CI) |
| """ |
| print("=" * 70) |
| print("HuggingFace Cache Cleanup") |
| print("=" * 70) |
|
|
| |
| cache_dir = get_hf_cache_dir() |
| print(f"Cache directory: {cache_dir}") |
|
|
| if not os.path.exists(cache_dir): |
| print("Cache directory does not exist - nothing to clean") |
| return 0 |
|
|
| print("-" * 70) |
|
|
| |
| print("Scanning for stale artifacts...") |
| stale_artifacts = find_stale_artifacts(cache_dir) |
|
|
| if not stale_artifacts: |
| print("✓ No stale cache artifacts found") |
| return 0 |
|
|
| |
| print(f"Found {len(stale_artifacts)} stale artifact(s) to remove:") |
| successful, failed = cleanup_artifacts(stale_artifacts) |
|
|
| print("-" * 70) |
|
|
| |
| if failed > 0: |
| print(f"⚠ Cleaned up {successful} file(s), {failed} removal(s) failed") |
| else: |
| print(f"✓ Successfully cleaned up {successful} stale file(s)") |
|
|
| |
| return 0 |
|
|
|
|
| if __name__ == "__main__": |
| try: |
| exit_code = main() |
| sys.exit(exit_code) |
| except KeyboardInterrupt: |
| print("\nInterrupted by user") |
| sys.exit(0) |
| except Exception as e: |
| print(f"ERROR: Unexpected error during cleanup: {e}") |
| import traceback |
|
|
| traceback.print_exc() |
| |
| sys.exit(0) |
|
|