File size: 3,729 Bytes
61ba51e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env python3
"""
Clean up stale HuggingFace cache artifacts from previous failed downloads.

This script removes incomplete marker files, temporary files, and lock files
from the HuggingFace cache directory. These artifacts can accumulate from
interrupted or failed downloads and may interfere with future downloads.
"""

import os
import sys
from pathlib import Path
from typing import List

try:
    from huggingface_hub import constants

    HF_HUB_AVAILABLE = True
except ImportError:
    print("Warning: huggingface_hub not available")
    HF_HUB_AVAILABLE = False


def get_hf_cache_dir() -> str:
    """Get the HuggingFace cache directory."""
    if HF_HUB_AVAILABLE:
        return constants.HF_HUB_CACHE

    # Fallback to environment variable or default
    hf_home = os.environ.get("HF_HOME", os.path.expanduser("~/.cache/huggingface"))
    return os.path.join(hf_home, "hub")


def find_stale_artifacts(cache_dir: str) -> List[Path]:
    """
    Find stale artifact files in the HuggingFace cache.

    Args:
        cache_dir: HuggingFace cache directory

    Returns:
        List of paths to stale artifact files
    """
    cache_path = Path(cache_dir)

    if not cache_path.exists():
        return []

    # Patterns for stale files to clean up
    patterns = [
        "**/*.incomplete",  # Incomplete download markers
        "**/*.tmp",  # Temporary files
        "**/*.lock",  # Lock files from interrupted downloads
    ]

    stale_files = []
    for pattern in patterns:
        stale_files.extend(cache_path.glob(pattern))

    return stale_files


def cleanup_artifacts(artifacts: List[Path]) -> tuple[int, int]:
    """
    Remove stale artifact files.

    Args:
        artifacts: List of file paths to remove

    Returns:
        Tuple of (successful_removals, failed_removals)
    """
    successful = 0
    failed = 0

    for file_path in artifacts:
        try:
            file_path.unlink()
            print(f"  Removed: {file_path}")
            successful += 1
        except Exception as e:
            print(f"  Warning: Could not remove {file_path}: {e}")
            failed += 1

    return successful, failed


def main() -> int:
    """
    Main cleanup logic.

    Returns:
        Always returns 0 (cleanup is best-effort and should not fail CI)
    """
    print("=" * 70)
    print("HuggingFace Cache Cleanup")
    print("=" * 70)

    # Get cache directory
    cache_dir = get_hf_cache_dir()
    print(f"Cache directory: {cache_dir}")

    if not os.path.exists(cache_dir):
        print("Cache directory does not exist - nothing to clean")
        return 0

    print("-" * 70)

    # Find stale artifacts
    print("Scanning for stale artifacts...")
    stale_artifacts = find_stale_artifacts(cache_dir)

    if not stale_artifacts:
        print("✓ No stale cache artifacts found")
        return 0

    # Clean up artifacts
    print(f"Found {len(stale_artifacts)} stale artifact(s) to remove:")
    successful, failed = cleanup_artifacts(stale_artifacts)

    print("-" * 70)

    # Summary
    if failed > 0:
        print(f"⚠ Cleaned up {successful} file(s), {failed} removal(s) failed")
    else:
        print(f"✓ Successfully cleaned up {successful} stale file(s)")

    # Always return 0 - cleanup failures should not fail CI
    return 0


if __name__ == "__main__":
    try:
        exit_code = main()
        sys.exit(exit_code)
    except KeyboardInterrupt:
        print("\nInterrupted by user")
        sys.exit(0)
    except Exception as e:
        print(f"ERROR: Unexpected error during cleanup: {e}")
        import traceback

        traceback.print_exc()
        # Still return 0 - cleanup failures should not fail CI
        sys.exit(0)