File size: 2,288 Bytes
e70b2c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env python3
"""
Storage cleanup script for Hugging Face Spaces
Removes old/unused models and cache to prevent storage limit errors
"""
import os
import shutil
from pathlib import Path

def cleanup_storage():
    """Remove unnecessary files to reduce storage usage"""

    # Define paths
    cache_dir = Path("/data/.cache/huggingface")
    models_dir = Path("/data/models")

    # 1. Clean up duplicate model downloads in cache
    if cache_dir.exists():
        # Remove old versions of models (keep only latest)
        for subdir in ["models", "hub"]:
            target_dir = cache_dir / subdir
            if target_dir.exists():
                # Keep only the most recent 2 model versions
                model_dirs = sorted(target_dir.glob("**/snapshots/*"), key=os.path.getmtime, reverse=True)
                for old_model in model_dirs[2:]:  # Keep 2 most recent, delete rest
                    if old_model.is_dir():
                        try:
                            shutil.rmtree(old_model)
                            print(f"Cleaned up old model cache: {old_model}")
                        except Exception as e:
                            print(f"Error cleaning {old_model}: {e}")

    # 2. Clean up old fine-tuned models (keep only active ones)
    if models_dir.exists():
        finetuned_dir = models_dir / "finetuned"
        if finetuned_dir.exists():
            # This would require database access to know which models are active
            # For now, just report the size
            total_size = sum(f.stat().st_size for f in finetuned_dir.rglob('*') if f.is_file())
            print(f"Fine-tuned models size: {total_size / (1024**3):.2f} GB")

    # 3. Report storage usage
    if Path("/data").exists():
        total_size = sum(f.stat().st_size for f in Path("/data").rglob('*') if f.is_file())
        print(f"Total /data storage: {total_size / (1024**3):.2f} GB")

        # Breakdown by directory
        for subdir in [".cache", "models"]:
            dir_path = Path("/data") / subdir
            if dir_path.exists():
                dir_size = sum(f.stat().st_size for f in dir_path.rglob('*') if f.is_file())
                print(f"  {subdir}: {dir_size / (1024**3):.2f} GB")

if __name__ == "__main__":
    cleanup_storage()