File size: 3,231 Bytes
e3e5444
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
"""
File cleanup and retention policy utilities.
"""
import os
import time
import logging
from typing import Tuple

logger = logging.getLogger(__name__)

# Retention policy (hours)
UPLOAD_RETENTION_HOURS = int(os.getenv("UPLOAD_RETENTION_HOURS", "72"))   # 3 days
REPORT_RETENTION_HOURS = int(os.getenv("REPORT_RETENTION_HOURS", "720"))  # 30 days
COMPLETED_JOB_RETENTION_HOURS = int(os.getenv("COMPLETED_JOB_RETENTION_HOURS", "1440"))  # 60 days


def cleanup_old_files(directory: str, cutoff_hours: int, dry_run: bool = False) -> Tuple[int, int]:
    """
    Clean up files older than cutoff_hours.
    Returns (deleted_count, freed_bytes)

    Args:
        directory: Path to clean
        cutoff_hours: Delete files older than this many hours
        dry_run: If True, don't actually delete
    """
    if not os.path.isdir(directory):
        return 0, 0

    now = time.time()
    cutoff_seconds = cutoff_hours * 3600
    deleted_count = 0
    freed_bytes = 0

    try:
        for filename in os.listdir(directory):
            filepath = os.path.join(directory, filename)
            if not os.path.isfile(filepath):
                continue

            file_age_seconds = now - os.path.getmtime(filepath)
            if file_age_seconds > cutoff_seconds:
                file_size = os.path.getsize(filepath)

                if not dry_run:
                    try:
                        os.remove(filepath)
                        deleted_count += 1
                        freed_bytes += file_size
                        logger.info(f"Deleted old file: {filename} ({file_size} bytes)")
                    except OSError as e:
                        logger.warning(f"Failed to delete {filename}: {e}")
                else:
                    deleted_count += 1
                    freed_bytes += file_size

    except Exception as e:
        logger.error(f"Error during cleanup of {directory}: {e}")

    return deleted_count, freed_bytes


def cleanup_uploads(dry_run: bool = False) -> Tuple[int, int]:
    """Clean old upload files."""
    return cleanup_old_files("data/uploads", UPLOAD_RETENTION_HOURS, dry_run)


def cleanup_reports(dry_run: bool = False) -> Tuple[int, int]:
    """Clean old report files."""
    return cleanup_old_files("data/reports", REPORT_RETENTION_HOURS, dry_run)


def get_directory_size(directory: str) -> int:
    """Get total size of all files in directory (recursively)."""
    total_size = 0
    try:
        for dirpath, dirnames, filenames in os.walk(directory):
            for filename in filenames:
                filepath = os.path.join(dirpath, filename)
                if os.path.isfile(filepath):
                    total_size += os.path.getsize(filepath)
    except Exception as e:
        logger.error(f"Error calculating directory size: {e}")

    return total_size


def log_storage_stats() -> None:
    """Log current storage usage."""
    uploads_size = get_directory_size("data/uploads") / (1024 * 1024)  # MB
    reports_size = get_directory_size("data/reports") / (1024 * 1024)  # MB

    logger.info(
        f"Storage usage - Uploads: {uploads_size:.2f} MB, Reports: {reports_size:.2f} MB, "
        f"Total: {uploads_size + reports_size:.2f} MB"
    )