DataAnalyst-Agent / app /utils /cleanup.py
shoaib4045's picture
Initial production-grade commit
e3e5444
"""
File cleanup and retention policy utilities.
"""
import os
import time
import logging
from typing import Tuple
logger = logging.getLogger(__name__)
# Retention policy (hours)
UPLOAD_RETENTION_HOURS = int(os.getenv("UPLOAD_RETENTION_HOURS", "72")) # 3 days
REPORT_RETENTION_HOURS = int(os.getenv("REPORT_RETENTION_HOURS", "720")) # 30 days
COMPLETED_JOB_RETENTION_HOURS = int(os.getenv("COMPLETED_JOB_RETENTION_HOURS", "1440")) # 60 days
def cleanup_old_files(directory: str, cutoff_hours: int, dry_run: bool = False) -> Tuple[int, int]:
"""
Clean up files older than cutoff_hours.
Returns (deleted_count, freed_bytes)
Args:
directory: Path to clean
cutoff_hours: Delete files older than this many hours
dry_run: If True, don't actually delete
"""
if not os.path.isdir(directory):
return 0, 0
now = time.time()
cutoff_seconds = cutoff_hours * 3600
deleted_count = 0
freed_bytes = 0
try:
for filename in os.listdir(directory):
filepath = os.path.join(directory, filename)
if not os.path.isfile(filepath):
continue
file_age_seconds = now - os.path.getmtime(filepath)
if file_age_seconds > cutoff_seconds:
file_size = os.path.getsize(filepath)
if not dry_run:
try:
os.remove(filepath)
deleted_count += 1
freed_bytes += file_size
logger.info(f"Deleted old file: {filename} ({file_size} bytes)")
except OSError as e:
logger.warning(f"Failed to delete {filename}: {e}")
else:
deleted_count += 1
freed_bytes += file_size
except Exception as e:
logger.error(f"Error during cleanup of {directory}: {e}")
return deleted_count, freed_bytes
def cleanup_uploads(dry_run: bool = False) -> Tuple[int, int]:
"""Clean old upload files."""
return cleanup_old_files("data/uploads", UPLOAD_RETENTION_HOURS, dry_run)
def cleanup_reports(dry_run: bool = False) -> Tuple[int, int]:
"""Clean old report files."""
return cleanup_old_files("data/reports", REPORT_RETENTION_HOURS, dry_run)
def get_directory_size(directory: str) -> int:
"""Get total size of all files in directory (recursively)."""
total_size = 0
try:
for dirpath, dirnames, filenames in os.walk(directory):
for filename in filenames:
filepath = os.path.join(dirpath, filename)
if os.path.isfile(filepath):
total_size += os.path.getsize(filepath)
except Exception as e:
logger.error(f"Error calculating directory size: {e}")
return total_size
def log_storage_stats() -> None:
"""Log current storage usage."""
uploads_size = get_directory_size("data/uploads") / (1024 * 1024) # MB
reports_size = get_directory_size("data/reports") / (1024 * 1024) # MB
logger.info(
f"Storage usage - Uploads: {uploads_size:.2f} MB, Reports: {reports_size:.2f} MB, "
f"Total: {uploads_size + reports_size:.2f} MB"
)