import asyncio import os import shutil from pathlib import Path from sqlalchemy import select from app.database import AsyncSessionLocal, init_db from app.models import Job from app.services.chroma_service import delete_job_vectors async def run_cleanup(): await init_db() async with AsyncSessionLocal() as session: result = await session.execute(select(Job).order_by(Job.created_at.desc())) jobs = list(result.scalars().all()) if not jobs: print("No jobs found in the database. Nothing to clean.") return latest_job = jobs[0] old_jobs = jobs[1:] print(f"Keeping latest job: {latest_job.id} ({latest_job.original_filename})") print(f"Found {len(old_jobs)} old jobs to delete.") for job in old_jobs: print(f"Deleting job {job.id}...") # 1. Delete from SQLite await session.delete(job) # 2. Delete from Chroma try: delete_job_vectors(job.id) except Exception as e: print(f" Chroma deletion error: {e}") # 3. Delete from Uploads uploads_dir = Path("data/uploads") / job.id if uploads_dir.exists(): shutil.rmtree(uploads_dir) print(f" Deleted upload folder: {uploads_dir}") await session.commit() print("Cleanup complete!") if __name__ == "__main__": asyncio.run(run_cleanup())