| import asyncio |
| import os |
| import shutil |
| from pathlib import Path |
| from sqlalchemy import select |
| from app.database import AsyncSessionLocal, init_db |
| from app.models import Job |
| from app.services.chroma_service import delete_job_vectors |
|
|
| async def run_cleanup(): |
| await init_db() |
| async with AsyncSessionLocal() as session: |
| result = await session.execute(select(Job).order_by(Job.created_at.desc())) |
| jobs = list(result.scalars().all()) |
| |
| if not jobs: |
| print("No jobs found in the database. Nothing to clean.") |
| return |
|
|
| latest_job = jobs[0] |
| old_jobs = jobs[1:] |
| |
| print(f"Keeping latest job: {latest_job.id} ({latest_job.original_filename})") |
| print(f"Found {len(old_jobs)} old jobs to delete.") |
|
|
| for job in old_jobs: |
| print(f"Deleting job {job.id}...") |
| |
| await session.delete(job) |
| |
| |
| try: |
| delete_job_vectors(job.id) |
| except Exception as e: |
| print(f" Chroma deletion error: {e}") |
| |
| |
| uploads_dir = Path("data/uploads") / job.id |
| if uploads_dir.exists(): |
| shutil.rmtree(uploads_dir) |
| print(f" Deleted upload folder: {uploads_dir}") |
| |
| await session.commit() |
| print("Cleanup complete!") |
|
|
| if __name__ == "__main__": |
| asyncio.run(run_cleanup()) |
|
|