well / scripts /cleanup_data.py
zarox's picture
Upload 23 files
1c167a4 verified
import asyncio
import os
import shutil
from pathlib import Path
from sqlalchemy import select
from app.database import AsyncSessionLocal, init_db
from app.models import Job
from app.services.chroma_service import delete_job_vectors
async def run_cleanup():
await init_db()
async with AsyncSessionLocal() as session:
result = await session.execute(select(Job).order_by(Job.created_at.desc()))
jobs = list(result.scalars().all())
if not jobs:
print("No jobs found in the database. Nothing to clean.")
return
latest_job = jobs[0]
old_jobs = jobs[1:]
print(f"Keeping latest job: {latest_job.id} ({latest_job.original_filename})")
print(f"Found {len(old_jobs)} old jobs to delete.")
for job in old_jobs:
print(f"Deleting job {job.id}...")
# 1. Delete from SQLite
await session.delete(job)
# 2. Delete from Chroma
try:
delete_job_vectors(job.id)
except Exception as e:
print(f" Chroma deletion error: {e}")
# 3. Delete from Uploads
uploads_dir = Path("data/uploads") / job.id
if uploads_dir.exists():
shutil.rmtree(uploads_dir)
print(f" Deleted upload folder: {uploads_dir}")
await session.commit()
print("Cleanup complete!")
if __name__ == "__main__":
asyncio.run(run_cleanup())