Spaces:
Paused
Paused
| import datetime | |
| import time | |
| import click | |
| from sqlalchemy import text | |
| from werkzeug.exceptions import NotFound | |
| import app | |
| from configs import dify_config | |
| from extensions.ext_database import db | |
| from models.dataset import Embedding | |
| def clean_embedding_cache_task(): | |
| click.echo(click.style("Start clean embedding cache.", fg="green")) | |
| clean_days = int(dify_config.PLAN_SANDBOX_CLEAN_DAY_SETTING) | |
| start_at = time.perf_counter() | |
| thirty_days_ago = datetime.datetime.now() - datetime.timedelta(days=clean_days) | |
| while True: | |
| try: | |
| embedding_ids = ( | |
| db.session.query(Embedding.id) | |
| .filter(Embedding.created_at < thirty_days_ago) | |
| .order_by(Embedding.created_at.desc()) | |
| .limit(100) | |
| .all() | |
| ) | |
| embedding_ids = [embedding_id[0] for embedding_id in embedding_ids] | |
| except NotFound: | |
| break | |
| if embedding_ids: | |
| for embedding_id in embedding_ids: | |
| db.session.execute( | |
| text("DELETE FROM embeddings WHERE id = :embedding_id"), {"embedding_id": embedding_id} | |
| ) | |
| db.session.commit() | |
| else: | |
| break | |
| end_at = time.perf_counter() | |
| click.echo(click.style("Cleaned embedding cache from db success latency: {}".format(end_at - start_at), fg="green")) | |