"""Script to remove data after October 25, 2025 from MongoDB for testing the scraper.""" from datetime import datetime from src.agri_predict.config import get_collections def remove_data_after_date(cutoff_date_str="2025-10-25"): """Remove all data after the specified date. Args: cutoff_date_str: Date string in format YYYY-MM-DD """ cutoff_date = datetime.strptime(cutoff_date_str, "%Y-%m-%d") cols = get_collections() collection = cols['collection'] # Count documents before deletion before_count = collection.count_documents({}) after_cutoff_count = collection.count_documents({ "Reported Date": {"$gt": cutoff_date} }) print(f"๐Ÿ“Š Database Status:") print(f" Total documents: {before_count}") print(f" Documents after {cutoff_date_str}: {after_cutoff_count}") if after_cutoff_count == 0: print(f"โœ… No documents found after {cutoff_date_str}") return # Delete documents result = collection.delete_many({ "Reported Date": {"$gt": cutoff_date} }) print(f"\n๐Ÿ—‘๏ธ Deletion Results:") print(f" Deleted {result.deleted_count} documents") # Verify deletion remaining_count = collection.count_documents({}) latest_doc = collection.find_one(sort=[("Reported Date", -1)]) print(f"\nโœ… After Deletion:") print(f" Total documents: {remaining_count}") if latest_doc: latest_date = latest_doc.get("Reported Date") print(f" Latest date in database: {latest_date.strftime('%Y-%m-%d') if latest_date else 'Unknown'}") else: print(f" Database is empty") if __name__ == "__main__": print("="*60) print("๐Ÿงน Cleaning MongoDB Data After 2025-10-25") print("="*60 + "\n") remove_data_after_date("2025-10-10") print("\n" + "="*60) print("โœ… Cleanup Complete - Ready to test scraper!") print("="*60)