Spaces:
Sleeping
Sleeping
File size: 1,955 Bytes
3029a46 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
"""Script to remove data after October 25, 2025 from MongoDB for testing the scraper."""
from datetime import datetime
from src.agri_predict.config import get_collections
def remove_data_after_date(cutoff_date_str="2025-10-25"):
"""Remove all data after the specified date.
Args:
cutoff_date_str: Date string in format YYYY-MM-DD
"""
cutoff_date = datetime.strptime(cutoff_date_str, "%Y-%m-%d")
cols = get_collections()
collection = cols['collection']
# Count documents before deletion
before_count = collection.count_documents({})
after_cutoff_count = collection.count_documents({
"Reported Date": {"$gt": cutoff_date}
})
print(f"📊 Database Status:")
print(f" Total documents: {before_count}")
print(f" Documents after {cutoff_date_str}: {after_cutoff_count}")
if after_cutoff_count == 0:
print(f"✅ No documents found after {cutoff_date_str}")
return
# Delete documents
result = collection.delete_many({
"Reported Date": {"$gt": cutoff_date}
})
print(f"\n🗑️ Deletion Results:")
print(f" Deleted {result.deleted_count} documents")
# Verify deletion
remaining_count = collection.count_documents({})
latest_doc = collection.find_one(sort=[("Reported Date", -1)])
print(f"\n✅ After Deletion:")
print(f" Total documents: {remaining_count}")
if latest_doc:
latest_date = latest_doc.get("Reported Date")
print(f" Latest date in database: {latest_date.strftime('%Y-%m-%d') if latest_date else 'Unknown'}")
else:
print(f" Database is empty")
if __name__ == "__main__":
print("="*60)
print("🧹 Cleaning MongoDB Data After 2025-10-25")
print("="*60 + "\n")
remove_data_after_date("2025-10-10")
print("\n" + "="*60)
print("✅ Cleanup Complete - Ready to test scraper!")
print("="*60)
|