File size: 1,955 Bytes
3029a46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
"""Script to remove data after October 25, 2025 from MongoDB for testing the scraper."""

from datetime import datetime
from src.agri_predict.config import get_collections

def remove_data_after_date(cutoff_date_str="2025-10-25"):
    """Remove all data after the specified date.
    
    Args:
        cutoff_date_str: Date string in format YYYY-MM-DD
    """
    cutoff_date = datetime.strptime(cutoff_date_str, "%Y-%m-%d")
    
    cols = get_collections()
    collection = cols['collection']
    
    # Count documents before deletion
    before_count = collection.count_documents({})
    after_cutoff_count = collection.count_documents({
        "Reported Date": {"$gt": cutoff_date}
    })
    
    print(f"📊 Database Status:")
    print(f"   Total documents: {before_count}")
    print(f"   Documents after {cutoff_date_str}: {after_cutoff_count}")
    
    if after_cutoff_count == 0:
        print(f"✅ No documents found after {cutoff_date_str}")
        return
    
    # Delete documents
    result = collection.delete_many({
        "Reported Date": {"$gt": cutoff_date}
    })
    
    print(f"\n🗑️  Deletion Results:")
    print(f"   Deleted {result.deleted_count} documents")
    
    # Verify deletion
    remaining_count = collection.count_documents({})
    latest_doc = collection.find_one(sort=[("Reported Date", -1)])
    
    print(f"\n✅ After Deletion:")
    print(f"   Total documents: {remaining_count}")
    if latest_doc:
        latest_date = latest_doc.get("Reported Date")
        print(f"   Latest date in database: {latest_date.strftime('%Y-%m-%d') if latest_date else 'Unknown'}")
    else:
        print(f"   Database is empty")

if __name__ == "__main__":
    print("="*60)
    print("🧹 Cleaning MongoDB Data After 2025-10-25")
    print("="*60 + "\n")
    
    remove_data_after_date("2025-10-10")
    
    print("\n" + "="*60)
    print("✅ Cleanup Complete - Ready to test scraper!")
    print("="*60)