""" MongoDB Sample Data Insertion Script for Sparrow Logistics Run this script to populate your MongoDB database with sample data for testing. Usage: python insert_sample_data.py """ import os import logging from datetime import datetime, timedelta from pymongo import MongoClient from pymongo.errors import ConnectionFailure, PyMongoError from dotenv import load_dotenv import random # Load environment variables load_dotenv() # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def get_mongodb_connection(): """Get MongoDB connection from environment variables""" try: mongodb_url = os.getenv('MONGODB_URL') or os.getenv('MONGO_URL') or os.getenv('DATABASE_URL') if not mongodb_url: raise ValueError("No MongoDB URL found in environment variables. Please set MONGODB_URL in your .env file.") client = MongoClient(mongodb_url, serverSelectionTimeoutMS=5000) # Test the connection client.admin.command('ping') # Get database name from URL or use default db_name = os.getenv('MONGODB_DATABASE', 'sparrow_logistics') db = client[db_name] logger.info(f"Successfully connected to MongoDB database: {db_name}") return client, db except Exception as e: logger.error(f"Failed to connect to MongoDB: {e}") raise def create_sample_users(): """Create sample user data""" users = [ { "user_id": "USER001", "_id": "USER001", "name": "John Smith", "full_name": "John Smith", "email": "john.smith@email.com", "phone": "+1-555-0101", "status": "active", "created_at": datetime.now() - timedelta(days=365), "join_date": datetime.now() - timedelta(days=365), "preferences": { "delivery_preference": "Standard", "notifications": "Email" } }, { "user_id": "USER002", "_id": "USER002", "name": "Sarah Johnson", "full_name": "Sarah Johnson", "email": "sarah.johnson@email.com", "phone": "+1-555-0102", "status": "active", "created_at": datetime.now() - timedelta(days=180), "join_date": datetime.now() - timedelta(days=180), "preferences": { "delivery_preference": "Express", "notifications": "SMS" } }, { "user_id": "USER003", "_id": "USER003", "name": "Mike Wilson", "full_name": "Michael Wilson", "email": "mike.wilson@email.com", "phone": "+1-555-0103", "status": "active", "created_at": datetime.now() - timedelta(days=90), "join_date": datetime.now() - timedelta(days=90), "preferences": { "delivery_preference": "Priority", "notifications": "Email" } }, { "user_id": "USER004", "_id": "USER004", "name": "Emma Davis", "full_name": "Emma Davis", "email": "emma.davis@email.com", "phone": "+1-555-0104", "status": "active", "created_at": datetime.now() - timedelta(days=30), "join_date": datetime.now() - timedelta(days=30), "preferences": { "delivery_preference": "Standard", "notifications": "Email" } } ] return users def create_sample_packages(): """Create sample package data""" statuses = ["delivered", "in_transit", "pending", "shipped", "out_for_delivery", "processing"] origins = ["New York, NY", "Los Angeles, CA", "Chicago, IL", "Houston, TX", "Phoenix, AZ"] destinations = ["Miami, FL", "Seattle, WA", "Boston, MA", "Atlanta, GA", "Denver, CO", "Las Vegas, NV"] packages = [] tracking_numbers = ["TRK001", "TRK002", "TRK003", "ABC123", "XYZ999", "DEF456", "GHI789", "JKL012"] for i, tracking_num in enumerate(tracking_numbers): user_id = f"USER{str((i % 4) + 1).zfill(3)}" status = random.choice(statuses) origin = random.choice(origins) destination = random.choice(destinations) # Create realistic tracking events tracking_events = [] base_date = datetime.now() - timedelta(days=random.randint(1, 10)) if status in ["delivered", "in_transit", "out_for_delivery"]: tracking_events = [ { "date": base_date, "location": origin, "description": "Package picked up", "status": "picked_up" }, { "date": base_date + timedelta(hours=6), "location": "Sorting Facility", "description": "Arrived at sorting facility", "status": "in_facility" } ] if status in ["delivered", "out_for_delivery"]: tracking_events.append({ "date": base_date + timedelta(days=1), "location": "Local Distribution Center", "description": "Out for delivery", "status": "out_for_delivery" }) if status == "delivered": tracking_events.append({ "date": base_date + timedelta(days=1, hours=4), "location": destination, "description": "Package delivered", "status": "delivered" }) package = { "tracking_number": tracking_num, "tracking_id": tracking_num, "reference_number": tracking_num, "user_id": user_id, "customer_name": ["John Smith", "Sarah Johnson", "Mike Wilson", "Emma Davis"][i % 4], "recipient_name": ["John Smith", "Sarah Johnson", "Mike Wilson", "Emma Davis"][i % 4], "status": status, "origin": origin, "destination": destination, "current_location": tracking_events[-1]["location"] if tracking_events else origin, "estimated_delivery": (datetime.now() + timedelta(days=random.randint(1, 5))).strftime("%Y-%m-%d"), "last_updated": (datetime.now() - timedelta(hours=random.randint(1, 24))).strftime("%Y-%m-%d %H:%M:%S"), "created_at": base_date, "delivery_time_days": random.randint(1, 7) if status == "delivered" else None, "tracking_events": tracking_events, "description": f"Package from {origin} to {destination}", "weight": f"{random.randint(1, 50)} lbs", "dimensions": f"{random.randint(6, 24)}x{random.randint(6, 24)}x{random.randint(6, 24)} inches" } packages.append(package) return packages def create_sample_delivery_routes(): """Create sample delivery route data""" routes = [ { "origin": "New York", "destination": "Miami", "route_name": "NYC-MIA Express", "estimated_days": 2, "service_type": "Express", "distance_miles": 1280, "active": True }, { "origin": "Los Angeles", "destination": "Seattle", "route_name": "LAX-SEA Standard", "estimated_days": 3, "service_type": "Standard", "distance_miles": 1135, "active": True }, { "origin": "Chicago", "destination": "Boston", "route_name": "CHI-BOS Priority", "estimated_days": 2, "service_type": "Priority", "distance_miles": 983, "active": True }, { "origin": "Houston", "destination": "Atlanta", "route_name": "HOU-ATL Standard", "estimated_days": 3, "service_type": "Standard", "distance_miles": 789, "active": True }, { "origin": "Phoenix", "destination": "Denver", "route_name": "PHX-DEN Express", "estimated_days": 1, "service_type": "Express", "distance_miles": 602, "active": True } ] return routes def create_sample_tracking_history(): """Create sample tracking history data""" history = [] for i in range(5): tracking_num = f"OLD{str(i+1).zfill(3)}" history.append({ "tracking_number": tracking_num, "status": "delivered", "last_updated": (datetime.now() - timedelta(days=random.randint(30, 365))).strftime("%Y-%m-%d"), "final_location": random.choice(["Miami, FL", "Seattle, WA", "Boston, MA"]), "delivery_date": (datetime.now() - timedelta(days=random.randint(30, 365))).strftime("%Y-%m-%d"), "archived": True }) return history def create_sample_service_alerts(): """Create sample service alert data""" alerts = [ { "title": "Weather Delay - Northeast Region", "description": "Heavy snow affecting deliveries in New York, Boston, and surrounding areas. Expect 1-2 day delays.", "status": "active", "severity": "High", "affected_locations": ["New York", "Boston", "Albany", "Hartford"], "estimated_delay_days": 2, "estimated_resolution": "2024-01-15", "priority": 3, "created_at": datetime.now() - timedelta(days=1) }, { "title": "Road Construction - I-95 Corridor", "description": "Ongoing road construction between Miami and Jacksonville causing minor delays.", "status": "active", "severity": "Medium", "affected_locations": ["Miami", "Jacksonville", "Fort Lauderdale"], "estimated_delay_days": 1, "estimated_resolution": "2024-02-01", "priority": 2, "created_at": datetime.now() - timedelta(days=7) }, { "title": "Holiday Schedule - Thanksgiving Week", "description": "Modified delivery schedule during Thanksgiving week. Some delays expected.", "status": "resolved", "severity": "Low", "affected_locations": ["Nationwide"], "estimated_delay_days": 1, "estimated_resolution": "2023-11-27", "priority": 1, "created_at": datetime.now() - timedelta(days=60) } ] return alerts def insert_sample_data(): """Main function to insert all sample data""" try: client, db = get_mongodb_connection() # Collections to populate collections_data = { 'users': create_sample_users(), 'packages': create_sample_packages(), 'delivery_routes': create_sample_delivery_routes(), 'tracking_history': create_sample_tracking_history(), 'service_alerts': create_sample_service_alerts() } # Insert data into each collection for collection_name, data in collections_data.items(): collection = db[collection_name] # Clear existing data (optional - remove this line to keep existing data) result = collection.delete_many({}) logger.info(f"Cleared {result.deleted_count} existing documents from {collection_name}") # Insert new data if data: result = collection.insert_many(data) logger.info(f"Inserted {len(result.inserted_ids)} documents into {collection_name}") else: logger.info(f"No data to insert into {collection_name}") # Create useful indexes for better performance logger.info("Creating indexes for better performance...") # Indexes for packages collection db.packages.create_index("tracking_number") db.packages.create_index("user_id") db.packages.create_index("status") db.packages.create_index([("origin", 1), ("destination", 1)]) # Indexes for users collection db.users.create_index("user_id") db.users.create_index("email") db.users.create_index("phone") # Indexes for delivery_routes collection db.delivery_routes.create_index([("origin", 1), ("destination", 1)]) # Indexes for service_alerts collection db.service_alerts.create_index("status") db.service_alerts.create_index("affected_locations") logger.info("Successfully created all indexes") # Print summary print("\n" + "="*60) print("SAMPLE DATA INSERTION COMPLETE!") print("="*60) for collection_name in collections_data.keys(): count = db[collection_name].count_documents({}) print(f"{collection_name.upper()}: {count} documents") print("="*60) print("\nYour MongoDB database is now ready for testing!") print("You can now run your chatbot and test with sample tracking numbers like:") print("- TRK001, TRK002, ABC123, XYZ999") print("- User IDs: USER001, USER002, USER003, USER004") print("- Or search by email: john.smith@email.com") print("\n") client.close() except Exception as e: logger.error(f"Error inserting sample data: {e}") raise def verify_data(): """Verify that the data was inserted correctly""" try: client, db = get_mongodb_connection() print("\n" + "="*60) print("DATA VERIFICATION") print("="*60) # Test some sample queries that your tools will use print("Testing sample queries:") # Test tracking package = db.packages.find_one({"tracking_number": "TRK001"}) if package: print(f"✅ Found package TRK001: {package['status']} - {package['destination']}") # Test user lookup user = db.users.find_one({"email": "john.smith@email.com"}) if user: print(f"✅ Found user: {user['name']} ({user['email']})") # Test route lookup route = db.delivery_routes.find_one({"origin": {"$regex": "New York", "$options": "i"}}) if route: print(f"✅ Found route: {route['route_name']} - {route['estimated_days']} days") # Test service alerts alerts = db.service_alerts.find({"status": "active"}).limit(1) alert = next(alerts, None) if alert: print(f"✅ Found active alert: {alert['title']}") print("="*60) print("✅ All verification tests passed!") client.close() except Exception as e: logger.error(f"Error during verification: {e}") raise if __name__ == "__main__": print("Sparrow Logistics MongoDB Sample Data Insertion") print("=" * 50) try: # Insert sample data insert_sample_data() # Verify the data verify_data() print("🎉 Sample data setup completed successfully!") print("Your chatbot is now ready to test with real MongoDB data.") except Exception as e: print(f"❌ Error setting up sample data: {e}") print("Please check your .env file and MongoDB connection.") exit(1)