Spaces:
Sleeping
Sleeping
| """ | |
| MongoDB Sample Data Insertion Script for Sparrow Logistics | |
| Run this script to populate your MongoDB database with sample data for testing. | |
| Usage: python insert_sample_data.py | |
| """ | |
| import os | |
| import logging | |
| from datetime import datetime, timedelta | |
| from pymongo import MongoClient | |
| from pymongo.errors import ConnectionFailure, PyMongoError | |
| from dotenv import load_dotenv | |
| import random | |
| # Load environment variables | |
| load_dotenv() | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| def get_mongodb_connection(): | |
| """Get MongoDB connection from environment variables""" | |
| try: | |
| mongodb_url = os.getenv('MONGODB_URL') or os.getenv('MONGO_URL') or os.getenv('DATABASE_URL') | |
| if not mongodb_url: | |
| raise ValueError("No MongoDB URL found in environment variables. Please set MONGODB_URL in your .env file.") | |
| client = MongoClient(mongodb_url, serverSelectionTimeoutMS=5000) | |
| # Test the connection | |
| client.admin.command('ping') | |
| # Get database name from URL or use default | |
| db_name = os.getenv('MONGODB_DATABASE', 'sparrow_logistics') | |
| db = client[db_name] | |
| logger.info(f"Successfully connected to MongoDB database: {db_name}") | |
| return client, db | |
| except Exception as e: | |
| logger.error(f"Failed to connect to MongoDB: {e}") | |
| raise | |
| def create_sample_users(): | |
| """Create sample user data""" | |
| users = [ | |
| { | |
| "user_id": "USER001", | |
| "_id": "USER001", | |
| "name": "John Smith", | |
| "full_name": "John Smith", | |
| "email": "john.smith@email.com", | |
| "phone": "+1-555-0101", | |
| "status": "active", | |
| "created_at": datetime.now() - timedelta(days=365), | |
| "join_date": datetime.now() - timedelta(days=365), | |
| "preferences": { | |
| "delivery_preference": "Standard", | |
| "notifications": "Email" | |
| } | |
| }, | |
| { | |
| "user_id": "USER002", | |
| "_id": "USER002", | |
| "name": "Sarah Johnson", | |
| "full_name": "Sarah Johnson", | |
| "email": "sarah.johnson@email.com", | |
| "phone": "+1-555-0102", | |
| "status": "active", | |
| "created_at": datetime.now() - timedelta(days=180), | |
| "join_date": datetime.now() - timedelta(days=180), | |
| "preferences": { | |
| "delivery_preference": "Express", | |
| "notifications": "SMS" | |
| } | |
| }, | |
| { | |
| "user_id": "USER003", | |
| "_id": "USER003", | |
| "name": "Mike Wilson", | |
| "full_name": "Michael Wilson", | |
| "email": "mike.wilson@email.com", | |
| "phone": "+1-555-0103", | |
| "status": "active", | |
| "created_at": datetime.now() - timedelta(days=90), | |
| "join_date": datetime.now() - timedelta(days=90), | |
| "preferences": { | |
| "delivery_preference": "Priority", | |
| "notifications": "Email" | |
| } | |
| }, | |
| { | |
| "user_id": "USER004", | |
| "_id": "USER004", | |
| "name": "Emma Davis", | |
| "full_name": "Emma Davis", | |
| "email": "emma.davis@email.com", | |
| "phone": "+1-555-0104", | |
| "status": "active", | |
| "created_at": datetime.now() - timedelta(days=30), | |
| "join_date": datetime.now() - timedelta(days=30), | |
| "preferences": { | |
| "delivery_preference": "Standard", | |
| "notifications": "Email" | |
| } | |
| } | |
| ] | |
| return users | |
| def create_sample_packages(): | |
| """Create sample package data""" | |
| statuses = ["delivered", "in_transit", "pending", "shipped", "out_for_delivery", "processing"] | |
| origins = ["New York, NY", "Los Angeles, CA", "Chicago, IL", "Houston, TX", "Phoenix, AZ"] | |
| destinations = ["Miami, FL", "Seattle, WA", "Boston, MA", "Atlanta, GA", "Denver, CO", "Las Vegas, NV"] | |
| packages = [] | |
| tracking_numbers = ["TRK001", "TRK002", "TRK003", "ABC123", "XYZ999", "DEF456", "GHI789", "JKL012"] | |
| for i, tracking_num in enumerate(tracking_numbers): | |
| user_id = f"USER{str((i % 4) + 1).zfill(3)}" | |
| status = random.choice(statuses) | |
| origin = random.choice(origins) | |
| destination = random.choice(destinations) | |
| # Create realistic tracking events | |
| tracking_events = [] | |
| base_date = datetime.now() - timedelta(days=random.randint(1, 10)) | |
| if status in ["delivered", "in_transit", "out_for_delivery"]: | |
| tracking_events = [ | |
| { | |
| "date": base_date, | |
| "location": origin, | |
| "description": "Package picked up", | |
| "status": "picked_up" | |
| }, | |
| { | |
| "date": base_date + timedelta(hours=6), | |
| "location": "Sorting Facility", | |
| "description": "Arrived at sorting facility", | |
| "status": "in_facility" | |
| } | |
| ] | |
| if status in ["delivered", "out_for_delivery"]: | |
| tracking_events.append({ | |
| "date": base_date + timedelta(days=1), | |
| "location": "Local Distribution Center", | |
| "description": "Out for delivery", | |
| "status": "out_for_delivery" | |
| }) | |
| if status == "delivered": | |
| tracking_events.append({ | |
| "date": base_date + timedelta(days=1, hours=4), | |
| "location": destination, | |
| "description": "Package delivered", | |
| "status": "delivered" | |
| }) | |
| package = { | |
| "tracking_number": tracking_num, | |
| "tracking_id": tracking_num, | |
| "reference_number": tracking_num, | |
| "user_id": user_id, | |
| "customer_name": ["John Smith", "Sarah Johnson", "Mike Wilson", "Emma Davis"][i % 4], | |
| "recipient_name": ["John Smith", "Sarah Johnson", "Mike Wilson", "Emma Davis"][i % 4], | |
| "status": status, | |
| "origin": origin, | |
| "destination": destination, | |
| "current_location": tracking_events[-1]["location"] if tracking_events else origin, | |
| "estimated_delivery": (datetime.now() + timedelta(days=random.randint(1, 5))).strftime("%Y-%m-%d"), | |
| "last_updated": (datetime.now() - timedelta(hours=random.randint(1, 24))).strftime("%Y-%m-%d %H:%M:%S"), | |
| "created_at": base_date, | |
| "delivery_time_days": random.randint(1, 7) if status == "delivered" else None, | |
| "tracking_events": tracking_events, | |
| "description": f"Package from {origin} to {destination}", | |
| "weight": f"{random.randint(1, 50)} lbs", | |
| "dimensions": f"{random.randint(6, 24)}x{random.randint(6, 24)}x{random.randint(6, 24)} inches" | |
| } | |
| packages.append(package) | |
| return packages | |
| def create_sample_delivery_routes(): | |
| """Create sample delivery route data""" | |
| routes = [ | |
| { | |
| "origin": "New York", | |
| "destination": "Miami", | |
| "route_name": "NYC-MIA Express", | |
| "estimated_days": 2, | |
| "service_type": "Express", | |
| "distance_miles": 1280, | |
| "active": True | |
| }, | |
| { | |
| "origin": "Los Angeles", | |
| "destination": "Seattle", | |
| "route_name": "LAX-SEA Standard", | |
| "estimated_days": 3, | |
| "service_type": "Standard", | |
| "distance_miles": 1135, | |
| "active": True | |
| }, | |
| { | |
| "origin": "Chicago", | |
| "destination": "Boston", | |
| "route_name": "CHI-BOS Priority", | |
| "estimated_days": 2, | |
| "service_type": "Priority", | |
| "distance_miles": 983, | |
| "active": True | |
| }, | |
| { | |
| "origin": "Houston", | |
| "destination": "Atlanta", | |
| "route_name": "HOU-ATL Standard", | |
| "estimated_days": 3, | |
| "service_type": "Standard", | |
| "distance_miles": 789, | |
| "active": True | |
| }, | |
| { | |
| "origin": "Phoenix", | |
| "destination": "Denver", | |
| "route_name": "PHX-DEN Express", | |
| "estimated_days": 1, | |
| "service_type": "Express", | |
| "distance_miles": 602, | |
| "active": True | |
| } | |
| ] | |
| return routes | |
| def create_sample_tracking_history(): | |
| """Create sample tracking history data""" | |
| history = [] | |
| for i in range(5): | |
| tracking_num = f"OLD{str(i+1).zfill(3)}" | |
| history.append({ | |
| "tracking_number": tracking_num, | |
| "status": "delivered", | |
| "last_updated": (datetime.now() - timedelta(days=random.randint(30, 365))).strftime("%Y-%m-%d"), | |
| "final_location": random.choice(["Miami, FL", "Seattle, WA", "Boston, MA"]), | |
| "delivery_date": (datetime.now() - timedelta(days=random.randint(30, 365))).strftime("%Y-%m-%d"), | |
| "archived": True | |
| }) | |
| return history | |
| def create_sample_service_alerts(): | |
| """Create sample service alert data""" | |
| alerts = [ | |
| { | |
| "title": "Weather Delay - Northeast Region", | |
| "description": "Heavy snow affecting deliveries in New York, Boston, and surrounding areas. Expect 1-2 day delays.", | |
| "status": "active", | |
| "severity": "High", | |
| "affected_locations": ["New York", "Boston", "Albany", "Hartford"], | |
| "estimated_delay_days": 2, | |
| "estimated_resolution": "2024-01-15", | |
| "priority": 3, | |
| "created_at": datetime.now() - timedelta(days=1) | |
| }, | |
| { | |
| "title": "Road Construction - I-95 Corridor", | |
| "description": "Ongoing road construction between Miami and Jacksonville causing minor delays.", | |
| "status": "active", | |
| "severity": "Medium", | |
| "affected_locations": ["Miami", "Jacksonville", "Fort Lauderdale"], | |
| "estimated_delay_days": 1, | |
| "estimated_resolution": "2024-02-01", | |
| "priority": 2, | |
| "created_at": datetime.now() - timedelta(days=7) | |
| }, | |
| { | |
| "title": "Holiday Schedule - Thanksgiving Week", | |
| "description": "Modified delivery schedule during Thanksgiving week. Some delays expected.", | |
| "status": "resolved", | |
| "severity": "Low", | |
| "affected_locations": ["Nationwide"], | |
| "estimated_delay_days": 1, | |
| "estimated_resolution": "2023-11-27", | |
| "priority": 1, | |
| "created_at": datetime.now() - timedelta(days=60) | |
| } | |
| ] | |
| return alerts | |
| def insert_sample_data(): | |
| """Main function to insert all sample data""" | |
| try: | |
| client, db = get_mongodb_connection() | |
| # Collections to populate | |
| collections_data = { | |
| 'users': create_sample_users(), | |
| 'packages': create_sample_packages(), | |
| 'delivery_routes': create_sample_delivery_routes(), | |
| 'tracking_history': create_sample_tracking_history(), | |
| 'service_alerts': create_sample_service_alerts() | |
| } | |
| # Insert data into each collection | |
| for collection_name, data in collections_data.items(): | |
| collection = db[collection_name] | |
| # Clear existing data (optional - remove this line to keep existing data) | |
| result = collection.delete_many({}) | |
| logger.info(f"Cleared {result.deleted_count} existing documents from {collection_name}") | |
| # Insert new data | |
| if data: | |
| result = collection.insert_many(data) | |
| logger.info(f"Inserted {len(result.inserted_ids)} documents into {collection_name}") | |
| else: | |
| logger.info(f"No data to insert into {collection_name}") | |
| # Create useful indexes for better performance | |
| logger.info("Creating indexes for better performance...") | |
| # Indexes for packages collection | |
| db.packages.create_index("tracking_number") | |
| db.packages.create_index("user_id") | |
| db.packages.create_index("status") | |
| db.packages.create_index([("origin", 1), ("destination", 1)]) | |
| # Indexes for users collection | |
| db.users.create_index("user_id") | |
| db.users.create_index("email") | |
| db.users.create_index("phone") | |
| # Indexes for delivery_routes collection | |
| db.delivery_routes.create_index([("origin", 1), ("destination", 1)]) | |
| # Indexes for service_alerts collection | |
| db.service_alerts.create_index("status") | |
| db.service_alerts.create_index("affected_locations") | |
| logger.info("Successfully created all indexes") | |
| # Print summary | |
| print("\n" + "="*60) | |
| print("SAMPLE DATA INSERTION COMPLETE!") | |
| print("="*60) | |
| for collection_name in collections_data.keys(): | |
| count = db[collection_name].count_documents({}) | |
| print(f"{collection_name.upper()}: {count} documents") | |
| print("="*60) | |
| print("\nYour MongoDB database is now ready for testing!") | |
| print("You can now run your chatbot and test with sample tracking numbers like:") | |
| print("- TRK001, TRK002, ABC123, XYZ999") | |
| print("- User IDs: USER001, USER002, USER003, USER004") | |
| print("- Or search by email: john.smith@email.com") | |
| print("\n") | |
| client.close() | |
| except Exception as e: | |
| logger.error(f"Error inserting sample data: {e}") | |
| raise | |
| def verify_data(): | |
| """Verify that the data was inserted correctly""" | |
| try: | |
| client, db = get_mongodb_connection() | |
| print("\n" + "="*60) | |
| print("DATA VERIFICATION") | |
| print("="*60) | |
| # Test some sample queries that your tools will use | |
| print("Testing sample queries:") | |
| # Test tracking | |
| package = db.packages.find_one({"tracking_number": "TRK001"}) | |
| if package: | |
| print(f"β Found package TRK001: {package['status']} - {package['destination']}") | |
| # Test user lookup | |
| user = db.users.find_one({"email": "john.smith@email.com"}) | |
| if user: | |
| print(f"β Found user: {user['name']} ({user['email']})") | |
| # Test route lookup | |
| route = db.delivery_routes.find_one({"origin": {"$regex": "New York", "$options": "i"}}) | |
| if route: | |
| print(f"β Found route: {route['route_name']} - {route['estimated_days']} days") | |
| # Test service alerts | |
| alerts = db.service_alerts.find({"status": "active"}).limit(1) | |
| alert = next(alerts, None) | |
| if alert: | |
| print(f"β Found active alert: {alert['title']}") | |
| print("="*60) | |
| print("β All verification tests passed!") | |
| client.close() | |
| except Exception as e: | |
| logger.error(f"Error during verification: {e}") | |
| raise | |
| if __name__ == "__main__": | |
| print("Sparrow Logistics MongoDB Sample Data Insertion") | |
| print("=" * 50) | |
| try: | |
| # Insert sample data | |
| insert_sample_data() | |
| # Verify the data | |
| verify_data() | |
| print("π Sample data setup completed successfully!") | |
| print("Your chatbot is now ready to test with real MongoDB data.") | |
| except Exception as e: | |
| print(f"β Error setting up sample data: {e}") | |
| print("Please check your .env file and MongoDB connection.") | |
| exit(1) |