SparrowAgent / mongodb /insert_sample_data.py
nivakaran's picture
Create mongodb/insert_sample_data.py
44f1933 verified
"""
MongoDB Sample Data Insertion Script for Sparrow Logistics
Run this script to populate your MongoDB database with sample data for testing.
Usage: python insert_sample_data.py
"""
import os
import logging
from datetime import datetime, timedelta
from pymongo import MongoClient
from pymongo.errors import ConnectionFailure, PyMongoError
from dotenv import load_dotenv
import random
# Load environment variables
load_dotenv()
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def get_mongodb_connection():
"""Get MongoDB connection from environment variables"""
try:
mongodb_url = os.getenv('MONGODB_URL') or os.getenv('MONGO_URL') or os.getenv('DATABASE_URL')
if not mongodb_url:
raise ValueError("No MongoDB URL found in environment variables. Please set MONGODB_URL in your .env file.")
client = MongoClient(mongodb_url, serverSelectionTimeoutMS=5000)
# Test the connection
client.admin.command('ping')
# Get database name from URL or use default
db_name = os.getenv('MONGODB_DATABASE', 'sparrow_logistics')
db = client[db_name]
logger.info(f"Successfully connected to MongoDB database: {db_name}")
return client, db
except Exception as e:
logger.error(f"Failed to connect to MongoDB: {e}")
raise
def create_sample_users():
"""Create sample user data"""
users = [
{
"user_id": "USER001",
"_id": "USER001",
"name": "John Smith",
"full_name": "John Smith",
"email": "john.smith@email.com",
"phone": "+1-555-0101",
"status": "active",
"created_at": datetime.now() - timedelta(days=365),
"join_date": datetime.now() - timedelta(days=365),
"preferences": {
"delivery_preference": "Standard",
"notifications": "Email"
}
},
{
"user_id": "USER002",
"_id": "USER002",
"name": "Sarah Johnson",
"full_name": "Sarah Johnson",
"email": "sarah.johnson@email.com",
"phone": "+1-555-0102",
"status": "active",
"created_at": datetime.now() - timedelta(days=180),
"join_date": datetime.now() - timedelta(days=180),
"preferences": {
"delivery_preference": "Express",
"notifications": "SMS"
}
},
{
"user_id": "USER003",
"_id": "USER003",
"name": "Mike Wilson",
"full_name": "Michael Wilson",
"email": "mike.wilson@email.com",
"phone": "+1-555-0103",
"status": "active",
"created_at": datetime.now() - timedelta(days=90),
"join_date": datetime.now() - timedelta(days=90),
"preferences": {
"delivery_preference": "Priority",
"notifications": "Email"
}
},
{
"user_id": "USER004",
"_id": "USER004",
"name": "Emma Davis",
"full_name": "Emma Davis",
"email": "emma.davis@email.com",
"phone": "+1-555-0104",
"status": "active",
"created_at": datetime.now() - timedelta(days=30),
"join_date": datetime.now() - timedelta(days=30),
"preferences": {
"delivery_preference": "Standard",
"notifications": "Email"
}
}
]
return users
def create_sample_packages():
"""Create sample package data"""
statuses = ["delivered", "in_transit", "pending", "shipped", "out_for_delivery", "processing"]
origins = ["New York, NY", "Los Angeles, CA", "Chicago, IL", "Houston, TX", "Phoenix, AZ"]
destinations = ["Miami, FL", "Seattle, WA", "Boston, MA", "Atlanta, GA", "Denver, CO", "Las Vegas, NV"]
packages = []
tracking_numbers = ["TRK001", "TRK002", "TRK003", "ABC123", "XYZ999", "DEF456", "GHI789", "JKL012"]
for i, tracking_num in enumerate(tracking_numbers):
user_id = f"USER{str((i % 4) + 1).zfill(3)}"
status = random.choice(statuses)
origin = random.choice(origins)
destination = random.choice(destinations)
# Create realistic tracking events
tracking_events = []
base_date = datetime.now() - timedelta(days=random.randint(1, 10))
if status in ["delivered", "in_transit", "out_for_delivery"]:
tracking_events = [
{
"date": base_date,
"location": origin,
"description": "Package picked up",
"status": "picked_up"
},
{
"date": base_date + timedelta(hours=6),
"location": "Sorting Facility",
"description": "Arrived at sorting facility",
"status": "in_facility"
}
]
if status in ["delivered", "out_for_delivery"]:
tracking_events.append({
"date": base_date + timedelta(days=1),
"location": "Local Distribution Center",
"description": "Out for delivery",
"status": "out_for_delivery"
})
if status == "delivered":
tracking_events.append({
"date": base_date + timedelta(days=1, hours=4),
"location": destination,
"description": "Package delivered",
"status": "delivered"
})
package = {
"tracking_number": tracking_num,
"tracking_id": tracking_num,
"reference_number": tracking_num,
"user_id": user_id,
"customer_name": ["John Smith", "Sarah Johnson", "Mike Wilson", "Emma Davis"][i % 4],
"recipient_name": ["John Smith", "Sarah Johnson", "Mike Wilson", "Emma Davis"][i % 4],
"status": status,
"origin": origin,
"destination": destination,
"current_location": tracking_events[-1]["location"] if tracking_events else origin,
"estimated_delivery": (datetime.now() + timedelta(days=random.randint(1, 5))).strftime("%Y-%m-%d"),
"last_updated": (datetime.now() - timedelta(hours=random.randint(1, 24))).strftime("%Y-%m-%d %H:%M:%S"),
"created_at": base_date,
"delivery_time_days": random.randint(1, 7) if status == "delivered" else None,
"tracking_events": tracking_events,
"description": f"Package from {origin} to {destination}",
"weight": f"{random.randint(1, 50)} lbs",
"dimensions": f"{random.randint(6, 24)}x{random.randint(6, 24)}x{random.randint(6, 24)} inches"
}
packages.append(package)
return packages
def create_sample_delivery_routes():
"""Create sample delivery route data"""
routes = [
{
"origin": "New York",
"destination": "Miami",
"route_name": "NYC-MIA Express",
"estimated_days": 2,
"service_type": "Express",
"distance_miles": 1280,
"active": True
},
{
"origin": "Los Angeles",
"destination": "Seattle",
"route_name": "LAX-SEA Standard",
"estimated_days": 3,
"service_type": "Standard",
"distance_miles": 1135,
"active": True
},
{
"origin": "Chicago",
"destination": "Boston",
"route_name": "CHI-BOS Priority",
"estimated_days": 2,
"service_type": "Priority",
"distance_miles": 983,
"active": True
},
{
"origin": "Houston",
"destination": "Atlanta",
"route_name": "HOU-ATL Standard",
"estimated_days": 3,
"service_type": "Standard",
"distance_miles": 789,
"active": True
},
{
"origin": "Phoenix",
"destination": "Denver",
"route_name": "PHX-DEN Express",
"estimated_days": 1,
"service_type": "Express",
"distance_miles": 602,
"active": True
}
]
return routes
def create_sample_tracking_history():
"""Create sample tracking history data"""
history = []
for i in range(5):
tracking_num = f"OLD{str(i+1).zfill(3)}"
history.append({
"tracking_number": tracking_num,
"status": "delivered",
"last_updated": (datetime.now() - timedelta(days=random.randint(30, 365))).strftime("%Y-%m-%d"),
"final_location": random.choice(["Miami, FL", "Seattle, WA", "Boston, MA"]),
"delivery_date": (datetime.now() - timedelta(days=random.randint(30, 365))).strftime("%Y-%m-%d"),
"archived": True
})
return history
def create_sample_service_alerts():
"""Create sample service alert data"""
alerts = [
{
"title": "Weather Delay - Northeast Region",
"description": "Heavy snow affecting deliveries in New York, Boston, and surrounding areas. Expect 1-2 day delays.",
"status": "active",
"severity": "High",
"affected_locations": ["New York", "Boston", "Albany", "Hartford"],
"estimated_delay_days": 2,
"estimated_resolution": "2024-01-15",
"priority": 3,
"created_at": datetime.now() - timedelta(days=1)
},
{
"title": "Road Construction - I-95 Corridor",
"description": "Ongoing road construction between Miami and Jacksonville causing minor delays.",
"status": "active",
"severity": "Medium",
"affected_locations": ["Miami", "Jacksonville", "Fort Lauderdale"],
"estimated_delay_days": 1,
"estimated_resolution": "2024-02-01",
"priority": 2,
"created_at": datetime.now() - timedelta(days=7)
},
{
"title": "Holiday Schedule - Thanksgiving Week",
"description": "Modified delivery schedule during Thanksgiving week. Some delays expected.",
"status": "resolved",
"severity": "Low",
"affected_locations": ["Nationwide"],
"estimated_delay_days": 1,
"estimated_resolution": "2023-11-27",
"priority": 1,
"created_at": datetime.now() - timedelta(days=60)
}
]
return alerts
def insert_sample_data():
"""Main function to insert all sample data"""
try:
client, db = get_mongodb_connection()
# Collections to populate
collections_data = {
'users': create_sample_users(),
'packages': create_sample_packages(),
'delivery_routes': create_sample_delivery_routes(),
'tracking_history': create_sample_tracking_history(),
'service_alerts': create_sample_service_alerts()
}
# Insert data into each collection
for collection_name, data in collections_data.items():
collection = db[collection_name]
# Clear existing data (optional - remove this line to keep existing data)
result = collection.delete_many({})
logger.info(f"Cleared {result.deleted_count} existing documents from {collection_name}")
# Insert new data
if data:
result = collection.insert_many(data)
logger.info(f"Inserted {len(result.inserted_ids)} documents into {collection_name}")
else:
logger.info(f"No data to insert into {collection_name}")
# Create useful indexes for better performance
logger.info("Creating indexes for better performance...")
# Indexes for packages collection
db.packages.create_index("tracking_number")
db.packages.create_index("user_id")
db.packages.create_index("status")
db.packages.create_index([("origin", 1), ("destination", 1)])
# Indexes for users collection
db.users.create_index("user_id")
db.users.create_index("email")
db.users.create_index("phone")
# Indexes for delivery_routes collection
db.delivery_routes.create_index([("origin", 1), ("destination", 1)])
# Indexes for service_alerts collection
db.service_alerts.create_index("status")
db.service_alerts.create_index("affected_locations")
logger.info("Successfully created all indexes")
# Print summary
print("\n" + "="*60)
print("SAMPLE DATA INSERTION COMPLETE!")
print("="*60)
for collection_name in collections_data.keys():
count = db[collection_name].count_documents({})
print(f"{collection_name.upper()}: {count} documents")
print("="*60)
print("\nYour MongoDB database is now ready for testing!")
print("You can now run your chatbot and test with sample tracking numbers like:")
print("- TRK001, TRK002, ABC123, XYZ999")
print("- User IDs: USER001, USER002, USER003, USER004")
print("- Or search by email: john.smith@email.com")
print("\n")
client.close()
except Exception as e:
logger.error(f"Error inserting sample data: {e}")
raise
def verify_data():
"""Verify that the data was inserted correctly"""
try:
client, db = get_mongodb_connection()
print("\n" + "="*60)
print("DATA VERIFICATION")
print("="*60)
# Test some sample queries that your tools will use
print("Testing sample queries:")
# Test tracking
package = db.packages.find_one({"tracking_number": "TRK001"})
if package:
print(f"βœ… Found package TRK001: {package['status']} - {package['destination']}")
# Test user lookup
user = db.users.find_one({"email": "john.smith@email.com"})
if user:
print(f"βœ… Found user: {user['name']} ({user['email']})")
# Test route lookup
route = db.delivery_routes.find_one({"origin": {"$regex": "New York", "$options": "i"}})
if route:
print(f"βœ… Found route: {route['route_name']} - {route['estimated_days']} days")
# Test service alerts
alerts = db.service_alerts.find({"status": "active"}).limit(1)
alert = next(alerts, None)
if alert:
print(f"βœ… Found active alert: {alert['title']}")
print("="*60)
print("βœ… All verification tests passed!")
client.close()
except Exception as e:
logger.error(f"Error during verification: {e}")
raise
if __name__ == "__main__":
print("Sparrow Logistics MongoDB Sample Data Insertion")
print("=" * 50)
try:
# Insert sample data
insert_sample_data()
# Verify the data
verify_data()
print("πŸŽ‰ Sample data setup completed successfully!")
print("Your chatbot is now ready to test with real MongoDB data.")
except Exception as e:
print(f"❌ Error setting up sample data: {e}")
print("Please check your .env file and MongoDB connection.")
exit(1)