nivakaran commited on
Commit
44f1933
Β·
verified Β·
1 Parent(s): d436b5a

Create mongodb/insert_sample_data.py

Browse files
Files changed (1) hide show
  1. mongodb/insert_sample_data.py +425 -0
mongodb/insert_sample_data.py ADDED
@@ -0,0 +1,425 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MongoDB Sample Data Insertion Script for Sparrow Logistics
3
+ Run this script to populate your MongoDB database with sample data for testing.
4
+
5
+ Usage: python insert_sample_data.py
6
+ """
7
+
8
+ import os
9
+ import logging
10
+ from datetime import datetime, timedelta
11
+ from pymongo import MongoClient
12
+ from pymongo.errors import ConnectionFailure, PyMongoError
13
+ from dotenv import load_dotenv
14
+ import random
15
+
16
+ # Load environment variables
17
+ load_dotenv()
18
+
19
+ # Configure logging
20
+ logging.basicConfig(level=logging.INFO)
21
+ logger = logging.getLogger(__name__)
22
+
23
+ def get_mongodb_connection():
24
+ """Get MongoDB connection from environment variables"""
25
+ try:
26
+ mongodb_url = os.getenv('MONGODB_URL') or os.getenv('MONGO_URL') or os.getenv('DATABASE_URL')
27
+
28
+ if not mongodb_url:
29
+ raise ValueError("No MongoDB URL found in environment variables. Please set MONGODB_URL in your .env file.")
30
+
31
+ client = MongoClient(mongodb_url, serverSelectionTimeoutMS=5000)
32
+
33
+ # Test the connection
34
+ client.admin.command('ping')
35
+
36
+ # Get database name from URL or use default
37
+ db_name = os.getenv('MONGODB_DATABASE', 'sparrow_logistics')
38
+ db = client[db_name]
39
+
40
+ logger.info(f"Successfully connected to MongoDB database: {db_name}")
41
+ return client, db
42
+
43
+ except Exception as e:
44
+ logger.error(f"Failed to connect to MongoDB: {e}")
45
+ raise
46
+
47
+ def create_sample_users():
48
+ """Create sample user data"""
49
+ users = [
50
+ {
51
+ "user_id": "USER001",
52
+ "_id": "USER001",
53
+ "name": "John Smith",
54
+ "full_name": "John Smith",
55
+ "email": "john.smith@email.com",
56
+ "phone": "+1-555-0101",
57
+ "status": "active",
58
+ "created_at": datetime.now() - timedelta(days=365),
59
+ "join_date": datetime.now() - timedelta(days=365),
60
+ "preferences": {
61
+ "delivery_preference": "Standard",
62
+ "notifications": "Email"
63
+ }
64
+ },
65
+ {
66
+ "user_id": "USER002",
67
+ "_id": "USER002",
68
+ "name": "Sarah Johnson",
69
+ "full_name": "Sarah Johnson",
70
+ "email": "sarah.johnson@email.com",
71
+ "phone": "+1-555-0102",
72
+ "status": "active",
73
+ "created_at": datetime.now() - timedelta(days=180),
74
+ "join_date": datetime.now() - timedelta(days=180),
75
+ "preferences": {
76
+ "delivery_preference": "Express",
77
+ "notifications": "SMS"
78
+ }
79
+ },
80
+ {
81
+ "user_id": "USER003",
82
+ "_id": "USER003",
83
+ "name": "Mike Wilson",
84
+ "full_name": "Michael Wilson",
85
+ "email": "mike.wilson@email.com",
86
+ "phone": "+1-555-0103",
87
+ "status": "active",
88
+ "created_at": datetime.now() - timedelta(days=90),
89
+ "join_date": datetime.now() - timedelta(days=90),
90
+ "preferences": {
91
+ "delivery_preference": "Priority",
92
+ "notifications": "Email"
93
+ }
94
+ },
95
+ {
96
+ "user_id": "USER004",
97
+ "_id": "USER004",
98
+ "name": "Emma Davis",
99
+ "full_name": "Emma Davis",
100
+ "email": "emma.davis@email.com",
101
+ "phone": "+1-555-0104",
102
+ "status": "active",
103
+ "created_at": datetime.now() - timedelta(days=30),
104
+ "join_date": datetime.now() - timedelta(days=30),
105
+ "preferences": {
106
+ "delivery_preference": "Standard",
107
+ "notifications": "Email"
108
+ }
109
+ }
110
+ ]
111
+ return users
112
+
113
+ def create_sample_packages():
114
+ """Create sample package data"""
115
+ statuses = ["delivered", "in_transit", "pending", "shipped", "out_for_delivery", "processing"]
116
+ origins = ["New York, NY", "Los Angeles, CA", "Chicago, IL", "Houston, TX", "Phoenix, AZ"]
117
+ destinations = ["Miami, FL", "Seattle, WA", "Boston, MA", "Atlanta, GA", "Denver, CO", "Las Vegas, NV"]
118
+
119
+ packages = []
120
+ tracking_numbers = ["TRK001", "TRK002", "TRK003", "ABC123", "XYZ999", "DEF456", "GHI789", "JKL012"]
121
+
122
+ for i, tracking_num in enumerate(tracking_numbers):
123
+ user_id = f"USER{str((i % 4) + 1).zfill(3)}"
124
+ status = random.choice(statuses)
125
+ origin = random.choice(origins)
126
+ destination = random.choice(destinations)
127
+
128
+ # Create realistic tracking events
129
+ tracking_events = []
130
+ base_date = datetime.now() - timedelta(days=random.randint(1, 10))
131
+
132
+ if status in ["delivered", "in_transit", "out_for_delivery"]:
133
+ tracking_events = [
134
+ {
135
+ "date": base_date,
136
+ "location": origin,
137
+ "description": "Package picked up",
138
+ "status": "picked_up"
139
+ },
140
+ {
141
+ "date": base_date + timedelta(hours=6),
142
+ "location": "Sorting Facility",
143
+ "description": "Arrived at sorting facility",
144
+ "status": "in_facility"
145
+ }
146
+ ]
147
+
148
+ if status in ["delivered", "out_for_delivery"]:
149
+ tracking_events.append({
150
+ "date": base_date + timedelta(days=1),
151
+ "location": "Local Distribution Center",
152
+ "description": "Out for delivery",
153
+ "status": "out_for_delivery"
154
+ })
155
+
156
+ if status == "delivered":
157
+ tracking_events.append({
158
+ "date": base_date + timedelta(days=1, hours=4),
159
+ "location": destination,
160
+ "description": "Package delivered",
161
+ "status": "delivered"
162
+ })
163
+
164
+ package = {
165
+ "tracking_number": tracking_num,
166
+ "tracking_id": tracking_num,
167
+ "reference_number": tracking_num,
168
+ "user_id": user_id,
169
+ "customer_name": ["John Smith", "Sarah Johnson", "Mike Wilson", "Emma Davis"][i % 4],
170
+ "recipient_name": ["John Smith", "Sarah Johnson", "Mike Wilson", "Emma Davis"][i % 4],
171
+ "status": status,
172
+ "origin": origin,
173
+ "destination": destination,
174
+ "current_location": tracking_events[-1]["location"] if tracking_events else origin,
175
+ "estimated_delivery": (datetime.now() + timedelta(days=random.randint(1, 5))).strftime("%Y-%m-%d"),
176
+ "last_updated": (datetime.now() - timedelta(hours=random.randint(1, 24))).strftime("%Y-%m-%d %H:%M:%S"),
177
+ "created_at": base_date,
178
+ "delivery_time_days": random.randint(1, 7) if status == "delivered" else None,
179
+ "tracking_events": tracking_events,
180
+ "description": f"Package from {origin} to {destination}",
181
+ "weight": f"{random.randint(1, 50)} lbs",
182
+ "dimensions": f"{random.randint(6, 24)}x{random.randint(6, 24)}x{random.randint(6, 24)} inches"
183
+ }
184
+ packages.append(package)
185
+
186
+ return packages
187
+
188
+ def create_sample_delivery_routes():
189
+ """Create sample delivery route data"""
190
+ routes = [
191
+ {
192
+ "origin": "New York",
193
+ "destination": "Miami",
194
+ "route_name": "NYC-MIA Express",
195
+ "estimated_days": 2,
196
+ "service_type": "Express",
197
+ "distance_miles": 1280,
198
+ "active": True
199
+ },
200
+ {
201
+ "origin": "Los Angeles",
202
+ "destination": "Seattle",
203
+ "route_name": "LAX-SEA Standard",
204
+ "estimated_days": 3,
205
+ "service_type": "Standard",
206
+ "distance_miles": 1135,
207
+ "active": True
208
+ },
209
+ {
210
+ "origin": "Chicago",
211
+ "destination": "Boston",
212
+ "route_name": "CHI-BOS Priority",
213
+ "estimated_days": 2,
214
+ "service_type": "Priority",
215
+ "distance_miles": 983,
216
+ "active": True
217
+ },
218
+ {
219
+ "origin": "Houston",
220
+ "destination": "Atlanta",
221
+ "route_name": "HOU-ATL Standard",
222
+ "estimated_days": 3,
223
+ "service_type": "Standard",
224
+ "distance_miles": 789,
225
+ "active": True
226
+ },
227
+ {
228
+ "origin": "Phoenix",
229
+ "destination": "Denver",
230
+ "route_name": "PHX-DEN Express",
231
+ "estimated_days": 1,
232
+ "service_type": "Express",
233
+ "distance_miles": 602,
234
+ "active": True
235
+ }
236
+ ]
237
+ return routes
238
+
239
+ def create_sample_tracking_history():
240
+ """Create sample tracking history data"""
241
+ history = []
242
+ for i in range(5):
243
+ tracking_num = f"OLD{str(i+1).zfill(3)}"
244
+ history.append({
245
+ "tracking_number": tracking_num,
246
+ "status": "delivered",
247
+ "last_updated": (datetime.now() - timedelta(days=random.randint(30, 365))).strftime("%Y-%m-%d"),
248
+ "final_location": random.choice(["Miami, FL", "Seattle, WA", "Boston, MA"]),
249
+ "delivery_date": (datetime.now() - timedelta(days=random.randint(30, 365))).strftime("%Y-%m-%d"),
250
+ "archived": True
251
+ })
252
+ return history
253
+
254
+ def create_sample_service_alerts():
255
+ """Create sample service alert data"""
256
+ alerts = [
257
+ {
258
+ "title": "Weather Delay - Northeast Region",
259
+ "description": "Heavy snow affecting deliveries in New York, Boston, and surrounding areas. Expect 1-2 day delays.",
260
+ "status": "active",
261
+ "severity": "High",
262
+ "affected_locations": ["New York", "Boston", "Albany", "Hartford"],
263
+ "estimated_delay_days": 2,
264
+ "estimated_resolution": "2024-01-15",
265
+ "priority": 3,
266
+ "created_at": datetime.now() - timedelta(days=1)
267
+ },
268
+ {
269
+ "title": "Road Construction - I-95 Corridor",
270
+ "description": "Ongoing road construction between Miami and Jacksonville causing minor delays.",
271
+ "status": "active",
272
+ "severity": "Medium",
273
+ "affected_locations": ["Miami", "Jacksonville", "Fort Lauderdale"],
274
+ "estimated_delay_days": 1,
275
+ "estimated_resolution": "2024-02-01",
276
+ "priority": 2,
277
+ "created_at": datetime.now() - timedelta(days=7)
278
+ },
279
+ {
280
+ "title": "Holiday Schedule - Thanksgiving Week",
281
+ "description": "Modified delivery schedule during Thanksgiving week. Some delays expected.",
282
+ "status": "resolved",
283
+ "severity": "Low",
284
+ "affected_locations": ["Nationwide"],
285
+ "estimated_delay_days": 1,
286
+ "estimated_resolution": "2023-11-27",
287
+ "priority": 1,
288
+ "created_at": datetime.now() - timedelta(days=60)
289
+ }
290
+ ]
291
+ return alerts
292
+
293
+ def insert_sample_data():
294
+ """Main function to insert all sample data"""
295
+ try:
296
+ client, db = get_mongodb_connection()
297
+
298
+ # Collections to populate
299
+ collections_data = {
300
+ 'users': create_sample_users(),
301
+ 'packages': create_sample_packages(),
302
+ 'delivery_routes': create_sample_delivery_routes(),
303
+ 'tracking_history': create_sample_tracking_history(),
304
+ 'service_alerts': create_sample_service_alerts()
305
+ }
306
+
307
+ # Insert data into each collection
308
+ for collection_name, data in collections_data.items():
309
+ collection = db[collection_name]
310
+
311
+ # Clear existing data (optional - remove this line to keep existing data)
312
+ result = collection.delete_many({})
313
+ logger.info(f"Cleared {result.deleted_count} existing documents from {collection_name}")
314
+
315
+ # Insert new data
316
+ if data:
317
+ result = collection.insert_many(data)
318
+ logger.info(f"Inserted {len(result.inserted_ids)} documents into {collection_name}")
319
+ else:
320
+ logger.info(f"No data to insert into {collection_name}")
321
+
322
+ # Create useful indexes for better performance
323
+ logger.info("Creating indexes for better performance...")
324
+
325
+ # Indexes for packages collection
326
+ db.packages.create_index("tracking_number")
327
+ db.packages.create_index("user_id")
328
+ db.packages.create_index("status")
329
+ db.packages.create_index([("origin", 1), ("destination", 1)])
330
+
331
+ # Indexes for users collection
332
+ db.users.create_index("user_id")
333
+ db.users.create_index("email")
334
+ db.users.create_index("phone")
335
+
336
+ # Indexes for delivery_routes collection
337
+ db.delivery_routes.create_index([("origin", 1), ("destination", 1)])
338
+
339
+ # Indexes for service_alerts collection
340
+ db.service_alerts.create_index("status")
341
+ db.service_alerts.create_index("affected_locations")
342
+
343
+ logger.info("Successfully created all indexes")
344
+
345
+ # Print summary
346
+ print("\n" + "="*60)
347
+ print("SAMPLE DATA INSERTION COMPLETE!")
348
+ print("="*60)
349
+ for collection_name in collections_data.keys():
350
+ count = db[collection_name].count_documents({})
351
+ print(f"{collection_name.upper()}: {count} documents")
352
+ print("="*60)
353
+ print("\nYour MongoDB database is now ready for testing!")
354
+ print("You can now run your chatbot and test with sample tracking numbers like:")
355
+ print("- TRK001, TRK002, ABC123, XYZ999")
356
+ print("- User IDs: USER001, USER002, USER003, USER004")
357
+ print("- Or search by email: john.smith@email.com")
358
+ print("\n")
359
+
360
+ client.close()
361
+
362
+ except Exception as e:
363
+ logger.error(f"Error inserting sample data: {e}")
364
+ raise
365
+
366
+ def verify_data():
367
+ """Verify that the data was inserted correctly"""
368
+ try:
369
+ client, db = get_mongodb_connection()
370
+
371
+ print("\n" + "="*60)
372
+ print("DATA VERIFICATION")
373
+ print("="*60)
374
+
375
+ # Test some sample queries that your tools will use
376
+ print("Testing sample queries:")
377
+
378
+ # Test tracking
379
+ package = db.packages.find_one({"tracking_number": "TRK001"})
380
+ if package:
381
+ print(f"βœ… Found package TRK001: {package['status']} - {package['destination']}")
382
+
383
+ # Test user lookup
384
+ user = db.users.find_one({"email": "john.smith@email.com"})
385
+ if user:
386
+ print(f"βœ… Found user: {user['name']} ({user['email']})")
387
+
388
+ # Test route lookup
389
+ route = db.delivery_routes.find_one({"origin": {"$regex": "New York", "$options": "i"}})
390
+ if route:
391
+ print(f"βœ… Found route: {route['route_name']} - {route['estimated_days']} days")
392
+
393
+ # Test service alerts
394
+ alerts = db.service_alerts.find({"status": "active"}).limit(1)
395
+ alert = next(alerts, None)
396
+ if alert:
397
+ print(f"βœ… Found active alert: {alert['title']}")
398
+
399
+ print("="*60)
400
+ print("βœ… All verification tests passed!")
401
+
402
+ client.close()
403
+
404
+ except Exception as e:
405
+ logger.error(f"Error during verification: {e}")
406
+ raise
407
+
408
+ if __name__ == "__main__":
409
+ print("Sparrow Logistics MongoDB Sample Data Insertion")
410
+ print("=" * 50)
411
+
412
+ try:
413
+ # Insert sample data
414
+ insert_sample_data()
415
+
416
+ # Verify the data
417
+ verify_data()
418
+
419
+ print("πŸŽ‰ Sample data setup completed successfully!")
420
+ print("Your chatbot is now ready to test with real MongoDB data.")
421
+
422
+ except Exception as e:
423
+ print(f"❌ Error setting up sample data: {e}")
424
+ print("Please check your .env file and MongoDB connection.")
425
+ exit(1)