# populate_erp_db.py import sqlite3 import os import pathlib import datetime import random from data.utils.erp_db_init import init_sqlite_db def populate_erp_db(db_path='./data/erp_db.sqlite'): """Populate SQLite database with sample ERP data.""" try: # Ensure database exists if not os.path.exists(db_path): init_sqlite_db(db_path) # Connect to SQLite database conn = sqlite3.connect(db_path) cursor = conn.cursor() # Sample data for customers customers = [ ('John Doe', 'john.doe@example.com', '555-123-4567', '123 Main St, Anytown, USA'), ('Jane Smith', 'jane.smith@example.com', '555-234-5678', '456 Oak Ave, Somewhere, USA'), ('Robert Johnson', 'robert.j@example.com', '555-345-6789', '789 Pine Rd, Nowhere, USA'), ('Emily Davis', 'emily.davis@example.com', '555-456-7890', '101 Maple Dr, Anywhere, USA'), ('Michael Wilson', 'michael.w@example.com', '555-567-8901', '202 Cedar Ln, Everywhere, USA'), ('Sarah Brown', 'sarah.b@example.com', '555-678-9012', '303 Birch Blvd, Somewhere, USA'), ('David Miller', 'david.m@example.com', '555-789-0123', '404 Elm St, Anytown, USA'), ('Jennifer Taylor', 'jennifer.t@example.com', '555-890-1234', '505 Walnut Ave, Nowhere, USA'), ('Christopher Anderson', 'chris.a@example.com', '555-901-2345', '606 Spruce Rd, Anywhere, USA'), ('Lisa Thomas', 'lisa.t@example.com', '555-012-3456', '707 Fir Dr, Everywhere, USA'), ('Daniel Jackson', 'daniel.j@example.com', '555-123-7890', '808 Pine St, Somewhere, USA'), ('Michelle White', 'michelle.w@example.com', '555-234-8901', '909 Oak Rd, Anytown, USA') ] # Sample data for products products = [ ('Laptop Pro', 'High-performance laptop for professionals', 'Electronics', 1299.99, 50, 'LP-001'), ('Smartphone X', 'Latest smartphone with advanced features', 'Electronics', 899.99, 100, 'SP-001'), ('Office Chair', 'Ergonomic office chair', 'Furniture', 199.99, 30, 'OC-001'), ('Desk Lamp', 'LED desk lamp with adjustable brightness', 'Home', 49.99, 75, 'DL-001'), ('Coffee Maker', 'Programmable coffee maker', 'Appliances', 89.99, 40, 'CM-001'), ('Wireless Headphones', 'Noise-cancelling wireless headphones', 'Electronics', 149.99, 60, 'WH-001'), ('Tablet Mini', 'Compact tablet for on-the-go use', 'Electronics', 399.99, 45, 'TM-001'), ('External Hard Drive', '2TB external hard drive', 'Electronics', 129.99, 55, 'EH-001'), ('Wireless Mouse', 'Ergonomic wireless mouse', 'Electronics', 29.99, 80, 'WM-001'), ('Bluetooth Speaker', 'Portable Bluetooth speaker', 'Electronics', 79.99, 65, 'BS-001'), ('Monitor 27"', '27-inch 4K monitor', 'Electronics', 349.99, 35, 'MN-001'), ('Keyboard', 'Mechanical gaming keyboard', 'Electronics', 119.99, 70, 'KB-001'), ('Desk', 'Adjustable standing desk', 'Furniture', 299.99, 25, 'DK-001'), ('Bookshelf', 'Modern 5-tier bookshelf', 'Furniture', 149.99, 20, 'BS-002') ] # Insert customers cursor.executemany(''' INSERT INTO erp_customers (name, email, phone, address) VALUES (?, ?, ?, ?) ''', customers) # Insert products cursor.executemany(''' INSERT INTO erp_products (product_name, description, category, price, stock_quantity, sku) VALUES (?, ?, ?, ?, ?, ?) ''', products) # Get customer IDs for reference cursor.execute('SELECT customer_id FROM erp_customers') customer_ids = [row[0] for row in cursor.fetchall()] # Get product IDs for reference cursor.execute('SELECT product_id, price FROM erp_products') product_data = cursor.fetchall() product_ids = [row[0] for row in product_data] product_prices = {row[0]: row[1] for row in product_data} # Sample data for orders orders = [] order_items = [] invoices = [] order_history = [] # Generate 15 orders for i in range(1, 16): # Randomly select a customer customer_id = random.choice(customer_ids) # Get customer address for shipping cursor.execute('SELECT address FROM erp_customers WHERE customer_id = ?', (customer_id,)) address = cursor.fetchone()[0] # Generate order date (within the last 60 days) days_ago = random.randint(1, 60) order_date = (datetime.datetime.now() - datetime.timedelta(days=days_ago)).strftime('%Y-%m-%d') # Generate estimated delivery (3-10 days after order) est_delivery = (datetime.datetime.now() - datetime.timedelta(days=days_ago) + datetime.timedelta(days=random.randint(3, 10))).strftime('%Y-%m-%d') # Determine if order has been delivered delivered = random.random() > 0.3 # 70% chance of being delivered actual_delivery = None if delivered: # Delivery occurred 0-2 days after estimated delivery delivery_offset = random.randint(-1, 2) # Can be early or late actual_delivery = (datetime.datetime.now() - datetime.timedelta(days=days_ago) + datetime.timedelta(days=random.randint(3, 10) + delivery_offset)).strftime('%Y-%m-%d') # Determine order status if days_ago <= 1: status = 'Processing' elif days_ago <= 3: status = 'Shipped' elif delivered: status = 'Delivered' else: status = random.choice(['Processing', 'Shipped', 'In Transit']) # Determine payment status payment_status = random.choice(['Paid', 'Pending', 'Paid', 'Paid']) # 75% chance of being paid # Generate shipping and destination countries shipping_country = random.choice([ 'USA', 'Canada', 'UK', 'Germany', 'France', 'Australia', 'China', 'Japan', 'India', 'Brazil', 'Mexico', 'South Africa', 'Italy', 'Spain', 'Russia', 'South Korea', 'Singapore', 'UAE', 'Netherlands', 'Sweden' ]) destination_country = shipping_country # Usually the same # Previous order (for some customers) previous_order_id = None if i > 5 and random.random() > 0.7: # 30% chance of having a previous order previous_order_id = random.randint(1, i-1) # Add to orders list orders.append(( customer_id, order_date, 0, # Total amount will be updated later status, previous_order_id, est_delivery, actual_delivery, payment_status, address, shipping_country, destination_country )) # Insert orders cursor.executemany(''' INSERT INTO erp_orders ( customer_id, order_date, total_amount, status, previous_order_id, estimated_delivery, actual_delivery, payment_status, shipping_address, shipping_country, destination_country ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ''', orders) # Get order IDs cursor.execute('SELECT order_id FROM erp_orders') order_ids = [row[0] for row in cursor.fetchall()] # Generate order items for each order for order_id in order_ids: # Each order has 1-5 items num_items = random.randint(1, 5) order_total = 0 # Select random products for this order selected_products = random.sample(product_ids, num_items) for product_id in selected_products: quantity = random.randint(1, 3) unit_price = product_prices[product_id] subtotal = quantity * unit_price order_total += subtotal # Add to order items list order_items.append((order_id, product_id, quantity, unit_price, subtotal)) # Update order total cursor.execute('UPDATE erp_orders SET total_amount = ? WHERE order_id = ?', (order_total, order_id)) # Generate invoice for paid orders cursor.execute('SELECT payment_status FROM erp_orders WHERE order_id = ?', (order_id,)) payment_status = cursor.fetchone()[0] if payment_status == 'Paid': invoice_date = (datetime.datetime.now() - datetime.timedelta(days=random.randint(1, 30))).strftime('%Y-%m-%d') due_date = (datetime.datetime.now() - datetime.timedelta(days=random.randint(1, 15))).strftime('%Y-%m-%d') invoice_number = f'INV-{order_id}-{random.randint(1000, 9999)}' invoices.append((order_id, invoice_date, order_total, 'Net 30', due_date, 1, invoice_number)) # Generate order history entries # Initial status order_history.append(( order_id, (datetime.datetime.now() - datetime.timedelta(days=random.randint(50, 60))).strftime('%Y-%m-%d %H:%M:%S'), 'Order Created', 'New order placed', 'System' )) # Processing status order_history.append(( order_id, (datetime.datetime.now() - datetime.timedelta(days=random.randint(40, 49))).strftime('%Y-%m-%d %H:%M:%S'), 'Processing', 'Order is being processed', 'System' )) # Additional statuses based on current order status cursor.execute('SELECT status FROM erp_orders WHERE order_id = ?', (order_id,)) current_status = cursor.fetchone()[0] if current_status in ['Shipped', 'In Transit', 'Delivered']: order_history.append(( order_id, (datetime.datetime.now() - datetime.timedelta(days=random.randint(30, 39))).strftime('%Y-%m-%d %H:%M:%S'), 'Shipped', 'Order has been shipped', 'Shipping Dept' )) if current_status in ['In Transit', 'Delivered']: order_history.append(( order_id, (datetime.datetime.now() - datetime.timedelta(days=random.randint(20, 29))).strftime('%Y-%m-%d %H:%M:%S'), 'In Transit', 'Order is in transit', 'Shipping Carrier' )) if current_status == 'Delivered': order_history.append(( order_id, (datetime.datetime.now() - datetime.timedelta(days=random.randint(1, 19))).strftime('%Y-%m-%d %H:%M:%S'), 'Delivered', 'Order has been delivered', 'Shipping Carrier' )) # Insert order items cursor.executemany(''' INSERT INTO erp_order_items (order_id, product_id, quantity, unit_price, subtotal) VALUES (?, ?, ?, ?, ?) ''', order_items) # Insert invoices cursor.executemany(''' INSERT INTO erp_invoices (order_id, invoice_date, amount, payment_terms, due_date, is_paid, invoice_number) VALUES (?, ?, ?, ?, ?, ?, ?) ''', invoices) # Insert order history cursor.executemany(''' INSERT INTO erp_order_history (order_id, timestamp, status_change, notes, updated_by) VALUES (?, ?, ?, ?, ?) ''', order_history) # Sample data for global disruptions disruption_types = [ 'Natural Disaster', 'Political Unrest', 'Labor Strike', 'Transportation Issue', 'Customs Delay', 'Weather Event', 'Port Congestion', 'Regulatory Change', 'Supply Shortage', 'Infrastructure Failure', 'Security Threat', 'Health Crisis' ] countries = [ 'USA', 'Canada', 'UK', 'Germany', 'France', 'Australia', 'China', 'Japan', 'India', 'Brazil', 'Mexico', 'South Africa', 'Italy', 'Spain', 'Russia', 'South Korea', 'Singapore', 'UAE', 'Netherlands', 'Sweden' ] global_disruptions = [] # Generate 15 global disruptions for i in range(1, 16): # Select random countries for source and destination source_country = random.choice(countries) # Ensure destination is different from source destination_options = [c for c in countries if c != source_country] destination_country = random.choice(destination_options) # Select random disruption type disruption_type = random.choice(disruption_types) # Generate severity (1-5) severity = random.randint(1, 5) # Generate start date (within the last 90 days) days_ago = random.randint(1, 90) start_date = (datetime.datetime.now() - datetime.timedelta(days=days_ago)).strftime('%Y-%m-%d') # Generate expected end date (1-30 days after start) expected_days_duration = random.randint(1, 30) expected_end_date = (datetime.datetime.now() - datetime.timedelta(days=days_ago) + datetime.timedelta(days=expected_days_duration)).strftime('%Y-%m-%d') # Determine if disruption has ended is_ended = random.random() > 0.6 # 40% chance of being ended actual_end_date = None if is_ended: # Actual end occurred 0-5 days after/before expected end end_offset = random.randint(-5, 5) actual_end_date = (datetime.datetime.now() - datetime.timedelta(days=days_ago) + datetime.timedelta(days=expected_days_duration + end_offset)).strftime('%Y-%m-%d') # Determine if disruption is active is_active = not is_ended # Generate impact hours based on severity impact_hours = severity * random.randint(10, 48) # Generate description descriptions = { 'Natural Disaster': [ f"Severe flooding in {source_country} affecting shipments to {destination_country}", f"Earthquake in {source_country} disrupting supply chain to {destination_country}", f"Hurricane impacting shipping routes between {source_country} and {destination_country}" ], 'Political Unrest': [ f"Protests in {source_country} affecting exports to {destination_country}", f"Trade dispute between {source_country} and {destination_country}", f"Political tensions causing delays in shipments from {source_country} to {destination_country}" ], 'Labor Strike': [ f"Port workers strike in {source_country} affecting shipments to {destination_country}", f"Transportation union strike impacting deliveries between {source_country} and {destination_country}", f"Warehouse workers strike in {source_country} delaying orders to {destination_country}" ], 'Transportation Issue': [ f"Major highway closure between {source_country} and {destination_country}", f"Shipping container shortage affecting routes from {source_country} to {destination_country}", f"Fuel shortage in {source_country} impacting deliveries to {destination_country}" ], 'Customs Delay': [ f"New customs regulations in {destination_country} causing delays from {source_country}", f"Increased inspection rates at {destination_country} border for goods from {source_country}", f"Documentation issues for shipments from {source_country} to {destination_country}" ], 'Weather Event': [ f"Severe snowstorm in {source_country} delaying shipments to {destination_country}", f"Fog at major ports in {source_country} affecting vessels bound for {destination_country}", f"Extreme heat causing transportation issues between {source_country} and {destination_country}" ], 'Port Congestion': [ f"Backlog at {source_country} ports affecting shipments to {destination_country}", f"Vessel scheduling issues at {source_country} ports for routes to {destination_country}", f"Limited berthing capacity at {destination_country} ports for vessels from {source_country}" ], 'Regulatory Change': [ f"New import regulations in {destination_country} affecting goods from {source_country}", f"Export restrictions in {source_country} for shipments to {destination_country}", f"Changed documentation requirements between {source_country} and {destination_country}" ], 'Supply Shortage': [ f"Raw material shortage in {source_country} affecting production for {destination_country}", f"Component shortage impacting products shipped from {source_country} to {destination_country}", f"Limited availability of goods in {source_country} for export to {destination_country}" ], 'Infrastructure Failure': [ f"Bridge collapse on major route between {source_country} and {destination_country}", f"Power outage in {source_country} affecting production for {destination_country}", f"IT system failure impacting customs processing between {source_country} and {destination_country}" ], 'Security Threat': [ f"Increased piracy risk on shipping routes from {source_country} to {destination_country}", f"Security concerns at {source_country} borders affecting shipments to {destination_country}", f"Cybersecurity incident affecting logistics between {source_country} and {destination_country}" ], 'Health Crisis': [ f"Disease outbreak in {source_country} affecting workforce for exports to {destination_country}", f"Quarantine requirements delaying shipments from {source_country} to {destination_country}", f"Health screening causing delays at {destination_country} border for goods from {source_country}" ] } description = random.choice(descriptions.get(disruption_type, [f"{disruption_type} affecting shipments from {source_country} to {destination_country}"])) # Add to global disruptions list global_disruptions.append(( source_country, destination_country, disruption_type, severity, start_date, expected_end_date, actual_end_date, is_active, description, impact_hours )) # Insert global disruptions cursor.executemany(''' INSERT INTO live_global_disruptions ( source_country, destination_country, disruption_type, severity, start_date, expected_end_date, actual_end_date, is_active, description, impact_hours ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ''', global_disruptions) # Commit changes and close connection conn.commit() conn.close() print(f"Successfully populated ERP database with sample data.") # Print summary of inserted data print(f"Inserted {len(customers)} customers") print(f"Inserted {len(products)} products") print(f"Inserted {len(orders)} orders") print(f"Inserted {len(order_items)} order items") print(f"Inserted {len(invoices)} invoices") print(f"Inserted {len(order_history)} order history entries") print(f"Inserted {len(global_disruptions)} global disruptions") return True except Exception as e: print(f"Error populating SQLite database: {str(e)}") return False if __name__ == "__main__": populate_erp_db()