""" Database initialization script for the Expense Auditor. Creates the SQLite database with expense policies and sample historical data. """ import sqlite3 import os from datetime import datetime, timedelta import random def init_database(db_path="database/expense_policies.db"): """Initialize the database with schema and sample data.""" # Create database directory if it doesn't exist os.makedirs(os.path.dirname(db_path), exist_ok=True) # Connect to database (creates if doesn't exist) conn = sqlite3.connect(db_path) cursor = conn.cursor() print("šŸ“Š Initializing Expense Auditor Database...") print("=" * 60) # ==================== CREATE TABLES ==================== # Expense policies table cursor.execute(""" CREATE TABLE IF NOT EXISTS expense_policies ( id INTEGER PRIMARY KEY AUTOINCREMENT, category TEXT UNIQUE NOT NULL, max_amount REAL NOT NULL, requires_approval INTEGER DEFAULT 0, notes TEXT, created_at TEXT DEFAULT CURRENT_TIMESTAMP ) """) # Historical expenses table (for fraud detection) cursor.execute(""" CREATE TABLE IF NOT EXISTS historical_expenses ( id INTEGER PRIMARY KEY AUTOINCREMENT, merchant TEXT NOT NULL, amount REAL NOT NULL, date TEXT NOT NULL, category TEXT NOT NULL, employee_id TEXT, approved INTEGER DEFAULT 1, fraud_risk INTEGER DEFAULT 0, created_at TEXT DEFAULT CURRENT_TIMESTAMP ) """) print("āœ… Tables created successfully") # ==================== INSERT EXPENSE POLICIES ==================== policies = [ ('meals', 200.0, 0, 'Meals and refreshments during business hours'), ('hotel', 800.0, 1, 'Hotel accommodation - always requires manager approval'), ('transport', 300.0, 0, 'Transportation and fuel expenses'), ('office_supplies', 500.0, 0, 'Office supplies and equipment'), ('other', 1000.0, 1, 'Other expenses - requires manager approval') ] cursor.executemany(""" INSERT OR REPLACE INTO expense_policies (category, max_amount, requires_approval, notes) VALUES (?, ?, ?, ?) """, policies) print(f"āœ… Inserted {len(policies)} expense policies") # ==================== INSERT SAMPLE HISTORICAL DATA ==================== # Sample merchants merchants = [ 'Al Baik Restaurant', 'Hyatt Regency Riyadh', 'Uber Rides', 'Jarir Bookstore', 'Starbucks', 'Najm Fuel Station', 'Marriott Hotel', 'Sadad Office Supplies', 'Pizza Hut', 'Holiday Inn' ] categories_map = { 'Al Baik Restaurant': 'meals', 'Hyatt Regency Riyadh': 'hotel', 'Uber Rides': 'transport', 'Jarir Bookstore': 'office_supplies', 'Starbucks': 'meals', 'Najm Fuel Station': 'transport', 'Marriott Hotel': 'hotel', 'Sadad Office Supplies': 'office_supplies', 'Pizza Hut': 'meals', 'Holiday Inn': 'hotel' } # Generate 50 historical expenses historical_data = [] base_date = datetime.now() - timedelta(days=180) # 6 months ago for i in range(50): merchant = random.choice(merchants) category = categories_map.get(merchant, 'other') # Generate realistic amounts based on category if category == 'meals': amount = round(random.uniform(30, 180), 2) elif category == 'hotel': amount = round(random.uniform(300, 750), 2) elif category == 'transport': amount = round(random.uniform(20, 250), 2) elif category == 'office_supplies': amount = round(random.uniform(50, 400), 2) else: amount = round(random.uniform(100, 900), 2) # Random date within last 6 months days_offset = random.randint(0, 180) expense_date = (base_date + timedelta(days=days_offset)).strftime('%Y-%m-%d') # Random employee ID employee_id = f"EMP{random.randint(1000, 9999)}" # Most are approved, some flagged approved = 1 if random.random() > 0.1 else 0 fraud_risk = random.randint(0, 35) if approved else random.randint(40, 85) historical_data.append(( merchant, amount, expense_date, category, employee_id, approved, fraud_risk )) cursor.executemany(""" INSERT INTO historical_expenses (merchant, amount, date, category, employee_id, approved, fraud_risk) VALUES (?, ?, ?, ?, ?, ?, ?) """, historical_data) print(f"āœ… Inserted {len(historical_data)} historical expense records") # ==================== ADD SOME DUPLICATE PATTERNS ==================== # Add a few duplicates for testing fraud detection duplicate_data = [ ('Al Baik Restaurant', 85.50, (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'), 'meals', 'EMP2001', 1, 25), ('Al Baik Restaurant', 85.50, (datetime.now() - timedelta(days=45)).strftime('%Y-%m-%d'), 'meals', 'EMP2001', 1, 30), ('Al Baik Restaurant', 85.50, (datetime.now() - timedelta(days=60)).strftime('%Y-%m-%d'), 'meals', 'EMP2001', 0, 65), ] cursor.executemany(""" INSERT INTO historical_expenses (merchant, amount, date, category, employee_id, approved, fraud_risk) VALUES (?, ?, ?, ?, ?, ?, ?) """, duplicate_data) print(f"āœ… Added {len(duplicate_data)} duplicate patterns for fraud testing") # ==================== COMMIT AND CLOSE ==================== conn.commit() conn.close() print("=" * 60) print(f"āœ… Database initialized successfully at: {db_path}") print("\nšŸ“‹ Summary:") print(f" - {len(policies)} expense policies") print(f" - {len(historical_data) + len(duplicate_data)} historical expenses") print(f" - Ready for fraud detection and policy validation") return True def verify_database(db_path="database/expense_policies.db"): """Verify database contents.""" if not os.path.exists(db_path): print(f"āŒ Database not found at: {db_path}") return False conn = sqlite3.connect(db_path) cursor = conn.cursor() print("\nšŸ” Database Verification:") print("=" * 60) # Check policies cursor.execute("SELECT COUNT(*) FROM expense_policies") policy_count = cursor.fetchone()[0] print(f"āœ… Expense Policies: {policy_count} records") # Check historical data cursor.execute("SELECT COUNT(*) FROM historical_expenses") history_count = cursor.fetchone()[0] print(f"āœ… Historical Expenses: {history_count} records") # Show sample policies print("\nšŸ“‹ Sample Policies:") cursor.execute("SELECT category, max_amount, requires_approval FROM expense_policies LIMIT 5") for row in cursor.fetchall(): approval = "Yes" if row[2] else "No" print(f" - {row[0]}: {row[1]:.2f} SAR (Approval required: {approval})") conn.close() return True if __name__ == "__main__": # Initialize database success = init_database() if success: # Verify it worked verify_database() print("\n" + "=" * 60) print("šŸŽ‰ Database setup complete!") print(" You can now run the Expense Auditor application") print("=" * 60)