Spaces:
Configuration error
Configuration error
| """ | |
| Database initialization script for the Expense Auditor. | |
| Creates the SQLite database with expense policies and sample historical data. | |
| """ | |
| import sqlite3 | |
| import os | |
| from datetime import datetime, timedelta | |
| import random | |
| def init_database(db_path="database/expense_policies.db"): | |
| """Initialize the database with schema and sample data.""" | |
| # Create database directory if it doesn't exist | |
| os.makedirs(os.path.dirname(db_path), exist_ok=True) | |
| # Connect to database (creates if doesn't exist) | |
| conn = sqlite3.connect(db_path) | |
| cursor = conn.cursor() | |
| print("π Initializing Expense Auditor Database...") | |
| print("=" * 60) | |
| # ==================== CREATE TABLES ==================== | |
| # Expense policies table | |
| cursor.execute(""" | |
| CREATE TABLE IF NOT EXISTS expense_policies ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| category TEXT UNIQUE NOT NULL, | |
| max_amount REAL NOT NULL, | |
| requires_approval INTEGER DEFAULT 0, | |
| notes TEXT, | |
| created_at TEXT DEFAULT CURRENT_TIMESTAMP | |
| ) | |
| """) | |
| # Historical expenses table (for fraud detection) | |
| cursor.execute(""" | |
| CREATE TABLE IF NOT EXISTS historical_expenses ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| merchant TEXT NOT NULL, | |
| amount REAL NOT NULL, | |
| date TEXT NOT NULL, | |
| category TEXT NOT NULL, | |
| employee_id TEXT, | |
| approved INTEGER DEFAULT 1, | |
| fraud_risk INTEGER DEFAULT 0, | |
| created_at TEXT DEFAULT CURRENT_TIMESTAMP | |
| ) | |
| """) | |
| print("β Tables created successfully") | |
| # ==================== INSERT EXPENSE POLICIES ==================== | |
| policies = [ | |
| ('meals', 200.0, 0, 'Meals and refreshments during business hours'), | |
| ('hotel', 800.0, 1, 'Hotel accommodation - always requires manager approval'), | |
| ('transport', 300.0, 0, 'Transportation and fuel expenses'), | |
| ('office_supplies', 500.0, 0, 'Office supplies and equipment'), | |
| ('other', 1000.0, 1, 'Other expenses - requires manager approval') | |
| ] | |
| cursor.executemany(""" | |
| INSERT OR REPLACE INTO expense_policies (category, max_amount, requires_approval, notes) | |
| VALUES (?, ?, ?, ?) | |
| """, policies) | |
| print(f"β Inserted {len(policies)} expense policies") | |
| # ==================== INSERT SAMPLE HISTORICAL DATA ==================== | |
| # Sample merchants | |
| merchants = [ | |
| 'Al Baik Restaurant', | |
| 'Hyatt Regency Riyadh', | |
| 'Uber Rides', | |
| 'Jarir Bookstore', | |
| 'Starbucks', | |
| 'Najm Fuel Station', | |
| 'Marriott Hotel', | |
| 'Sadad Office Supplies', | |
| 'Pizza Hut', | |
| 'Holiday Inn' | |
| ] | |
| categories_map = { | |
| 'Al Baik Restaurant': 'meals', | |
| 'Hyatt Regency Riyadh': 'hotel', | |
| 'Uber Rides': 'transport', | |
| 'Jarir Bookstore': 'office_supplies', | |
| 'Starbucks': 'meals', | |
| 'Najm Fuel Station': 'transport', | |
| 'Marriott Hotel': 'hotel', | |
| 'Sadad Office Supplies': 'office_supplies', | |
| 'Pizza Hut': 'meals', | |
| 'Holiday Inn': 'hotel' | |
| } | |
| # Generate 50 historical expenses | |
| historical_data = [] | |
| base_date = datetime.now() - timedelta(days=180) # 6 months ago | |
| for i in range(50): | |
| merchant = random.choice(merchants) | |
| category = categories_map.get(merchant, 'other') | |
| # Generate realistic amounts based on category | |
| if category == 'meals': | |
| amount = round(random.uniform(30, 180), 2) | |
| elif category == 'hotel': | |
| amount = round(random.uniform(300, 750), 2) | |
| elif category == 'transport': | |
| amount = round(random.uniform(20, 250), 2) | |
| elif category == 'office_supplies': | |
| amount = round(random.uniform(50, 400), 2) | |
| else: | |
| amount = round(random.uniform(100, 900), 2) | |
| # Random date within last 6 months | |
| days_offset = random.randint(0, 180) | |
| expense_date = (base_date + timedelta(days=days_offset)).strftime('%Y-%m-%d') | |
| # Random employee ID | |
| employee_id = f"EMP{random.randint(1000, 9999)}" | |
| # Most are approved, some flagged | |
| approved = 1 if random.random() > 0.1 else 0 | |
| fraud_risk = random.randint(0, 35) if approved else random.randint(40, 85) | |
| historical_data.append(( | |
| merchant, | |
| amount, | |
| expense_date, | |
| category, | |
| employee_id, | |
| approved, | |
| fraud_risk | |
| )) | |
| cursor.executemany(""" | |
| INSERT INTO historical_expenses | |
| (merchant, amount, date, category, employee_id, approved, fraud_risk) | |
| VALUES (?, ?, ?, ?, ?, ?, ?) | |
| """, historical_data) | |
| print(f"β Inserted {len(historical_data)} historical expense records") | |
| # ==================== ADD SOME DUPLICATE PATTERNS ==================== | |
| # Add a few duplicates for testing fraud detection | |
| duplicate_data = [ | |
| ('Al Baik Restaurant', 85.50, (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'), 'meals', 'EMP2001', 1, 25), | |
| ('Al Baik Restaurant', 85.50, (datetime.now() - timedelta(days=45)).strftime('%Y-%m-%d'), 'meals', 'EMP2001', 1, 30), | |
| ('Al Baik Restaurant', 85.50, (datetime.now() - timedelta(days=60)).strftime('%Y-%m-%d'), 'meals', 'EMP2001', 0, 65), | |
| ] | |
| cursor.executemany(""" | |
| INSERT INTO historical_expenses | |
| (merchant, amount, date, category, employee_id, approved, fraud_risk) | |
| VALUES (?, ?, ?, ?, ?, ?, ?) | |
| """, duplicate_data) | |
| print(f"β Added {len(duplicate_data)} duplicate patterns for fraud testing") | |
| # ==================== COMMIT AND CLOSE ==================== | |
| conn.commit() | |
| conn.close() | |
| print("=" * 60) | |
| print(f"β Database initialized successfully at: {db_path}") | |
| print("\nπ Summary:") | |
| print(f" - {len(policies)} expense policies") | |
| print(f" - {len(historical_data) + len(duplicate_data)} historical expenses") | |
| print(f" - Ready for fraud detection and policy validation") | |
| return True | |
| def verify_database(db_path="database/expense_policies.db"): | |
| """Verify database contents.""" | |
| if not os.path.exists(db_path): | |
| print(f"β Database not found at: {db_path}") | |
| return False | |
| conn = sqlite3.connect(db_path) | |
| cursor = conn.cursor() | |
| print("\nπ Database Verification:") | |
| print("=" * 60) | |
| # Check policies | |
| cursor.execute("SELECT COUNT(*) FROM expense_policies") | |
| policy_count = cursor.fetchone()[0] | |
| print(f"β Expense Policies: {policy_count} records") | |
| # Check historical data | |
| cursor.execute("SELECT COUNT(*) FROM historical_expenses") | |
| history_count = cursor.fetchone()[0] | |
| print(f"β Historical Expenses: {history_count} records") | |
| # Show sample policies | |
| print("\nπ Sample Policies:") | |
| cursor.execute("SELECT category, max_amount, requires_approval FROM expense_policies LIMIT 5") | |
| for row in cursor.fetchall(): | |
| approval = "Yes" if row[2] else "No" | |
| print(f" - {row[0]}: {row[1]:.2f} SAR (Approval required: {approval})") | |
| conn.close() | |
| return True | |
| if __name__ == "__main__": | |
| # Initialize database | |
| success = init_database() | |
| if success: | |
| # Verify it worked | |
| verify_database() | |
| print("\n" + "=" * 60) | |
| print("π Database setup complete!") | |
| print(" You can now run the Expense Auditor application") | |
| print("=" * 60) |