ai-expense-auditor / setup_database.py
dhoshan's picture
Add core application files and configuration
b6ecb5b verified
"""
Database initialization script for the Expense Auditor.
Creates the SQLite database with expense policies and sample historical data.
"""
import sqlite3
import os
from datetime import datetime, timedelta
import random
def init_database(db_path="database/expense_policies.db"):
"""Initialize the database with schema and sample data."""
# Create database directory if it doesn't exist
os.makedirs(os.path.dirname(db_path), exist_ok=True)
# Connect to database (creates if doesn't exist)
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
print("πŸ“Š Initializing Expense Auditor Database...")
print("=" * 60)
# ==================== CREATE TABLES ====================
# Expense policies table
cursor.execute("""
CREATE TABLE IF NOT EXISTS expense_policies (
id INTEGER PRIMARY KEY AUTOINCREMENT,
category TEXT UNIQUE NOT NULL,
max_amount REAL NOT NULL,
requires_approval INTEGER DEFAULT 0,
notes TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP
)
""")
# Historical expenses table (for fraud detection)
cursor.execute("""
CREATE TABLE IF NOT EXISTS historical_expenses (
id INTEGER PRIMARY KEY AUTOINCREMENT,
merchant TEXT NOT NULL,
amount REAL NOT NULL,
date TEXT NOT NULL,
category TEXT NOT NULL,
employee_id TEXT,
approved INTEGER DEFAULT 1,
fraud_risk INTEGER DEFAULT 0,
created_at TEXT DEFAULT CURRENT_TIMESTAMP
)
""")
print("βœ… Tables created successfully")
# ==================== INSERT EXPENSE POLICIES ====================
policies = [
('meals', 200.0, 0, 'Meals and refreshments during business hours'),
('hotel', 800.0, 1, 'Hotel accommodation - always requires manager approval'),
('transport', 300.0, 0, 'Transportation and fuel expenses'),
('office_supplies', 500.0, 0, 'Office supplies and equipment'),
('other', 1000.0, 1, 'Other expenses - requires manager approval')
]
cursor.executemany("""
INSERT OR REPLACE INTO expense_policies (category, max_amount, requires_approval, notes)
VALUES (?, ?, ?, ?)
""", policies)
print(f"βœ… Inserted {len(policies)} expense policies")
# ==================== INSERT SAMPLE HISTORICAL DATA ====================
# Sample merchants
merchants = [
'Al Baik Restaurant',
'Hyatt Regency Riyadh',
'Uber Rides',
'Jarir Bookstore',
'Starbucks',
'Najm Fuel Station',
'Marriott Hotel',
'Sadad Office Supplies',
'Pizza Hut',
'Holiday Inn'
]
categories_map = {
'Al Baik Restaurant': 'meals',
'Hyatt Regency Riyadh': 'hotel',
'Uber Rides': 'transport',
'Jarir Bookstore': 'office_supplies',
'Starbucks': 'meals',
'Najm Fuel Station': 'transport',
'Marriott Hotel': 'hotel',
'Sadad Office Supplies': 'office_supplies',
'Pizza Hut': 'meals',
'Holiday Inn': 'hotel'
}
# Generate 50 historical expenses
historical_data = []
base_date = datetime.now() - timedelta(days=180) # 6 months ago
for i in range(50):
merchant = random.choice(merchants)
category = categories_map.get(merchant, 'other')
# Generate realistic amounts based on category
if category == 'meals':
amount = round(random.uniform(30, 180), 2)
elif category == 'hotel':
amount = round(random.uniform(300, 750), 2)
elif category == 'transport':
amount = round(random.uniform(20, 250), 2)
elif category == 'office_supplies':
amount = round(random.uniform(50, 400), 2)
else:
amount = round(random.uniform(100, 900), 2)
# Random date within last 6 months
days_offset = random.randint(0, 180)
expense_date = (base_date + timedelta(days=days_offset)).strftime('%Y-%m-%d')
# Random employee ID
employee_id = f"EMP{random.randint(1000, 9999)}"
# Most are approved, some flagged
approved = 1 if random.random() > 0.1 else 0
fraud_risk = random.randint(0, 35) if approved else random.randint(40, 85)
historical_data.append((
merchant,
amount,
expense_date,
category,
employee_id,
approved,
fraud_risk
))
cursor.executemany("""
INSERT INTO historical_expenses
(merchant, amount, date, category, employee_id, approved, fraud_risk)
VALUES (?, ?, ?, ?, ?, ?, ?)
""", historical_data)
print(f"βœ… Inserted {len(historical_data)} historical expense records")
# ==================== ADD SOME DUPLICATE PATTERNS ====================
# Add a few duplicates for testing fraud detection
duplicate_data = [
('Al Baik Restaurant', 85.50, (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'), 'meals', 'EMP2001', 1, 25),
('Al Baik Restaurant', 85.50, (datetime.now() - timedelta(days=45)).strftime('%Y-%m-%d'), 'meals', 'EMP2001', 1, 30),
('Al Baik Restaurant', 85.50, (datetime.now() - timedelta(days=60)).strftime('%Y-%m-%d'), 'meals', 'EMP2001', 0, 65),
]
cursor.executemany("""
INSERT INTO historical_expenses
(merchant, amount, date, category, employee_id, approved, fraud_risk)
VALUES (?, ?, ?, ?, ?, ?, ?)
""", duplicate_data)
print(f"βœ… Added {len(duplicate_data)} duplicate patterns for fraud testing")
# ==================== COMMIT AND CLOSE ====================
conn.commit()
conn.close()
print("=" * 60)
print(f"βœ… Database initialized successfully at: {db_path}")
print("\nπŸ“‹ Summary:")
print(f" - {len(policies)} expense policies")
print(f" - {len(historical_data) + len(duplicate_data)} historical expenses")
print(f" - Ready for fraud detection and policy validation")
return True
def verify_database(db_path="database/expense_policies.db"):
"""Verify database contents."""
if not os.path.exists(db_path):
print(f"❌ Database not found at: {db_path}")
return False
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
print("\nπŸ” Database Verification:")
print("=" * 60)
# Check policies
cursor.execute("SELECT COUNT(*) FROM expense_policies")
policy_count = cursor.fetchone()[0]
print(f"βœ… Expense Policies: {policy_count} records")
# Check historical data
cursor.execute("SELECT COUNT(*) FROM historical_expenses")
history_count = cursor.fetchone()[0]
print(f"βœ… Historical Expenses: {history_count} records")
# Show sample policies
print("\nπŸ“‹ Sample Policies:")
cursor.execute("SELECT category, max_amount, requires_approval FROM expense_policies LIMIT 5")
for row in cursor.fetchall():
approval = "Yes" if row[2] else "No"
print(f" - {row[0]}: {row[1]:.2f} SAR (Approval required: {approval})")
conn.close()
return True
if __name__ == "__main__":
# Initialize database
success = init_database()
if success:
# Verify it worked
verify_database()
print("\n" + "=" * 60)
print("πŸŽ‰ Database setup complete!")
print(" You can now run the Expense Auditor application")
print("=" * 60)