Spaces:
Configuration error
Configuration error
File size: 7,710 Bytes
b6ecb5b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 | """
Database initialization script for the Expense Auditor.
Creates the SQLite database with expense policies and sample historical data.
"""
import sqlite3
import os
from datetime import datetime, timedelta
import random
def init_database(db_path="database/expense_policies.db"):
"""Initialize the database with schema and sample data."""
# Create database directory if it doesn't exist
os.makedirs(os.path.dirname(db_path), exist_ok=True)
# Connect to database (creates if doesn't exist)
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
print("π Initializing Expense Auditor Database...")
print("=" * 60)
# ==================== CREATE TABLES ====================
# Expense policies table
cursor.execute("""
CREATE TABLE IF NOT EXISTS expense_policies (
id INTEGER PRIMARY KEY AUTOINCREMENT,
category TEXT UNIQUE NOT NULL,
max_amount REAL NOT NULL,
requires_approval INTEGER DEFAULT 0,
notes TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP
)
""")
# Historical expenses table (for fraud detection)
cursor.execute("""
CREATE TABLE IF NOT EXISTS historical_expenses (
id INTEGER PRIMARY KEY AUTOINCREMENT,
merchant TEXT NOT NULL,
amount REAL NOT NULL,
date TEXT NOT NULL,
category TEXT NOT NULL,
employee_id TEXT,
approved INTEGER DEFAULT 1,
fraud_risk INTEGER DEFAULT 0,
created_at TEXT DEFAULT CURRENT_TIMESTAMP
)
""")
print("β
Tables created successfully")
# ==================== INSERT EXPENSE POLICIES ====================
policies = [
('meals', 200.0, 0, 'Meals and refreshments during business hours'),
('hotel', 800.0, 1, 'Hotel accommodation - always requires manager approval'),
('transport', 300.0, 0, 'Transportation and fuel expenses'),
('office_supplies', 500.0, 0, 'Office supplies and equipment'),
('other', 1000.0, 1, 'Other expenses - requires manager approval')
]
cursor.executemany("""
INSERT OR REPLACE INTO expense_policies (category, max_amount, requires_approval, notes)
VALUES (?, ?, ?, ?)
""", policies)
print(f"β
Inserted {len(policies)} expense policies")
# ==================== INSERT SAMPLE HISTORICAL DATA ====================
# Sample merchants
merchants = [
'Al Baik Restaurant',
'Hyatt Regency Riyadh',
'Uber Rides',
'Jarir Bookstore',
'Starbucks',
'Najm Fuel Station',
'Marriott Hotel',
'Sadad Office Supplies',
'Pizza Hut',
'Holiday Inn'
]
categories_map = {
'Al Baik Restaurant': 'meals',
'Hyatt Regency Riyadh': 'hotel',
'Uber Rides': 'transport',
'Jarir Bookstore': 'office_supplies',
'Starbucks': 'meals',
'Najm Fuel Station': 'transport',
'Marriott Hotel': 'hotel',
'Sadad Office Supplies': 'office_supplies',
'Pizza Hut': 'meals',
'Holiday Inn': 'hotel'
}
# Generate 50 historical expenses
historical_data = []
base_date = datetime.now() - timedelta(days=180) # 6 months ago
for i in range(50):
merchant = random.choice(merchants)
category = categories_map.get(merchant, 'other')
# Generate realistic amounts based on category
if category == 'meals':
amount = round(random.uniform(30, 180), 2)
elif category == 'hotel':
amount = round(random.uniform(300, 750), 2)
elif category == 'transport':
amount = round(random.uniform(20, 250), 2)
elif category == 'office_supplies':
amount = round(random.uniform(50, 400), 2)
else:
amount = round(random.uniform(100, 900), 2)
# Random date within last 6 months
days_offset = random.randint(0, 180)
expense_date = (base_date + timedelta(days=days_offset)).strftime('%Y-%m-%d')
# Random employee ID
employee_id = f"EMP{random.randint(1000, 9999)}"
# Most are approved, some flagged
approved = 1 if random.random() > 0.1 else 0
fraud_risk = random.randint(0, 35) if approved else random.randint(40, 85)
historical_data.append((
merchant,
amount,
expense_date,
category,
employee_id,
approved,
fraud_risk
))
cursor.executemany("""
INSERT INTO historical_expenses
(merchant, amount, date, category, employee_id, approved, fraud_risk)
VALUES (?, ?, ?, ?, ?, ?, ?)
""", historical_data)
print(f"β
Inserted {len(historical_data)} historical expense records")
# ==================== ADD SOME DUPLICATE PATTERNS ====================
# Add a few duplicates for testing fraud detection
duplicate_data = [
('Al Baik Restaurant', 85.50, (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'), 'meals', 'EMP2001', 1, 25),
('Al Baik Restaurant', 85.50, (datetime.now() - timedelta(days=45)).strftime('%Y-%m-%d'), 'meals', 'EMP2001', 1, 30),
('Al Baik Restaurant', 85.50, (datetime.now() - timedelta(days=60)).strftime('%Y-%m-%d'), 'meals', 'EMP2001', 0, 65),
]
cursor.executemany("""
INSERT INTO historical_expenses
(merchant, amount, date, category, employee_id, approved, fraud_risk)
VALUES (?, ?, ?, ?, ?, ?, ?)
""", duplicate_data)
print(f"β
Added {len(duplicate_data)} duplicate patterns for fraud testing")
# ==================== COMMIT AND CLOSE ====================
conn.commit()
conn.close()
print("=" * 60)
print(f"β
Database initialized successfully at: {db_path}")
print("\nπ Summary:")
print(f" - {len(policies)} expense policies")
print(f" - {len(historical_data) + len(duplicate_data)} historical expenses")
print(f" - Ready for fraud detection and policy validation")
return True
def verify_database(db_path="database/expense_policies.db"):
"""Verify database contents."""
if not os.path.exists(db_path):
print(f"β Database not found at: {db_path}")
return False
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
print("\nπ Database Verification:")
print("=" * 60)
# Check policies
cursor.execute("SELECT COUNT(*) FROM expense_policies")
policy_count = cursor.fetchone()[0]
print(f"β
Expense Policies: {policy_count} records")
# Check historical data
cursor.execute("SELECT COUNT(*) FROM historical_expenses")
history_count = cursor.fetchone()[0]
print(f"β
Historical Expenses: {history_count} records")
# Show sample policies
print("\nπ Sample Policies:")
cursor.execute("SELECT category, max_amount, requires_approval FROM expense_policies LIMIT 5")
for row in cursor.fetchall():
approval = "Yes" if row[2] else "No"
print(f" - {row[0]}: {row[1]:.2f} SAR (Approval required: {approval})")
conn.close()
return True
if __name__ == "__main__":
# Initialize database
success = init_database()
if success:
# Verify it worked
verify_database()
print("\n" + "=" * 60)
print("π Database setup complete!")
print(" You can now run the Expense Auditor application")
print("=" * 60) |