File size: 7,710 Bytes
b6ecb5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
"""
Database initialization script for the Expense Auditor.
Creates the SQLite database with expense policies and sample historical data.
"""

import sqlite3
import os
from datetime import datetime, timedelta
import random


def init_database(db_path="database/expense_policies.db"):
    """Initialize the database with schema and sample data."""
    
    # Create database directory if it doesn't exist
    os.makedirs(os.path.dirname(db_path), exist_ok=True)
    
    # Connect to database (creates if doesn't exist)
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    print("πŸ“Š Initializing Expense Auditor Database...")
    print("=" * 60)
    
    # ==================== CREATE TABLES ====================
    
    # Expense policies table
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS expense_policies (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            category TEXT UNIQUE NOT NULL,
            max_amount REAL NOT NULL,
            requires_approval INTEGER DEFAULT 0,
            notes TEXT,
            created_at TEXT DEFAULT CURRENT_TIMESTAMP
        )
    """)
    
    # Historical expenses table (for fraud detection)
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS historical_expenses (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            merchant TEXT NOT NULL,
            amount REAL NOT NULL,
            date TEXT NOT NULL,
            category TEXT NOT NULL,
            employee_id TEXT,
            approved INTEGER DEFAULT 1,
            fraud_risk INTEGER DEFAULT 0,
            created_at TEXT DEFAULT CURRENT_TIMESTAMP
        )
    """)
    
    print("βœ… Tables created successfully")
    
    # ==================== INSERT EXPENSE POLICIES ====================
    
    policies = [
        ('meals', 200.0, 0, 'Meals and refreshments during business hours'),
        ('hotel', 800.0, 1, 'Hotel accommodation - always requires manager approval'),
        ('transport', 300.0, 0, 'Transportation and fuel expenses'),
        ('office_supplies', 500.0, 0, 'Office supplies and equipment'),
        ('other', 1000.0, 1, 'Other expenses - requires manager approval')
    ]
    
    cursor.executemany("""
        INSERT OR REPLACE INTO expense_policies (category, max_amount, requires_approval, notes)
        VALUES (?, ?, ?, ?)
    """, policies)
    
    print(f"βœ… Inserted {len(policies)} expense policies")
    
    # ==================== INSERT SAMPLE HISTORICAL DATA ====================
    
    # Sample merchants
    merchants = [
        'Al Baik Restaurant',
        'Hyatt Regency Riyadh',
        'Uber Rides',
        'Jarir Bookstore',
        'Starbucks',
        'Najm Fuel Station',
        'Marriott Hotel',
        'Sadad Office Supplies',
        'Pizza Hut',
        'Holiday Inn'
    ]
    
    categories_map = {
        'Al Baik Restaurant': 'meals',
        'Hyatt Regency Riyadh': 'hotel',
        'Uber Rides': 'transport',
        'Jarir Bookstore': 'office_supplies',
        'Starbucks': 'meals',
        'Najm Fuel Station': 'transport',
        'Marriott Hotel': 'hotel',
        'Sadad Office Supplies': 'office_supplies',
        'Pizza Hut': 'meals',
        'Holiday Inn': 'hotel'
    }
    
    # Generate 50 historical expenses
    historical_data = []
    base_date = datetime.now() - timedelta(days=180)  # 6 months ago
    
    for i in range(50):
        merchant = random.choice(merchants)
        category = categories_map.get(merchant, 'other')
        
        # Generate realistic amounts based on category
        if category == 'meals':
            amount = round(random.uniform(30, 180), 2)
        elif category == 'hotel':
            amount = round(random.uniform(300, 750), 2)
        elif category == 'transport':
            amount = round(random.uniform(20, 250), 2)
        elif category == 'office_supplies':
            amount = round(random.uniform(50, 400), 2)
        else:
            amount = round(random.uniform(100, 900), 2)
        
        # Random date within last 6 months
        days_offset = random.randint(0, 180)
        expense_date = (base_date + timedelta(days=days_offset)).strftime('%Y-%m-%d')
        
        # Random employee ID
        employee_id = f"EMP{random.randint(1000, 9999)}"
        
        # Most are approved, some flagged
        approved = 1 if random.random() > 0.1 else 0
        fraud_risk = random.randint(0, 35) if approved else random.randint(40, 85)
        
        historical_data.append((
            merchant,
            amount,
            expense_date,
            category,
            employee_id,
            approved,
            fraud_risk
        ))
    
    cursor.executemany("""
        INSERT INTO historical_expenses 
        (merchant, amount, date, category, employee_id, approved, fraud_risk)
        VALUES (?, ?, ?, ?, ?, ?, ?)
    """, historical_data)
    
    print(f"βœ… Inserted {len(historical_data)} historical expense records")
    
    # ==================== ADD SOME DUPLICATE PATTERNS ====================
    
    # Add a few duplicates for testing fraud detection
    duplicate_data = [
        ('Al Baik Restaurant', 85.50, (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'), 'meals', 'EMP2001', 1, 25),
        ('Al Baik Restaurant', 85.50, (datetime.now() - timedelta(days=45)).strftime('%Y-%m-%d'), 'meals', 'EMP2001', 1, 30),
        ('Al Baik Restaurant', 85.50, (datetime.now() - timedelta(days=60)).strftime('%Y-%m-%d'), 'meals', 'EMP2001', 0, 65),
    ]
    
    cursor.executemany("""
        INSERT INTO historical_expenses 
        (merchant, amount, date, category, employee_id, approved, fraud_risk)
        VALUES (?, ?, ?, ?, ?, ?, ?)
    """, duplicate_data)
    
    print(f"βœ… Added {len(duplicate_data)} duplicate patterns for fraud testing")
    
    # ==================== COMMIT AND CLOSE ====================
    
    conn.commit()
    conn.close()
    
    print("=" * 60)
    print(f"βœ… Database initialized successfully at: {db_path}")
    print("\nπŸ“‹ Summary:")
    print(f"   - {len(policies)} expense policies")
    print(f"   - {len(historical_data) + len(duplicate_data)} historical expenses")
    print(f"   - Ready for fraud detection and policy validation")
    
    return True


def verify_database(db_path="database/expense_policies.db"):
    """Verify database contents."""
    
    if not os.path.exists(db_path):
        print(f"❌ Database not found at: {db_path}")
        return False
    
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    print("\nπŸ” Database Verification:")
    print("=" * 60)
    
    # Check policies
    cursor.execute("SELECT COUNT(*) FROM expense_policies")
    policy_count = cursor.fetchone()[0]
    print(f"βœ… Expense Policies: {policy_count} records")
    
    # Check historical data
    cursor.execute("SELECT COUNT(*) FROM historical_expenses")
    history_count = cursor.fetchone()[0]
    print(f"βœ… Historical Expenses: {history_count} records")
    
    # Show sample policies
    print("\nπŸ“‹ Sample Policies:")
    cursor.execute("SELECT category, max_amount, requires_approval FROM expense_policies LIMIT 5")
    for row in cursor.fetchall():
        approval = "Yes" if row[2] else "No"
        print(f"   - {row[0]}: {row[1]:.2f} SAR (Approval required: {approval})")
    
    conn.close()
    return True


if __name__ == "__main__":
    # Initialize database
    success = init_database()
    
    if success:
        # Verify it worked
        verify_database()
        
        print("\n" + "=" * 60)
        print("πŸŽ‰ Database setup complete!")
        print("   You can now run the Expense Auditor application")
        print("=" * 60)