""" Demo data generator for insider threat detection system Generates realistic behavioral data with normal and anomalous patterns """ import random import sys import os from datetime import datetime, timedelta, timezone # Add parent directory to path sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from database import SessionLocal, engine, Base import models # Create tables Base.metadata.create_all(bind=engine) def generate_employees(db, count=20): """Generate sample employees""" departments = ['Engineering', 'Sales', 'HR', 'Finance', 'Operations', 'IT Security'] roles = ['Developer', 'Manager', 'Analyst', 'Administrator', 'Director'] locations = ['New York', 'San Francisco', 'London', 'Tokyo', 'Mumbai'] employees = [] for i in range(count): employee = models.Employee( employee_id=f"EMP{1000 + i}", name=f"Employee {i+1}", email=f"employee{i+1}@company.com", department=random.choice(departments), role=random.choice(roles), baseline_location=random.choice(locations) ) db.add(employee) employees.append(employee) db.commit() print(f"āœ“ Created {count} employees") return employees def generate_normal_events(db, employee, days=30): """Generate normal behavioral events for an employee""" events = [] start_date = datetime.now(timezone.utc) - timedelta(days=days) # Normal login pattern: 8-10 AM on weekdays for day in range(days): current_date = start_date + timedelta(days=day) # Skip weekends for most employees if current_date.weekday() >= 5 and random.random() > 0.1: continue # Morning login login_hour = random.randint(8, 10) login_time = current_date.replace(hour=login_hour, minute=random.randint(0, 59)) events.append(models.BehavioralEvent( employee_id=employee.id, event_type='login', timestamp=login_time, location=employee.baseline_location, ip_address=f"192.168.1.{random.randint(10, 250)}", success=True )) # Normal file access (5-15 files per day) for _ in range(random.randint(5, 15)): file_time = login_time + timedelta(hours=random.randint(0, 8)) events.append(models.BehavioralEvent( employee_id=employee.id, event_type='file_access', timestamp=file_time, file_path=f"/home/user/documents/file{random.randint(1, 100)}.txt", action=random.choice(['read', 'write']), success=True )) # Normal network activity (standard ports) for _ in range(random.randint(10, 20)): net_time = login_time + timedelta(hours=random.randint(0, 8)) events.append(models.BehavioralEvent( employee_id=employee.id, event_type='network', timestamp=net_time, port=random.choice([80, 443, 22, 3306]), success=True )) # Occasional privilege escalation (normal for some roles) if random.random() < 0.3: sudo_time = login_time + timedelta(hours=random.randint(1, 6)) events.append(models.BehavioralEvent( employee_id=employee.id, event_type='privilege_escalation', timestamp=sudo_time, action='sudo', success=True )) for event in events: db.add(event) return len(events) def generate_anomalous_events(db, employee, anomaly_type='unusual_login'): """Generate anomalous events for testing""" events = [] base_time = datetime.now(timezone.utc) - timedelta(days=random.randint(1, 7)) if anomaly_type == 'unusual_login': # Login at 3 AM night_login = base_time.replace(hour=3, minute=random.randint(0, 59)) events.append(models.BehavioralEvent( employee_id=employee.id, event_type='login', timestamp=night_login, location=employee.baseline_location, ip_address=f"192.168.1.{random.randint(10, 250)}", success=True )) elif anomaly_type == 'unusual_location': # Login from different location unusual_locations = ['Beijing', 'Moscow', 'Unknown Location'] events.append(models.BehavioralEvent( employee_id=employee.id, event_type='login', timestamp=base_time, location=random.choice(unusual_locations), ip_address=f"10.0.0.{random.randint(1, 255)}", success=True )) elif anomaly_type == 'unusual_port': # Access unusual ports unusual_ports = [4444, 8888, 9999, 31337, 6667] for port in random.sample(unusual_ports, 3): events.append(models.BehavioralEvent( employee_id=employee.id, event_type='network', timestamp=base_time + timedelta(minutes=random.randint(0, 60)), port=port, success=True )) elif anomaly_type == 'sensitive_files': # Access sensitive files sensitive_paths = [ '/etc/shadow', '/root/.ssh/id_rsa', '/var/log/auth.log', '/home/admin/passwords.txt', '/etc/secrets/api_keys.conf' ] for path in random.sample(sensitive_paths, 3): events.append(models.BehavioralEvent( employee_id=employee.id, event_type='file_access', timestamp=base_time + timedelta(minutes=random.randint(0, 60)), file_path=path, action='read', success=True )) elif anomaly_type == 'privilege_escalation': # Excessive privilege escalation for _ in range(15): events.append(models.BehavioralEvent( employee_id=employee.id, event_type='privilege_escalation', timestamp=base_time + timedelta(minutes=random.randint(0, 120)), action='sudo', success=True )) elif anomaly_type == 'firewall_change': # Firewall modifications for _ in range(5): events.append(models.BehavioralEvent( employee_id=employee.id, event_type='firewall', timestamp=base_time + timedelta(minutes=random.randint(0, 60)), action='modify_rule', success=True )) elif anomaly_type == 'failed_logins': # Multiple failed login attempts for _ in range(10): events.append(models.BehavioralEvent( employee_id=employee.id, event_type='login', timestamp=base_time + timedelta(minutes=random.randint(0, 30)), location=employee.baseline_location, ip_address=f"192.168.1.{random.randint(10, 250)}", success=False )) for event in events: db.add(event) return len(events) def main(): """Main data generation function""" db = SessionLocal() try: print("šŸš€ Starting data generation...") # Clear existing data print("Clearing existing data...") db.query(models.MitigationStrategy).delete() db.query(models.MitreMapping).delete() db.query(models.Anomaly).delete() db.query(models.BehavioralFingerprint).delete() db.query(models.BehavioralEvent).delete() db.query(models.Employee).delete() db.commit() # Generate employees employees = generate_employees(db, count=20) # Generate normal events for all employees print("Generating normal behavioral events...") total_events = 0 for employee in employees: count = generate_normal_events(db, employee, days=30) total_events += count db.commit() print(f"āœ“ Created {total_events} normal events") # Generate anomalous events for some employees print("Generating anomalous events...") anomaly_types = [ 'unusual_login', 'unusual_location', 'unusual_port', 'sensitive_files', 'privilege_escalation', 'firewall_change', 'failed_logins' ] anomalous_employees = random.sample(employees, 7) anomaly_count = 0 for employee, anomaly_type in zip(anomalous_employees, anomaly_types): count = generate_anomalous_events(db, employee, anomaly_type) anomaly_count += count db.commit() print(f"āœ“ Created {anomaly_count} anomalous events for {len(anomalous_employees)} employees") print("\nāœ… Data generation complete!") print(f" - Employees: {len(employees)}") print(f" - Normal events: {total_events}") print(f" - Anomalous events: {anomaly_count}") print(f"\nšŸ’” Next steps:") print(f" 1. Train the ML model: POST http://localhost:8000/api/ml/train") print(f" 2. View dashboard: http://localhost:5173") except Exception as e: print(f"āŒ Error: {e}") db.rollback() finally: db.close() if __name__ == "__main__": main()