import pandas as pd import numpy as np # Load original dataset df = pd.read_csv('ai_hiring_audit_dataset.csv') # Set seed for reproducibility np.random.seed(42) # Define demographics genders = ['Male', 'Female', 'Non-Binary'] races = ['White', 'Black', 'Asian', 'Hispanic', 'Other'] # Assign demographics randomly df['Gender'] = np.random.choice(genders, size=len(df), p=[0.48, 0.48, 0.04]) df['Race'] = np.random.choice(races, size=len(df), p=[0.6, 0.15, 0.15, 0.07, 0.03]) # Introduce some synthetic bias if not present # Let's say AI_Decision is slightly biased against Females in Software Engineering mask = (df['Gender'] == 'Female') & (df['Job_Category'] == 'Software Engineer') # Randomly flip some 1s to 0s for AI_Decision in this group to_flip = df[mask & (df['AI_Decision'] == 1)].sample(frac=0.3, random_state=42).index df.loc[to_flip, 'AI_Decision'] = 0 # Save enriched dataset df.to_csv('hiring_data_enriched.csv', index=False) print("Enriched dataset saved as hiring_data_enriched.csv")