File size: 1,034 Bytes
c9f05a2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | import pandas as pd
import numpy as np
# Load original dataset
df = pd.read_csv('ai_hiring_audit_dataset.csv')
# Set seed for reproducibility
np.random.seed(42)
# Define demographics
genders = ['Male', 'Female', 'Non-Binary']
races = ['White', 'Black', 'Asian', 'Hispanic', 'Other']
# Assign demographics randomly
df['Gender'] = np.random.choice(genders, size=len(df), p=[0.48, 0.48, 0.04])
df['Race'] = np.random.choice(races, size=len(df), p=[0.6, 0.15, 0.15, 0.07, 0.03])
# Introduce some synthetic bias if not present
# Let's say AI_Decision is slightly biased against Females in Software Engineering
mask = (df['Gender'] == 'Female') & (df['Job_Category'] == 'Software Engineer')
# Randomly flip some 1s to 0s for AI_Decision in this group
to_flip = df[mask & (df['AI_Decision'] == 1)].sample(frac=0.3, random_state=42).index
df.loc[to_flip, 'AI_Decision'] = 0
# Save enriched dataset
df.to_csv('hiring_data_enriched.csv', index=False)
print("Enriched dataset saved as hiring_data_enriched.csv")
|