import joblib import numpy as np import config rank_to_ordinal = { 'POM': 2, 'POF': 2, 'PO': 2, 'PSA': 2, 'SGT': 4, 'SSA': 4, 'SDS': 4, 'DT3': 3, 'DT2': 3, 'DT1': 3, 'DTS': 3, 'LT': 5, 'LSA': 5, 'LCD': 5, 'CPT': 6, 'SRG': 9, 'SCS': 10, 'DCS': 10, 'DI': 7, 'INS': 8, 'DC': 10, 'AC': 11, 'COD': 13, 'CCA': 13, 'CCT': 13, 'CD': 13, 'CMS': 13, 'COH': 13, 'COI': 13, 'COP': 13, 'COS': 13, 'COT': 13, 'CPB': 13, 'DET': 3, 'SGT DS': 4, 'LT SA': 5, 'LT CD': 5, 'SGT SA': 4, 'INSP': 8, 'LT.': 5, 'CHIEF': 13, 'DT': 3 } def transform_incident_rank(l2_norms, incident_rank): return np.array((rank_to_ordinal[incident_rank] / l2_norms["incident_rank"],)) def transform_current_rank(l2_norms, current_rank): return np.array((rank_to_ordinal[current_rank] / l2_norms["current_rank"],)) def transform_previous_complaints(l2_norms, previous_complaints): x = int(previous_complaints) x = x / l2_norms["previous_complaints"] return np.array((x,)) def transform_complaint_duration_days(l2_norms, complaint_duration_days): x = int(complaint_duration_days) x = x / l2_norms["complaint_duration_days"] return np.array((x,)) def transform_days_on_force(l2_norms, days_on_force): x = int(days_on_force) x = x / l2_norms["days_on_force"] return np.array((x,)) def transform_to_ohe(column_name, value, options=None): if options is None: options = config.features_and_options[column_name] hot = options.index(value) one_hot = np.zeros(len(options)) one_hot[hot] = 1 return one_hot def process_officer_race(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days, days_on_force, officer_gender, fado_type, allegation, ccrb_disposition, penalty_rec, penalty_cat, location_type, contact_outcome, impacted_gender, impacted_race, incident_precinct): l2_norms = config.officer_race_l2_norm['undersampling' if 'Undersampling' in model_name else 'no_undersampling'] current_rank = transform_current_rank(l2_norms, current_rank) incident_rank = transform_incident_rank(l2_norms, incident_rank) previous_complaints = transform_previous_complaints(l2_norms, previous_complaints) complaint_duration_days = transform_complaint_duration_days(l2_norms, complaint_duration_days) days_on_force = transform_days_on_force(l2_norms, days_on_force) officer_gender = transform_to_ohe('OfficerGender', officer_gender) fado_type = transform_to_ohe('FADOType', fado_type) allegation = transform_to_ohe('Allegation', allegation) ccrb_disposition = transform_to_ohe('CCRBDisposition', ccrb_disposition) penalty_rec = transform_to_ohe('PenaltyRec', penalty_rec) penalty_cat = transform_to_ohe('PenaltyCat', penalty_cat) location_type = transform_to_ohe('LocationType', location_type) contact_outcome = transform_to_ohe('ContactOutcome', contact_outcome) impacted_gender = transform_to_ohe("ImpactedGender", impacted_gender) impacted_race = transform_to_ohe("ImpactedRace", impacted_race) incident_precinct = transform_to_ohe("IncidentPrecinct", incident_precinct) input_array = np.concatenate((current_rank, incident_rank, days_on_force, previous_complaints, complaint_duration_days, officer_gender, fado_type, allegation, ccrb_disposition, penalty_rec, penalty_cat, location_type, contact_outcome, impacted_gender, impacted_race, incident_precinct), dtype=np.float32) input_array = input_array.reshape(1, -1) return input_array def process_officer_gender(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days, officer_race, days_on_force, fado_type, allegation, ccrb_disposition, penalty_rec, penalty_cat, location_type, contact_outcome, impacted_gender, impacted_race, incident_precinct): l2_norms = config.officer_gender_l2_norm['undersampling' if 'Undersampling' in model_name else 'no_undersampling'] current_rank = transform_current_rank(l2_norms, current_rank) incident_rank = transform_incident_rank(l2_norms, incident_rank) previous_complaints = transform_previous_complaints(l2_norms, previous_complaints) complaint_duration_days = transform_complaint_duration_days(l2_norms, complaint_duration_days) officer_race = transform_to_ohe('OfficerRace', officer_race, config.features_and_options_target_gender["OfficerRace"]) days_on_force = transform_days_on_force(l2_norms, days_on_force) fado_type = transform_to_ohe('FADOType', fado_type) allegation = transform_to_ohe('Allegation', allegation) ccrb_disposition = transform_to_ohe('CCRBDisposition', ccrb_disposition) penalty_rec = transform_to_ohe('PenaltyRec', penalty_rec) penalty_cat = transform_to_ohe('PenaltyCat', penalty_cat) location_type = transform_to_ohe('LocationType', location_type) contact_outcome = transform_to_ohe('ContactOutcome', contact_outcome) impacted_gender = transform_to_ohe("ImpactedGender", impacted_gender) impacted_race = transform_to_ohe("ImpactedRace", impacted_race) incident_precinct = transform_to_ohe("IncidentPrecinct", incident_precinct) arrays = (current_rank, incident_rank, days_on_force, previous_complaints, complaint_duration_days, officer_race, fado_type, allegation, ccrb_disposition, penalty_rec, penalty_cat, location_type, contact_outcome, impacted_gender, impacted_race, incident_precinct) input_array = np.concatenate(arrays, dtype=np.float32) input_array = input_array.reshape(1, -1) return input_array def process_penalty_cat(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days, officer_gender, officer_race, days_on_force, fado_type, allegation, location_type, contact_outcome, impacted_gender, impacted_race, incident_precinct): officer_race = transform_to_ohe('OfficerRace', officer_race, config.features_and_options_target_gender["OfficerRace"]) fado_type = transform_to_ohe('FADOType', fado_type) allegation = transform_to_ohe('Allegation', allegation) location_type = transform_to_ohe('LocationType', location_type) contact_outcome = transform_to_ohe('ContactOutcome', contact_outcome) impacted_gender = transform_to_ohe("ImpactedGender", impacted_gender) impacted_race = transform_to_ohe("ImpactedRace", impacted_race) incident_precinct = transform_to_ohe("IncidentPrecinct", incident_precinct) l2_norms = config.penalty_cat_l2_norm['undersampling' if 'Undersampling' in model_name else 'no_undersampling'] current_rank = transform_current_rank(l2_norms, current_rank) incident_rank = transform_incident_rank(l2_norms, incident_rank) previous_complaints = transform_previous_complaints(l2_norms, previous_complaints) complaint_duration_days = transform_complaint_duration_days(l2_norms, complaint_duration_days) officer_gender = transform_to_ohe('OfficerGender', officer_gender) days_on_force = transform_days_on_force(l2_norms, days_on_force) arrays = (current_rank, incident_rank, days_on_force, previous_complaints, complaint_duration_days, officer_gender, officer_race, fado_type, allegation, location_type, contact_outcome, impacted_gender, impacted_race, incident_precinct) input_array = np.concatenate(arrays, dtype=np.float32) input_array = input_array.reshape(1, -1) if model_name in ["Neural Network.pth", "Logistic Regression Balanced"]: scaler = joblib.load(f"models/PenaltyCat/scaler_model.pkl") print(scaler.feature_names_in_) input_array = scaler.transform(input_array) return input_array