Spaces:

riyadhrazzaq
/

applied-ml-project

Sleeping

File size: 8,931 Bytes

bd9ee57
4d502fc
 
cfc11ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4e8bbcb
740d621
cfc11ee
 
4e8bbcb
740d621
4e8bbcb
 
 
cfc11ee
4e8bbcb
740d621
cfc11ee
 
4e8bbcb
cfc11ee
4e8bbcb
740d621
cfc11ee
 
4e8bbcb
cfc11ee
4e8bbcb
740d621
cfc11ee
 
740d621
 
 
 
 
 
cfc11ee
 
 
4d502fc
4e8bbcb
a31c8a2
 
 
 
4e8bbcb
 
 
 
 
 
 
 
a31c8a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
740d621
 
 
 
 
 
 
 
 
 
 
 
 
 
cfc11ee
 
 
 
 
 
 
 
 
 
 
740d621
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfc11ee
 
f267af6
 
 
 
 
bd9ee57
 
 
 
 
 
 
 
 
 
f267af6
 
 
 
 
 
 
 
bd9ee57
f267af6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd9ee57
 
 
 
f267af6

import joblib
import numpy as np

import config

rank_to_ordinal = {
    'POM': 2,
    'POF': 2,
    'PO': 2,
    'PSA': 2,
    'SGT': 4,
    'SSA': 4,
    'SDS': 4,
    'DT3': 3,
    'DT2': 3,
    'DT1': 3,
    'DTS': 3,
    'LT': 5,
    'LSA': 5,
    'LCD': 5,
    'CPT': 6,
    'SRG': 9,
    'SCS': 10,
    'DCS': 10,
    'DI': 7,
    'INS': 8,
    'DC': 10,
    'AC': 11,
    'COD': 13,
    'CCA': 13,
    'CCT': 13,
    'CD': 13,
    'CMS': 13,
    'COH': 13,
    'COI': 13,
    'COP': 13,
    'COS': 13,
    'COT': 13,
    'CPB': 13,
    'DET': 3,
    'SGT DS': 4,
    'LT SA': 5,
    'LT CD': 5,
    'SGT SA': 4,
    'INSP': 8,
    'LT.': 5,
    'CHIEF': 13,
    'DT': 3
}


def transform_incident_rank(l2_norms, incident_rank):
    return np.array((rank_to_ordinal[incident_rank] / l2_norms["incident_rank"],))


def transform_current_rank(l2_norms, current_rank):
    return np.array((rank_to_ordinal[current_rank] / l2_norms["current_rank"],))


def transform_previous_complaints(l2_norms, previous_complaints):
    x = int(previous_complaints)
    x = x / l2_norms["previous_complaints"]
    return np.array((x,))


def transform_complaint_duration_days(l2_norms, complaint_duration_days):
    x = int(complaint_duration_days)
    x = x / l2_norms["complaint_duration_days"]
    return np.array((x,))


def transform_days_on_force(l2_norms, days_on_force):
    x = int(days_on_force)
    x = x / l2_norms["days_on_force"]
    return np.array((x,))


def transform_to_ohe(column_name, value, options=None):
    if options is None:
        options = config.features_and_options[column_name]

    hot = options.index(value)
    one_hot = np.zeros(len(options))
    one_hot[hot] = 1
    return one_hot


def process_officer_race(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days,
                         days_on_force, officer_gender, fado_type, allegation, ccrb_disposition,
                         penalty_rec, penalty_cat, location_type, contact_outcome,
                         impacted_gender, impacted_race,
                         incident_precinct):
    l2_norms = config.officer_race_l2_norm['undersampling' if 'Undersampling' in model_name else 'no_undersampling']

    current_rank = transform_current_rank(l2_norms, current_rank)
    incident_rank = transform_incident_rank(l2_norms, incident_rank)
    previous_complaints = transform_previous_complaints(l2_norms, previous_complaints)
    complaint_duration_days = transform_complaint_duration_days(l2_norms, complaint_duration_days)
    days_on_force = transform_days_on_force(l2_norms, days_on_force)

    officer_gender = transform_to_ohe('OfficerGender', officer_gender)
    fado_type = transform_to_ohe('FADOType', fado_type)
    allegation = transform_to_ohe('Allegation', allegation)
    ccrb_disposition = transform_to_ohe('CCRBDisposition', ccrb_disposition)
    penalty_rec = transform_to_ohe('PenaltyRec', penalty_rec)
    penalty_cat = transform_to_ohe('PenaltyCat', penalty_cat)
    location_type = transform_to_ohe('LocationType', location_type)
    contact_outcome = transform_to_ohe('ContactOutcome', contact_outcome)
    impacted_gender = transform_to_ohe("ImpactedGender", impacted_gender)
    impacted_race = transform_to_ohe("ImpactedRace", impacted_race)
    incident_precinct = transform_to_ohe("IncidentPrecinct", incident_precinct)

    input_array = np.concatenate((current_rank,
                                  incident_rank,
                                  days_on_force,
                                  previous_complaints,
                                  complaint_duration_days,
                                  officer_gender,
                                  fado_type,
                                  allegation,
                                  ccrb_disposition,
                                  penalty_rec,
                                  penalty_cat,
                                  location_type,
                                  contact_outcome,
                                  impacted_gender,
                                  impacted_race,
                                  incident_precinct), dtype=np.float32)
    input_array = input_array.reshape(1, -1)
    return input_array


def process_officer_gender(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days,
                           officer_race, days_on_force, fado_type, allegation, ccrb_disposition,
                           penalty_rec, penalty_cat, location_type, contact_outcome,
                           impacted_gender, impacted_race,
                           incident_precinct):
    l2_norms = config.officer_gender_l2_norm['undersampling' if 'Undersampling' in model_name else 'no_undersampling']

    current_rank = transform_current_rank(l2_norms, current_rank)
    incident_rank = transform_incident_rank(l2_norms, incident_rank)
    previous_complaints = transform_previous_complaints(l2_norms, previous_complaints)
    complaint_duration_days = transform_complaint_duration_days(l2_norms, complaint_duration_days)
    officer_race = transform_to_ohe('OfficerRace', officer_race, config.features_and_options_target_gender["OfficerRace"])
    days_on_force = transform_days_on_force(l2_norms, days_on_force)
    fado_type = transform_to_ohe('FADOType', fado_type)
    allegation = transform_to_ohe('Allegation', allegation)
    ccrb_disposition = transform_to_ohe('CCRBDisposition', ccrb_disposition)
    penalty_rec = transform_to_ohe('PenaltyRec', penalty_rec)
    penalty_cat = transform_to_ohe('PenaltyCat', penalty_cat)
    location_type = transform_to_ohe('LocationType', location_type)
    contact_outcome = transform_to_ohe('ContactOutcome', contact_outcome)
    impacted_gender = transform_to_ohe("ImpactedGender", impacted_gender)
    impacted_race = transform_to_ohe("ImpactedRace", impacted_race)
    incident_precinct = transform_to_ohe("IncidentPrecinct", incident_precinct)

    arrays = (current_rank,
              incident_rank,
              days_on_force,
              previous_complaints,
              complaint_duration_days,
              officer_race,
              fado_type,
              allegation,
              ccrb_disposition,
              penalty_rec,
              penalty_cat,
              location_type,
              contact_outcome,
              impacted_gender,
              impacted_race,
              incident_precinct)

    input_array = np.concatenate(arrays, dtype=np.float32)
    input_array = input_array.reshape(1, -1)
    return input_array

def process_penalty_cat(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days,
                           officer_gender, officer_race, days_on_force, fado_type, allegation, location_type, contact_outcome,
                           impacted_gender, impacted_race,
                           incident_precinct):
    officer_race = transform_to_ohe('OfficerRace', officer_race,
                                    config.features_and_options_target_gender["OfficerRace"])
    fado_type = transform_to_ohe('FADOType', fado_type)
    allegation = transform_to_ohe('Allegation', allegation)
    location_type = transform_to_ohe('LocationType', location_type)
    contact_outcome = transform_to_ohe('ContactOutcome', contact_outcome)
    impacted_gender = transform_to_ohe("ImpactedGender", impacted_gender)
    impacted_race = transform_to_ohe("ImpactedRace", impacted_race)
    incident_precinct = transform_to_ohe("IncidentPrecinct", incident_precinct)

    l2_norms = config.penalty_cat_l2_norm['undersampling' if 'Undersampling' in model_name else 'no_undersampling']

    current_rank = transform_current_rank(l2_norms, current_rank)
    incident_rank = transform_incident_rank(l2_norms, incident_rank)
    previous_complaints = transform_previous_complaints(l2_norms, previous_complaints)
    complaint_duration_days = transform_complaint_duration_days(l2_norms, complaint_duration_days)
    officer_gender = transform_to_ohe('OfficerGender', officer_gender)
    days_on_force = transform_days_on_force(l2_norms, days_on_force)


    arrays = (current_rank,
              incident_rank,
              days_on_force,
              previous_complaints,
              complaint_duration_days,
              officer_gender,
              officer_race,
              fado_type,
              allegation,
              location_type,
              contact_outcome,
              impacted_gender,
              impacted_race,
              incident_precinct)

    input_array = np.concatenate(arrays, dtype=np.float32)
    input_array = input_array.reshape(1, -1)
    if model_name in ["Neural Network.pth", "Logistic Regression Balanced"]:
        scaler = joblib.load(f"models/PenaltyCat/scaler_model.pkl")
        print(scaler.feature_names_in_)
        input_array = scaler.transform(input_array)
    return input_array