Spaces:
Sleeping
Sleeping
Commit ·
bd9ee57
1
Parent(s): e1b5f4e
add alvaro's code
Browse files- config.py +5 -6
- inference.py +6 -2
- model.py +5 -1
- models/PenaltyCat/Logistic Regression Balanced.pkl +3 -0
- models/PenaltyCat/scaler_model.pkl +3 -0
- preprocessor.py +16 -8
config.py
CHANGED
|
@@ -105,13 +105,12 @@ penalty_cat_l2_norm = {
|
|
| 105 |
"current_rank": 653.0803932135767,
|
| 106 |
"incident_rank": 526.2489904978441
|
| 107 |
},
|
| 108 |
-
# todo these are dummy values, get real ones from Alvaro
|
| 109 |
"no_undersampling": {
|
| 110 |
-
"days_on_force":
|
| 111 |
-
"complaint_duration_days":
|
| 112 |
-
"previous_complaints":
|
| 113 |
-
"current_rank":
|
| 114 |
-
"incident_rank":
|
| 115 |
}
|
| 116 |
}
|
| 117 |
|
|
|
|
| 105 |
"current_rank": 653.0803932135767,
|
| 106 |
"incident_rank": 526.2489904978441
|
| 107 |
},
|
|
|
|
| 108 |
"no_undersampling": {
|
| 109 |
+
"days_on_force": 1.,
|
| 110 |
+
"complaint_duration_days": 1.,
|
| 111 |
+
"previous_complaints": 1.,
|
| 112 |
+
"current_rank": 1.,
|
| 113 |
+
"incident_rank": 1.
|
| 114 |
}
|
| 115 |
}
|
| 116 |
|
inference.py
CHANGED
|
@@ -28,7 +28,11 @@ def predict_penalty_cat(model_name, X):
|
|
| 28 |
model = load_models(model_name, 'PenaltyCat')
|
| 29 |
# 1 No Penalty, 0 Penalty
|
| 30 |
prediction = model.predict(X)
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
|
| 34 |
def infer_officer_race(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days,
|
|
@@ -64,6 +68,6 @@ def infer_penalty_cat(model_name, current_rank, incident_rank, previous_complain
|
|
| 64 |
officer_gender, officer_race, days_on_force, fado_type, allegation, location_type, contact_outcome, impacted_gender, impacted_race,
|
| 65 |
incident_precinct)
|
| 66 |
prediction = predict_penalty_cat(model_name, input_array)
|
| 67 |
-
prediction = f"
|
| 68 |
return prediction
|
| 69 |
|
|
|
|
| 28 |
model = load_models(model_name, 'PenaltyCat')
|
| 29 |
# 1 No Penalty, 0 Penalty
|
| 30 |
prediction = model.predict(X)
|
| 31 |
+
|
| 32 |
+
classes = ["No discipline Taken", "Discipline Taken"]
|
| 33 |
+
if "balanced" in model_name:
|
| 34 |
+
classes = ["Discipline Taken", "No discipline Taken"]
|
| 35 |
+
return classes[int(prediction[0])]
|
| 36 |
|
| 37 |
|
| 38 |
def infer_officer_race(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days,
|
|
|
|
| 68 |
officer_gender, officer_race, days_on_force, fado_type, allegation, location_type, contact_outcome, impacted_gender, impacted_race,
|
| 69 |
incident_precinct)
|
| 70 |
prediction = predict_penalty_cat(model_name, input_array)
|
| 71 |
+
prediction = f"{prediction} against the officer"
|
| 72 |
return prediction
|
| 73 |
|
model.py
CHANGED
|
@@ -35,5 +35,9 @@ def available_models(target_column):
|
|
| 35 |
if target_column == "OfficerRace":
|
| 36 |
return get_folder_names("models/OfficerRace")
|
| 37 |
|
| 38 |
-
elif target_column == "OfficerGender"
|
| 39 |
return [os.path.splitext(name)[0] for name in os.listdir(f"models/{target_column}") if name.endswith(".pkl")]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
if target_column == "OfficerRace":
|
| 36 |
return get_folder_names("models/OfficerRace")
|
| 37 |
|
| 38 |
+
elif target_column == "OfficerGender":
|
| 39 |
return [os.path.splitext(name)[0] for name in os.listdir(f"models/{target_column}") if name.endswith(".pkl")]
|
| 40 |
+
|
| 41 |
+
elif target_column == "PenaltyCat":
|
| 42 |
+
return [os.path.splitext(name)[0] for name in os.listdir(f"models/{target_column}") if name != ("scaler_model"
|
| 43 |
+
".pkl")]
|
models/PenaltyCat/Logistic Regression Balanced.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:724e162762800f47a9669edf534ffd36c925c2a0b9f339bccb77e11b3c54b972
|
| 3 |
+
size 1727
|
models/PenaltyCat/scaler_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13bc7d0afe5d9de7719389aa42a9c38c27ff153e276a9acc047fd689e4fc01fd
|
| 3 |
+
size 6417
|
preprocessor.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import numpy as np
|
| 2 |
|
| 3 |
import config
|
|
@@ -178,6 +179,16 @@ def process_penalty_cat(model_name, current_rank, incident_rank, previous_compla
|
|
| 178 |
officer_gender, officer_race, days_on_force, fado_type, allegation, location_type, contact_outcome,
|
| 179 |
impacted_gender, impacted_race,
|
| 180 |
incident_precinct):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
l2_norms = config.penalty_cat_l2_norm['undersampling' if 'Undersampling' in model_name else 'no_undersampling']
|
| 182 |
|
| 183 |
current_rank = transform_current_rank(l2_norms, current_rank)
|
|
@@ -185,15 +196,8 @@ def process_penalty_cat(model_name, current_rank, incident_rank, previous_compla
|
|
| 185 |
previous_complaints = transform_previous_complaints(l2_norms, previous_complaints)
|
| 186 |
complaint_duration_days = transform_complaint_duration_days(l2_norms, complaint_duration_days)
|
| 187 |
officer_gender = transform_to_ohe('OfficerGender', officer_gender)
|
| 188 |
-
officer_race = transform_to_ohe('OfficerRace', officer_race, config.features_and_options_target_gender["OfficerRace"])
|
| 189 |
days_on_force = transform_days_on_force(l2_norms, days_on_force)
|
| 190 |
-
|
| 191 |
-
allegation = transform_to_ohe('Allegation', allegation)
|
| 192 |
-
location_type = transform_to_ohe('LocationType', location_type)
|
| 193 |
-
contact_outcome = transform_to_ohe('ContactOutcome', contact_outcome)
|
| 194 |
-
impacted_gender = transform_to_ohe("ImpactedGender", impacted_gender)
|
| 195 |
-
impacted_race = transform_to_ohe("ImpactedRace", impacted_race)
|
| 196 |
-
incident_precinct = transform_to_ohe("IncidentPrecinct", incident_precinct)
|
| 197 |
|
| 198 |
arrays = (current_rank,
|
| 199 |
incident_rank,
|
|
@@ -212,4 +216,8 @@ def process_penalty_cat(model_name, current_rank, incident_rank, previous_compla
|
|
| 212 |
|
| 213 |
input_array = np.concatenate(arrays, dtype=np.float32)
|
| 214 |
input_array = input_array.reshape(1, -1)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
return input_array
|
|
|
|
| 1 |
+
import joblib
|
| 2 |
import numpy as np
|
| 3 |
|
| 4 |
import config
|
|
|
|
| 179 |
officer_gender, officer_race, days_on_force, fado_type, allegation, location_type, contact_outcome,
|
| 180 |
impacted_gender, impacted_race,
|
| 181 |
incident_precinct):
|
| 182 |
+
officer_race = transform_to_ohe('OfficerRace', officer_race,
|
| 183 |
+
config.features_and_options_target_gender["OfficerRace"])
|
| 184 |
+
fado_type = transform_to_ohe('FADOType', fado_type)
|
| 185 |
+
allegation = transform_to_ohe('Allegation', allegation)
|
| 186 |
+
location_type = transform_to_ohe('LocationType', location_type)
|
| 187 |
+
contact_outcome = transform_to_ohe('ContactOutcome', contact_outcome)
|
| 188 |
+
impacted_gender = transform_to_ohe("ImpactedGender", impacted_gender)
|
| 189 |
+
impacted_race = transform_to_ohe("ImpactedRace", impacted_race)
|
| 190 |
+
incident_precinct = transform_to_ohe("IncidentPrecinct", incident_precinct)
|
| 191 |
+
|
| 192 |
l2_norms = config.penalty_cat_l2_norm['undersampling' if 'Undersampling' in model_name else 'no_undersampling']
|
| 193 |
|
| 194 |
current_rank = transform_current_rank(l2_norms, current_rank)
|
|
|
|
| 196 |
previous_complaints = transform_previous_complaints(l2_norms, previous_complaints)
|
| 197 |
complaint_duration_days = transform_complaint_duration_days(l2_norms, complaint_duration_days)
|
| 198 |
officer_gender = transform_to_ohe('OfficerGender', officer_gender)
|
|
|
|
| 199 |
days_on_force = transform_days_on_force(l2_norms, days_on_force)
|
| 200 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
arrays = (current_rank,
|
| 203 |
incident_rank,
|
|
|
|
| 216 |
|
| 217 |
input_array = np.concatenate(arrays, dtype=np.float32)
|
| 218 |
input_array = input_array.reshape(1, -1)
|
| 219 |
+
if model_name in ["Neural Network.pth", "Logistic Regression Balanced"]:
|
| 220 |
+
scaler = joblib.load(f"models/PenaltyCat/scaler_model.pkl")
|
| 221 |
+
print(scaler.feature_names_in_)
|
| 222 |
+
input_array = scaler.transform(input_array)
|
| 223 |
return input_array
|