Spaces:

riyadhrazzaq
/

applied-ml-project

Sleeping

App Files Files Community

riyadhrazzaq commited on Jan 12, 2024

Commit

bd9ee57

1 Parent(s): e1b5f4e

add alvaro's code

Browse files

Files changed (6) hide show

config.py +5 -6
inference.py +6 -2
model.py +5 -1
models/PenaltyCat/Logistic Regression Balanced.pkl +3 -0
models/PenaltyCat/scaler_model.pkl +3 -0
preprocessor.py +16 -8

config.py CHANGED Viewed

@@ -105,13 +105,12 @@ penalty_cat_l2_norm = {
         "current_rank": 653.0803932135767,
         "incident_rank": 526.2489904978441
     },
-    # todo these are dummy values, get real ones from Alvaro
     "no_undersampling": {
-        "days_on_force": 709634.1443779323,
-        "complaint_duration_days": 67272.94727451741,
-        "previous_complaints": 2251.0399818750443,
-        "current_rank": 653.0803932135767,
-        "incident_rank": 526.2489904978441
     }
 }

         "current_rank": 653.0803932135767,
         "incident_rank": 526.2489904978441
     },
     "no_undersampling": {
+        "days_on_force": 1.,
+        "complaint_duration_days": 1.,
+        "previous_complaints": 1.,
+        "current_rank": 1.,
+        "incident_rank": 1.
     }
 }

inference.py CHANGED Viewed

@@ -28,7 +28,11 @@ def predict_penalty_cat(model_name, X):
     model = load_models(model_name, 'PenaltyCat')
     # 1 No Penalty, 0 Penalty
     prediction = model.predict(X)
-    return ["No discipline Taken", "Discipline Taken"][int(prediction[0])]
 def infer_officer_race(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days,
@@ -64,6 +68,6 @@ def infer_penalty_cat(model_name, current_rank, incident_rank, previous_complain
                                        officer_gender, officer_race, days_on_force, fado_type, allegation, location_type, contact_outcome, impacted_gender, impacted_race,
                                        incident_precinct)
     prediction = predict_penalty_cat(model_name, input_array)
-    prediction = f"**{prediction}** against the officer"
     return prediction

     model = load_models(model_name, 'PenaltyCat')
     # 1 No Penalty, 0 Penalty
     prediction = model.predict(X)
+    classes = ["No discipline Taken", "Discipline Taken"]
+    if "balanced" in model_name:
+        classes = ["Discipline Taken", "No discipline Taken"]
+    return classes[int(prediction[0])]
 def infer_officer_race(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days,
                                        officer_gender, officer_race, days_on_force, fado_type, allegation, location_type, contact_outcome, impacted_gender, impacted_race,
                                        incident_precinct)
     prediction = predict_penalty_cat(model_name, input_array)
+    prediction = f"{prediction} against the officer"
     return prediction

model.py CHANGED Viewed

@@ -35,5 +35,9 @@ def available_models(target_column):
     if target_column == "OfficerRace":
         return get_folder_names("models/OfficerRace")
-    elif target_column == "OfficerGender" or target_column == "PenaltyCat":
         return [os.path.splitext(name)[0] for name in os.listdir(f"models/{target_column}") if name.endswith(".pkl")]

     if target_column == "OfficerRace":
         return get_folder_names("models/OfficerRace")
+    elif target_column == "OfficerGender":
         return [os.path.splitext(name)[0] for name in os.listdir(f"models/{target_column}") if name.endswith(".pkl")]
+    elif target_column == "PenaltyCat":
+        return [os.path.splitext(name)[0] for name in os.listdir(f"models/{target_column}") if name != ("scaler_model"
+                                                                                                        ".pkl")]

models/PenaltyCat/Logistic Regression Balanced.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:724e162762800f47a9669edf534ffd36c925c2a0b9f339bccb77e11b3c54b972
+size 1727

models/PenaltyCat/scaler_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13bc7d0afe5d9de7719389aa42a9c38c27ff153e276a9acc047fd689e4fc01fd
+size 6417

preprocessor.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import numpy as np
 import config
@@ -178,6 +179,16 @@ def process_penalty_cat(model_name, current_rank, incident_rank, previous_compla
                            officer_gender, officer_race, days_on_force, fado_type, allegation, location_type, contact_outcome,
                            impacted_gender, impacted_race,
                            incident_precinct):
     l2_norms = config.penalty_cat_l2_norm['undersampling' if 'Undersampling' in model_name else 'no_undersampling']
     current_rank = transform_current_rank(l2_norms, current_rank)
@@ -185,15 +196,8 @@ def process_penalty_cat(model_name, current_rank, incident_rank, previous_compla
     previous_complaints = transform_previous_complaints(l2_norms, previous_complaints)
     complaint_duration_days = transform_complaint_duration_days(l2_norms, complaint_duration_days)
     officer_gender = transform_to_ohe('OfficerGender', officer_gender)
-    officer_race = transform_to_ohe('OfficerRace', officer_race, config.features_and_options_target_gender["OfficerRace"])
     days_on_force = transform_days_on_force(l2_norms, days_on_force)
-    fado_type = transform_to_ohe('FADOType', fado_type)
-    allegation = transform_to_ohe('Allegation', allegation)
-    location_type = transform_to_ohe('LocationType', location_type)
-    contact_outcome = transform_to_ohe('ContactOutcome', contact_outcome)
-    impacted_gender = transform_to_ohe("ImpactedGender", impacted_gender)
-    impacted_race = transform_to_ohe("ImpactedRace", impacted_race)
-    incident_precinct = transform_to_ohe("IncidentPrecinct", incident_precinct)
     arrays = (current_rank,
               incident_rank,
@@ -212,4 +216,8 @@ def process_penalty_cat(model_name, current_rank, incident_rank, previous_compla
     input_array = np.concatenate(arrays, dtype=np.float32)
     input_array = input_array.reshape(1, -1)
     return input_array

+import joblib
 import numpy as np
 import config
                            officer_gender, officer_race, days_on_force, fado_type, allegation, location_type, contact_outcome,
                            impacted_gender, impacted_race,
                            incident_precinct):
+    officer_race = transform_to_ohe('OfficerRace', officer_race,
+                                    config.features_and_options_target_gender["OfficerRace"])
+    fado_type = transform_to_ohe('FADOType', fado_type)
+    allegation = transform_to_ohe('Allegation', allegation)
+    location_type = transform_to_ohe('LocationType', location_type)
+    contact_outcome = transform_to_ohe('ContactOutcome', contact_outcome)
+    impacted_gender = transform_to_ohe("ImpactedGender", impacted_gender)
+    impacted_race = transform_to_ohe("ImpactedRace", impacted_race)
+    incident_precinct = transform_to_ohe("IncidentPrecinct", incident_precinct)
     l2_norms = config.penalty_cat_l2_norm['undersampling' if 'Undersampling' in model_name else 'no_undersampling']
     current_rank = transform_current_rank(l2_norms, current_rank)
     previous_complaints = transform_previous_complaints(l2_norms, previous_complaints)
     complaint_duration_days = transform_complaint_duration_days(l2_norms, complaint_duration_days)
     officer_gender = transform_to_ohe('OfficerGender', officer_gender)
     days_on_force = transform_days_on_force(l2_norms, days_on_force)
     arrays = (current_rank,
               incident_rank,
     input_array = np.concatenate(arrays, dtype=np.float32)
     input_array = input_array.reshape(1, -1)
+    if model_name in ["Neural Network.pth", "Logistic Regression Balanced"]:
+        scaler = joblib.load(f"models/PenaltyCat/scaler_model.pkl")
+        print(scaler.feature_names_in_)
+        input_array = scaler.transform(input_array)
     return input_array