Spaces:

riyadhrazzaq
/

applied-ml-project

Sleeping

App Files Files Community

riyadhrazzaq commited on Jan 7, 2024

Commit

f267af6

1 Parent(s): 740d621

supports penalty cat

Browse files

Files changed (11) hide show

app.py +3 -2
block_penalty_cat.py +135 -0
config.py +11 -2
inference.py +19 -1
model.py +7 -3
models/PenaltyCat/AdaBoost Undersampling.pkl +3 -0
models/PenaltyCat/Decision Tree Undersampling.pkl +3 -0
models/PenaltyCat/KNN Undersampling.pkl +3 -0
models/PenaltyCat/Logistic Regression Undersampling.pkl +3 -0
models/PenaltyCat/Random Forest Undersampling.pkl +3 -0
preprocessor.py +40 -0

app.py CHANGED Viewed

@@ -2,9 +2,10 @@ import gradio as gr
 from block_officer_race import officerRaceDemo
 from block_officer_gender import officerGenderDemo
-demo = gr.TabbedInterface([officerRaceDemo, officerGenderDemo],
-                          ["Predict OfficerRace", "Predict Officer Gender"],
                           title="NYPD Complaints Predictor",
                           theme=gr.themes.Monochrome()
                           )

 from block_officer_race import officerRaceDemo
 from block_officer_gender import officerGenderDemo
+from block_penalty_cat import penaltyCatDemo
+demo = gr.TabbedInterface([officerRaceDemo, officerGenderDemo, penaltyCatDemo],
+                          ["Predict OfficerRace", "Predict Officer Gender", "Predict Penalty Category"],
                           title="NYPD Complaints Predictor",
                           theme=gr.themes.Monochrome()
                           )

block_penalty_cat.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import gradio as gr
+import config
+import model
+from inference import infer_penalty_cat
+def infer(model_name,
+          current_rank,
+          incident_rank,
+          previous_complaints,
+          complaint_duration_days,
+          officer_gender,
+          officer_race,
+          days_on_force,
+          fado_type,
+          allegation,
+          location_type,
+          contact_outcome,
+          impacted_gender,
+          impacted_race,
+          incident_precinct):
+    # throw error if model name is not selected
+    if not model_name:
+        raise gr.Error("Please select a model")
+    return infer_penalty_cat(model_name,
+                                current_rank,
+                                incident_rank,
+                                previous_complaints,
+                                complaint_duration_days,
+                                officer_gender,
+                                officer_race,
+                                days_on_force,
+                                fado_type,
+                                allegation,
+                                location_type,
+                                contact_outcome,
+                                impacted_gender,
+                                impacted_race,
+                                incident_precinct)
+with gr.Blocks() as penaltyCatDemo:
+    with gr.Row():
+        with gr.Column():
+            current_rank_dropdown = gr.Dropdown(choices=config.features_and_options["CurrentRank"],
+                                                multiselect=False,
+                                                label=config.current_rank_label,
+                                                value=config.features_and_options["CurrentRank"][0])
+            incident_rank_dropdown = gr.Dropdown(choices=config.features_and_options["IncidentRank"],
+                                                 multiselect=False,
+                                                 label=config.incident_rank_label,
+                                                 value=config.features_and_options["IncidentRank"][0])
+            previous_complaints_slider = gr.Slider(minimum=0,
+                                                   maximum=100,
+                                                   step=1,
+                                                   label=config.previous_complaints_label,
+                                                   value=3)
+            complaint_duration_days_slider = gr.Slider(minimum=0,
+                                                       maximum=100,
+                                                       step=1,
+                                                       label=config.complaint_duration_days_label,
+                                                       value=3)
+            officer_gender_dropdown = gr.Dropdown(choices=config.features_and_options["OfficerGender"],
+                                                  multiselect=False,
+                                                  interactive=True,
+                                                  label=config.officer_gender_label,
+                                                  value=config.features_and_options["OfficerGender"][0])
+            days_on_force_slider = gr.Number(label=config.days_on_force_label,
+                                             value=700)
+            officer_race_dropdown = gr.Dropdown(choices=config.features_and_options_target_gender["OfficerRace"],
+                                                multiselect=False,
+                                                label=config.officer_race_label,
+                                                value=config.features_and_options_target_gender["OfficerRace"][0])
+        with gr.Column():
+            fado_type_dropdown = gr.Dropdown(choices=config.features_and_options["FADOType"],
+                                             multiselect=False,
+                                             label=config.fado_type_label,
+                                             value=config.features_and_options["FADOType"][0])
+            allegation_dropdown = gr.Dropdown(choices=config.features_and_options["Allegation"],
+                                              multiselect=False,
+                                              label=config.allegation_label,
+                                              value=config.features_and_options["Allegation"][0])
+            location_type_dropdown = gr.Dropdown(choices=config.features_and_options["LocationType"],
+                                                 multiselect=False,
+                                                 label=config.location_type_label,
+                                                 value=config.features_and_options["LocationType"][0])
+            contact_outcome_dropdown = gr.Dropdown(choices=config.features_and_options["ContactOutcome"],
+                                                   multiselect=False,
+                                                   label=config.contact_outcome_label,
+                                                   value=config.features_and_options["ContactOutcome"][0])
+            impacted_gender_dropdown = gr.Dropdown(choices=config.features_and_options["ImpactedGender"],
+                                                   multiselect=False,
+                                                   label=config.impacted_gender_label,
+                                                   value=config.features_and_options["ImpactedGender"][0])
+            impacted_race_dropdown = gr.Dropdown(choices=config.features_and_options["ImpactedRace"],
+                                                 multiselect=False,
+                                                 label=config.impacted_race_label,
+                                                 value=config.features_and_options["ImpactedRace"][0])
+            incident_precinct_dropdown = gr.Dropdown(choices=config.features_and_options["IncidentPrecinct"],
+                                                     multiselect=False,
+                                                     label=config.incident_precinct_label,
+                                                     value=config.features_and_options["IncidentPrecinct"][0])
+    with gr.Row():
+        with gr.Column():
+            model_dropdown = gr.Dropdown(choices=model.available_models("PenaltyCat"),
+                                         multiselect=False,
+                                         label=config.model_label)
+    out = gr.Textbox(label="Prediction")
+    input_components = [model_dropdown,
+                        current_rank_dropdown,
+                        incident_rank_dropdown,
+                        previous_complaints_slider,
+                        complaint_duration_days_slider,
+                        officer_gender_dropdown,
+                        officer_race_dropdown,
+                        days_on_force_slider,
+                        fado_type_dropdown,
+                        allegation_dropdown,
+                        location_type_dropdown,
+                        contact_outcome_dropdown,
+                        impacted_gender_dropdown,
+                        impacted_race_dropdown,
+                        incident_precinct_dropdown]
+    btn = gr.Button("Predict")
+    btn.click(fn=infer, inputs=input_components, outputs=out)

config.py CHANGED Viewed

@@ -1,14 +1,13 @@
 # values
 target_columns = ["OfficerRace", "Officer Gender", "Penalty"]
 features_and_options = {
     "CurrentRank": ['POM', 'POF', 'PO', 'PSA', 'SGT', 'SSA', 'SDS', 'DT3', 'DT2', 'DT1', 'DTS', 'LT', 'LSA', 'LCD',
                     'CPT', 'SRG', 'SCS', 'DCS', 'DI', 'INS', 'DC', 'AC', 'COD', 'CCA', 'CCT', 'CD', 'CMS', 'COH', 'COI',
                     'COP', 'COS', 'COT', 'CPB', 'DET', 'SGT DS', 'LT SA', 'LT CD', 'SGT SA', 'INSP', 'LT.', 'CHIEF',
                     'DT'],
-    # IMPORTANT NOTE: The order of the options are important for the model to work
-    # TODO: IncidentRank and CurrentRank aren't the same in the data, so the options should NOT be the same
     "IncidentRank": ['POM', 'POF', 'PO', 'PSA', 'SGT', 'SSA', 'SDS', 'DT3', 'DT2', 'DT1', 'DTS', 'LT', 'LSA', 'LCD',
                      'CPT', 'SRG', 'SCS', 'DCS', 'DI', 'INS', 'DC', 'AC', 'COD', 'CCA', 'CCT', 'CD', 'CMS', 'COH',
                      'COI', 'COP', 'COS', 'COT', 'CPB', 'DET', 'SGT DS', 'LT SA', 'LT CD', 'SGT SA', 'INSP', 'LT.',
@@ -91,6 +90,16 @@ officer_gender_l2_norm = {
     }
 }
 # labels
 model_label = "Choose a model"
 target_label = "What do you want to predict?"

 # values
 target_columns = ["OfficerRace", "Officer Gender", "Penalty"]
+# IMPORTANT NOTE: The order of the options are important for the model to work
 features_and_options = {
     "CurrentRank": ['POM', 'POF', 'PO', 'PSA', 'SGT', 'SSA', 'SDS', 'DT3', 'DT2', 'DT1', 'DTS', 'LT', 'LSA', 'LCD',
                     'CPT', 'SRG', 'SCS', 'DCS', 'DI', 'INS', 'DC', 'AC', 'COD', 'CCA', 'CCT', 'CD', 'CMS', 'COH', 'COI',
                     'COP', 'COS', 'COT', 'CPB', 'DET', 'SGT DS', 'LT SA', 'LT CD', 'SGT SA', 'INSP', 'LT.', 'CHIEF',
                     'DT'],
     "IncidentRank": ['POM', 'POF', 'PO', 'PSA', 'SGT', 'SSA', 'SDS', 'DT3', 'DT2', 'DT1', 'DTS', 'LT', 'LSA', 'LCD',
                      'CPT', 'SRG', 'SCS', 'DCS', 'DI', 'INS', 'DC', 'AC', 'COD', 'CCA', 'CCT', 'CD', 'CMS', 'COH',
                      'COI', 'COP', 'COS', 'COT', 'CPB', 'DET', 'SGT DS', 'LT SA', 'LT CD', 'SGT SA', 'INSP', 'LT.',
     }
 }
+penalty_cat_l2_norm = {
+    "undersampling": {
+        "days_on_force": 14658,
+        "complaint_duration_days": 2817,
+        "previous_complaints": 112,
+        "current_rank": 13,
+        "incident_rank": 13
+    }
+}
 # labels
 model_label = "Choose a model"
 target_label = "What do you want to predict?"

inference.py CHANGED Viewed

@@ -2,7 +2,7 @@ import numpy as np
 import config
 from model import load_models
-from preprocessor import process_officer_race, process_officer_gender
 def predict_officer_race(model_name, X):
@@ -24,6 +24,13 @@ def predict_officer_gender(model_name, X):
     return ["Male", "Female"][int(prediction[0])]
 def infer_officer_race(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days,
                        days_on_force, officer_gender, fado_type, allegation, ccrb_disposition,
                        penalty_rec, penalty_cat, location_type, contact_outcome, impacted_gender, impacted_race,
@@ -49,3 +56,14 @@ def infer_officer_gender(model_name, current_rank, incident_rank, previous_compl
     prediction = f"The officer is predicted to be {prediction}"
     return prediction

 import config
 from model import load_models
+from preprocessor import process_officer_race, process_officer_gender, process_penalty_cat
 def predict_officer_race(model_name, X):
     return ["Male", "Female"][int(prediction[0])]
+def predict_penalty_cat(model_name, X):
+    model = load_models(model_name, 'PenaltyCat')
+    # 1 No Penalty, 0 Penalty
+    prediction = model.predict(X)
+    return ["No discipline Taken", "Discipline Taken"][int(prediction[0])]
 def infer_officer_race(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days,
                        days_on_force, officer_gender, fado_type, allegation, ccrb_disposition,
                        penalty_rec, penalty_cat, location_type, contact_outcome, impacted_gender, impacted_race,
     prediction = f"The officer is predicted to be {prediction}"
     return prediction
+def infer_penalty_cat(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days,
+                       officer_gender, officer_race, days_on_force, fado_type, allegation, location_type, contact_outcome, impacted_gender, impacted_race,
+                       incident_precinct):
+    input_array = process_penalty_cat(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days,
+                                       officer_gender, officer_race, days_on_force, fado_type, allegation, location_type, contact_outcome, impacted_gender, impacted_race,
+                                       incident_precinct)
+    prediction = predict_penalty_cat(model_name, input_array)
+    prediction = f"**{prediction}** against the officer"
+    return prediction

model.py CHANGED Viewed

@@ -18,6 +18,10 @@ def load_models(model_name, target_column):
         model_path = target_column_root / f"{model_name}.pkl"
         return joblib.load(model_path)
 def get_folder_names(directory):
     return [name for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name))]
@@ -25,9 +29,9 @@ def get_folder_names(directory):
 def available_models(target_column):
     """Return available models for a given target column"""
-    # all_models = [path.stem for path in Path(f"models/{target_column}").glob("*.pkl")]
     if target_column == "OfficerRace":
         return get_folder_names("models/OfficerRace")
-    elif target_column == "OfficerGender":
-        return [os.path.splitext(name)[0] for name in os.listdir("models/OfficerGender") if name.endswith(".pkl")]

         model_path = target_column_root / f"{model_name}.pkl"
         return joblib.load(model_path)
+    if target_column == "PenaltyCat":
+        model_path = target_column_root / f"{model_name}.pkl"
+        return joblib.load(model_path)
 def get_folder_names(directory):
     return [name for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name))]
 def available_models(target_column):
     """Return available models for a given target column"""
     if target_column == "OfficerRace":
         return get_folder_names("models/OfficerRace")
+    elif target_column == "OfficerGender" or target_column == "PenaltyCat":
+        return [os.path.splitext(name)[0] for name in os.listdir(f"models/{target_column}") if name.endswith(".pkl")]

models/PenaltyCat/AdaBoost Undersampling.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64d1fc138ccd50658dc07acfdd21ce277ae35e8d7cce6f2bee7d4e9d51a9b120
+size 143605131

models/PenaltyCat/Decision Tree Undersampling.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a8d0f1d37c0bba6ed7f760a7de98b47f5abcab64b6775a39058ae51cfe48708d
+size 647675

models/PenaltyCat/KNN Undersampling.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:458cb9942ef62e0fa2cae46e1d7310544f4bd2929a3a9ef717b0753946cbbf31
+size 27295716

models/PenaltyCat/Logistic Regression Undersampling.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79e0fe12881da3fb2f0edb1d1575e996a94ef846de610597c96b3ffbb9fd47ab
+size 4844

models/PenaltyCat/Random Forest Undersampling.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1118146f515d9dd2680413370cdcced631d92ed3e82f41fe6c1a987885d9b8a
+size 96083022

preprocessor.py CHANGED Viewed

@@ -173,3 +173,43 @@ def process_officer_gender(model_name, current_rank, incident_rank, previous_com
     input_array = np.concatenate(arrays, dtype=np.float32)
     input_array = input_array.reshape(1, -1)
     return input_array

     input_array = np.concatenate(arrays, dtype=np.float32)
     input_array = input_array.reshape(1, -1)
     return input_array
+def process_penalty_cat(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days,
+                           officer_gender, officer_race, days_on_force, fado_type, allegation, location_type, contact_outcome,
+                           impacted_gender, impacted_race,
+                           incident_precinct):
+    l2_norms = config.penalty_cat_l2_norm['undersampling' if 'Undersampling' in model_name else 'no_undersampling']
+    current_rank = transform_current_rank(l2_norms, current_rank)
+    incident_rank = transform_incident_rank(l2_norms, incident_rank)
+    previous_complaints = transform_previous_complaints(l2_norms, previous_complaints)
+    complaint_duration_days = transform_complaint_duration_days(l2_norms, complaint_duration_days)
+    officer_gender = transform_to_ohe('OfficerGender', officer_gender)
+    officer_race = transform_to_ohe('OfficerRace', officer_race, config.features_and_options_target_gender["OfficerRace"])
+    days_on_force = transform_days_on_force(l2_norms, days_on_force)
+    fado_type = transform_to_ohe('FADOType', fado_type)
+    allegation = transform_to_ohe('Allegation', allegation)
+    location_type = transform_to_ohe('LocationType', location_type)
+    contact_outcome = transform_to_ohe('ContactOutcome', contact_outcome)
+    impacted_gender = transform_to_ohe("ImpactedGender", impacted_gender)
+    impacted_race = transform_to_ohe("ImpactedRace", impacted_race)
+    incident_precinct = transform_to_ohe("IncidentPrecinct", incident_precinct)
+    arrays = (current_rank,
+              incident_rank,
+              days_on_force,
+              previous_complaints,
+              complaint_duration_days,
+              officer_gender,
+              officer_race,
+              fado_type,
+              allegation,
+              location_type,
+              contact_outcome,
+              impacted_gender,
+              impacted_race,
+              incident_precinct)
+    input_array = np.concatenate(arrays, dtype=np.float32)
+    input_array = input_array.reshape(1, -1)
+    return input_array