riyadhrazzaq commited on
Commit
f267af6
·
1 Parent(s): 740d621

supports penalty cat

Browse files
app.py CHANGED
@@ -2,9 +2,10 @@ import gradio as gr
2
 
3
  from block_officer_race import officerRaceDemo
4
  from block_officer_gender import officerGenderDemo
 
5
 
6
- demo = gr.TabbedInterface([officerRaceDemo, officerGenderDemo],
7
- ["Predict OfficerRace", "Predict Officer Gender"],
8
  title="NYPD Complaints Predictor",
9
  theme=gr.themes.Monochrome()
10
  )
 
2
 
3
  from block_officer_race import officerRaceDemo
4
  from block_officer_gender import officerGenderDemo
5
+ from block_penalty_cat import penaltyCatDemo
6
 
7
+ demo = gr.TabbedInterface([officerRaceDemo, officerGenderDemo, penaltyCatDemo],
8
+ ["Predict OfficerRace", "Predict Officer Gender", "Predict Penalty Category"],
9
  title="NYPD Complaints Predictor",
10
  theme=gr.themes.Monochrome()
11
  )
block_penalty_cat.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ import config
4
+ import model
5
+ from inference import infer_penalty_cat
6
+
7
+
8
+ def infer(model_name,
9
+ current_rank,
10
+ incident_rank,
11
+ previous_complaints,
12
+ complaint_duration_days,
13
+ officer_gender,
14
+ officer_race,
15
+ days_on_force,
16
+ fado_type,
17
+ allegation,
18
+ location_type,
19
+ contact_outcome,
20
+ impacted_gender,
21
+ impacted_race,
22
+ incident_precinct):
23
+ # throw error if model name is not selected
24
+ if not model_name:
25
+ raise gr.Error("Please select a model")
26
+
27
+ return infer_penalty_cat(model_name,
28
+ current_rank,
29
+ incident_rank,
30
+ previous_complaints,
31
+ complaint_duration_days,
32
+ officer_gender,
33
+ officer_race,
34
+ days_on_force,
35
+ fado_type,
36
+ allegation,
37
+ location_type,
38
+ contact_outcome,
39
+ impacted_gender,
40
+ impacted_race,
41
+ incident_precinct)
42
+
43
+
44
+ with gr.Blocks() as penaltyCatDemo:
45
+ with gr.Row():
46
+ with gr.Column():
47
+ current_rank_dropdown = gr.Dropdown(choices=config.features_and_options["CurrentRank"],
48
+ multiselect=False,
49
+ label=config.current_rank_label,
50
+ value=config.features_and_options["CurrentRank"][0])
51
+ incident_rank_dropdown = gr.Dropdown(choices=config.features_and_options["IncidentRank"],
52
+ multiselect=False,
53
+ label=config.incident_rank_label,
54
+ value=config.features_and_options["IncidentRank"][0])
55
+ previous_complaints_slider = gr.Slider(minimum=0,
56
+ maximum=100,
57
+ step=1,
58
+ label=config.previous_complaints_label,
59
+ value=3)
60
+ complaint_duration_days_slider = gr.Slider(minimum=0,
61
+ maximum=100,
62
+ step=1,
63
+ label=config.complaint_duration_days_label,
64
+ value=3)
65
+ officer_gender_dropdown = gr.Dropdown(choices=config.features_and_options["OfficerGender"],
66
+ multiselect=False,
67
+ interactive=True,
68
+ label=config.officer_gender_label,
69
+ value=config.features_and_options["OfficerGender"][0])
70
+ days_on_force_slider = gr.Number(label=config.days_on_force_label,
71
+ value=700)
72
+
73
+ officer_race_dropdown = gr.Dropdown(choices=config.features_and_options_target_gender["OfficerRace"],
74
+ multiselect=False,
75
+ label=config.officer_race_label,
76
+ value=config.features_and_options_target_gender["OfficerRace"][0])
77
+
78
+ with gr.Column():
79
+
80
+ fado_type_dropdown = gr.Dropdown(choices=config.features_and_options["FADOType"],
81
+ multiselect=False,
82
+ label=config.fado_type_label,
83
+ value=config.features_and_options["FADOType"][0])
84
+ allegation_dropdown = gr.Dropdown(choices=config.features_and_options["Allegation"],
85
+ multiselect=False,
86
+ label=config.allegation_label,
87
+ value=config.features_and_options["Allegation"][0])
88
+
89
+ location_type_dropdown = gr.Dropdown(choices=config.features_and_options["LocationType"],
90
+ multiselect=False,
91
+ label=config.location_type_label,
92
+ value=config.features_and_options["LocationType"][0])
93
+ contact_outcome_dropdown = gr.Dropdown(choices=config.features_and_options["ContactOutcome"],
94
+ multiselect=False,
95
+ label=config.contact_outcome_label,
96
+ value=config.features_and_options["ContactOutcome"][0])
97
+
98
+ impacted_gender_dropdown = gr.Dropdown(choices=config.features_and_options["ImpactedGender"],
99
+ multiselect=False,
100
+ label=config.impacted_gender_label,
101
+ value=config.features_and_options["ImpactedGender"][0])
102
+ impacted_race_dropdown = gr.Dropdown(choices=config.features_and_options["ImpactedRace"],
103
+ multiselect=False,
104
+ label=config.impacted_race_label,
105
+ value=config.features_and_options["ImpactedRace"][0])
106
+ incident_precinct_dropdown = gr.Dropdown(choices=config.features_and_options["IncidentPrecinct"],
107
+ multiselect=False,
108
+ label=config.incident_precinct_label,
109
+ value=config.features_and_options["IncidentPrecinct"][0])
110
+
111
+ with gr.Row():
112
+ with gr.Column():
113
+ model_dropdown = gr.Dropdown(choices=model.available_models("PenaltyCat"),
114
+ multiselect=False,
115
+ label=config.model_label)
116
+
117
+ out = gr.Textbox(label="Prediction")
118
+
119
+ input_components = [model_dropdown,
120
+ current_rank_dropdown,
121
+ incident_rank_dropdown,
122
+ previous_complaints_slider,
123
+ complaint_duration_days_slider,
124
+ officer_gender_dropdown,
125
+ officer_race_dropdown,
126
+ days_on_force_slider,
127
+ fado_type_dropdown,
128
+ allegation_dropdown,
129
+ location_type_dropdown,
130
+ contact_outcome_dropdown,
131
+ impacted_gender_dropdown,
132
+ impacted_race_dropdown,
133
+ incident_precinct_dropdown]
134
+ btn = gr.Button("Predict")
135
+ btn.click(fn=infer, inputs=input_components, outputs=out)
config.py CHANGED
@@ -1,14 +1,13 @@
1
  # values
2
  target_columns = ["OfficerRace", "Officer Gender", "Penalty"]
3
 
 
4
  features_and_options = {
5
  "CurrentRank": ['POM', 'POF', 'PO', 'PSA', 'SGT', 'SSA', 'SDS', 'DT3', 'DT2', 'DT1', 'DTS', 'LT', 'LSA', 'LCD',
6
  'CPT', 'SRG', 'SCS', 'DCS', 'DI', 'INS', 'DC', 'AC', 'COD', 'CCA', 'CCT', 'CD', 'CMS', 'COH', 'COI',
7
  'COP', 'COS', 'COT', 'CPB', 'DET', 'SGT DS', 'LT SA', 'LT CD', 'SGT SA', 'INSP', 'LT.', 'CHIEF',
8
  'DT'],
9
 
10
- # IMPORTANT NOTE: The order of the options are important for the model to work
11
- # TODO: IncidentRank and CurrentRank aren't the same in the data, so the options should NOT be the same
12
  "IncidentRank": ['POM', 'POF', 'PO', 'PSA', 'SGT', 'SSA', 'SDS', 'DT3', 'DT2', 'DT1', 'DTS', 'LT', 'LSA', 'LCD',
13
  'CPT', 'SRG', 'SCS', 'DCS', 'DI', 'INS', 'DC', 'AC', 'COD', 'CCA', 'CCT', 'CD', 'CMS', 'COH',
14
  'COI', 'COP', 'COS', 'COT', 'CPB', 'DET', 'SGT DS', 'LT SA', 'LT CD', 'SGT SA', 'INSP', 'LT.',
@@ -91,6 +90,16 @@ officer_gender_l2_norm = {
91
  }
92
  }
93
 
 
 
 
 
 
 
 
 
 
 
94
  # labels
95
  model_label = "Choose a model"
96
  target_label = "What do you want to predict?"
 
1
  # values
2
  target_columns = ["OfficerRace", "Officer Gender", "Penalty"]
3
 
4
+ # IMPORTANT NOTE: The order of the options are important for the model to work
5
  features_and_options = {
6
  "CurrentRank": ['POM', 'POF', 'PO', 'PSA', 'SGT', 'SSA', 'SDS', 'DT3', 'DT2', 'DT1', 'DTS', 'LT', 'LSA', 'LCD',
7
  'CPT', 'SRG', 'SCS', 'DCS', 'DI', 'INS', 'DC', 'AC', 'COD', 'CCA', 'CCT', 'CD', 'CMS', 'COH', 'COI',
8
  'COP', 'COS', 'COT', 'CPB', 'DET', 'SGT DS', 'LT SA', 'LT CD', 'SGT SA', 'INSP', 'LT.', 'CHIEF',
9
  'DT'],
10
 
 
 
11
  "IncidentRank": ['POM', 'POF', 'PO', 'PSA', 'SGT', 'SSA', 'SDS', 'DT3', 'DT2', 'DT1', 'DTS', 'LT', 'LSA', 'LCD',
12
  'CPT', 'SRG', 'SCS', 'DCS', 'DI', 'INS', 'DC', 'AC', 'COD', 'CCA', 'CCT', 'CD', 'CMS', 'COH',
13
  'COI', 'COP', 'COS', 'COT', 'CPB', 'DET', 'SGT DS', 'LT SA', 'LT CD', 'SGT SA', 'INSP', 'LT.',
 
90
  }
91
  }
92
 
93
+ penalty_cat_l2_norm = {
94
+ "undersampling": {
95
+ "days_on_force": 14658,
96
+ "complaint_duration_days": 2817,
97
+ "previous_complaints": 112,
98
+ "current_rank": 13,
99
+ "incident_rank": 13
100
+ }
101
+ }
102
+
103
  # labels
104
  model_label = "Choose a model"
105
  target_label = "What do you want to predict?"
inference.py CHANGED
@@ -2,7 +2,7 @@ import numpy as np
2
 
3
  import config
4
  from model import load_models
5
- from preprocessor import process_officer_race, process_officer_gender
6
 
7
 
8
  def predict_officer_race(model_name, X):
@@ -24,6 +24,13 @@ def predict_officer_gender(model_name, X):
24
  return ["Male", "Female"][int(prediction[0])]
25
 
26
 
 
 
 
 
 
 
 
27
  def infer_officer_race(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days,
28
  days_on_force, officer_gender, fado_type, allegation, ccrb_disposition,
29
  penalty_rec, penalty_cat, location_type, contact_outcome, impacted_gender, impacted_race,
@@ -49,3 +56,14 @@ def infer_officer_gender(model_name, current_rank, incident_rank, previous_compl
49
  prediction = f"The officer is predicted to be {prediction}"
50
  return prediction
51
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  import config
4
  from model import load_models
5
+ from preprocessor import process_officer_race, process_officer_gender, process_penalty_cat
6
 
7
 
8
  def predict_officer_race(model_name, X):
 
24
  return ["Male", "Female"][int(prediction[0])]
25
 
26
 
27
+ def predict_penalty_cat(model_name, X):
28
+ model = load_models(model_name, 'PenaltyCat')
29
+ # 1 No Penalty, 0 Penalty
30
+ prediction = model.predict(X)
31
+ return ["No discipline Taken", "Discipline Taken"][int(prediction[0])]
32
+
33
+
34
  def infer_officer_race(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days,
35
  days_on_force, officer_gender, fado_type, allegation, ccrb_disposition,
36
  penalty_rec, penalty_cat, location_type, contact_outcome, impacted_gender, impacted_race,
 
56
  prediction = f"The officer is predicted to be {prediction}"
57
  return prediction
58
 
59
+
60
+ def infer_penalty_cat(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days,
61
+ officer_gender, officer_race, days_on_force, fado_type, allegation, location_type, contact_outcome, impacted_gender, impacted_race,
62
+ incident_precinct):
63
+ input_array = process_penalty_cat(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days,
64
+ officer_gender, officer_race, days_on_force, fado_type, allegation, location_type, contact_outcome, impacted_gender, impacted_race,
65
+ incident_precinct)
66
+ prediction = predict_penalty_cat(model_name, input_array)
67
+ prediction = f"**{prediction}** against the officer"
68
+ return prediction
69
+
model.py CHANGED
@@ -18,6 +18,10 @@ def load_models(model_name, target_column):
18
  model_path = target_column_root / f"{model_name}.pkl"
19
  return joblib.load(model_path)
20
 
 
 
 
 
21
 
22
  def get_folder_names(directory):
23
  return [name for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name))]
@@ -25,9 +29,9 @@ def get_folder_names(directory):
25
 
26
  def available_models(target_column):
27
  """Return available models for a given target column"""
28
- # all_models = [path.stem for path in Path(f"models/{target_column}").glob("*.pkl")]
29
  if target_column == "OfficerRace":
30
  return get_folder_names("models/OfficerRace")
31
 
32
- elif target_column == "OfficerGender":
33
- return [os.path.splitext(name)[0] for name in os.listdir("models/OfficerGender") if name.endswith(".pkl")]
 
18
  model_path = target_column_root / f"{model_name}.pkl"
19
  return joblib.load(model_path)
20
 
21
+ if target_column == "PenaltyCat":
22
+ model_path = target_column_root / f"{model_name}.pkl"
23
+ return joblib.load(model_path)
24
+
25
 
26
  def get_folder_names(directory):
27
  return [name for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name))]
 
29
 
30
  def available_models(target_column):
31
  """Return available models for a given target column"""
32
+
33
  if target_column == "OfficerRace":
34
  return get_folder_names("models/OfficerRace")
35
 
36
+ elif target_column == "OfficerGender" or target_column == "PenaltyCat":
37
+ return [os.path.splitext(name)[0] for name in os.listdir(f"models/{target_column}") if name.endswith(".pkl")]
models/PenaltyCat/AdaBoost Undersampling.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64d1fc138ccd50658dc07acfdd21ce277ae35e8d7cce6f2bee7d4e9d51a9b120
3
+ size 143605131
models/PenaltyCat/Decision Tree Undersampling.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8d0f1d37c0bba6ed7f760a7de98b47f5abcab64b6775a39058ae51cfe48708d
3
+ size 647675
models/PenaltyCat/KNN Undersampling.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:458cb9942ef62e0fa2cae46e1d7310544f4bd2929a3a9ef717b0753946cbbf31
3
+ size 27295716
models/PenaltyCat/Logistic Regression Undersampling.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79e0fe12881da3fb2f0edb1d1575e996a94ef846de610597c96b3ffbb9fd47ab
3
+ size 4844
models/PenaltyCat/Random Forest Undersampling.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1118146f515d9dd2680413370cdcced631d92ed3e82f41fe6c1a987885d9b8a
3
+ size 96083022
preprocessor.py CHANGED
@@ -173,3 +173,43 @@ def process_officer_gender(model_name, current_rank, incident_rank, previous_com
173
  input_array = np.concatenate(arrays, dtype=np.float32)
174
  input_array = input_array.reshape(1, -1)
175
  return input_array
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  input_array = np.concatenate(arrays, dtype=np.float32)
174
  input_array = input_array.reshape(1, -1)
175
  return input_array
176
+
177
+ def process_penalty_cat(model_name, current_rank, incident_rank, previous_complaints, complaint_duration_days,
178
+ officer_gender, officer_race, days_on_force, fado_type, allegation, location_type, contact_outcome,
179
+ impacted_gender, impacted_race,
180
+ incident_precinct):
181
+ l2_norms = config.penalty_cat_l2_norm['undersampling' if 'Undersampling' in model_name else 'no_undersampling']
182
+
183
+ current_rank = transform_current_rank(l2_norms, current_rank)
184
+ incident_rank = transform_incident_rank(l2_norms, incident_rank)
185
+ previous_complaints = transform_previous_complaints(l2_norms, previous_complaints)
186
+ complaint_duration_days = transform_complaint_duration_days(l2_norms, complaint_duration_days)
187
+ officer_gender = transform_to_ohe('OfficerGender', officer_gender)
188
+ officer_race = transform_to_ohe('OfficerRace', officer_race, config.features_and_options_target_gender["OfficerRace"])
189
+ days_on_force = transform_days_on_force(l2_norms, days_on_force)
190
+ fado_type = transform_to_ohe('FADOType', fado_type)
191
+ allegation = transform_to_ohe('Allegation', allegation)
192
+ location_type = transform_to_ohe('LocationType', location_type)
193
+ contact_outcome = transform_to_ohe('ContactOutcome', contact_outcome)
194
+ impacted_gender = transform_to_ohe("ImpactedGender", impacted_gender)
195
+ impacted_race = transform_to_ohe("ImpactedRace", impacted_race)
196
+ incident_precinct = transform_to_ohe("IncidentPrecinct", incident_precinct)
197
+
198
+ arrays = (current_rank,
199
+ incident_rank,
200
+ days_on_force,
201
+ previous_complaints,
202
+ complaint_duration_days,
203
+ officer_gender,
204
+ officer_race,
205
+ fado_type,
206
+ allegation,
207
+ location_type,
208
+ contact_outcome,
209
+ impacted_gender,
210
+ impacted_race,
211
+ incident_precinct)
212
+
213
+ input_array = np.concatenate(arrays, dtype=np.float32)
214
+ input_array = input_array.reshape(1, -1)
215
+ return input_array