File size: 4,658 Bytes
217da35
 
 
 
 
 
 
953bc5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217da35
4e27464
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217da35
953bc5b
217da35
 
 
 
 
 
 
 
 
953bc5b
217da35
 
 
 
 
 
 
 
5146748
217da35
 
 
f520cdb
 
 
 
 
 
f53d6a1
5146748
8957a74
5146748
953bc5b
f53d6a1
217da35
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import streamlit as st
import pandas as pd
from sodapy import Socrata
import hopsworks
import joblib
import xgboost as xgb

def unencode_weekday(fri, mon, sat, sun, thu, tue, wed):
    if fri==1.0:
        return "Friday"
    elif mon==1.0:
        return "Monday"
    elif sat==1.0:
        return "Saturday"
    elif sun==1.0:
        return "Sunday"
    elif thu==1.0:
        return "Thursday"
    elif tue==1.0:
        return "Tuesday"
    elif wed==1.0:
        return "Wednesday"
    else:
        return "Invalid Weekday"

def unencode_report_type_code(ii, iss, vi, vs):
    if ii==1.0:
        return "II"
    elif iss==1.0:
        return "IS"
    elif vi==1.0:
        return "VI"
    elif vs==1.0:
        return "VS"
    else:
        return "Invalid Report Type Code"

def unencode_police_district(bay, cen, ing, mis, nor, out, par, ric, sou, tar, ten):
    if bay==1.0:
        return "Bayview"
    elif cen==1.0:
        return "Central"
    elif ing==1.0:
        return "Ingleside"
    elif mis==1.0:
        return "Mission"
    elif nor==1.0:
        return "Northern"
    elif out==1.0:
        return "OutOfSF"
    elif par==1.0:
        return "Park"
    elif ric==1.0:
        return "Richmond"
    elif sou==1.0:
        return "Southern"
    elif tar==1.0:
        return "Taraval"
    elif ten==1.0:
        return "Tenderloin"
    else:
        return "Invalid Police District"


st.set_page_config(layout="wide")
st.title('Latest SF Incident Category Prediction')

client = Socrata("data.sfgov.org", "gZmg4iarmENBTk1Vzsb94bnse", username="xinyulia@kth.se", password="Xw990504")
results = client.get("wg3w-h783", limit=800000)
results_df = pd.DataFrame.from_records(results)

from preprocessor_pipeline import preprocessing_incident
results_df_preprocessed = preprocessing_incident(results_df)
results_df_preprocessed.incident_datetime=pd.to_datetime(results_df_preprocessed.incident_datetime)
results_df_preprocessed.sort_values(by='incident_datetime', ascending = False, inplace = True)
results_df_preprocessed = results_df_preprocessed[:100]

project = hopsworks.login()
fs = project.get_feature_store()
mr = project.get_model_registry()
model = mr.get_model("incident_modal", version=1)
model_dir = model.download()
model = joblib.load(model_dir + "/incident_model.pkl")

batch_data = results_df_preprocessed.copy()
batch_data.drop(columns=['incident_datetime','incident_category'], inplace=True)
y_pred = model.predict(batch_data)

results_df_preprocessed["incident_day_of_week"]=results_df_preprocessed.apply(lambda x:unencode_weekday(x.incident_day_of_week_Friday,x.incident_day_of_week_Monday,x.incident_day_of_week_Saturday,x.incident_day_of_week_Sunday,x.incident_day_of_week_Thursday,x.incident_day_of_week_Tuesday,x.incident_day_of_week_Wednesday),axis=1)
results_df_preprocessed.drop(columns=["incident_day_of_week_Friday","incident_day_of_week_Monday","incident_day_of_week_Saturday","incident_day_of_week_Sunday","incident_day_of_week_Thursday","incident_day_of_week_Tuesday","incident_day_of_week_Wednesday"],inplace=True)
results_df_preprocessed["report_type_code"]=results_df_preprocessed.apply(lambda x:unencode_report_type_code(x.report_type_code_II,x.report_type_code_IS,x.report_type_code_VI,x.report_type_code_VS),axis=1)
results_df_preprocessed.drop(columns=["report_type_code_II","report_type_code_IS","report_type_code_VI","report_type_code_VS"],inplace=True)
results_df_preprocessed["police_district"]=results_df_preprocessed.apply(lambda x:unencode_police_district(x.police_district_Bayview,x.police_district_Central,x.police_district_Ingleside,x.police_district_Mission,x.police_district_Northern,x.police_district_OutOfSF,x.police_district_Park,x.police_district_Richmond,x.police_district_Southern,x.police_district_Taraval,x.police_district_Tenderloin),axis=1)
results_df_preprocessed.drop(columns=["police_district_Bayview","police_district_Central","police_district_Ingleside","police_district_Mission","police_district_Northern","police_district_OutOfSF","police_district_Park","police_district_Richmond","police_district_Southern","police_district_Taraval","police_district_Tenderloin"],inplace=True)
results_df_preprocessed.reset_index(inplace=True)
results_df_preprocessed["idx_temp"]=results_df_preprocessed.index
results_df_preprocessed["incident_category_pred"]=y_pred[results_df_preprocessed.idx_temp]
results_df_preprocessed.drop(columns=["idx_temp"],inplace=True)

df = results_df_preprocessed[['incident_datetime', 'latitude', 'longitude', 'incident_day_of_week', 'report_type_code', 'police_district', 'incident_category', 'incident_category_pred']]

st.write(df)
st.button("Re-run")