|
|
import joblib |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from flask import Flask, request, jsonify |
|
|
from flask_cors import CORS |
|
|
from datetime import datetime |
|
|
|
|
|
|
|
|
LAT_MIN = 12.70 |
|
|
LAT_MAX = 13.30 |
|
|
LON_MIN = 77.30 |
|
|
LON_MAX = 78.00 |
|
|
LAT_GRIDS = 50 |
|
|
LON_GRIDS = 50 |
|
|
THRESHOLD = 0.6 |
|
|
|
|
|
FEATURES = [ |
|
|
"grid_x", |
|
|
"grid_y", |
|
|
"day_of_week", |
|
|
"is_weekend", |
|
|
"month", |
|
|
"crime_lag_1", |
|
|
"crime_lag_7", |
|
|
"crime_lag_30" |
|
|
] |
|
|
|
|
|
|
|
|
app = Flask(__name__) |
|
|
CORS(app) |
|
|
|
|
|
|
|
|
model1 = joblib.load("model1.pkl") |
|
|
model2 = joblib.load("model2.pkl") |
|
|
le = joblib.load("label_encoder.pkl") |
|
|
|
|
|
|
|
|
full = pd.read_csv("crime_grid_daily_features.csv") |
|
|
full["FIR_DATE"] = pd.to_datetime(full["FIR_DATE"]) |
|
|
|
|
|
|
|
|
def latlon_to_grid(lat, lon): |
|
|
return ( |
|
|
int((lat - LAT_MIN) / ((LAT_MAX - LAT_MIN) / LAT_GRIDS)), |
|
|
int((lon - LON_MIN) / ((LON_MAX - LON_MIN) / LON_GRIDS)) |
|
|
) |
|
|
|
|
|
def get_lags(grid_x, grid_y): |
|
|
hist = full[ |
|
|
(full["grid_x"] == grid_x) & |
|
|
(full["grid_y"] == grid_y) |
|
|
].sort_values("FIR_DATE") |
|
|
|
|
|
if hist.empty: |
|
|
return 0, 0, 0 |
|
|
|
|
|
return ( |
|
|
hist.tail(1)["crime_count"].mean(), |
|
|
hist.tail(7)["crime_count"].mean(), |
|
|
hist.tail(30)["crime_count"].mean() |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
@app.route("/health", methods=["GET"]) |
|
|
def health(): |
|
|
return jsonify({"status": "ok"}) |
|
|
|
|
|
@app.route("/historical", methods=["GET"]) |
|
|
def historical(): |
|
|
points = raw_points.dropna().sample(min(5000, len(raw_points))) |
|
|
return jsonify(points.to_dict(orient="records")) |
|
|
|
|
|
@app.route("/predict", methods=["POST"]) |
|
|
def predict(): |
|
|
data = request.json |
|
|
lat = float(data["latitude"]) |
|
|
lon = float(data["longitude"]) |
|
|
date = datetime.strptime(data["date"], "%Y-%m-%d") |
|
|
|
|
|
grid_x, grid_y = latlon_to_grid(lat, lon) |
|
|
crime_lag_1, crime_lag_7, crime_lag_30 = get_lags(grid_x, grid_y) |
|
|
|
|
|
row = pd.DataFrame([{ |
|
|
"grid_x": grid_x, |
|
|
"grid_y": grid_y, |
|
|
"day_of_week": date.weekday(), |
|
|
"is_weekend": int(date.weekday() >= 5), |
|
|
"month": date.month, |
|
|
"crime_lag_1": crime_lag_1, |
|
|
"crime_lag_7": crime_lag_7, |
|
|
"crime_lag_30": crime_lag_30 |
|
|
}]) |
|
|
|
|
|
prob = model1.predict_proba(row[FEATURES])[0][1] |
|
|
response = { |
|
|
"crime_probability": float(prob), |
|
|
"risk": "UNSAFE" if prob > THRESHOLD else "SAFE" |
|
|
} |
|
|
|
|
|
if prob > THRESHOLD: |
|
|
probs = model2.predict_proba(row[FEATURES])[0] |
|
|
top = np.argsort(probs)[-3:][::-1] |
|
|
|
|
|
response["top_crimes"] = [ |
|
|
{ |
|
|
"type": le.inverse_transform([i])[0], |
|
|
"confidence": float(probs[i]) |
|
|
} |
|
|
for i in top |
|
|
] |
|
|
|
|
|
return jsonify(response) |
|
|
|
|
|
@app.route("/hotspots", methods=["GET"]) |
|
|
def hotspots(): |
|
|
|
|
|
dataset = pd.read_csv("dataset_cleaned.csv") |
|
|
|
|
|
hotspot_data = dataset.groupby(['Latitude', 'Longitude']).size().reset_index(name='count') |
|
|
|
|
|
hotspots = hotspot_data[hotspot_data['count'] >= 5] |
|
|
return jsonify(hotspots.to_dict(orient="records")) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.run(host="0.0.0.0", port=5000) |