import joblib import pandas as pd import numpy as np from flask import Flask, request, jsonify from flask_cors import CORS from datetime import datetime # ---------------- CONFIG ---------------- LAT_MIN = 12.70 LAT_MAX = 13.30 LON_MIN = 77.30 LON_MAX = 78.00 LAT_GRIDS = 50 LON_GRIDS = 50 THRESHOLD = 0.6 FEATURES = [ "grid_x", "grid_y", "day_of_week", "is_weekend", "month", "crime_lag_1", "crime_lag_7", "crime_lag_30" ] # ---------------- APP ---------------- app = Flask(__name__) CORS(app) # ---------------- LOAD MODELS ---------------- model1 = joblib.load("model1.pkl") model2 = joblib.load("model2.pkl") le = joblib.load("label_encoder.pkl") # ---------------- LOAD DATA ---------------- full = pd.read_csv("crime_grid_daily_features.csv") full["FIR_DATE"] = pd.to_datetime(full["FIR_DATE"]) # ---------------- HELPERS ---------------- def latlon_to_grid(lat, lon): return ( int((lat - LAT_MIN) / ((LAT_MAX - LAT_MIN) / LAT_GRIDS)), int((lon - LON_MIN) / ((LON_MAX - LON_MIN) / LON_GRIDS)) ) def get_lags(grid_x, grid_y): hist = full[ (full["grid_x"] == grid_x) & (full["grid_y"] == grid_y) ].sort_values("FIR_DATE") if hist.empty: return 0, 0, 0 return ( hist.tail(1)["crime_count"].mean(), hist.tail(7)["crime_count"].mean(), hist.tail(30)["crime_count"].mean() ) # ---------------- ROUTES ---------------- @app.route("/health", methods=["GET"]) def health(): return jsonify({"status": "ok"}) @app.route("/historical", methods=["GET"]) def historical(): points = raw_points.dropna().sample(min(5000, len(raw_points))) return jsonify(points.to_dict(orient="records")) @app.route("/predict", methods=["POST"]) def predict(): data = request.json lat = float(data["latitude"]) lon = float(data["longitude"]) date = datetime.strptime(data["date"], "%Y-%m-%d") grid_x, grid_y = latlon_to_grid(lat, lon) crime_lag_1, crime_lag_7, crime_lag_30 = get_lags(grid_x, grid_y) row = pd.DataFrame([{ "grid_x": grid_x, "grid_y": grid_y, "day_of_week": date.weekday(), "is_weekend": int(date.weekday() >= 5), "month": date.month, "crime_lag_1": crime_lag_1, "crime_lag_7": crime_lag_7, "crime_lag_30": crime_lag_30 }]) prob = model1.predict_proba(row[FEATURES])[0][1] response = { "crime_probability": float(prob), "risk": "UNSAFE" if prob > THRESHOLD else "SAFE" } if prob > THRESHOLD: probs = model2.predict_proba(row[FEATURES])[0] top = np.argsort(probs)[-3:][::-1] response["top_crimes"] = [ { "type": le.inverse_transform([i])[0], "confidence": float(probs[i]) } for i in top ] return jsonify(response) @app.route("/hotspots", methods=["GET"]) def hotspots(): # Load the cleaned dataset dataset = pd.read_csv("dataset_cleaned.csv") # Group by location and count occurrences hotspot_data = dataset.groupby(['Latitude', 'Longitude']).size().reset_index(name='count') # Filter hotspots based on a threshold (e.g., at least 5 occurrences) hotspots = hotspot_data[hotspot_data['count'] >= 5] return jsonify(hotspots.to_dict(orient="records")) # ---------------- RUN ---------------- if __name__ == "__main__": app.run(host="0.0.0.0", port=5000)