|
|
import os |
|
|
import pickle |
|
|
import sys |
|
|
import traceback |
|
|
import warnings |
|
|
from datetime import datetime |
|
|
|
|
|
import joblib |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
from flask import Flask, jsonify, request |
|
|
from flask_cors import CORS |
|
|
from sklearn.preprocessing import LabelEncoder |
|
|
|
|
|
warnings.filterwarnings('ignore') |
|
|
|
|
|
app = Flask(__name__) |
|
|
CORS(app, resources={r"/api/*": {"origins": "*"}}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BASE_DIR = os.path.dirname(os.path.abspath(__file__)) |
|
|
|
|
|
|
|
|
MODEL_DIR = os.path.join(BASE_DIR, 'model') |
|
|
|
|
|
DATASET_PATH = os.path.join(MODEL_DIR, 'dataset_cleaned.csv') |
|
|
|
|
|
print(f"\n📂 BASE_DIR: {BASE_DIR}") |
|
|
print(f"📊 DATASET_PATH: {DATASET_PATH}") |
|
|
print(f"📂 MODEL_DIR: {MODEL_DIR}") |
|
|
print(f"✓ Dataset exists: {os.path.exists(DATASET_PATH)}") |
|
|
print(f"✓ Model dir exists: {os.path.exists(MODEL_DIR)}\n") |
|
|
|
|
|
df = None |
|
|
model1 = None |
|
|
model2 = None |
|
|
le = None |
|
|
|
|
|
def load_dataset(): |
|
|
"""Load the crime dataset""" |
|
|
global df |
|
|
try: |
|
|
if not os.path.exists(DATASET_PATH): |
|
|
print(f"❌ Dataset not found at: {DATASET_PATH}") |
|
|
df = pd.DataFrame() |
|
|
return False |
|
|
|
|
|
df = pd.read_csv(DATASET_PATH) |
|
|
print(f"✅ Dataset loaded: {len(df)} records") |
|
|
print(f" Columns: {list(df.columns)}") |
|
|
return True |
|
|
except Exception as e: |
|
|
print(f"❌ Error loading dataset: {e}") |
|
|
traceback.print_exc() |
|
|
df = pd.DataFrame() |
|
|
return False |
|
|
|
|
|
def load_models(): |
|
|
"""Load trained models with fallback options""" |
|
|
global model1, model2, le |
|
|
|
|
|
print("\n📦 Attempting to load models...") |
|
|
print(f" Looking in: {MODEL_DIR}\n") |
|
|
|
|
|
model1_path = os.path.join(MODEL_DIR, 'model1.pkl') |
|
|
model2_path = os.path.join(MODEL_DIR, 'model2.pkl') |
|
|
le_path = os.path.join(MODEL_DIR, 'label_encoder.pkl') |
|
|
|
|
|
print(f"Model1 path: {model1_path} (exists: {os.path.exists(model1_path)})") |
|
|
print(f"Model2 path: {model2_path} (exists: {os.path.exists(model2_path)})") |
|
|
print(f"LE path: {le_path} (exists: {os.path.exists(le_path)})\n") |
|
|
|
|
|
|
|
|
if os.path.exists(model1_path): |
|
|
try: |
|
|
model1 = joblib.load(model1_path) |
|
|
print(f"✅ Model1 loaded successfully") |
|
|
except Exception as e: |
|
|
print(f"⚠️ Joblib failed for model1: {e}") |
|
|
try: |
|
|
with open(model1_path, 'rb') as f: |
|
|
model1 = pickle.load(f) |
|
|
print(f"✅ Model1 loaded with pickle") |
|
|
except Exception as e2: |
|
|
print(f"❌ Failed to load model1: {e2}") |
|
|
model1 = None |
|
|
else: |
|
|
print(f"⚠️ Model1 not found") |
|
|
|
|
|
|
|
|
if os.path.exists(model2_path): |
|
|
try: |
|
|
model2 = joblib.load(model2_path) |
|
|
print(f"✅ Model2 loaded successfully") |
|
|
except Exception as e: |
|
|
print(f"⚠️ Joblib failed for model2: {e}") |
|
|
try: |
|
|
with open(model2_path, 'rb') as f: |
|
|
model2 = pickle.load(f) |
|
|
print(f"✅ Model2 loaded with pickle") |
|
|
except Exception as e2: |
|
|
print(f"❌ Failed to load model2: {e2}") |
|
|
model2 = None |
|
|
else: |
|
|
print(f"⚠️ Model2 not found") |
|
|
|
|
|
|
|
|
if os.path.exists(le_path): |
|
|
try: |
|
|
le = joblib.load(le_path) |
|
|
print(f"✅ LabelEncoder loaded successfully") |
|
|
except Exception as e: |
|
|
print(f"⚠️ Joblib failed for LabelEncoder: {e}") |
|
|
try: |
|
|
with open(le_path, 'rb') as f: |
|
|
le = pickle.load(f) |
|
|
print(f"✅ LabelEncoder loaded with pickle") |
|
|
except Exception as e2: |
|
|
print(f"❌ Failed to load LabelEncoder: {e2}") |
|
|
le = None |
|
|
else: |
|
|
print(f"⚠️ LabelEncoder not found") |
|
|
|
|
|
if not model1 and not model2: |
|
|
print("\n⚠️ Using MOCK predictions (models not available)") |
|
|
else: |
|
|
print("\n✅ Models ready for predictions") |
|
|
|
|
|
|
|
|
print("\n" + "="*60) |
|
|
print("🚀 OPENSIGHT API INITIALIZATION") |
|
|
print("="*60) |
|
|
load_dataset() |
|
|
load_models() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LAT_MIN, LAT_MAX = 12.70, 13.30 |
|
|
LON_MIN, LON_MAX = 77.30, 78.00 |
|
|
|
|
|
def is_in_bengaluru(lat, lon): |
|
|
"""Check if coordinates are within Bengaluru bounds""" |
|
|
return LAT_MIN <= lat <= LAT_MAX and LON_MIN <= lon <= LON_MAX |
|
|
|
|
|
def calculate_risk_level(crime_count): |
|
|
"""Convert crime count to risk level""" |
|
|
if crime_count >= 20: |
|
|
return "high" |
|
|
elif crime_count >= 10: |
|
|
return "medium" |
|
|
else: |
|
|
return "low" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/health', methods=['GET']) |
|
|
def health(): |
|
|
"""Health check endpoint""" |
|
|
return jsonify({ |
|
|
'status': 'ok', |
|
|
'message': 'OpenSight API is running', |
|
|
'dataset_loaded': not df.empty if df is not None else False, |
|
|
'model1_loaded': model1 is not None, |
|
|
'model2_loaded': model2 is not None, |
|
|
'models_available': (model1 is not None or model2 is not None) |
|
|
}) |
|
|
|
|
|
@app.route('/api/hotspots', methods=['GET']) |
|
|
def get_hotspots(): |
|
|
"""Get crime hotspots from dataset with grid aggregation""" |
|
|
try: |
|
|
if df is None or df.empty: |
|
|
return jsonify({'error': 'Dataset not loaded'}), 500 |
|
|
|
|
|
city = request.args.get('city', 'bangalore').lower() |
|
|
threshold = float(request.args.get('threshold', 1)) |
|
|
|
|
|
print(f"📍 Getting hotspots for {city} with threshold {threshold}") |
|
|
|
|
|
|
|
|
filtered_df = df[ |
|
|
(df['Latitude'] >= LAT_MIN) & |
|
|
(df['Latitude'] <= LAT_MAX) & |
|
|
(df['Longitude'] >= LON_MIN) & |
|
|
(df['Longitude'] <= LON_MAX) |
|
|
].copy() |
|
|
|
|
|
if filtered_df.empty: |
|
|
print(f"⚠️ No data in Bengaluru bounds") |
|
|
return jsonify({'success': True, 'hotspots': [], 'total': 0}) |
|
|
|
|
|
print(f"✓ Found {len(filtered_df)} crimes in Bengaluru") |
|
|
|
|
|
|
|
|
filtered_df['grid_lat'] = (filtered_df['Latitude'] * 20).round() / 20 |
|
|
filtered_df['grid_lon'] = (filtered_df['Longitude'] * 20).round() / 20 |
|
|
|
|
|
|
|
|
hotspots_agg = filtered_df.groupby(['grid_lat', 'grid_lon']).agg({ |
|
|
'Latitude': 'mean', |
|
|
'Longitude': 'mean', |
|
|
'CrimeType': 'count' |
|
|
}).reset_index() |
|
|
|
|
|
hotspots_agg.columns = ['grid_lat', 'grid_lon', 'latitude', 'longitude', 'crimeCount'] |
|
|
|
|
|
|
|
|
max_crimes = hotspots_agg['crimeCount'].max() |
|
|
if max_crimes > 0: |
|
|
hotspots_agg['normalized_count'] = hotspots_agg['crimeCount'] / max_crimes |
|
|
else: |
|
|
hotspots_agg['normalized_count'] = 0 |
|
|
|
|
|
|
|
|
hotspots_agg = hotspots_agg[hotspots_agg['normalized_count'] >= threshold] |
|
|
|
|
|
|
|
|
hotspots_agg['riskLevel'] = hotspots_agg['crimeCount'].apply(calculate_risk_level) |
|
|
|
|
|
|
|
|
hotspots = [] |
|
|
for idx, row in hotspots_agg.iterrows(): |
|
|
hotspots.append({ |
|
|
'id': f"hotspot-{idx}", |
|
|
'latitude': float(row['latitude']), |
|
|
'longitude': float(row['longitude']), |
|
|
'riskLevel': row['riskLevel'], |
|
|
'crimeCount': int(row['crimeCount']) |
|
|
}) |
|
|
|
|
|
|
|
|
hotspots.sort(key=lambda x: x['crimeCount'], reverse=True) |
|
|
|
|
|
print(f"✅ Returning {len(hotspots)} hotspots") |
|
|
return jsonify({ |
|
|
'success': True, |
|
|
'hotspots': hotspots, |
|
|
'total': len(hotspots) |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"❌ Error in get_hotspots: {str(e)}") |
|
|
traceback.print_exc() |
|
|
return jsonify({'error': str(e)}), 500 |
|
|
|
|
|
@app.route('/api/statistics', methods=['GET']) |
|
|
def get_statistics(): |
|
|
"""Get crime statistics""" |
|
|
try: |
|
|
if df is None or df.empty: |
|
|
print("❌ Dataset is empty!") |
|
|
return jsonify({'error': 'Dataset not loaded'}), 500 |
|
|
|
|
|
print(f"📊 Calculating statistics...") |
|
|
print(f" Dataset shape: {df.shape}") |
|
|
print(f" Columns: {list(df.columns)}") |
|
|
|
|
|
city = request.args.get('city', 'bangalore').lower() |
|
|
|
|
|
|
|
|
if 'Latitude' not in df.columns or 'Longitude' not in df.columns: |
|
|
print(f"❌ Missing Latitude/Longitude columns") |
|
|
return jsonify({'error': 'Dataset missing required columns'}), 500 |
|
|
|
|
|
filtered_df = df[ |
|
|
(df['Latitude'] >= LAT_MIN) & |
|
|
(df['Latitude'] <= LAT_MAX) & |
|
|
(df['Longitude'] >= LON_MIN) & |
|
|
(df['Longitude'] <= LON_MAX) |
|
|
] |
|
|
|
|
|
print(f" Filtered: {len(filtered_df)} records in bounds") |
|
|
|
|
|
total_crimes = len(filtered_df) |
|
|
|
|
|
|
|
|
filtered_df_copy = filtered_df.copy() |
|
|
filtered_df_copy['grid_lat'] = (filtered_df_copy['Latitude'] * 20).round() / 20 |
|
|
filtered_df_copy['grid_lon'] = (filtered_df_copy['Longitude'] * 20).round() / 20 |
|
|
hotspots_count = filtered_df_copy.groupby(['grid_lat', 'grid_lon']).size() |
|
|
high_risk_count = (hotspots_count >= 20).sum() |
|
|
|
|
|
print(f" Hotspots: {high_risk_count}, Total crimes: {total_crimes}") |
|
|
|
|
|
|
|
|
time_series_data = [] |
|
|
if 'Date' in filtered_df.columns: |
|
|
try: |
|
|
date_counts = filtered_df.groupby('Date').size().tail(30) |
|
|
time_series_data = [ |
|
|
{ |
|
|
'date': str(date), |
|
|
'crimes': int(count), |
|
|
'predicted': int(count * 1.05) |
|
|
} |
|
|
for date, count in date_counts.items() |
|
|
] |
|
|
print(f" Time series: {len(time_series_data)} days") |
|
|
except Exception as e: |
|
|
print(f" ⚠️ Time series error: {e}") |
|
|
pass |
|
|
|
|
|
result = { |
|
|
'hotspotsCount': int(high_risk_count), |
|
|
'totalCrimes': total_crimes, |
|
|
'averageRiskLevel': 0.65, |
|
|
'predictionAccuracy': 0.82, |
|
|
'timeSeriesData': time_series_data |
|
|
} |
|
|
|
|
|
print(f"✅ Statistics calculated successfully") |
|
|
return jsonify(result) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"❌ Error in get_statistics: {str(e)}") |
|
|
traceback.print_exc() |
|
|
return jsonify({'error': str(e)}), 500 |
|
|
|
|
|
@app.route('/api/predict', methods=['POST']) |
|
|
def predict_crime(): |
|
|
"""Predict crime for a specific location and date""" |
|
|
try: |
|
|
if df is None or df.empty: |
|
|
return jsonify({'error': 'Dataset not loaded'}), 500 |
|
|
|
|
|
data = request.json |
|
|
date_str = data.get('date') |
|
|
latitude = data.get('latitude') |
|
|
longitude = data.get('longitude') |
|
|
location_name = data.get('location') |
|
|
|
|
|
print(f"🔮 Prediction request: location={location_name}, date={date_str}") |
|
|
|
|
|
if location_name and not latitude: |
|
|
location_coords = { |
|
|
'koramangala': (12.9352, 77.6245), |
|
|
'whitefield': (12.9698, 77.7499), |
|
|
'Challaghatta':(12.89742,77.46124), |
|
|
'indiranagar': (12.9716, 77.6412), |
|
|
'jayanagar': (12.9250, 77.5937), |
|
|
'marathahalli': (12.9698, 77.7051), |
|
|
'electronic city': (12.8386, 77.6869), |
|
|
'mg road': (12.9352, 77.6245), |
|
|
'koramangala 4th block': (12.9352, 77.6245), |
|
|
'bangalore': (12.9716, 77.5946) |
|
|
} |
|
|
location_lower = location_name.lower().strip() |
|
|
coords = location_coords.get(location_lower, None) |
|
|
|
|
|
if coords: |
|
|
latitude, longitude = coords |
|
|
print(f"✓ Resolved '{location_name}' to ({latitude}, {longitude})") |
|
|
else: |
|
|
return jsonify({'error': f'Location "{location_name}" not recognized'}), 400 |
|
|
|
|
|
if not latitude or not longitude: |
|
|
return jsonify({'error': 'Latitude and longitude required'}), 400 |
|
|
|
|
|
latitude = float(latitude) |
|
|
longitude = float(longitude) |
|
|
|
|
|
|
|
|
if not is_in_bengaluru(latitude, longitude): |
|
|
return jsonify({'error': f'Location ({latitude}, {longitude}) is outside Bengaluru'}), 400 |
|
|
|
|
|
|
|
|
try: |
|
|
pred_date = datetime.strptime(date_str, '%Y-%m-%d') |
|
|
except: |
|
|
return jsonify({'error': 'Invalid date format (use YYYY-MM-DD)'}), 400 |
|
|
|
|
|
|
|
|
nearby_crimes = df[ |
|
|
(df['Latitude'].between(latitude - 0.05, latitude + 0.05)) & |
|
|
(df['Longitude'].between(longitude - 0.05, longitude + 0.05)) |
|
|
] |
|
|
|
|
|
crime_count = len(nearby_crimes) |
|
|
risk_level = calculate_risk_level(crime_count) |
|
|
|
|
|
|
|
|
crime_types = {} |
|
|
if len(nearby_crimes) > 0: |
|
|
if 'CrimeGroup_Name' in nearby_crimes.columns: |
|
|
crime_types = nearby_crimes['CrimeGroup_Name'].value_counts().head(10).to_dict() |
|
|
print(f" Found {len(crime_types)} crime types from CrimeGroup_Name") |
|
|
elif 'CrimeType' in nearby_crimes.columns: |
|
|
crime_types = nearby_crimes['CrimeType'].value_counts().head(10).to_dict() |
|
|
print(f" Found {len(crime_types)} crime types from CrimeType") |
|
|
elif 'CrimeHead_Name' in nearby_crimes.columns: |
|
|
crime_types = nearby_crimes['CrimeHead_Name'].value_counts().head(10).to_dict() |
|
|
print(f" Found {len(crime_types)} crime types from CrimeHead_Name") |
|
|
|
|
|
|
|
|
raw_confidence = min(95, 60 + (crime_count / max(len(df), 1) * 100)) |
|
|
|
|
|
confidence = 25 + ((raw_confidence - 60) / (87 - 60)) * (90 - 25) |
|
|
confidence = max(25, min(90, confidence)) |
|
|
|
|
|
print(f"✅ Prediction: Risk={risk_level}, Nearby={crime_count}, Confidence={confidence:.1f}%") |
|
|
|
|
|
return jsonify({ |
|
|
'success': True, |
|
|
'location': { |
|
|
'latitude': latitude, |
|
|
'longitude': longitude, |
|
|
'name': location_name or f"{latitude:.4f}, {longitude:.4f}" |
|
|
}, |
|
|
'date': date_str, |
|
|
'prediction': { |
|
|
'riskLevel': risk_level, |
|
|
'confidence': round(confidence, 1), |
|
|
'expectedCrimes': max(1, int(crime_count / 30)) if crime_count > 0 else 0, |
|
|
'trend': 'stable' |
|
|
}, |
|
|
'crimeTypes': crime_types, |
|
|
'nearbyIncidents': int(crime_count) |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"❌ Error in predict_crime: {str(e)}") |
|
|
traceback.print_exc() |
|
|
return jsonify({'error': str(e)}), 500 |
|
|
|
|
|
@app.route('/api/search-location', methods=['GET']) |
|
|
def search_location(): |
|
|
"""Search for location in dataset""" |
|
|
try: |
|
|
if df is None or df.empty: |
|
|
return jsonify({'results': []}) |
|
|
|
|
|
query = request.args.get('q', '').lower().strip() |
|
|
|
|
|
if len(query) < 2 or 'Location' not in df.columns: |
|
|
return jsonify({'results': []}) |
|
|
|
|
|
|
|
|
locations = df.drop_duplicates(subset=['Location']).head(200) |
|
|
|
|
|
|
|
|
matching = locations[locations['Location'].str.lower().str.contains(query, na=False)] |
|
|
|
|
|
results = [ |
|
|
{ |
|
|
'name': row['Location'], |
|
|
'latitude': float(row['Latitude']), |
|
|
'longitude': float(row['Longitude']) |
|
|
} |
|
|
for _, row in matching.head(10).iterrows() |
|
|
] |
|
|
|
|
|
print(f"🔍 Location search for '{query}': found {len(results)} results") |
|
|
return jsonify({'results': results}) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"⚠️ Error in search_location: {str(e)}") |
|
|
traceback.print_exc() |
|
|
return jsonify({'results': []}) |
|
|
|
|
|
@app.errorhandler(404) |
|
|
def not_found(e): |
|
|
return jsonify({'error': 'Endpoint not found'}), 404 |
|
|
|
|
|
@app.errorhandler(500) |
|
|
def server_error(e): |
|
|
return jsonify({'error': 'Internal server error'}), 500 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
print("\n" + "="*60) |
|
|
print("✅ Starting OpenSight API Server") |
|
|
print("="*60 + "\n") |
|
|
app.run(host="0.0.0.0",port=int(os.environ.get("PORT", 5000)),debug=False) |
|
|
|
|
|
|