Spaces:
Running
Running
Early Morning Improvements
Browse files- services/feature_engineering.py +33 -10
services/feature_engineering.py
CHANGED
|
@@ -204,30 +204,53 @@ class SafetyFeatureEngineer:
|
|
| 204 |
|
| 205 |
Uses percentile normalization to preserve meaningful gradients
|
| 206 |
even when the absolute density range is narrow.
|
|
|
|
|
|
|
|
|
|
| 207 |
"""
|
| 208 |
if self.kde_model is None:
|
| 209 |
return np.zeros(len(midpoints))
|
| 210 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
# score_samples returns log-density; exponentiate for raw likelihood
|
| 212 |
-
|
| 213 |
-
|
|
|
|
| 214 |
|
| 215 |
# Apply regional crime multiplier from district/state data
|
| 216 |
-
|
| 217 |
|
| 218 |
# Percentile normalization for robust [0, 1] mapping
|
| 219 |
-
if len(
|
| 220 |
-
p5, p95 = np.percentile(
|
| 221 |
if p95 > p5:
|
| 222 |
-
|
| 223 |
else:
|
| 224 |
-
d_max =
|
| 225 |
-
|
| 226 |
else:
|
| 227 |
-
d_max =
|
| 228 |
if d_max > 0:
|
| 229 |
-
|
| 230 |
|
|
|
|
| 231 |
return density
|
| 232 |
|
| 233 |
# ββ POI Density βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 204 |
|
| 205 |
Uses percentile normalization to preserve meaningful gradients
|
| 206 |
even when the absolute density range is narrow.
|
| 207 |
+
|
| 208 |
+
Points outside India's geographic bounds receive a low-neutral
|
| 209 |
+
default since the KDE model is trained exclusively on Indian data.
|
| 210 |
"""
|
| 211 |
if self.kde_model is None:
|
| 212 |
return np.zeros(len(midpoints))
|
| 213 |
|
| 214 |
+
# India bounding-box guard β the KDE has no training data outside
|
| 215 |
+
# these bounds, so international points get a flat neutral value.
|
| 216 |
+
INDIA_LAT_MIN, INDIA_LAT_MAX = 6.0, 37.0
|
| 217 |
+
INDIA_LON_MIN, INDIA_LON_MAX = 68.0, 98.0
|
| 218 |
+
|
| 219 |
+
in_india = (
|
| 220 |
+
(midpoints[:, 0] >= INDIA_LAT_MIN)
|
| 221 |
+
& (midpoints[:, 0] <= INDIA_LAT_MAX)
|
| 222 |
+
& (midpoints[:, 1] >= INDIA_LON_MIN)
|
| 223 |
+
& (midpoints[:, 1] <= INDIA_LON_MAX)
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
density = np.full(len(midpoints), 0.1) # neutral for international
|
| 227 |
+
|
| 228 |
+
india_mask = np.where(in_india)[0]
|
| 229 |
+
if len(india_mask) == 0:
|
| 230 |
+
return density
|
| 231 |
+
|
| 232 |
# score_samples returns log-density; exponentiate for raw likelihood
|
| 233 |
+
india_points = midpoints[india_mask]
|
| 234 |
+
log_density = self.kde_model.score_samples(india_points)
|
| 235 |
+
india_density = np.exp(log_density)
|
| 236 |
|
| 237 |
# Apply regional crime multiplier from district/state data
|
| 238 |
+
india_density = india_density * self.regional_crime_multiplier
|
| 239 |
|
| 240 |
# Percentile normalization for robust [0, 1] mapping
|
| 241 |
+
if len(india_density) > 10:
|
| 242 |
+
p5, p95 = np.percentile(india_density, [5, 95])
|
| 243 |
if p95 > p5:
|
| 244 |
+
india_density = np.clip((india_density - p5) / (p95 - p5), 0.0, 1.0)
|
| 245 |
else:
|
| 246 |
+
d_max = india_density.max()
|
| 247 |
+
india_density = india_density / d_max if d_max > 0 else india_density
|
| 248 |
else:
|
| 249 |
+
d_max = india_density.max()
|
| 250 |
if d_max > 0:
|
| 251 |
+
india_density = india_density / d_max
|
| 252 |
|
| 253 |
+
density[india_mask] = india_density
|
| 254 |
return density
|
| 255 |
|
| 256 |
# ββ POI Density βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|