Spaces:
Running
Running
Update src/train_model.py
Browse files- src/train_model.py +9 -7
src/train_model.py
CHANGED
|
@@ -298,7 +298,7 @@ def generate_landslide_data(n: int = 4000):
|
|
| 298 |
# ββ Clean catalog spine βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 299 |
catalog = catalog.dropna(subset=["latitude", "longitude"])
|
| 300 |
catalog["event_date"] = pd.to_datetime(
|
| 301 |
-
catalog["event_date"],
|
| 302 |
)
|
| 303 |
catalog = catalog.dropna(subset=["event_date"])
|
| 304 |
print(f"[Landslide] After date clean: {len(catalog)} rows")
|
|
@@ -349,7 +349,7 @@ def generate_landslide_data(n: int = 4000):
|
|
| 349 |
else:
|
| 350 |
elev["aspect_index"] = 0.5
|
| 351 |
|
| 352 |
-
base = nearest_merge(base, elev, ["aspect_index"])
|
| 353 |
|
| 354 |
# ββ Rainfall ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 355 |
print("[Landslide] Merging rainfall...")
|
|
@@ -363,10 +363,11 @@ def generate_landslide_data(n: int = 4000):
|
|
| 363 |
base = nearest_merge(base, rain_agg, ["rainfall_intensity_mmh"])
|
| 364 |
|
| 365 |
# ββ soil_type_index proxy βββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 366 |
veg_norm = np.clip(base["vegetation_cover_pct"] / 100.0, 0, 1)
|
| 367 |
rain_norm = np.clip(base["rainfall_intensity_mmh"] / 200.0, 0, 1)
|
| 368 |
base["soil_type_index"] = np.clip( # β FIX 1: now saved
|
| 369 |
-
1.0 - (0.
|
| 370 |
)
|
| 371 |
|
| 372 |
# ββ Risk label ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -395,9 +396,10 @@ def generate_landslide_data(n: int = 4000):
|
|
| 395 |
|
| 396 |
base["risk_score"] = np.clip(
|
| 397 |
0.35 * base["size_score"] +
|
| 398 |
-
0.
|
| 399 |
-
0.
|
| 400 |
-
0.
|
|
|
|
| 401 |
np.random.normal(0, 0.02, len(base)),
|
| 402 |
0.0, 1.0
|
| 403 |
)
|
|
@@ -408,7 +410,7 @@ def generate_landslide_data(n: int = 4000):
|
|
| 408 |
|
| 409 |
# ββ Final feature matrix ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 410 |
features = [
|
| 411 |
-
"rainfall_intensity_mmh", "soil_type_index",
|
| 412 |
"vegetation_cover_pct", "seismic_activity_index",
|
| 413 |
"distance_to_fault_km", "aspect_index", "historical_landslide_freq",
|
| 414 |
]
|
|
|
|
| 298 |
# ββ Clean catalog spine βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 299 |
catalog = catalog.dropna(subset=["latitude", "longitude"])
|
| 300 |
catalog["event_date"] = pd.to_datetime(
|
| 301 |
+
catalog["event_date"], errors="coerce"
|
| 302 |
)
|
| 303 |
catalog = catalog.dropna(subset=["event_date"])
|
| 304 |
print(f"[Landslide] After date clean: {len(catalog)} rows")
|
|
|
|
| 349 |
else:
|
| 350 |
elev["aspect_index"] = 0.5
|
| 351 |
|
| 352 |
+
base = nearest_merge(base, elev, ["slope_degrees","aspect_index"])
|
| 353 |
|
| 354 |
# ββ Rainfall ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 355 |
print("[Landslide] Merging rainfall...")
|
|
|
|
| 363 |
base = nearest_merge(base, rain_agg, ["rainfall_intensity_mmh"])
|
| 364 |
|
| 365 |
# ββ soil_type_index proxy βββββββββββββββββββββββββββββββββββββββββββββ
|
| 366 |
+
slope_norm = np.clip(base["slope_degrees"] / 90.0, 0, 1)
|
| 367 |
veg_norm = np.clip(base["vegetation_cover_pct"] / 100.0, 0, 1)
|
| 368 |
rain_norm = np.clip(base["rainfall_intensity_mmh"] / 200.0, 0, 1)
|
| 369 |
base["soil_type_index"] = np.clip( # β FIX 1: now saved
|
| 370 |
+
1.0 - (0.4 * slope_norm + 0.3 * (1 - veg_norm) + 0.3 * rain_norm), 0, 1
|
| 371 |
)
|
| 372 |
|
| 373 |
# ββ Risk label ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 396 |
|
| 397 |
base["risk_score"] = np.clip(
|
| 398 |
0.35 * base["size_score"] +
|
| 399 |
+
0.25 * base["historical_landslide_freq"] +
|
| 400 |
+
0.20 * fatality_norm +
|
| 401 |
+
0.15 * slope_norm +
|
| 402 |
+
0.05 * (1 - veg_norm) +
|
| 403 |
np.random.normal(0, 0.02, len(base)),
|
| 404 |
0.0, 1.0
|
| 405 |
)
|
|
|
|
| 410 |
|
| 411 |
# ββ Final feature matrix ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 412 |
features = [
|
| 413 |
+
"slope_degrees","rainfall_intensity_mmh", "soil_type_index",
|
| 414 |
"vegetation_cover_pct", "seismic_activity_index",
|
| 415 |
"distance_to_fault_km", "aspect_index", "historical_landslide_freq",
|
| 416 |
]
|