clarindasusan commited on
Commit
b41b06c
Β·
verified Β·
1 Parent(s): c3ebbf7

Update src/train_model.py

Browse files
Files changed (1) hide show
  1. src/train_model.py +9 -7
src/train_model.py CHANGED
@@ -298,7 +298,7 @@ def generate_landslide_data(n: int = 4000):
298
  # ── Clean catalog spine ───────────────────────────────────────────────
299
  catalog = catalog.dropna(subset=["latitude", "longitude"])
300
  catalog["event_date"] = pd.to_datetime(
301
- catalog["event_date"], format="%m-%d-%Y", errors="coerce"
302
  )
303
  catalog = catalog.dropna(subset=["event_date"])
304
  print(f"[Landslide] After date clean: {len(catalog)} rows")
@@ -349,7 +349,7 @@ def generate_landslide_data(n: int = 4000):
349
  else:
350
  elev["aspect_index"] = 0.5
351
 
352
- base = nearest_merge(base, elev, ["aspect_index"])
353
 
354
  # ── Rainfall ──────────────────────────────────────────────────────────
355
  print("[Landslide] Merging rainfall...")
@@ -363,10 +363,11 @@ def generate_landslide_data(n: int = 4000):
363
  base = nearest_merge(base, rain_agg, ["rainfall_intensity_mmh"])
364
 
365
  # ── soil_type_index proxy ─────────────────────────────────────────────
 
366
  veg_norm = np.clip(base["vegetation_cover_pct"] / 100.0, 0, 1)
367
  rain_norm = np.clip(base["rainfall_intensity_mmh"] / 200.0, 0, 1)
368
  base["soil_type_index"] = np.clip( # ← FIX 1: now saved
369
- 1.0 - (0.5 * (1 - veg_norm) + 0.5 * rain_norm), 0, 1
370
  )
371
 
372
  # ── Risk label ────────────────────────────────────────────────────────
@@ -395,9 +396,10 @@ def generate_landslide_data(n: int = 4000):
395
 
396
  base["risk_score"] = np.clip(
397
  0.35 * base["size_score"] +
398
- 0.30 * base["historical_landslide_freq"] +
399
- 0.25 * fatality_norm +
400
- 0.10 * (1 - veg_norm) +
 
401
  np.random.normal(0, 0.02, len(base)),
402
  0.0, 1.0
403
  )
@@ -408,7 +410,7 @@ def generate_landslide_data(n: int = 4000):
408
 
409
  # ── Final feature matrix ──────────────────────────────────────────────
410
  features = [
411
- "rainfall_intensity_mmh", "soil_type_index",
412
  "vegetation_cover_pct", "seismic_activity_index",
413
  "distance_to_fault_km", "aspect_index", "historical_landslide_freq",
414
  ]
 
298
  # ── Clean catalog spine ───────────────────────────────────────────────
299
  catalog = catalog.dropna(subset=["latitude", "longitude"])
300
  catalog["event_date"] = pd.to_datetime(
301
+ catalog["event_date"], errors="coerce"
302
  )
303
  catalog = catalog.dropna(subset=["event_date"])
304
  print(f"[Landslide] After date clean: {len(catalog)} rows")
 
349
  else:
350
  elev["aspect_index"] = 0.5
351
 
352
+ base = nearest_merge(base, elev, ["slope_degrees","aspect_index"])
353
 
354
  # ── Rainfall ──────────────────────────────────────────────────────────
355
  print("[Landslide] Merging rainfall...")
 
363
  base = nearest_merge(base, rain_agg, ["rainfall_intensity_mmh"])
364
 
365
  # ── soil_type_index proxy ─────────────────────────────────────────────
366
+ slope_norm = np.clip(base["slope_degrees"] / 90.0, 0, 1)
367
  veg_norm = np.clip(base["vegetation_cover_pct"] / 100.0, 0, 1)
368
  rain_norm = np.clip(base["rainfall_intensity_mmh"] / 200.0, 0, 1)
369
  base["soil_type_index"] = np.clip( # ← FIX 1: now saved
370
+ 1.0 - (0.4 * slope_norm + 0.3 * (1 - veg_norm) + 0.3 * rain_norm), 0, 1
371
  )
372
 
373
  # ── Risk label ────────────────────────────────────────────────────────
 
396
 
397
  base["risk_score"] = np.clip(
398
  0.35 * base["size_score"] +
399
+ 0.25 * base["historical_landslide_freq"] +
400
+ 0.20 * fatality_norm +
401
+ 0.15 * slope_norm +
402
+ 0.05 * (1 - veg_norm) +
403
  np.random.normal(0, 0.02, len(base)),
404
  0.0, 1.0
405
  )
 
410
 
411
  # ── Final feature matrix ──────────────────────────────────────────────
412
  features = [
413
+ "slope_degrees","rainfall_intensity_mmh", "soil_type_index",
414
  "vegetation_cover_pct", "seismic_activity_index",
415
  "distance_to_fault_km", "aspect_index", "historical_landslide_freq",
416
  ]