""" Intervention Simulation ======================= Standalone script that simulates stress challenges and robot interventions on a field, predicts before/after yields using the combined SHAP + rule-based classification approach (same as intervention_predictions_pipeline.py), and outputs a results CSV + 3-panel PNG map. Usage: python apps/intervention_simulation.py """ import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import matplotlib.patches as mpatches import numpy as np import pandas as pd import pickle import warnings import gc import hashlib import json from pathlib import Path from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error, r2_score from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA from tabpfn import TabPFNRegressor import netCDF4 as nclib warnings.filterwarnings("ignore") # ── Paths ───────────────────────────────────────────────────────────────────── DATA_ROOT = Path("./yieldsat_data") RESULTS_DIR = Path("./results") NC_PATH = DATA_ROOT / "Preprocessed/Germany/merge_s2-soil-dem-weather-coords.nc" CHALLENGES_CSV = RESULTS_DIR / "sim_input_challenges.csv" INTERVENTIONS_CSV = RESULTS_DIR / "sim_input_interventions.csv" SIM_DATA_DIR = Path("data/raw/simulator/real_field") INTERVENTION_ZONES_CSV = SIM_DATA_DIR / "de_0100_full_field_source.csv" SHAP_CACHE_PATH = RESULTS_DIR / "shap_pca_cache_DE_0100.pkl" SIM_CACHE_DIR = RESULTS_DIR / "_sim_cache" OUTPUT_CSV = RESULTS_DIR / "farm_intervention_simulation.csv" OUTPUT_PNG = RESULTS_DIR / "farm_intervention_simulation_map.png" # ── Model Parameters ────────────────────────────────────────────────────────── TABPFN_MAX_TRAIN = 2000 LOW_YIELD_PERCENTILE = 25 ZSCORE_THRESHOLD = 1.2 RECOVERY_FRACTION = 0.6 EPS = 1e-8 # ── Combination Parameters (from pipeline) ─────────────────────────────────── AGREE_BOOST = 0.15 DISAGREE_PENALTY = 0.20 SHAP_RESCUE_DISCOUNT = 0.8 RULE_ONLY_DISCOUNT = 0.7 SHAP_MIN_MAGNITUDE = 0.05 SHAP_MARGIN_THRESHOLD = 0.05 # ── Mappings ────────────────────────────────────────────────────────────────── SOIL_DEPTHS = ["0-5", "5-15", "15-30", "30-60", "60-100", "100-200"] SOIL_PROPS = ["clay", "sand", "silt", "soc", "phh2o", "cec", "nitrogen", "cfvo"] PCA_GROUPS = { "soil_texture": { "cols": [f"{p}_{d}" for p in ["clay", "sand", "silt"] for d in SOIL_DEPTHS], "n_components": 2, }, "soil_chemistry": { "cols": [ f"{p}_{d}" for p in ["soc", "cec", "nitrogen", "phh2o"] for d in SOIL_DEPTHS ], "n_components": 3, }, "coarse_fragments": { "cols": [f"cfvo_{d}" for d in SOIL_DEPTHS], "n_components": 1, }, "temperature": { "cols": ["temp_mean_mean", "temp_max_mean", "temp_min_mean"], "n_components": 1, }, } ACTION_MAP = { "drought": "irrigate", "waterlogging": "install_drainage", "nutrient_deficiency": "apply_fertilizer_N", "compaction": "subsoil", "poor_drainage": "install_drainage", "healthy_low_yield_anomaly": "inspect_manually", "unclassified": "inspect_manually", } STRESS_COLORS = { "drought": "#2196F3", "waterlogging": "#795548", "nutrient_deficiency": "#4CAF50", "compaction": "#FF9800", "poor_drainage": "#00BCD4", "healthy_low_yield_anomaly": "#E91E63", "unclassified": "#9E9E9E", } ACTION_RESPONSIVE_FEATURES = { "apply_fertilizer_N": ["ndvi_last", "ndre_last", "cire_last", "s2rep_last"], "irrigate": ["ndvi_last", "ndmi_last"], "install_drainage": ["ndvi_last", "ndmi_last"], "subsoil": ["ndvi_last", "ndre_last", "cire_last"], } COMPATIBLE_STRESSES = { ("poor_drainage", "waterlogging"), ("waterlogging", "poor_drainage"), ("drought", "compaction"), ("compaction", "drought"), ("nutrient_deficiency", "compaction"), ("compaction", "nutrient_deficiency"), } RULE_PRIORITY_STRESSES = {"drought", "nutrient_deficiency"} SHAP_PRIORITY_STRESSES = {"waterlogging", "compaction", "poor_drainage"} # z-score feature mapping: raw column name -> classifier key # Pipeline uses z_NDMI (not z_NDWI) ZSCORE_FEATURES = { "ndvi_last": "z_NDVI", "ndre_last": "z_NDRE", "ndmi_last": "z_NDMI", "psri_last": "z_PSRI", "cire_last": "z_CIre", "s2rep_last": "z_S2REP", "clay_0-5": "z_clay_0-5", "sand_0-5": "z_sand_0-5", "silt_0-5": "z_silt_0-5", "soc_0-5": "z_soc_0-5", "phh2o_0-5": "z_phh2o_0-5", "cec_0-5": "z_cec_0-5", "nitrogen_0-5": "z_nitrogen_0-5", "cfvo_0-5": "z_cfvo_0-5", "dem": "z_dem", "slope": "z_slope", "twi": "z_twi", } # ── Data Loading ────────────────────────────────────────────────────────────── def load_field_data(nc_path, field_id, nc_id): """Load field from NetCDF. Returns chunk dict + band_names list.""" ds = nclib.Dataset(str(nc_path), "r") band_names = [str(b) for b in ds["band"][:]] fsn_all = ds["field_shared_name"][:] field_indices = np.where(fsn_all == nc_id)[0] if len(field_indices) == 0: ds.close() raise ValueError(f"Field {field_id} (NC id={nc_id}) not found in dataset") i0, i1 = field_indices[0], field_indices[-1] + 1 chunk = { "field_str": field_id, "field_nc": nc_id, "sample": np.ma.filled(ds["sample"][i0:i1, :, :], np.nan).astype(np.float32), "times": ds["times"][i0:i1, :], "target": np.ma.filled(ds["target"][i0:i1], np.nan).astype(np.float32), "row": ds["row"][i0:i1], "col": ds["col"][i0:i1], } ds.close() return chunk, band_names # ── Feature Engineering ─────────────────────────────────────────────────────── def find_valid_timesteps(sample, band_idx, min_coverage=0.5): """Return timestep indices with >50% valid pixels.""" vals = sample[:, :, band_idx] valid_per_ts = np.sum(~np.isnan(vals), axis=0) coverage = valid_per_ts / len(vals) return np.where(coverage > min_coverage)[0] def extract_features(chunk, band_names): """Extract 63 raw features (soil x 6 depths, topo, weather, VIs incl. PSRI) then PCA-compress to 18 model features. Adds X_raw, X, feature_cols, pca_info, psri_last to chunk.""" s = chunk["sample"] B04_IDX = band_names.index("B04") B03_IDX = band_names.index("B03") B05_IDX = band_names.index("B05") B06_IDX = band_names.index("B06") B07_IDX = band_names.index("B07") B08_IDX = band_names.index("B08") B11_IDX = band_names.index("B11") valid_ts = find_valid_timesteps(s, B04_IDX) chunk["valid_ts"] = valid_ts if len(valid_ts) == 0: raise ValueError(f"No valid timesteps for {chunk['field_str']}") last_t = valid_ts[-1] nir = s[:, last_t, B08_IDX] red = s[:, last_t, B04_IDX] grn = s[:, last_t, B03_IDX] re1 = s[:, last_t, B05_IDX] re2 = s[:, last_t, B06_IDX] re3 = s[:, last_t, B07_IDX] swir = s[:, last_t, B11_IDX] # Static bands static_bands = {} for prop in SOIL_PROPS: for depth in SOIL_DEPTHS: bname = f"{prop}_{depth}" if bname in band_names: static_bands[bname] = band_names.index(bname) for bname in ["dem", "slope", "aspect", "curvature", "twi"]: if bname in band_names: static_bands[bname] = band_names.index(bname) weather_bands = {} for bname in ["temp_mean", "temp_max", "temp_min", "total_prec"]: if bname in band_names: weather_bands[bname] = band_names.index(bname) # Build raw features feat_dict = {} for bname, bidx in static_bands.items(): feat_dict[bname] = s[:, 0, bidx] for bname, bidx in weather_bands.items(): feat_dict[f"{bname}_mean"] = np.nanmean(s[:, valid_ts, bidx], axis=1) feat_dict["ndvi_last"] = (nir - red) / (nir + red + EPS) feat_dict["ndre_last"] = (nir - re1) / (nir + re1 + EPS) feat_dict["ndmi_last"] = (nir - swir) / (nir + swir + EPS) feat_dict["cire_last"] = nir / (re1 + EPS) - 1 feat_dict["s2rep_last"] = 705 + 35 * ((red + re3) / 2 - re1) / (re2 - re1 + EPS) X_raw = pd.DataFrame(feat_dict).fillna(pd.DataFrame(feat_dict).median()) chunk["X_raw"] = X_raw # PSRI for classifier (not a model feature) chunk["psri_last"] = (red - grn) / (re2 + EPS) # PCA compression X_pca = X_raw.copy() pca_info = {} for group_name, group_cfg in PCA_GROUPS.items(): group_cols = group_cfg["cols"] n_comp = group_cfg["n_components"] available = [c for c in group_cols if c in X_pca.columns] if len(available) < 2: continue scaler = StandardScaler() scaled = scaler.fit_transform(X_pca[available].values) pca = PCA(n_components=n_comp) pcs = pca.fit_transform(scaled) pca_info[group_name] = { "pca": pca, "scaler": scaler, "original_cols": available, "explained_var": pca.explained_variance_ratio_, } X_pca = X_pca.drop(columns=available) for i in range(n_comp): X_pca[f"{group_name}_pc{i + 1}"] = pcs[:, i] X = X_pca.fillna(X_pca.median()) chunk["X"] = X chunk["feature_cols"] = list(X.columns) chunk["pca_info"] = pca_info chunk["band_names"] = band_names return chunk # ── Model Training ──────────────────────────────────────────────────────────── def _sim_cache_path(field_id="DE_0100"): """Hash key over data file mtime + training/PCA config so cache invalidates when any of those change.""" cfg = { "field_id": field_id, "nc_mtime": NC_PATH.stat().st_mtime if NC_PATH.exists() else 0, "tabpfn_max_train": TABPFN_MAX_TRAIN, "pca_groups": { k: (v["cols"], v["n_components"]) for k, v in PCA_GROUPS.items() }, "soil_depths": SOIL_DEPTHS, "soil_props": SOIL_PROPS, } h = hashlib.md5( json.dumps(cfg, sort_keys=True, default=str).encode() ).hexdigest()[:12] SIM_CACHE_DIR.mkdir(exist_ok=True, parents=True) return SIM_CACHE_DIR / f"sim_cache_{field_id}_{h}.pkl" def _load_baseline_from_csv(chunk): """Load baseline yield_pred from de_0100_full_field_source.csv, aligned by (row, col) to the chunk's pixel order. Returns None if the CSV is missing.""" if not INTERVENTION_ZONES_CSV.exists(): return None src = pd.read_csv(INTERVENTION_ZONES_CSV) if "yield_pred" not in src.columns: return None lookup = { (int(r), int(c)): float(p) for r, c, p in zip(src["row"], src["col"], src["yield_pred"]) } chunk_rows = chunk["row"] chunk_cols = chunk["col"] try: return np.array( [lookup[(int(r), int(c))] for r, c in zip(chunk_rows, chunk_cols)], dtype=np.float64, ) except KeyError: return None def train_tabpfn(chunk, field_id="DE_0100"): """Train TabPFN (or load from disk cache). 70/30 split, TabPFNRegressor(n_estimators=4). Baseline y_pred_all comes from the precomputed source CSV, so no full-field model.predict is needed. Returns (model, y_pred_all, col_means).""" cache_path = _sim_cache_path(field_id) # Try cache hit first if cache_path.exists(): try: with open(cache_path, "rb") as f: cache = pickle.load(f) model = cache["model"] col_means = cache["col_means"] print(f" Cache HIT: {cache_path.name}") print( f" TabPFN (cached): RMSE={cache.get('rmse', float('nan')):.3f}, " f"R2={cache.get('r2', float('nan')):.3f}" ) except Exception as e: print(f" Cache load failed ({e}); rebuilding") cache = None model = None else: print(f" Cache MISS: {cache_path.name}") cache = None model = None if model is None: X = chunk["X"].values.copy() y = chunk["target"] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=42 ) col_means = np.nanmean(X_train, axis=0) col_means = np.where(np.isnan(col_means), 0, col_means) for j in range(X.shape[1]): X_train[np.isnan(X_train[:, j]), j] = col_means[j] X_test[np.isnan(X_test[:, j]), j] = col_means[j] model = TabPFNRegressor( n_estimators=4, device="cpu", ignore_pretraining_limits=True ) X_tr, y_tr = X_train, y_train if len(X_tr) > TABPFN_MAX_TRAIN: idx = np.random.choice(len(X_tr), TABPFN_MAX_TRAIN, replace=False) X_tr, y_tr = X_train[idx], y_train[idx] model.fit(X_tr, y_tr) y_pred_test = model.predict(X_test) rmse = float(np.sqrt(mean_squared_error(y_test, y_pred_test))) r2 = float(r2_score(y_test, y_pred_test)) print(f" TabPFN: RMSE={rmse:.3f}, R2={r2:.3f}") with open(cache_path, "wb") as f: pickle.dump( {"model": model, "col_means": col_means, "rmse": rmse, "r2": r2}, f ) print(f" Saved cache: {cache_path.name}") del X_train, X_test gc.collect() # Baseline: load from precomputed source CSV (no full-field predict). y_pred_all = _load_baseline_from_csv(chunk) if y_pred_all is None: print(" Source CSV unavailable — falling back to full-field model.predict") X_full = chunk["X"].values.copy() for j in range(X_full.shape[1]): X_full[np.isnan(X_full[:, j]), j] = col_means[j] y_pred_all = model.predict(X_full) return model, y_pred_all, col_means # ── Challenge Pixel Injection ───────────────────────────────────────────────── def inject_challenge_pixels(chunk, challenge_info, X_raw_orig, y_pred_all): """Copy raw features from worst low-yield templates, apply stress-specific adjustments, recompute PCA. Returns (X_raw_challenge, X_challenge, X_challenge_arr).""" y_all = chunk["target"] rows = chunk["row"] cols = chunk["col"] pca_info = chunk["pca_info"] feature_cols = chunk["feature_cols"] low_mask = y_all <= np.nanpercentile(y_all, LOW_YIELD_PERCENTILE) low_indices = np.where(low_mask)[0] low_yields = y_all[low_mask] worst_order = np.argsort(low_yields) template_indices = low_indices[worst_order[: len(challenge_info)]] print(f" Template pixels (real low-yield):") for i, ti in enumerate(template_indices): print( f" Template {i + 1}: [{rows[ti]},{cols[ti]}], yield={y_all[ti]:.1f} t/ha, " f"pred={y_pred_all[ti]:.1f} t/ha" ) X_raw_challenge = X_raw_orig.astype(np.float64).copy() for ch_i, (info, template_idx) in enumerate(zip(challenge_info, template_indices)): px_idx = info["pixel_idx"] stress = info["stress"] # Copy ALL raw features from template for col_name in X_raw_challenge.columns: X_raw_challenge.loc[px_idx, col_name] = X_raw_challenge.loc[ template_idx, col_name ] # Stress-specific adjustments if stress == "drought": X_raw_challenge.loc[px_idx, "ndmi_last"] *= 0.5 for d in SOIL_DEPTHS: col = f"sand_{d}" if col in X_raw_challenge.columns: X_raw_challenge.loc[px_idx, col] = X_raw_orig[col].quantile(0.95) elif stress == "compaction": for d in SOIL_DEPTHS: for col_prefix in ["cfvo", "clay"]: col = f"{col_prefix}_{d}" if col in X_raw_challenge.columns: X_raw_challenge.loc[px_idx, col] = X_raw_orig[col].quantile( 0.95 ) X_raw_challenge.loc[px_idx, "slope"] = X_raw_orig["slope"].quantile(0.05) elif stress == "nutrient_deficiency": for d in SOIL_DEPTHS: for prop in ["nitrogen", "soc", "cec"]: col = f"{prop}_{d}" if col in X_raw_challenge.columns: X_raw_challenge.loc[px_idx, col] = X_raw_orig[col].quantile( 0.05 ) print( f" [{info['row']},{info['col']}] {stress}: " f"features from template [{rows[template_idx]},{cols[template_idx]}]" ) # Recompute PCA features from modified raw features X_pca_challenge = X_raw_challenge.copy() for group_name in PCA_GROUPS: if group_name not in pca_info: continue gi = pca_info[group_name] available = gi["original_cols"] n_comp = len(gi["explained_var"]) scaled = gi["scaler"].transform(X_pca_challenge[available].values) pcs = gi["pca"].transform(scaled) X_pca_challenge = X_pca_challenge.drop(columns=available) for j in range(n_comp): X_pca_challenge[f"{group_name}_pc{j + 1}"] = pcs[:, j] X_challenge = X_pca_challenge.fillna(X_pca_challenge.median()) X_challenge = X_challenge[feature_cols] X_challenge_arr = X_challenge.values.copy() col_means_ch = np.nanmean(X_challenge_arr, axis=0) col_means_ch = np.where(np.isnan(col_means_ch), 0, col_means_ch) for j in range(X_challenge_arr.shape[1]): X_challenge_arr[np.isnan(X_challenge_arr[:, j]), j] = col_means_ch[j] return X_raw_challenge, X_challenge, X_challenge_arr # ── Stress Classification ──────────────────────────────────────────────────── def classify_stress(z_row, t=ZSCORE_THRESHOLD): """Rule-based z-score classifier using z_NDMI (pipeline convention). 5 stress types: drought, waterlogging, nutrient_deficiency, compaction, poor_drainage.""" def signal(key, direction): v = z_row.get(key, None) if v is None or pd.isna(v): return (False, 0.0, False) if direction == "+": return (v > t, v, True) elif direction == "-": return (v < -t, v, True) else: return (abs(v) > t, v, True) def score( dynamic, static, min_dyn_frac=0.5, min_dyn_available=1, static_weight=0.5 ): dyn_available = [s for s in dynamic if s[2]] if len(dyn_available) < min_dyn_available: return 0.0, False dyn_fired = [s for s in dyn_available if s[0]] if len(dyn_fired) / len(dyn_available) < min_dyn_frac: return 0.0, False stat_available = [s for s in static if s[2]] stat_fired = [s for s in stat_available if s[0]] dyn_strength = sum(max(0, abs(s[1]) - t) for s in dyn_fired) stat_strength = sum(max(0, abs(s[1]) - t) for s in stat_fired) * static_weight max_possible = (len(dyn_available) + len(stat_available) * static_weight) * 2.0 if max_possible <= 0: return 0.0, False conf = min(1.0, (dyn_strength + stat_strength) / max_possible) return conf, True stresses = [] # DROUGHT — uses z_NDMI (pipeline convention) dyn = [signal("z_NDMI", "-"), signal("z_NDVI", "-")] stat = [signal("z_sand_0-5", "+")] conf, fired = score(dyn, stat, min_dyn_frac=0.5) if fired: stresses.append(("drought", conf)) # WATERLOGGING dyn = [signal("z_PSRI", "+"), signal("z_NDMI", "-")] stat = [ signal("z_twi", "+"), signal("z_slope", "-"), signal("z_clay_0-5", "+"), signal("z_dem", "-"), ] conf, fired = score(dyn, stat, min_dyn_frac=0.5) if fired: stresses.append(("waterlogging", conf)) # NUTRIENT DEFICIENCY dyn = [ signal("z_NDRE", "-"), signal("z_CIre", "-"), signal("z_S2REP", "-"), signal("z_NDVI", "-"), ] stat = [ signal("z_nitrogen_0-5", "-"), signal("z_soc_0-5", "-"), signal("z_cec_0-5", "-"), signal("z_phh2o_0-5", "abs"), ] conf, fired = score(dyn, stat, min_dyn_frac=0.5) if fired: stresses.append(("nutrient_deficiency", conf)) # COMPACTION dyn = [signal("z_PSRI", "+"), signal("z_NDRE", "-")] stat = [ signal("z_cfvo_0-5", "+"), signal("z_clay_0-5", "+"), signal("z_slope", "-"), ] conf, fired = score(dyn, stat, min_dyn_frac=0.5) if fired: stresses.append(("compaction", conf)) # POOR DRAINAGE dyn = [signal("z_PSRI", "+"), signal("z_NDVI", "-")] stat = [ signal("z_sand_0-5", "-"), signal("z_silt_0-5", "+"), signal("z_clay_0-5", "+"), ] conf, fired = score(dyn, stat, min_dyn_frac=0.5) if fired: stresses.append(("poor_drainage", conf)) if stresses: stresses.sort(key=lambda x: -x[1]) while len(stresses) < 2: stresses.append(("none", 0.0)) return stresses[:2], "classified" dyn_keys = ["z_NDVI", "z_NDRE", "z_NDMI", "z_PSRI", "z_CIre", "z_S2REP"] available_dyn = [ k for k in dyn_keys if z_row.get(k) is not None and not pd.isna(z_row.get(k)) ] if len(available_dyn) < 2: return [("unclassified", 0.0), ("none", 0.0)], "insufficient_data" ndvi = z_row.get("z_NDVI", np.nan) ndre = z_row.get("z_NDRE", np.nan) psri = z_row.get("z_PSRI", np.nan) cire = z_row.get("z_CIre", np.nan) healthy_markers = 0 if not pd.isna(ndvi) and ndvi > 0.5: healthy_markers += 1 if not pd.isna(ndre) and ndre > 0.5: healthy_markers += 1 if not pd.isna(psri) and psri < -0.5: healthy_markers += 1 if not pd.isna(cire) and cire > 0.5: healthy_markers += 1 if healthy_markers >= 2: return [("healthy_low_yield_anomaly", 0.5), ("none", 0.0)], "healthy_anomaly" return [("unclassified", 0.0), ("none", 0.0)], "no_signal" def run_stress_classification(chunk, X_raw_to_use): """Build diagnostic DataFrame for all low-yield pixels (or challenge pixels), compute z-scores using field-level median/MAD, run classify_stress(). Returns rule_df.""" X_raw = X_raw_to_use fid = chunk["field_str"] n_pix = len(chunk["target"]) psri_last = chunk["psri_last"] diag_records = [] for i in range(n_pix): rec = { "field_id": fid, "row": int(chunk["row"][i]), "col": int(chunk["col"][i]), "yield_tha": float(chunk["target"][i]), "NDVI": float(X_raw.iloc[i]["ndvi_last"]), "NDRE": float(X_raw.iloc[i]["ndre_last"]), "NDMI": float(X_raw.iloc[i]["ndmi_last"]), "CIre": float(X_raw.iloc[i]["cire_last"]), "S2REP": float(X_raw.iloc[i]["s2rep_last"]), "PSRI": float(psri_last[i]), } for feat in [ "clay_0-5", "sand_0-5", "silt_0-5", "soc_0-5", "phh2o_0-5", "cec_0-5", "nitrogen_0-5", "cfvo_0-5", "dem", "slope", "aspect", "curvature", "twi", ]: rec[feat] = float(X_raw.iloc[i].get(feat, np.nan)) diag_records.append(rec) df = pd.DataFrame(diag_records) # Low yield identification field_p25 = ( df.groupby("field_id")["yield_tha"] .quantile(LOW_YIELD_PERCENTILE / 100) .rename("yield_p25") ) df = df.merge(field_p25, on="field_id", how="left") df["is_low_yield"] = df["yield_tha"] <= df["yield_p25"] # Z-score computation diag_features = [ "clay_0-5", "sand_0-5", "silt_0-5", "soc_0-5", "phh2o_0-5", "cec_0-5", "nitrogen_0-5", "cfvo_0-5", "dem", "slope", "aspect", "curvature", "twi", "NDVI", "NDRE", "NDMI", "PSRI", "CIre", "S2REP", ] diag_features = [f for f in diag_features if f in df.columns] field_medians = df.groupby("field_id")[diag_features].median() def compute_mad(x): return (x - x.median()).abs().median() field_mads = df.groupby("field_id")[diag_features].apply(compute_mad) mad_floors = {} for col in diag_features: q75, q25 = df[col].quantile([0.75, 0.25]) mad_floors[col] = max((q75 - q25) * 0.01, 1e-6) low_df = df[df["is_low_yield"]].copy() for col in diag_features: med = low_df["field_id"].map(field_medians[col]) mad = low_df["field_id"].map(field_mads[col]) floor = mad_floors[col] valid_raw = low_df[col].notna() & med.notna() valid_variance = mad > floor available = valid_raw & valid_variance mad_safe = mad.clip(lower=floor) z = (low_df[col] - med) / (1.4826 * mad_safe) z = z.clip(-10, 10).where(available, other=np.nan) low_df[f"z_{col}"] = z z_cols = [f"z_{c}" for c in diag_features] # Classify stress_records = [] for idx, row in low_df.iterrows(): z_dict = {c: row[c] for c in z_cols} top2, status = classify_stress(z_dict) primary, secondary = top2[0], top2[1] co_occurring = ( status == "classified" and secondary[1] > 0 and secondary[1] >= 0.7 * primary[1] ) stress_records.append( { "idx": idx, "field_id": row["field_id"], "row": int(row["row"]), "col": int(row["col"]), "yield_tha": row["yield_tha"], "rule_stress": primary[0], "rule_confidence": round(primary[1], 3), "rule_secondary": secondary[0], "rule_secondary_confidence": round(secondary[1], 3), "co_occurring": co_occurring, "rule_status": status, } ) return pd.DataFrame(stress_records) # ── SHAP Cache + Combination (from pipeline) ───────────────────────────────── def load_shap_cache(cache_path): """Load SHAP pkl, normalize confidence. Returns (shap_df, bool). Graceful fallback to None if not found.""" cache_path = Path(cache_path) if not cache_path.exists(): return None, False with open(cache_path, "rb") as f: shap_cache = pickle.load(f) shap_df = pd.DataFrame(shap_cache["stress_records"]) p95 = shap_df["shap_magnitude"].quantile(0.95) norm_denom = p95 if p95 > 0 else shap_df["shap_magnitude"].max() shap_df["shap_confidence"] = (shap_df["shap_magnitude"] / norm_denom).clip(0, 1) shap_df = shap_df.rename(columns={"primary_stress": "shap_stress"}) return shap_df, True def _stress_types_agree(rule_s, shap_s): """Check agreement via COMPATIBLE_STRESSES.""" return rule_s == shap_s or (rule_s, shap_s) in COMPATIBLE_STRESSES def _combine_pixel(row): """Full SHAP+rule combination logic with margin uncertainty, domain priority, secondary support.""" rule_s = row.get("rule_stress", "unclassified") or "unclassified" rule_c = row.get("rule_confidence", 0.0) or 0.0 rule_status = row.get("rule_status", "no_signal") or "no_signal" shap_s = row.get("shap_stress", "unclassified") or "unclassified" shap_c = row.get("shap_confidence", 0.0) or 0.0 scores = row.get("category_scores", {}) if pd.isna(rule_s): rule_s = "unclassified" if pd.isna(shap_s): shap_s = "unclassified" rule_classified = rule_status == "classified" shap_classified = shap_s != "unclassified" and shap_c >= SHAP_MIN_MAGNITUDE # SHAP margin uncertainty shap_margin = 0.0 shap_uncertain = False if isinstance(scores, dict) and len(scores) >= 2: sorted_scores = sorted(scores.values()) best_score = sorted_scores[0] second_score = sorted_scores[1] if best_score < 0: shap_margin = abs(best_score - second_score) / (abs(best_score) + 1e-9) shap_uncertain = shap_margin < SHAP_MARGIN_THRESHOLD if shap_uncertain: shap_classified = False if rule_classified and shap_classified and _stress_types_agree(rule_s, shap_s): return { "combined_stress": rule_s, "combined_confidence": min(1.0, max(rule_c, shap_c) + AGREE_BOOST), "combination_tier": "high", "combination_reason": "both_agree", "shap_margin": round(shap_margin, 3), } if rule_classified and shap_classified: if isinstance(scores, dict) and len(scores) >= 2: sorted_cats = sorted(scores.items(), key=lambda x: x[1]) shap_second = sorted_cats[1][0] if len(sorted_cats) >= 2 else None if shap_second and _stress_types_agree(rule_s, shap_second): return { "combined_stress": rule_s, "combined_confidence": rule_c * 0.85, "combination_tier": "medium", "combination_reason": "shap_secondary_supports_rule", "shap_margin": round(shap_margin, 3), } if rule_s in RULE_PRIORITY_STRESSES: winner = rule_s elif shap_s in SHAP_PRIORITY_STRESSES: winner = shap_s else: winner = rule_s if rule_c >= shap_c else shap_s return { "combined_stress": winner, "combined_confidence": max(0.1, max(rule_c, shap_c) - DISAGREE_PENALTY), "combination_tier": "low", "combination_reason": f"disagree_rule={rule_s}_shap={shap_s}", "shap_margin": round(shap_margin, 3), } if not rule_classified and shap_classified: return { "combined_stress": shap_s, "combined_confidence": shap_c * SHAP_RESCUE_DISCOUNT, "combination_tier": "medium", "combination_reason": "shap_rescue", "shap_margin": round(shap_margin, 3), } if rule_classified and not shap_classified: return { "combined_stress": rule_s, "combined_confidence": rule_c * RULE_ONLY_DISCOUNT, "combination_tier": "medium", "combination_reason": "rule_only", "shap_margin": round(shap_margin, 3), } return { "combined_stress": rule_s, "combined_confidence": 0.0, "combination_tier": "unresolved", "combination_reason": "neither_classified", "shap_margin": round(shap_margin, 3), } def combine_classifications(rule_df, shap_df): """Merge rule-based and SHAP classifications per pixel and apply _combine_pixel.""" merged = rule_df.merge( shap_df[ [ "field_id", "row", "col", "shap_stress", "shap_magnitude", "shap_confidence", "category_scores", ] ], on=["field_id", "row", "col"], how="outer", ) combined_cols = merged.apply(_combine_pixel, axis=1, result_type="expand") result = pd.concat([merged, combined_cols], axis=1) result["recommended_action"] = ( result["combined_stress"].map(ACTION_MAP).fillna("inspect_manually") ) return result def _rule_only_fallback(rule_df): """Wrap rule_df with combined_* columns when no SHAP cache is available.""" df = rule_df.copy() df["combined_stress"] = df["rule_stress"] df["combined_confidence"] = df["rule_confidence"] df["combination_tier"] = "medium" df["combination_reason"] = "rule_only" df["shap_stress"] = np.nan df["shap_magnitude"] = np.nan df["shap_confidence"] = np.nan df["recommended_action"] = ( df["combined_stress"].map(ACTION_MAP).fillna("inspect_manually") ) return df # ── Challenge Classification ───────────────────────────────────────────────── def classify_challenge_pixels(challenge_info, X_raw_challenge, chunk): """Run combined SHAP+rule classification specifically on challenge pixels. Computes z-scores directly for challenge pixel indices (bypassing the low-yield filter, since challenge pixels have high original yields but transplanted stress features). Also runs full-field classification via run_stress_classification for non-challenge low-yield pixels, then combines with SHAP cache if available.""" if not challenge_info: print(" No challenge pixels to classify.") # Still run full-field classification for intervention pixels rule_df = run_stress_classification(chunk, X_raw_challenge) shap_df, shap_available = load_shap_cache(SHAP_CACHE_PATH) if shap_available: print( f" SHAP cache loaded: {SHAP_CACHE_PATH.name} ({len(shap_df)} pixels)" ) return combine_classifications(rule_df, shap_df) else: print(f" No SHAP cache at {SHAP_CACHE_PATH} — rule-based only.") return _rule_only_fallback(rule_df) print("Running stress classification...") # 1. Full-field rule-based classification (covers low-yield intervention pixels) rule_df = run_stress_classification(chunk, X_raw_challenge) # 2. Direct z-score classification for challenge pixels # These pixels have high original yields but transplanted stress features, # so they won't appear in the is_low_yield subset. fid = chunk["field_str"] challenge_pixel_indices = {info["pixel_idx"] for info in challenge_info} # Compute field-level medians and MADs from modified raw features field_medians = {} field_mads = {} mad_floors = {} for raw_col in ZSCORE_FEATURES: if raw_col == "psri_last": vals = pd.Series(chunk["psri_last"]) elif raw_col not in X_raw_challenge.columns: continue else: vals = X_raw_challenge[raw_col] med = vals.median() mad = (vals - med).abs().median() q75, q25 = vals.quantile([0.75, 0.25]) floor = max((q75 - q25) * 0.01, 1e-6) field_medians[raw_col] = med field_mads[raw_col] = mad mad_floors[raw_col] = floor # PSRI is stored separately in chunk, not in X_raw psri_last = chunk["psri_last"] challenge_rule_records = [] for info in challenge_info: px_idx = info["pixel_idx"] z_dict = {} for raw_col, z_key in ZSCORE_FEATURES.items(): if raw_col == "psri_last": val = float(psri_last[px_idx]) elif raw_col not in X_raw_challenge.columns: z_dict[z_key] = np.nan continue else: val = X_raw_challenge.iloc[px_idx][raw_col] med = field_medians[raw_col] mad = field_mads[raw_col] floor = mad_floors[raw_col] if pd.isna(val) or pd.isna(med): z_dict[z_key] = np.nan continue mad_safe = max(mad, floor) z = (val - med) / (1.4826 * mad_safe) z_dict[z_key] = float(np.clip(z, -10, 10)) top2, status = classify_stress(z_dict) primary, secondary = top2[0], top2[1] co_occurring = ( status == "classified" and secondary[1] > 0 and secondary[1] >= 0.7 * primary[1] ) challenge_rule_records.append( { "field_id": fid, "row": info["row"], "col": info["col"], "yield_tha": info["original_yield"], "rule_stress": primary[0], "rule_confidence": round(primary[1], 3), "rule_secondary": secondary[0], "rule_secondary_confidence": round(secondary[1], 3), "co_occurring": co_occurring, "rule_status": status, } ) challenge_rule_df = pd.DataFrame(challenge_rule_records) # 3. Merge challenge-pixel rule results into full rule_df # Remove any duplicate rows for challenge pixels that might already be # in rule_df (unlikely, but safe) if len(rule_df) > 0: challenge_coords = set((info["row"], info["col"]) for info in challenge_info) rule_df = rule_df[ ~rule_df.apply( lambda r: (int(r["row"]), int(r["col"])) in challenge_coords, axis=1 ) ] rule_df = pd.concat([challenge_rule_df, rule_df], ignore_index=True) # 4. Load SHAP cache and combine shap_df, shap_available = load_shap_cache(SHAP_CACHE_PATH) if shap_available: print(f" SHAP cache loaded: {SHAP_CACHE_PATH.name} ({len(shap_df)} pixels)") stress_df = combine_classifications(rule_df, shap_df) n_high = (stress_df["combination_tier"] == "high").sum() n_rescued = (stress_df["combination_reason"] == "shap_rescue").sum() print(f" Combined: high={n_high} | rescued={n_rescued}") else: print(f" No SHAP cache at {SHAP_CACHE_PATH} — rule-based only.") stress_df = _rule_only_fallback(rule_df) # 5. Print classification results for challenge pixels print( f"\n {'Pixel':<12} {'Expected':<22} {'Classified':<22} " f"{'Tier':<10} {'Conf':>6} {'Reason'}" ) print(f" {'-' * 90}") for info in challenge_info: row_match = stress_df[ (stress_df["row"] == info["row"]) & (stress_df["col"] == info["col"]) ] if len(row_match) > 0: r = row_match.iloc[0] classified = r.get("combined_stress", "unclassified") tier = r.get("combination_tier", "n/a") conf = r.get("combined_confidence", 0.0) reason = r.get("combination_reason", "n/a") info["classified_stress"] = classified info["classified_conf"] = conf info["classified_action"] = ACTION_MAP.get(classified, "inspect_manually") else: info["classified_stress"] = "unclassified" info["classified_conf"] = 0.0 info["classified_action"] = "inspect_manually" tier, reason = "n/a", "no_match" print( f" [{info['row']:>2},{info['col']:>2}] {info['stress']:<22} " f"{info['classified_stress']:<22} {tier:<10} {info['classified_conf']:>5.3f} {reason}" ) return stress_df # ── Intervention Loading ────────────────────────────────────────────────────── def load_interventions(interventions_csv, rows, cols): """Read CSV, match (row,col) to pixel indices. Returns (selected_indices, selected_actions, selected_stresses).""" df = pd.read_csv(interventions_csv) print(f" Loaded {len(df)} intervention pixels from {Path(interventions_csv).name}") action_to_stress = {v: k for k, v in ACTION_MAP.items()} selected_indices = [] selected_actions = [] selected_stresses = [] for _, irow in df.iterrows(): mask = (rows == irow["row"]) & (cols == irow["col"]) idx = np.where(mask)[0] if len(idx) == 0: print( f" WARNING: pixel [{irow['row']},{irow['col']}] not found, skipping" ) continue selected_indices.append(idx[0]) selected_actions.append(irow["action"]) selected_stresses.append(action_to_stress.get(irow["action"], "unclassified")) selected_indices = ( np.array(selected_indices) if selected_indices else np.array([], dtype=int) ) print(f" Matched {len(selected_indices)} intervention pixels") return selected_indices, selected_actions, selected_stresses # ── Combine Interventions ──────────────────────────────────────────────────── def combine_interventions( challenge_info, selected_indices, selected_actions, selected_stresses ): """Merge treated challenges + robot interventions. Returns (all_indices, all_actions, all_stresses, all_is_challenge).""" treated = [info for info in challenge_info if info["treated"]] untreated = [info for info in challenge_info if not info["treated"]] if untreated: print(f" Untreated challenge pixels (no recovery):") for info in untreated: print(f" [{info['row']},{info['col']}] {info['stress']}") all_indices = np.concatenate( [ np.array([info["pixel_idx"] for info in treated]) if treated else np.array([], dtype=int), selected_indices if len(selected_indices) > 0 else np.array([], dtype=int), ] ).astype(int) all_actions = [info["action"] for info in treated] + list(selected_actions) all_stresses = [ info.get("classified_stress", info["stress"]) for info in treated ] + list(selected_stresses) all_is_challenge = [True] * len(treated) + [False] * len(selected_indices) print( f" Combined: {len(all_indices)} pixels " f"({len(treated)} treated challenges + {len(selected_indices)} robot interventions)" ) return all_indices, all_actions, all_stresses, all_is_challenge # ── Recovery Simulation ────────────────────────────────────────────────────── def simulate_recovery( X_challenge_arr, feature_cols, all_indices, all_actions, y_all, X_all_baseline ): """60% shift of responsive VIs toward healthy-pixel median. Returns X_post (modified copy of X_challenge_arr).""" low_mask = y_all <= np.nanpercentile(y_all, LOW_YIELD_PERCENTILE) healthy_medians = {} for i, col_name in enumerate(feature_cols): healthy_vals = X_all_baseline[~low_mask, i] healthy_medians[col_name] = np.median(healthy_vals) X_post = X_challenge_arr.copy() for sel_i, px_idx in enumerate(all_indices): action = all_actions[sel_i] responsive = ACTION_RESPONSIVE_FEATURES.get(action, []) for feat_name in responsive: if feat_name not in feature_cols: continue feat_idx = feature_cols.index(feat_name) current_val = X_post[px_idx, feat_idx] target_val = healthy_medians[feat_name] X_post[px_idx, feat_idx] = current_val + RECOVERY_FRACTION * ( target_val - current_val ) return X_post # ── Prediction & Comparison ────────────────────────────────────────────────── def predict_and_compare( model, X_challenge_arr, X_post, all_indices, all_actions, all_stresses, all_is_challenge, y_all, rows, cols, y_pred_all, challenge_indices=None, ): """TabPFN predict on post-intervention features, compute yield improvements. Only the changed pixels (challenges + interventions) are passed to model.predict; the unchanged remainder is patched in from y_pred_all. Returns (comparison_df, y_pred_before_full, y_pred_after_full). """ # "Before" = baseline with challenge stress applied. Patch challenge # pixels (where features differ from baseline) into a copy of y_pred_all. y_pred_before = y_pred_all.astype(np.float64).copy() if challenge_indices is not None and len(challenge_indices) > 0: ch_idx = np.asarray(challenge_indices, dtype=int) y_pred_before[ch_idx] = model.predict(X_challenge_arr[ch_idx]) # "After" = before + intervention recovery. Patch intervention pixels. y_pred_after = y_pred_before.copy() if len(all_indices) > 0: ai = np.asarray(all_indices, dtype=int) y_pred_after[ai] = model.predict(X_post[ai]) comparison = [] for sel_i, px_idx in enumerate(all_indices): pred_before = y_pred_before[px_idx] pred_after = y_pred_after[px_idx] comparison.append( { "row": int(rows[px_idx]), "col": int(cols[px_idx]), "is_challenge": all_is_challenge[sel_i], "action": all_actions[sel_i], "stress": all_stresses[sel_i], "yield_actual": float(y_all[px_idx]), "yield_pred_before": float(pred_before), "yield_pred_after": float(pred_after), "yield_improvement": float(pred_after - pred_before), "pct_improvement": float( (pred_after - pred_before) / max(pred_before, 0.1) * 100 ), } ) return pd.DataFrame(comparison), y_pred_before, y_pred_after # ── Plotting ────────────────────────────────────────────────────────────────── def pixels_to_image(rows, cols, values): """Helper: pixel coords to 2D array. Returns (img, r_min, c_min).""" r_min, c_min = rows.min(), cols.min() h = rows.max() - r_min + 1 w = cols.max() - c_min + 1 img = np.full((h, w), np.nan) img[rows - r_min, cols - c_min] = values return img, r_min, c_min def plot_simulation_map( chunk, y_pred_before, y_pred_after, challenge_info, selected_indices, all_indices, output_path, ): """3-panel figure: before (RdYlGn), after (RdYlGn), delta (RdBu). Challenge=circles, intervention=crosses.""" r_arr = chunk["row"] c_arr = chunk["col"] y_all = chunk["target"] vmin = 0 vmax = max(float(y_all.max()), float(y_pred_after.max())) _, r_min, c_min = pixels_to_image(r_arr, c_arr, y_pred_before) fig, axes = plt.subplots(1, 3, figsize=(18, 6)) for panel_idx, (img_data, title) in enumerate( [ (y_pred_before, "Before Intervention"), (y_pred_after, "After Intervention"), ] ): img, _, _ = pixels_to_image(r_arr, c_arr, img_data) im = axes[panel_idx].imshow(img, cmap="RdYlGn", vmin=vmin, vmax=vmax) axes[panel_idx].set_title(title, fontsize=11) axes[panel_idx].axis("off") for info in challenge_info: axes[panel_idx].plot( info["col"] - c_min, info["row"] - r_min, "ko", markersize=12, markerfacecolor="none", markeredgewidth=2.5, ) for px_idx in selected_indices: axes[panel_idx].plot( c_arr[px_idx] - c_min, r_arr[px_idx] - r_min, "kx", markersize=8, markeredgewidth=2, ) plt.colorbar(im, ax=axes[panel_idx], shrink=0.7, label="t/ha") # Panel 3: Yield improvement (delta) diff = y_pred_after - y_pred_before img_diff, _, _ = pixels_to_image(r_arr, c_arr, diff) max_abs = max(abs(float(np.nanmin(img_diff))), abs(float(np.nanmax(img_diff))), 0.5) im3 = axes[2].imshow(img_diff, cmap="RdBu", vmin=-max_abs, vmax=max_abs) axes[2].set_title("Yield Change (After - Before)", fontsize=11) axes[2].axis("off") for info in challenge_info: axes[2].plot( info["col"] - c_min, info["row"] - r_min, "ko", markersize=12, markerfacecolor="none", markeredgewidth=2.5, ) for px_idx in selected_indices: axes[2].plot( c_arr[px_idx] - c_min, r_arr[px_idx] - r_min, "kx", markersize=8, markeredgewidth=2, ) plt.colorbar(im3, ax=axes[2], shrink=0.7, label="\u0394 t/ha") n_chal = len(challenge_info) n_orig = len(selected_indices) plt.tight_layout() fig.savefig(output_path, dpi=150, bbox_inches="tight") plt.close(fig) # ── Orchestrator ────────────────────────────────────────────────────────────── def run_simulation( field_id="DE_0100", nc_id=100, seed=42, ): """Orchestrator calling all steps, printing progress.""" np.random.seed(seed) RESULTS_DIR.mkdir(exist_ok=True) # 1. Load field data from NetCDF print(f"[1/10] Loading field {field_id} (nc_id={nc_id})...") chunk, band_names = load_field_data(NC_PATH, field_id, nc_id) print(f" {len(chunk['target'])} pixels, yield={chunk['target'].mean():.1f} t/ha") # 2. Extract features (raw + PCA) print("[2/10] Extracting features...") chunk = extract_features(chunk, band_names) print(f" {len(chunk['X_raw'].columns)} raw -> {chunk['X'].shape[1]} after PCA") # 3. Train TabPFN model print("[3/10] Training TabPFN model...") model, y_pred_all, col_means = train_tabpfn(chunk) print( f" All-pixel predictions: mean={y_pred_all.mean():.2f}, std={y_pred_all.std():.2f}" ) y_all = chunk["target"] rows = chunk["row"] cols = chunk["col"] feature_cols = chunk["feature_cols"] X_raw_orig = chunk["X_raw"].copy() # Baseline feature array (for healthy-pixel stats) X_all_baseline = chunk["X"].values.copy() for j in range(X_all_baseline.shape[1]): X_all_baseline[np.isnan(X_all_baseline[:, j]), j] = col_means[j] # 4. Load challenges CSV print("[4/10] Loading challenges...") if not CHALLENGES_CSV.exists(): print( f" No challenges file at {CHALLENGES_CSV} — skipping challenge injection." ) challenge_info = [] else: challenges_df = pd.read_csv(CHALLENGES_CSV) print(f" Loaded {len(challenges_df)} challenge pixels") challenge_info = [] for _, crow in challenges_df.iterrows(): mask = (rows == crow["row"]) & (cols == crow["col"]) idx = np.where(mask)[0] if len(idx) == 0: print( f" WARNING: pixel [{crow['row']},{crow['col']}] not found, skipping" ) continue px_idx = idx[0] treated = str(crow.get("treated", "true")).lower() == "true" challenge_info.append( { "pixel_idx": px_idx, "row": int(crow["row"]), "col": int(crow["col"]), "stress": crow["challenge"], "action": ACTION_MAP.get(crow["challenge"], "inspect_manually"), "treated": treated, "original_yield": float(y_all[px_idx]), } ) print( f" [{crow['row']},{crow['col']}] {crow['challenge']}, treated={treated}" ) # 5. Inject challenge pixels if challenge_info: print("[5/10] Injecting challenge pixels...") X_raw_challenge, X_challenge, X_challenge_arr = inject_challenge_pixels( chunk, challenge_info, X_raw_orig, y_pred_all ) else: print("[5/10] No challenges to inject — using original features.") X_raw_challenge = X_raw_orig.copy() X_challenge_arr = X_all_baseline.copy() # 6. Run SHAP+rule-based stress classification on challenge pixels print("[6/10] Running stress classification...") stress_df = classify_challenge_pixels(challenge_info, X_raw_challenge, chunk) # 7. Load interventions CSV print("[7/10] Loading interventions...") if not INTERVENTIONS_CSV.exists(): print(f" No interventions file at {INTERVENTIONS_CSV} — skipping.") selected_indices = np.array([], dtype=int) selected_actions = [] selected_stresses = [] else: selected_indices, selected_actions, selected_stresses = load_interventions( INTERVENTIONS_CSV, rows, cols ) # 8. Combine treated challenges + robot interventions print("[8/10] Combining interventions...") if len(challenge_info) == 0 and len(selected_indices) == 0: print(" WARNING: No challenges and no interventions — nothing to simulate.") comp_df = pd.DataFrame( columns=[ "row", "col", "is_challenge", "action", "stress", "yield_actual", "yield_pred_before", "yield_pred_after", "yield_improvement", "pct_improvement", ] ) comp_df.to_csv(OUTPUT_CSV, index=False) print(f" Saved empty: {OUTPUT_CSV}") # Still generate a baseline map (before == after, zero delta). # No challenges and no interventions => features unchanged => baseline = y_pred_all. y_pred_baseline = y_pred_all plot_simulation_map( chunk, y_pred_baseline, y_pred_baseline, challenge_info, selected_indices, np.array([], dtype=int), OUTPUT_PNG, ) print(f"Saved baseline map: {OUTPUT_PNG}") return all_indices, all_actions, all_stresses, all_is_challenge = combine_interventions( challenge_info, selected_indices, selected_actions, selected_stresses ) # 9. Simulate post-intervention recovery print("[9/10] Simulating recovery...") X_post = simulate_recovery( X_challenge_arr, feature_cols, all_indices, all_actions, y_all, X_all_baseline ) print(f" Recovery fraction: {RECOVERY_FRACTION:.0%}") # 10. Predict post-intervention yields & save print("[10/10] Predicting and saving...") challenge_indices = np.array( [info["pixel_idx"] for info in challenge_info], dtype=int ) comp_df, y_pred_before, y_pred_after = predict_and_compare( model, X_challenge_arr, X_post, all_indices, all_actions, all_stresses, all_is_challenge, y_all, rows, cols, y_pred_all, challenge_indices=challenge_indices, ) # Print results n_chal = comp_df["is_challenge"].sum() n_orig = len(comp_df) - n_chal print(f"\n{'=' * 90}") print(f"INTERVENTION SIMULATION RESULTS ({n_chal} challenge + {n_orig} original)") print(f"{'=' * 90}") print( f"\n{'Type':<6} {'Row':<5} {'Col':<5} {'Stress':<22} {'Action':<22} " f"{'Before':>7} {'After':>7} {'Gain':>7} {'%':>6}" ) print(f"{'-' * 90}") for _, r in comp_df.iterrows(): label = "CHAL" if r["is_challenge"] else "orig" print( f"{label:<6} {int(r['row']):<5} {int(r['col']):<5} {r['stress']:<22} " f"{r['action']:<22} {r['yield_pred_before']:>7.2f} {r['yield_pred_after']:>7.2f} " f"{r['yield_improvement']:>+7.2f} {r['pct_improvement']:>+5.1f}%" ) chal_df = comp_df[comp_df["is_challenge"]] orig_df = comp_df[~comp_df["is_challenge"]] if len(chal_df) > 0: print( f"\n{'CHALLENGE (' + str(n_chal) + ')':<54} " f"{chal_df['yield_pred_before'].mean():>7.2f} " f"{chal_df['yield_pred_after'].mean():>7.2f} " f"{chal_df['yield_improvement'].mean():>+7.2f} " f"{chal_df['pct_improvement'].mean():>+5.1f}%" ) if len(orig_df) > 0: print( f"{'ORIGINAL (' + str(n_orig) + ')':<54} " f"{orig_df['yield_pred_before'].mean():>7.2f} " f"{orig_df['yield_pred_after'].mean():>7.2f} " f"{orig_df['yield_improvement'].mean():>+7.2f} " f"{orig_df['pct_improvement'].mean():>+5.1f}%" ) print(f"\nField mean yield: {y_all.mean():.2f} t/ha") # Save CSV comp_df.to_csv(OUTPUT_CSV, index=False) print(f"\nSaved: {OUTPUT_CSV}") # Generate map (reuse arrays from predict_and_compare — no extra predict needed). plot_simulation_map( chunk, y_pred_before, y_pred_after, challenge_info, selected_indices, all_indices, OUTPUT_PNG, ) print(f"Saved: {OUTPUT_PNG}") print("\nDone.") # ── Entry Point ─────────────────────────────────────────────────────────────── if __name__ == "__main__": run_simulation()