Spaces:

isabelku
/

autofarm

Sleeping

App Files Files Community

autofarm / apps /intervention_simulation.py

isabelku

AutoFarm Space deploy

826dd96 about 1 month ago

raw

history blame contribute delete

57.9 kB

	"""
	Intervention Simulation
	=======================
	Standalone script that simulates stress challenges and robot interventions
	on a field, predicts before/after yields using the combined SHAP + rule-based
	classification approach (same as intervention_predictions_pipeline.py), and
	outputs a results CSV + 3-panel PNG map.

	Usage:
	python apps/intervention_simulation.py
	"""

	import matplotlib

	matplotlib.use("Agg")
	import matplotlib.pyplot as plt
	import matplotlib.patches as mpatches
	import numpy as np
	import pandas as pd
	import pickle
	import warnings
	import gc
	import hashlib
	import json
	from pathlib import Path
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import mean_squared_error, r2_score
	from sklearn.preprocessing import StandardScaler
	from sklearn.decomposition import PCA
	from tabpfn import TabPFNRegressor
	import netCDF4 as nclib

	warnings.filterwarnings("ignore")

	# ── Paths ─────────────────────────────────────────────────────────────────────

	DATA_ROOT = Path("./yieldsat_data")
	RESULTS_DIR = Path("./results")
	NC_PATH = DATA_ROOT / "Preprocessed/Germany/merge_s2-soil-dem-weather-coords.nc"

	CHALLENGES_CSV = RESULTS_DIR / "sim_input_challenges.csv"
	INTERVENTIONS_CSV = RESULTS_DIR / "sim_input_interventions.csv"
	SIM_DATA_DIR = Path("data/raw/simulator/real_field")
	INTERVENTION_ZONES_CSV = SIM_DATA_DIR / "de_0100_full_field_source.csv"
	SHAP_CACHE_PATH = RESULTS_DIR / "shap_pca_cache_DE_0100.pkl"
	SIM_CACHE_DIR = RESULTS_DIR / "_sim_cache"

	OUTPUT_CSV = RESULTS_DIR / "farm_intervention_simulation.csv"
	OUTPUT_PNG = RESULTS_DIR / "farm_intervention_simulation_map.png"

	# ── Model Parameters ──────────────────────────────────────────────────────────

	TABPFN_MAX_TRAIN = 2000
	LOW_YIELD_PERCENTILE = 25
	ZSCORE_THRESHOLD = 1.2
	RECOVERY_FRACTION = 0.6
	EPS = 1e-8

	# ── Combination Parameters (from pipeline) ───────────────────────────────────

	AGREE_BOOST = 0.15
	DISAGREE_PENALTY = 0.20
	SHAP_RESCUE_DISCOUNT = 0.8
	RULE_ONLY_DISCOUNT = 0.7
	SHAP_MIN_MAGNITUDE = 0.05
	SHAP_MARGIN_THRESHOLD = 0.05

	# ── Mappings ──────────────────────────────────────────────────────────────────

	SOIL_DEPTHS = ["0-5", "5-15", "15-30", "30-60", "60-100", "100-200"]
	SOIL_PROPS = ["clay", "sand", "silt", "soc", "phh2o", "cec", "nitrogen", "cfvo"]

	PCA_GROUPS = {
	"soil_texture": {
	"cols": [f"{p}_{d}" for p in ["clay", "sand", "silt"] for d in SOIL_DEPTHS],
	"n_components": 2,
	},
	"soil_chemistry": {
	"cols": [
	f"{p}_{d}" for p in ["soc", "cec", "nitrogen", "phh2o"] for d in SOIL_DEPTHS
	],
	"n_components": 3,
	},
	"coarse_fragments": {
	"cols": [f"cfvo_{d}" for d in SOIL_DEPTHS],
	"n_components": 1,
	},
	"temperature": {
	"cols": ["temp_mean_mean", "temp_max_mean", "temp_min_mean"],
	"n_components": 1,
	},
	}

	ACTION_MAP = {
	"drought": "irrigate",
	"waterlogging": "install_drainage",
	"nutrient_deficiency": "apply_fertilizer_N",
	"compaction": "subsoil",
	"poor_drainage": "install_drainage",
	"healthy_low_yield_anomaly": "inspect_manually",
	"unclassified": "inspect_manually",
	}

	STRESS_COLORS = {
	"drought": "#2196F3",
	"waterlogging": "#795548",
	"nutrient_deficiency": "#4CAF50",
	"compaction": "#FF9800",
	"poor_drainage": "#00BCD4",
	"healthy_low_yield_anomaly": "#E91E63",
	"unclassified": "#9E9E9E",
	}

	ACTION_RESPONSIVE_FEATURES = {
	"apply_fertilizer_N": ["ndvi_last", "ndre_last", "cire_last", "s2rep_last"],
	"irrigate": ["ndvi_last", "ndmi_last"],
	"install_drainage": ["ndvi_last", "ndmi_last"],
	"subsoil": ["ndvi_last", "ndre_last", "cire_last"],
	}

	COMPATIBLE_STRESSES = {
	("poor_drainage", "waterlogging"),
	("waterlogging", "poor_drainage"),
	("drought", "compaction"),
	("compaction", "drought"),
	("nutrient_deficiency", "compaction"),
	("compaction", "nutrient_deficiency"),
	}

	RULE_PRIORITY_STRESSES = {"drought", "nutrient_deficiency"}
	SHAP_PRIORITY_STRESSES = {"waterlogging", "compaction", "poor_drainage"}

	# z-score feature mapping: raw column name -> classifier key
	# Pipeline uses z_NDMI (not z_NDWI)
	ZSCORE_FEATURES = {
	"ndvi_last": "z_NDVI",
	"ndre_last": "z_NDRE",
	"ndmi_last": "z_NDMI",
	"psri_last": "z_PSRI",
	"cire_last": "z_CIre",
	"s2rep_last": "z_S2REP",
	"clay_0-5": "z_clay_0-5",
	"sand_0-5": "z_sand_0-5",
	"silt_0-5": "z_silt_0-5",
	"soc_0-5": "z_soc_0-5",
	"phh2o_0-5": "z_phh2o_0-5",
	"cec_0-5": "z_cec_0-5",
	"nitrogen_0-5": "z_nitrogen_0-5",
	"cfvo_0-5": "z_cfvo_0-5",
	"dem": "z_dem",
	"slope": "z_slope",
	"twi": "z_twi",
	}


	# ── Data Loading ──────────────────────────────────────────────────────────────


	def load_field_data(nc_path, field_id, nc_id):
	"""Load field from NetCDF. Returns chunk dict + band_names list."""
	ds = nclib.Dataset(str(nc_path), "r")
	band_names = [str(b) for b in ds["band"][:]]
	fsn_all = ds["field_shared_name"][:]

	field_indices = np.where(fsn_all == nc_id)[0]
	if len(field_indices) == 0:
	ds.close()
	raise ValueError(f"Field {field_id} (NC id={nc_id}) not found in dataset")

	i0, i1 = field_indices[0], field_indices[-1] + 1
	chunk = {
	"field_str": field_id,
	"field_nc": nc_id,
	"sample": np.ma.filled(ds["sample"][i0:i1, :, :], np.nan).astype(np.float32),
	"times": ds["times"][i0:i1, :],
	"target": np.ma.filled(ds["target"][i0:i1], np.nan).astype(np.float32),
	"row": ds["row"][i0:i1],
	"col": ds["col"][i0:i1],
	}
	ds.close()
	return chunk, band_names


	# ── Feature Engineering ───────────────────────────────────────────────────────


	def find_valid_timesteps(sample, band_idx, min_coverage=0.5):
	"""Return timestep indices with >50% valid pixels."""
	vals = sample[:, :, band_idx]
	valid_per_ts = np.sum(~np.isnan(vals), axis=0)
	coverage = valid_per_ts / len(vals)
	return np.where(coverage > min_coverage)[0]


	def extract_features(chunk, band_names):
	"""Extract 63 raw features (soil x 6 depths, topo, weather, VIs incl. PSRI)
	then PCA-compress to 18 model features. Adds X_raw, X, feature_cols,
	pca_info, psri_last to chunk."""
	s = chunk["sample"]

	B04_IDX = band_names.index("B04")
	B03_IDX = band_names.index("B03")
	B05_IDX = band_names.index("B05")
	B06_IDX = band_names.index("B06")
	B07_IDX = band_names.index("B07")
	B08_IDX = band_names.index("B08")
	B11_IDX = band_names.index("B11")

	valid_ts = find_valid_timesteps(s, B04_IDX)
	chunk["valid_ts"] = valid_ts
	if len(valid_ts) == 0:
	raise ValueError(f"No valid timesteps for {chunk['field_str']}")

	last_t = valid_ts[-1]
	nir = s[:, last_t, B08_IDX]
	red = s[:, last_t, B04_IDX]
	grn = s[:, last_t, B03_IDX]
	re1 = s[:, last_t, B05_IDX]
	re2 = s[:, last_t, B06_IDX]
	re3 = s[:, last_t, B07_IDX]
	swir = s[:, last_t, B11_IDX]

	# Static bands
	static_bands = {}
	for prop in SOIL_PROPS:
	for depth in SOIL_DEPTHS:
	bname = f"{prop}_{depth}"
	if bname in band_names:
	static_bands[bname] = band_names.index(bname)
	for bname in ["dem", "slope", "aspect", "curvature", "twi"]:
	if bname in band_names:
	static_bands[bname] = band_names.index(bname)

	weather_bands = {}
	for bname in ["temp_mean", "temp_max", "temp_min", "total_prec"]:
	if bname in band_names:
	weather_bands[bname] = band_names.index(bname)

	# Build raw features
	feat_dict = {}
	for bname, bidx in static_bands.items():
	feat_dict[bname] = s[:, 0, bidx]
	for bname, bidx in weather_bands.items():
	feat_dict[f"{bname}_mean"] = np.nanmean(s[:, valid_ts, bidx], axis=1)

	feat_dict["ndvi_last"] = (nir - red) / (nir + red + EPS)
	feat_dict["ndre_last"] = (nir - re1) / (nir + re1 + EPS)
	feat_dict["ndmi_last"] = (nir - swir) / (nir + swir + EPS)
	feat_dict["cire_last"] = nir / (re1 + EPS) - 1
	feat_dict["s2rep_last"] = 705 + 35 * ((red + re3) / 2 - re1) / (re2 - re1 + EPS)

	X_raw = pd.DataFrame(feat_dict).fillna(pd.DataFrame(feat_dict).median())
	chunk["X_raw"] = X_raw

	# PSRI for classifier (not a model feature)
	chunk["psri_last"] = (red - grn) / (re2 + EPS)

	# PCA compression
	X_pca = X_raw.copy()
	pca_info = {}

	for group_name, group_cfg in PCA_GROUPS.items():
	group_cols = group_cfg["cols"]
	n_comp = group_cfg["n_components"]
	available = [c for c in group_cols if c in X_pca.columns]
	if len(available) < 2:
	continue

	scaler = StandardScaler()
	scaled = scaler.fit_transform(X_pca[available].values)
	pca = PCA(n_components=n_comp)
	pcs = pca.fit_transform(scaled)

	pca_info[group_name] = {
	"pca": pca,
	"scaler": scaler,
	"original_cols": available,
	"explained_var": pca.explained_variance_ratio_,
	}

	X_pca = X_pca.drop(columns=available)
	for i in range(n_comp):
	X_pca[f"{group_name}_pc{i + 1}"] = pcs[:, i]

	X = X_pca.fillna(X_pca.median())
	chunk["X"] = X
	chunk["feature_cols"] = list(X.columns)
	chunk["pca_info"] = pca_info
	chunk["band_names"] = band_names

	return chunk


	# ── Model Training ────────────────────────────────────────────────────────────


	def _sim_cache_path(field_id="DE_0100"):
	"""Hash key over data file mtime + training/PCA config so cache invalidates
	when any of those change."""
	cfg = {
	"field_id": field_id,
	"nc_mtime": NC_PATH.stat().st_mtime if NC_PATH.exists() else 0,
	"tabpfn_max_train": TABPFN_MAX_TRAIN,
	"pca_groups": {
	k: (v["cols"], v["n_components"]) for k, v in PCA_GROUPS.items()
	},
	"soil_depths": SOIL_DEPTHS,
	"soil_props": SOIL_PROPS,
	}
	h = hashlib.md5(
	json.dumps(cfg, sort_keys=True, default=str).encode()
	).hexdigest()[:12]
	SIM_CACHE_DIR.mkdir(exist_ok=True, parents=True)
	return SIM_CACHE_DIR / f"sim_cache_{field_id}_{h}.pkl"


	def _load_baseline_from_csv(chunk):
	"""Load baseline yield_pred from de_0100_full_field_source.csv, aligned by
	(row, col) to the chunk's pixel order. Returns None if the CSV is missing."""
	if not INTERVENTION_ZONES_CSV.exists():
	return None
	src = pd.read_csv(INTERVENTION_ZONES_CSV)
	if "yield_pred" not in src.columns:
	return None
	lookup = {
	(int(r), int(c)): float(p)
	for r, c, p in zip(src["row"], src["col"], src["yield_pred"])
	}
	chunk_rows = chunk["row"]
	chunk_cols = chunk["col"]
	try:
	return np.array(
	[lookup[(int(r), int(c))] for r, c in zip(chunk_rows, chunk_cols)],
	dtype=np.float64,
	)
	except KeyError:
	return None


	def train_tabpfn(chunk, field_id="DE_0100"):
	"""Train TabPFN (or load from disk cache). 70/30 split,
	TabPFNRegressor(n_estimators=4). Baseline y_pred_all comes from the
	precomputed source CSV, so no full-field model.predict is needed.
	Returns (model, y_pred_all, col_means)."""
	cache_path = _sim_cache_path(field_id)

	# Try cache hit first
	if cache_path.exists():
	try:
	with open(cache_path, "rb") as f:
	cache = pickle.load(f)
	model = cache["model"]
	col_means = cache["col_means"]
	print(f" Cache HIT: {cache_path.name}")
	print(
	f" TabPFN (cached): RMSE={cache.get('rmse', float('nan')):.3f}, "
	f"R2={cache.get('r2', float('nan')):.3f}"
	)
	except Exception as e:
	print(f" Cache load failed ({e}); rebuilding")
	cache = None
	model = None
	else:
	print(f" Cache MISS: {cache_path.name}")
	cache = None
	model = None

	if model is None:
	X = chunk["X"].values.copy()
	y = chunk["target"]

	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=0.3, random_state=42
	)

	col_means = np.nanmean(X_train, axis=0)
	col_means = np.where(np.isnan(col_means), 0, col_means)
	for j in range(X.shape[1]):
	X_train[np.isnan(X_train[:, j]), j] = col_means[j]
	X_test[np.isnan(X_test[:, j]), j] = col_means[j]

	model = TabPFNRegressor(
	n_estimators=4, device="cpu", ignore_pretraining_limits=True
	)

	X_tr, y_tr = X_train, y_train
	if len(X_tr) > TABPFN_MAX_TRAIN:
	idx = np.random.choice(len(X_tr), TABPFN_MAX_TRAIN, replace=False)
	X_tr, y_tr = X_train[idx], y_train[idx]

	model.fit(X_tr, y_tr)

	y_pred_test = model.predict(X_test)
	rmse = float(np.sqrt(mean_squared_error(y_test, y_pred_test)))
	r2 = float(r2_score(y_test, y_pred_test))
	print(f" TabPFN: RMSE={rmse:.3f}, R2={r2:.3f}")

	with open(cache_path, "wb") as f:
	pickle.dump(
	{"model": model, "col_means": col_means, "rmse": rmse, "r2": r2}, f
	)
	print(f" Saved cache: {cache_path.name}")
	del X_train, X_test
	gc.collect()

	# Baseline: load from precomputed source CSV (no full-field predict).
	y_pred_all = _load_baseline_from_csv(chunk)
	if y_pred_all is None:
	print(" Source CSV unavailable — falling back to full-field model.predict")
	X_full = chunk["X"].values.copy()
	for j in range(X_full.shape[1]):
	X_full[np.isnan(X_full[:, j]), j] = col_means[j]
	y_pred_all = model.predict(X_full)

	return model, y_pred_all, col_means


	# ── Challenge Pixel Injection ─────────────────────────────────────────────────


	def inject_challenge_pixels(chunk, challenge_info, X_raw_orig, y_pred_all):
	"""Copy raw features from worst low-yield templates, apply stress-specific
	adjustments, recompute PCA. Returns (X_raw_challenge, X_challenge, X_challenge_arr)."""
	y_all = chunk["target"]
	rows = chunk["row"]
	cols = chunk["col"]
	pca_info = chunk["pca_info"]
	feature_cols = chunk["feature_cols"]

	low_mask = y_all <= np.nanpercentile(y_all, LOW_YIELD_PERCENTILE)
	low_indices = np.where(low_mask)[0]
	low_yields = y_all[low_mask]
	worst_order = np.argsort(low_yields)
	template_indices = low_indices[worst_order[: len(challenge_info)]]

	print(f" Template pixels (real low-yield):")
	for i, ti in enumerate(template_indices):
	print(
	f" Template {i + 1}: [{rows[ti]},{cols[ti]}], yield={y_all[ti]:.1f} t/ha, "
	f"pred={y_pred_all[ti]:.1f} t/ha"
	)

	X_raw_challenge = X_raw_orig.astype(np.float64).copy()

	for ch_i, (info, template_idx) in enumerate(zip(challenge_info, template_indices)):
	px_idx = info["pixel_idx"]
	stress = info["stress"]

	# Copy ALL raw features from template
	for col_name in X_raw_challenge.columns:
	X_raw_challenge.loc[px_idx, col_name] = X_raw_challenge.loc[
	template_idx, col_name
	]

	# Stress-specific adjustments
	if stress == "drought":
	X_raw_challenge.loc[px_idx, "ndmi_last"] *= 0.5
	for d in SOIL_DEPTHS:
	col = f"sand_{d}"
	if col in X_raw_challenge.columns:
	X_raw_challenge.loc[px_idx, col] = X_raw_orig[col].quantile(0.95)

	elif stress == "compaction":
	for d in SOIL_DEPTHS:
	for col_prefix in ["cfvo", "clay"]:
	col = f"{col_prefix}_{d}"
	if col in X_raw_challenge.columns:
	X_raw_challenge.loc[px_idx, col] = X_raw_orig[col].quantile(
	0.95
	)
	X_raw_challenge.loc[px_idx, "slope"] = X_raw_orig["slope"].quantile(0.05)

	elif stress == "nutrient_deficiency":
	for d in SOIL_DEPTHS:
	for prop in ["nitrogen", "soc", "cec"]:
	col = f"{prop}_{d}"
	if col in X_raw_challenge.columns:
	X_raw_challenge.loc[px_idx, col] = X_raw_orig[col].quantile(
	0.05
	)

	print(
	f" [{info['row']},{info['col']}] {stress}: "
	f"features from template [{rows[template_idx]},{cols[template_idx]}]"
	)

	# Recompute PCA features from modified raw features
	X_pca_challenge = X_raw_challenge.copy()
	for group_name in PCA_GROUPS:
	if group_name not in pca_info:
	continue
	gi = pca_info[group_name]
	available = gi["original_cols"]
	n_comp = len(gi["explained_var"])
	scaled = gi["scaler"].transform(X_pca_challenge[available].values)
	pcs = gi["pca"].transform(scaled)
	X_pca_challenge = X_pca_challenge.drop(columns=available)
	for j in range(n_comp):
	X_pca_challenge[f"{group_name}_pc{j + 1}"] = pcs[:, j]

	X_challenge = X_pca_challenge.fillna(X_pca_challenge.median())
	X_challenge = X_challenge[feature_cols]

	X_challenge_arr = X_challenge.values.copy()
	col_means_ch = np.nanmean(X_challenge_arr, axis=0)
	col_means_ch = np.where(np.isnan(col_means_ch), 0, col_means_ch)
	for j in range(X_challenge_arr.shape[1]):
	X_challenge_arr[np.isnan(X_challenge_arr[:, j]), j] = col_means_ch[j]

	return X_raw_challenge, X_challenge, X_challenge_arr


	# ── Stress Classification ────────────────────────────────────────────────────


	def classify_stress(z_row, t=ZSCORE_THRESHOLD):
	"""Rule-based z-score classifier using z_NDMI (pipeline convention).
	5 stress types: drought, waterlogging, nutrient_deficiency, compaction,
	poor_drainage."""

	def signal(key, direction):
	v = z_row.get(key, None)
	if v is None or pd.isna(v):
	return (False, 0.0, False)
	if direction == "+":
	return (v > t, v, True)
	elif direction == "-":
	return (v < -t, v, True)
	else:
	return (abs(v) > t, v, True)

	def score(
	dynamic, static, min_dyn_frac=0.5, min_dyn_available=1, static_weight=0.5
	):
	dyn_available = [s for s in dynamic if s[2]]
	if len(dyn_available) < min_dyn_available:
	return 0.0, False
	dyn_fired = [s for s in dyn_available if s[0]]
	if len(dyn_fired) / len(dyn_available) < min_dyn_frac:
	return 0.0, False
	stat_available = [s for s in static if s[2]]
	stat_fired = [s for s in stat_available if s[0]]
	dyn_strength = sum(max(0, abs(s[1]) - t) for s in dyn_fired)
	stat_strength = sum(max(0, abs(s[1]) - t) for s in stat_fired) * static_weight
	max_possible = (len(dyn_available) + len(stat_available) * static_weight) * 2.0
	if max_possible <= 0:
	return 0.0, False
	conf = min(1.0, (dyn_strength + stat_strength) / max_possible)
	return conf, True

	stresses = []

	# DROUGHT — uses z_NDMI (pipeline convention)
	dyn = [signal("z_NDMI", "-"), signal("z_NDVI", "-")]
	stat = [signal("z_sand_0-5", "+")]
	conf, fired = score(dyn, stat, min_dyn_frac=0.5)
	if fired:
	stresses.append(("drought", conf))

	# WATERLOGGING
	dyn = [signal("z_PSRI", "+"), signal("z_NDMI", "-")]
	stat = [
	signal("z_twi", "+"),
	signal("z_slope", "-"),
	signal("z_clay_0-5", "+"),
	signal("z_dem", "-"),
	]
	conf, fired = score(dyn, stat, min_dyn_frac=0.5)
	if fired:
	stresses.append(("waterlogging", conf))

	# NUTRIENT DEFICIENCY
	dyn = [
	signal("z_NDRE", "-"),
	signal("z_CIre", "-"),
	signal("z_S2REP", "-"),
	signal("z_NDVI", "-"),
	]
	stat = [
	signal("z_nitrogen_0-5", "-"),
	signal("z_soc_0-5", "-"),
	signal("z_cec_0-5", "-"),
	signal("z_phh2o_0-5", "abs"),
	]
	conf, fired = score(dyn, stat, min_dyn_frac=0.5)
	if fired:
	stresses.append(("nutrient_deficiency", conf))

	# COMPACTION
	dyn = [signal("z_PSRI", "+"), signal("z_NDRE", "-")]
	stat = [
	signal("z_cfvo_0-5", "+"),
	signal("z_clay_0-5", "+"),
	signal("z_slope", "-"),
	]
	conf, fired = score(dyn, stat, min_dyn_frac=0.5)
	if fired:
	stresses.append(("compaction", conf))

	# POOR DRAINAGE
	dyn = [signal("z_PSRI", "+"), signal("z_NDVI", "-")]
	stat = [
	signal("z_sand_0-5", "-"),
	signal("z_silt_0-5", "+"),
	signal("z_clay_0-5", "+"),
	]
	conf, fired = score(dyn, stat, min_dyn_frac=0.5)
	if fired:
	stresses.append(("poor_drainage", conf))

	if stresses:
	stresses.sort(key=lambda x: -x[1])
	while len(stresses) < 2:
	stresses.append(("none", 0.0))
	return stresses[:2], "classified"

	dyn_keys = ["z_NDVI", "z_NDRE", "z_NDMI", "z_PSRI", "z_CIre", "z_S2REP"]
	available_dyn = [
	k for k in dyn_keys if z_row.get(k) is not None and not pd.isna(z_row.get(k))
	]
	if len(available_dyn) < 2:
	return [("unclassified", 0.0), ("none", 0.0)], "insufficient_data"

	ndvi = z_row.get("z_NDVI", np.nan)
	ndre = z_row.get("z_NDRE", np.nan)
	psri = z_row.get("z_PSRI", np.nan)
	cire = z_row.get("z_CIre", np.nan)
	healthy_markers = 0
	if not pd.isna(ndvi) and ndvi > 0.5:
	healthy_markers += 1
	if not pd.isna(ndre) and ndre > 0.5:
	healthy_markers += 1
	if not pd.isna(psri) and psri < -0.5:
	healthy_markers += 1
	if not pd.isna(cire) and cire > 0.5:
	healthy_markers += 1
	if healthy_markers >= 2:
	return [("healthy_low_yield_anomaly", 0.5), ("none", 0.0)], "healthy_anomaly"

	return [("unclassified", 0.0), ("none", 0.0)], "no_signal"


	def run_stress_classification(chunk, X_raw_to_use):
	"""Build diagnostic DataFrame for all low-yield pixels (or challenge pixels),
	compute z-scores using field-level median/MAD, run classify_stress().
	Returns rule_df."""
	X_raw = X_raw_to_use
	fid = chunk["field_str"]
	n_pix = len(chunk["target"])
	psri_last = chunk["psri_last"]

	diag_records = []
	for i in range(n_pix):
	rec = {
	"field_id": fid,
	"row": int(chunk["row"][i]),
	"col": int(chunk["col"][i]),
	"yield_tha": float(chunk["target"][i]),
	"NDVI": float(X_raw.iloc[i]["ndvi_last"]),
	"NDRE": float(X_raw.iloc[i]["ndre_last"]),
	"NDMI": float(X_raw.iloc[i]["ndmi_last"]),
	"CIre": float(X_raw.iloc[i]["cire_last"]),
	"S2REP": float(X_raw.iloc[i]["s2rep_last"]),
	"PSRI": float(psri_last[i]),
	}
	for feat in [
	"clay_0-5",
	"sand_0-5",
	"silt_0-5",
	"soc_0-5",
	"phh2o_0-5",
	"cec_0-5",
	"nitrogen_0-5",
	"cfvo_0-5",
	"dem",
	"slope",
	"aspect",
	"curvature",
	"twi",
	]:
	rec[feat] = float(X_raw.iloc[i].get(feat, np.nan))
	diag_records.append(rec)

	df = pd.DataFrame(diag_records)

	# Low yield identification
	field_p25 = (
	df.groupby("field_id")["yield_tha"]
	.quantile(LOW_YIELD_PERCENTILE / 100)
	.rename("yield_p25")
	)
	df = df.merge(field_p25, on="field_id", how="left")
	df["is_low_yield"] = df["yield_tha"] <= df["yield_p25"]

	# Z-score computation
	diag_features = [
	"clay_0-5",
	"sand_0-5",
	"silt_0-5",
	"soc_0-5",
	"phh2o_0-5",
	"cec_0-5",
	"nitrogen_0-5",
	"cfvo_0-5",
	"dem",
	"slope",
	"aspect",
	"curvature",
	"twi",
	"NDVI",
	"NDRE",
	"NDMI",
	"PSRI",
	"CIre",
	"S2REP",
	]
	diag_features = [f for f in diag_features if f in df.columns]

	field_medians = df.groupby("field_id")[diag_features].median()

	def compute_mad(x):
	return (x - x.median()).abs().median()

	field_mads = df.groupby("field_id")[diag_features].apply(compute_mad)

	mad_floors = {}
	for col in diag_features:
	q75, q25 = df[col].quantile([0.75, 0.25])
	mad_floors[col] = max((q75 - q25) * 0.01, 1e-6)

	low_df = df[df["is_low_yield"]].copy()

	for col in diag_features:
	med = low_df["field_id"].map(field_medians[col])
	mad = low_df["field_id"].map(field_mads[col])
	floor = mad_floors[col]
	valid_raw = low_df[col].notna() & med.notna()
	valid_variance = mad > floor
	available = valid_raw & valid_variance
	mad_safe = mad.clip(lower=floor)
	z = (low_df[col] - med) / (1.4826 * mad_safe)
	z = z.clip(-10, 10).where(available, other=np.nan)
	low_df[f"z_{col}"] = z

	z_cols = [f"z_{c}" for c in diag_features]

	# Classify
	stress_records = []
	for idx, row in low_df.iterrows():
	z_dict = {c: row[c] for c in z_cols}
	top2, status = classify_stress(z_dict)
	primary, secondary = top2[0], top2[1]
	co_occurring = (
	status == "classified"
	and secondary[1] > 0
	and secondary[1] >= 0.7 * primary[1]
	)
	stress_records.append(
	{
	"idx": idx,
	"field_id": row["field_id"],
	"row": int(row["row"]),
	"col": int(row["col"]),
	"yield_tha": row["yield_tha"],
	"rule_stress": primary[0],
	"rule_confidence": round(primary[1], 3),
	"rule_secondary": secondary[0],
	"rule_secondary_confidence": round(secondary[1], 3),
	"co_occurring": co_occurring,
	"rule_status": status,
	}
	)

	return pd.DataFrame(stress_records)


	# ── SHAP Cache + Combination (from pipeline) ─────────────────────────────────


	def load_shap_cache(cache_path):
	"""Load SHAP pkl, normalize confidence. Returns (shap_df, bool).
	Graceful fallback to None if not found."""
	cache_path = Path(cache_path)
	if not cache_path.exists():
	return None, False
	with open(cache_path, "rb") as f:
	shap_cache = pickle.load(f)
	shap_df = pd.DataFrame(shap_cache["stress_records"])
	p95 = shap_df["shap_magnitude"].quantile(0.95)
	norm_denom = p95 if p95 > 0 else shap_df["shap_magnitude"].max()
	shap_df["shap_confidence"] = (shap_df["shap_magnitude"] / norm_denom).clip(0, 1)
	shap_df = shap_df.rename(columns={"primary_stress": "shap_stress"})
	return shap_df, True


	def _stress_types_agree(rule_s, shap_s):
	"""Check agreement via COMPATIBLE_STRESSES."""
	return rule_s == shap_s or (rule_s, shap_s) in COMPATIBLE_STRESSES


	def _combine_pixel(row):
	"""Full SHAP+rule combination logic with margin uncertainty, domain priority,
	secondary support."""
	rule_s = row.get("rule_stress", "unclassified") or "unclassified"
	rule_c = row.get("rule_confidence", 0.0) or 0.0
	rule_status = row.get("rule_status", "no_signal") or "no_signal"
	shap_s = row.get("shap_stress", "unclassified") or "unclassified"
	shap_c = row.get("shap_confidence", 0.0) or 0.0
	scores = row.get("category_scores", {})

	if pd.isna(rule_s):
	rule_s = "unclassified"
	if pd.isna(shap_s):
	shap_s = "unclassified"

	rule_classified = rule_status == "classified"
	shap_classified = shap_s != "unclassified" and shap_c >= SHAP_MIN_MAGNITUDE

	# SHAP margin uncertainty
	shap_margin = 0.0
	shap_uncertain = False
	if isinstance(scores, dict) and len(scores) >= 2:
	sorted_scores = sorted(scores.values())
	best_score = sorted_scores[0]
	second_score = sorted_scores[1]
	if best_score < 0:
	shap_margin = abs(best_score - second_score) / (abs(best_score) + 1e-9)
	shap_uncertain = shap_margin < SHAP_MARGIN_THRESHOLD
	if shap_uncertain:
	shap_classified = False

	if rule_classified and shap_classified and _stress_types_agree(rule_s, shap_s):
	return {
	"combined_stress": rule_s,
	"combined_confidence": min(1.0, max(rule_c, shap_c) + AGREE_BOOST),
	"combination_tier": "high",
	"combination_reason": "both_agree",
	"shap_margin": round(shap_margin, 3),
	}

	if rule_classified and shap_classified:
	if isinstance(scores, dict) and len(scores) >= 2:
	sorted_cats = sorted(scores.items(), key=lambda x: x[1])
	shap_second = sorted_cats[1][0] if len(sorted_cats) >= 2 else None
	if shap_second and _stress_types_agree(rule_s, shap_second):
	return {
	"combined_stress": rule_s,
	"combined_confidence": rule_c * 0.85,
	"combination_tier": "medium",
	"combination_reason": "shap_secondary_supports_rule",
	"shap_margin": round(shap_margin, 3),
	}
	if rule_s in RULE_PRIORITY_STRESSES:
	winner = rule_s
	elif shap_s in SHAP_PRIORITY_STRESSES:
	winner = shap_s
	else:
	winner = rule_s if rule_c >= shap_c else shap_s
	return {
	"combined_stress": winner,
	"combined_confidence": max(0.1, max(rule_c, shap_c) - DISAGREE_PENALTY),
	"combination_tier": "low",
	"combination_reason": f"disagree_rule={rule_s}_shap={shap_s}",
	"shap_margin": round(shap_margin, 3),
	}

	if not rule_classified and shap_classified:
	return {
	"combined_stress": shap_s,
	"combined_confidence": shap_c * SHAP_RESCUE_DISCOUNT,
	"combination_tier": "medium",
	"combination_reason": "shap_rescue",
	"shap_margin": round(shap_margin, 3),
	}

	if rule_classified and not shap_classified:
	return {
	"combined_stress": rule_s,
	"combined_confidence": rule_c * RULE_ONLY_DISCOUNT,
	"combination_tier": "medium",
	"combination_reason": "rule_only",
	"shap_margin": round(shap_margin, 3),
	}

	return {
	"combined_stress": rule_s,
	"combined_confidence": 0.0,
	"combination_tier": "unresolved",
	"combination_reason": "neither_classified",
	"shap_margin": round(shap_margin, 3),
	}


	def combine_classifications(rule_df, shap_df):
	"""Merge rule-based and SHAP classifications per pixel and apply
	_combine_pixel."""
	merged = rule_df.merge(
	shap_df[
	[
	"field_id",
	"row",
	"col",
	"shap_stress",
	"shap_magnitude",
	"shap_confidence",
	"category_scores",
	]
	],
	on=["field_id", "row", "col"],
	how="outer",
	)
	combined_cols = merged.apply(_combine_pixel, axis=1, result_type="expand")
	result = pd.concat([merged, combined_cols], axis=1)
	result["recommended_action"] = (
	result["combined_stress"].map(ACTION_MAP).fillna("inspect_manually")
	)
	return result


	def _rule_only_fallback(rule_df):
	"""Wrap rule_df with combined_* columns when no SHAP cache is available."""
	df = rule_df.copy()
	df["combined_stress"] = df["rule_stress"]
	df["combined_confidence"] = df["rule_confidence"]
	df["combination_tier"] = "medium"
	df["combination_reason"] = "rule_only"
	df["shap_stress"] = np.nan
	df["shap_magnitude"] = np.nan
	df["shap_confidence"] = np.nan
	df["recommended_action"] = (
	df["combined_stress"].map(ACTION_MAP).fillna("inspect_manually")
	)
	return df


	# ── Challenge Classification ─────────────────────────────────────────────────


	def classify_challenge_pixels(challenge_info, X_raw_challenge, chunk):
	"""Run combined SHAP+rule classification specifically on challenge pixels.
	Computes z-scores directly for challenge pixel indices (bypassing the
	low-yield filter, since challenge pixels have high original yields but
	transplanted stress features). Also runs full-field classification via
	run_stress_classification for non-challenge low-yield pixels, then
	combines with SHAP cache if available."""
	if not challenge_info:
	print(" No challenge pixels to classify.")
	# Still run full-field classification for intervention pixels
	rule_df = run_stress_classification(chunk, X_raw_challenge)
	shap_df, shap_available = load_shap_cache(SHAP_CACHE_PATH)
	if shap_available:
	print(
	f" SHAP cache loaded: {SHAP_CACHE_PATH.name} ({len(shap_df)} pixels)"
	)
	return combine_classifications(rule_df, shap_df)
	else:
	print(f" No SHAP cache at {SHAP_CACHE_PATH} — rule-based only.")
	return _rule_only_fallback(rule_df)

	print("Running stress classification...")

	# 1. Full-field rule-based classification (covers low-yield intervention pixels)
	rule_df = run_stress_classification(chunk, X_raw_challenge)

	# 2. Direct z-score classification for challenge pixels
	# These pixels have high original yields but transplanted stress features,
	# so they won't appear in the is_low_yield subset.
	fid = chunk["field_str"]
	challenge_pixel_indices = {info["pixel_idx"] for info in challenge_info}

	# Compute field-level medians and MADs from modified raw features
	field_medians = {}
	field_mads = {}
	mad_floors = {}
	for raw_col in ZSCORE_FEATURES:
	if raw_col == "psri_last":
	vals = pd.Series(chunk["psri_last"])
	elif raw_col not in X_raw_challenge.columns:
	continue
	else:
	vals = X_raw_challenge[raw_col]
	med = vals.median()
	mad = (vals - med).abs().median()
	q75, q25 = vals.quantile([0.75, 0.25])
	floor = max((q75 - q25) * 0.01, 1e-6)
	field_medians[raw_col] = med
	field_mads[raw_col] = mad
	mad_floors[raw_col] = floor

	# PSRI is stored separately in chunk, not in X_raw
	psri_last = chunk["psri_last"]

	challenge_rule_records = []
	for info in challenge_info:
	px_idx = info["pixel_idx"]
	z_dict = {}
	for raw_col, z_key in ZSCORE_FEATURES.items():
	if raw_col == "psri_last":
	val = float(psri_last[px_idx])
	elif raw_col not in X_raw_challenge.columns:
	z_dict[z_key] = np.nan
	continue
	else:
	val = X_raw_challenge.iloc[px_idx][raw_col]
	med = field_medians[raw_col]
	mad = field_mads[raw_col]
	floor = mad_floors[raw_col]
	if pd.isna(val) or pd.isna(med):
	z_dict[z_key] = np.nan
	continue
	mad_safe = max(mad, floor)
	z = (val - med) / (1.4826 * mad_safe)
	z_dict[z_key] = float(np.clip(z, -10, 10))

	top2, status = classify_stress(z_dict)
	primary, secondary = top2[0], top2[1]
	co_occurring = (
	status == "classified"
	and secondary[1] > 0
	and secondary[1] >= 0.7 * primary[1]
	)
	challenge_rule_records.append(
	{
	"field_id": fid,
	"row": info["row"],
	"col": info["col"],
	"yield_tha": info["original_yield"],
	"rule_stress": primary[0],
	"rule_confidence": round(primary[1], 3),
	"rule_secondary": secondary[0],
	"rule_secondary_confidence": round(secondary[1], 3),
	"co_occurring": co_occurring,
	"rule_status": status,
	}
	)

	challenge_rule_df = pd.DataFrame(challenge_rule_records)

	# 3. Merge challenge-pixel rule results into full rule_df
	# Remove any duplicate rows for challenge pixels that might already be
	# in rule_df (unlikely, but safe)
	if len(rule_df) > 0:
	challenge_coords = set((info["row"], info["col"]) for info in challenge_info)
	rule_df = rule_df[
	~rule_df.apply(
	lambda r: (int(r["row"]), int(r["col"])) in challenge_coords, axis=1
	)
	]
	rule_df = pd.concat([challenge_rule_df, rule_df], ignore_index=True)

	# 4. Load SHAP cache and combine
	shap_df, shap_available = load_shap_cache(SHAP_CACHE_PATH)
	if shap_available:
	print(f" SHAP cache loaded: {SHAP_CACHE_PATH.name} ({len(shap_df)} pixels)")
	stress_df = combine_classifications(rule_df, shap_df)
	n_high = (stress_df["combination_tier"] == "high").sum()
	n_rescued = (stress_df["combination_reason"] == "shap_rescue").sum()
	print(f" Combined: high={n_high} \| rescued={n_rescued}")
	else:
	print(f" No SHAP cache at {SHAP_CACHE_PATH} — rule-based only.")
	stress_df = _rule_only_fallback(rule_df)

	# 5. Print classification results for challenge pixels
	print(
	f"\n {'Pixel':<12} {'Expected':<22} {'Classified':<22} "
	f"{'Tier':<10} {'Conf':>6} {'Reason'}"
	)
	print(f" {'-' * 90}")

	for info in challenge_info:
	row_match = stress_df[
	(stress_df["row"] == info["row"]) & (stress_df["col"] == info["col"])
	]
	if len(row_match) > 0:
	r = row_match.iloc[0]
	classified = r.get("combined_stress", "unclassified")
	tier = r.get("combination_tier", "n/a")
	conf = r.get("combined_confidence", 0.0)
	reason = r.get("combination_reason", "n/a")
	info["classified_stress"] = classified
	info["classified_conf"] = conf
	info["classified_action"] = ACTION_MAP.get(classified, "inspect_manually")
	else:
	info["classified_stress"] = "unclassified"
	info["classified_conf"] = 0.0
	info["classified_action"] = "inspect_manually"
	tier, reason = "n/a", "no_match"

	print(
	f" [{info['row']:>2},{info['col']:>2}] {info['stress']:<22} "
	f"{info['classified_stress']:<22} {tier:<10} {info['classified_conf']:>5.3f} {reason}"
	)

	return stress_df


	# ── Intervention Loading ──────────────────────────────────────────────────────


	def load_interventions(interventions_csv, rows, cols):
	"""Read CSV, match (row,col) to pixel indices. Returns
	(selected_indices, selected_actions, selected_stresses)."""
	df = pd.read_csv(interventions_csv)
	print(f" Loaded {len(df)} intervention pixels from {Path(interventions_csv).name}")

	action_to_stress = {v: k for k, v in ACTION_MAP.items()}
	selected_indices = []
	selected_actions = []
	selected_stresses = []

	for _, irow in df.iterrows():
	mask = (rows == irow["row"]) & (cols == irow["col"])
	idx = np.where(mask)[0]
	if len(idx) == 0:
	print(
	f" WARNING: pixel [{irow['row']},{irow['col']}] not found, skipping"
	)
	continue
	selected_indices.append(idx[0])
	selected_actions.append(irow["action"])
	selected_stresses.append(action_to_stress.get(irow["action"], "unclassified"))

	selected_indices = (
	np.array(selected_indices) if selected_indices else np.array([], dtype=int)
	)
	print(f" Matched {len(selected_indices)} intervention pixels")
	return selected_indices, selected_actions, selected_stresses


	# ── Combine Interventions ────────────────────────────────────────────────────


	def combine_interventions(
	challenge_info, selected_indices, selected_actions, selected_stresses
	):
	"""Merge treated challenges + robot interventions. Returns
	(all_indices, all_actions, all_stresses, all_is_challenge)."""
	treated = [info for info in challenge_info if info["treated"]]
	untreated = [info for info in challenge_info if not info["treated"]]

	if untreated:
	print(f" Untreated challenge pixels (no recovery):")
	for info in untreated:
	print(f" [{info['row']},{info['col']}] {info['stress']}")

	all_indices = np.concatenate(
	[
	np.array([info["pixel_idx"] for info in treated])
	if treated
	else np.array([], dtype=int),
	selected_indices if len(selected_indices) > 0 else np.array([], dtype=int),
	]
	).astype(int)

	all_actions = [info["action"] for info in treated] + list(selected_actions)
	all_stresses = [
	info.get("classified_stress", info["stress"]) for info in treated
	] + list(selected_stresses)
	all_is_challenge = [True] * len(treated) + [False] * len(selected_indices)

	print(
	f" Combined: {len(all_indices)} pixels "
	f"({len(treated)} treated challenges + {len(selected_indices)} robot interventions)"
	)
	return all_indices, all_actions, all_stresses, all_is_challenge


	# ── Recovery Simulation ──────────────────────────────────────────────────────


	def simulate_recovery(
	X_challenge_arr, feature_cols, all_indices, all_actions, y_all, X_all_baseline
	):
	"""60% shift of responsive VIs toward healthy-pixel median.
	Returns X_post (modified copy of X_challenge_arr)."""
	low_mask = y_all <= np.nanpercentile(y_all, LOW_YIELD_PERCENTILE)

	healthy_medians = {}
	for i, col_name in enumerate(feature_cols):
	healthy_vals = X_all_baseline[~low_mask, i]
	healthy_medians[col_name] = np.median(healthy_vals)

	X_post = X_challenge_arr.copy()

	for sel_i, px_idx in enumerate(all_indices):
	action = all_actions[sel_i]
	responsive = ACTION_RESPONSIVE_FEATURES.get(action, [])
	for feat_name in responsive:
	if feat_name not in feature_cols:
	continue
	feat_idx = feature_cols.index(feat_name)
	current_val = X_post[px_idx, feat_idx]
	target_val = healthy_medians[feat_name]
	X_post[px_idx, feat_idx] = current_val + RECOVERY_FRACTION * (
	target_val - current_val
	)

	return X_post


	# ── Prediction & Comparison ──────────────────────────────────────────────────


	def predict_and_compare(
	model,
	X_challenge_arr,
	X_post,
	all_indices,
	all_actions,
	all_stresses,
	all_is_challenge,
	y_all,
	rows,
	cols,
	y_pred_all,
	challenge_indices=None,
	):
	"""TabPFN predict on post-intervention features, compute yield improvements.

	Only the changed pixels (challenges + interventions) are passed to
	model.predict; the unchanged remainder is patched in from y_pred_all.

	Returns (comparison_df, y_pred_before_full, y_pred_after_full).
	"""
	# "Before" = baseline with challenge stress applied. Patch challenge
	# pixels (where features differ from baseline) into a copy of y_pred_all.
	y_pred_before = y_pred_all.astype(np.float64).copy()
	if challenge_indices is not None and len(challenge_indices) > 0:
	ch_idx = np.asarray(challenge_indices, dtype=int)
	y_pred_before[ch_idx] = model.predict(X_challenge_arr[ch_idx])

	# "After" = before + intervention recovery. Patch intervention pixels.
	y_pred_after = y_pred_before.copy()
	if len(all_indices) > 0:
	ai = np.asarray(all_indices, dtype=int)
	y_pred_after[ai] = model.predict(X_post[ai])

	comparison = []
	for sel_i, px_idx in enumerate(all_indices):
	pred_before = y_pred_before[px_idx]
	pred_after = y_pred_after[px_idx]
	comparison.append(
	{
	"row": int(rows[px_idx]),
	"col": int(cols[px_idx]),
	"is_challenge": all_is_challenge[sel_i],
	"action": all_actions[sel_i],
	"stress": all_stresses[sel_i],
	"yield_actual": float(y_all[px_idx]),
	"yield_pred_before": float(pred_before),
	"yield_pred_after": float(pred_after),
	"yield_improvement": float(pred_after - pred_before),
	"pct_improvement": float(
	(pred_after - pred_before) / max(pred_before, 0.1) * 100
	),
	}
	)

	return pd.DataFrame(comparison), y_pred_before, y_pred_after


	# ── Plotting ──────────────────────────────────────────────────────────────────


	def pixels_to_image(rows, cols, values):
	"""Helper: pixel coords to 2D array. Returns (img, r_min, c_min)."""
	r_min, c_min = rows.min(), cols.min()
	h = rows.max() - r_min + 1
	w = cols.max() - c_min + 1
	img = np.full((h, w), np.nan)
	img[rows - r_min, cols - c_min] = values
	return img, r_min, c_min


	def plot_simulation_map(
	chunk,
	y_pred_before,
	y_pred_after,
	challenge_info,
	selected_indices,
	all_indices,
	output_path,
	):
	"""3-panel figure: before (RdYlGn), after (RdYlGn), delta (RdBu).
	Challenge=circles, intervention=crosses."""
	r_arr = chunk["row"]
	c_arr = chunk["col"]
	y_all = chunk["target"]

	vmin = 0
	vmax = max(float(y_all.max()), float(y_pred_after.max()))
	_, r_min, c_min = pixels_to_image(r_arr, c_arr, y_pred_before)

	fig, axes = plt.subplots(1, 3, figsize=(18, 6))

	for panel_idx, (img_data, title) in enumerate(
	[
	(y_pred_before, "Before Intervention"),
	(y_pred_after, "After Intervention"),
	]
	):
	img, _, _ = pixels_to_image(r_arr, c_arr, img_data)
	im = axes[panel_idx].imshow(img, cmap="RdYlGn", vmin=vmin, vmax=vmax)
	axes[panel_idx].set_title(title, fontsize=11)
	axes[panel_idx].axis("off")

	for info in challenge_info:
	axes[panel_idx].plot(
	info["col"] - c_min,
	info["row"] - r_min,
	"ko",
	markersize=12,
	markerfacecolor="none",
	markeredgewidth=2.5,
	)
	for px_idx in selected_indices:
	axes[panel_idx].plot(
	c_arr[px_idx] - c_min,
	r_arr[px_idx] - r_min,
	"kx",
	markersize=8,
	markeredgewidth=2,
	)
	plt.colorbar(im, ax=axes[panel_idx], shrink=0.7, label="t/ha")

	# Panel 3: Yield improvement (delta)
	diff = y_pred_after - y_pred_before
	img_diff, _, _ = pixels_to_image(r_arr, c_arr, diff)
	max_abs = max(abs(float(np.nanmin(img_diff))), abs(float(np.nanmax(img_diff))), 0.5)
	im3 = axes[2].imshow(img_diff, cmap="RdBu", vmin=-max_abs, vmax=max_abs)
	axes[2].set_title("Yield Change (After - Before)", fontsize=11)
	axes[2].axis("off")

	for info in challenge_info:
	axes[2].plot(
	info["col"] - c_min,
	info["row"] - r_min,
	"ko",
	markersize=12,
	markerfacecolor="none",
	markeredgewidth=2.5,
	)
	for px_idx in selected_indices:
	axes[2].plot(
	c_arr[px_idx] - c_min,
	r_arr[px_idx] - r_min,
	"kx",
	markersize=8,
	markeredgewidth=2,
	)
	plt.colorbar(im3, ax=axes[2], shrink=0.7, label="\u0394 t/ha")

	n_chal = len(challenge_info)
	n_orig = len(selected_indices)
	plt.tight_layout()
	fig.savefig(output_path, dpi=150, bbox_inches="tight")
	plt.close(fig)


	# ── Orchestrator ──────────────────────────────────────────────────────────────


	def run_simulation(
	field_id="DE_0100",
	nc_id=100,
	seed=42,
	):
	"""Orchestrator calling all steps, printing progress."""
	np.random.seed(seed)
	RESULTS_DIR.mkdir(exist_ok=True)

	# 1. Load field data from NetCDF
	print(f"[1/10] Loading field {field_id} (nc_id={nc_id})...")
	chunk, band_names = load_field_data(NC_PATH, field_id, nc_id)
	print(f" {len(chunk['target'])} pixels, yield={chunk['target'].mean():.1f} t/ha")

	# 2. Extract features (raw + PCA)
	print("[2/10] Extracting features...")
	chunk = extract_features(chunk, band_names)
	print(f" {len(chunk['X_raw'].columns)} raw -> {chunk['X'].shape[1]} after PCA")

	# 3. Train TabPFN model
	print("[3/10] Training TabPFN model...")
	model, y_pred_all, col_means = train_tabpfn(chunk)
	print(
	f" All-pixel predictions: mean={y_pred_all.mean():.2f}, std={y_pred_all.std():.2f}"
	)

	y_all = chunk["target"]
	rows = chunk["row"]
	cols = chunk["col"]
	feature_cols = chunk["feature_cols"]
	X_raw_orig = chunk["X_raw"].copy()

	# Baseline feature array (for healthy-pixel stats)
	X_all_baseline = chunk["X"].values.copy()
	for j in range(X_all_baseline.shape[1]):
	X_all_baseline[np.isnan(X_all_baseline[:, j]), j] = col_means[j]

	# 4. Load challenges CSV
	print("[4/10] Loading challenges...")
	if not CHALLENGES_CSV.exists():
	print(
	f" No challenges file at {CHALLENGES_CSV} — skipping challenge injection."
	)
	challenge_info = []
	else:
	challenges_df = pd.read_csv(CHALLENGES_CSV)
	print(f" Loaded {len(challenges_df)} challenge pixels")

	challenge_info = []
	for _, crow in challenges_df.iterrows():
	mask = (rows == crow["row"]) & (cols == crow["col"])
	idx = np.where(mask)[0]
	if len(idx) == 0:
	print(
	f" WARNING: pixel [{crow['row']},{crow['col']}] not found, skipping"
	)
	continue
	px_idx = idx[0]
	treated = str(crow.get("treated", "true")).lower() == "true"
	challenge_info.append(
	{
	"pixel_idx": px_idx,
	"row": int(crow["row"]),
	"col": int(crow["col"]),
	"stress": crow["challenge"],
	"action": ACTION_MAP.get(crow["challenge"], "inspect_manually"),
	"treated": treated,
	"original_yield": float(y_all[px_idx]),
	}
	)
	print(
	f" [{crow['row']},{crow['col']}] {crow['challenge']}, treated={treated}"
	)

	# 5. Inject challenge pixels
	if challenge_info:
	print("[5/10] Injecting challenge pixels...")
	X_raw_challenge, X_challenge, X_challenge_arr = inject_challenge_pixels(
	chunk, challenge_info, X_raw_orig, y_pred_all
	)
	else:
	print("[5/10] No challenges to inject — using original features.")
	X_raw_challenge = X_raw_orig.copy()
	X_challenge_arr = X_all_baseline.copy()

	# 6. Run SHAP+rule-based stress classification on challenge pixels
	print("[6/10] Running stress classification...")
	stress_df = classify_challenge_pixels(challenge_info, X_raw_challenge, chunk)

	# 7. Load interventions CSV
	print("[7/10] Loading interventions...")
	if not INTERVENTIONS_CSV.exists():
	print(f" No interventions file at {INTERVENTIONS_CSV} — skipping.")
	selected_indices = np.array([], dtype=int)
	selected_actions = []
	selected_stresses = []
	else:
	selected_indices, selected_actions, selected_stresses = load_interventions(
	INTERVENTIONS_CSV, rows, cols
	)

	# 8. Combine treated challenges + robot interventions
	print("[8/10] Combining interventions...")
	if len(challenge_info) == 0 and len(selected_indices) == 0:
	print(" WARNING: No challenges and no interventions — nothing to simulate.")
	comp_df = pd.DataFrame(
	columns=[
	"row",
	"col",
	"is_challenge",
	"action",
	"stress",
	"yield_actual",
	"yield_pred_before",
	"yield_pred_after",
	"yield_improvement",
	"pct_improvement",
	]
	)
	comp_df.to_csv(OUTPUT_CSV, index=False)
	print(f" Saved empty: {OUTPUT_CSV}")

	# Still generate a baseline map (before == after, zero delta).
	# No challenges and no interventions => features unchanged => baseline = y_pred_all.
	y_pred_baseline = y_pred_all
	plot_simulation_map(
	chunk,
	y_pred_baseline,
	y_pred_baseline,
	challenge_info,
	selected_indices,
	np.array([], dtype=int),
	OUTPUT_PNG,
	)
	print(f"Saved baseline map: {OUTPUT_PNG}")
	return

	all_indices, all_actions, all_stresses, all_is_challenge = combine_interventions(
	challenge_info, selected_indices, selected_actions, selected_stresses
	)

	# 9. Simulate post-intervention recovery
	print("[9/10] Simulating recovery...")
	X_post = simulate_recovery(
	X_challenge_arr, feature_cols, all_indices, all_actions, y_all, X_all_baseline
	)
	print(f" Recovery fraction: {RECOVERY_FRACTION:.0%}")

	# 10. Predict post-intervention yields & save
	print("[10/10] Predicting and saving...")
	challenge_indices = np.array(
	[info["pixel_idx"] for info in challenge_info], dtype=int
	)
	comp_df, y_pred_before, y_pred_after = predict_and_compare(
	model,
	X_challenge_arr,
	X_post,
	all_indices,
	all_actions,
	all_stresses,
	all_is_challenge,
	y_all,
	rows,
	cols,
	y_pred_all,
	challenge_indices=challenge_indices,
	)

	# Print results
	n_chal = comp_df["is_challenge"].sum()
	n_orig = len(comp_df) - n_chal
	print(f"\n{'=' * 90}")
	print(f"INTERVENTION SIMULATION RESULTS ({n_chal} challenge + {n_orig} original)")
	print(f"{'=' * 90}")
	print(
	f"\n{'Type':<6} {'Row':<5} {'Col':<5} {'Stress':<22} {'Action':<22} "
	f"{'Before':>7} {'After':>7} {'Gain':>7} {'%':>6}"
	)
	print(f"{'-' * 90}")

	for _, r in comp_df.iterrows():
	label = "CHAL" if r["is_challenge"] else "orig"
	print(
	f"{label:<6} {int(r['row']):<5} {int(r['col']):<5} {r['stress']:<22} "
	f"{r['action']:<22} {r['yield_pred_before']:>7.2f} {r['yield_pred_after']:>7.2f} "
	f"{r['yield_improvement']:>+7.2f} {r['pct_improvement']:>+5.1f}%"
	)

	chal_df = comp_df[comp_df["is_challenge"]]
	orig_df = comp_df[~comp_df["is_challenge"]]
	if len(chal_df) > 0:
	print(
	f"\n{'CHALLENGE (' + str(n_chal) + ')':<54} "
	f"{chal_df['yield_pred_before'].mean():>7.2f} "
	f"{chal_df['yield_pred_after'].mean():>7.2f} "
	f"{chal_df['yield_improvement'].mean():>+7.2f} "
	f"{chal_df['pct_improvement'].mean():>+5.1f}%"
	)
	if len(orig_df) > 0:
	print(
	f"{'ORIGINAL (' + str(n_orig) + ')':<54} "
	f"{orig_df['yield_pred_before'].mean():>7.2f} "
	f"{orig_df['yield_pred_after'].mean():>7.2f} "
	f"{orig_df['yield_improvement'].mean():>+7.2f} "
	f"{orig_df['pct_improvement'].mean():>+5.1f}%"
	)
	print(f"\nField mean yield: {y_all.mean():.2f} t/ha")

	# Save CSV
	comp_df.to_csv(OUTPUT_CSV, index=False)
	print(f"\nSaved: {OUTPUT_CSV}")

	# Generate map (reuse arrays from predict_and_compare — no extra predict needed).
	plot_simulation_map(
	chunk,
	y_pred_before,
	y_pred_after,
	challenge_info,
	selected_indices,
	all_indices,
	OUTPUT_PNG,
	)
	print(f"Saved: {OUTPUT_PNG}")
	print("\nDone.")


	# ── Entry Point ───────────────────────────────────────────────────────────────

	if __name__ == "__main__":
	run_simulation()