import pandas as pd import numpy as np import os from scipy import stats from scipy.spatial import cKDTree from scipy.ndimage import binary_dilation from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error from skimage.metrics import structural_similarity as ssim from tqdm import trange import matplotlib.pyplot as plt import matplotlib.ticker as ticker import matplotlib.cm as cm import matplotlib matplotlib.rcParams['font.sans-serif'] = ['Arial'] matplotlib.rcParams['font.size'] = 16 class DataLoader: ''' Fucntions: 1. load_predictions: 从 npz 文件加载预测和真实浓度场 2. load_metadata: 从 meta txt 文件加载元信息(风速、风向、稳定度、源编号等) 3. load_conds_data: 从 pkl 文件加载条件预测数据(如果有) 4. log2ppm: 将 log 浓度转换为 ppm 浓度(根据给定的关系) 5. get_sample: 根据索引获取单个样本的预测场、真实场、条件预测和元信息 ''' def __init__(self, pred_npz_path, meta_txt_path, conds_pkl_path): self.pred_npz_path = pred_npz_path self.meta_txt_path = meta_txt_path self.conds_pkl_path = conds_pkl_path self.load_predictions() self.meta = self.load_metadata() self.conds_data = self.load_conds_data() def load_predictions(self): data = np.load(self.pred_npz_path) self.preds = data['preds'].squeeze(1) self.trues = data['trues'].squeeze(1) _, non_building_mask = PrintMetrics.get_building_area() self.preds = self.preds * non_building_mask[None, :, :] self.trues = self.trues * non_building_mask[None, :, :] return self.preds, self.trues def load_metadata(self): df = pd.read_csv(self.meta_txt_path, sep=',', header=None) df.columns = ['npz_colname'] pattern = r'v([0-9_]+)_d(\d+)_sc(\d+)_s(\d+)' df[['wind_speed', 'wind_direction', 'sc', 'source_number']] = ( df['npz_colname'].str.extract(pattern)) df['wind_speed'] = df['wind_speed'].str.replace('_', '.').astype(float) df[['wind_direction', 'sc', 'source_number']] = df[['wind_direction', 'sc', 'source_number']].astype(int) return df def load_conds_data(self): conds_data = np.load(self.conds_pkl_path, allow_pickle=True) return conds_data @staticmethod def log2ppm(log_conc): log_conc = np.asarray(log_conc) log_conc = np.minimum(log_conc, 15.0) ppm_conc = np.expm1(log_conc) * (0.7449) return np.maximum(ppm_conc, 0.0) def get_sample(self, idx, in_ppm=True): psi_f = self.preds[idx] psi_t = self.trues[idx] meta = self.meta.iloc[idx] conds_preds = self.conds_data[idx]['conds']['preds'] if in_ppm: psi_f = DataLoader.log2ppm(psi_f) psi_t = DataLoader.log2ppm(psi_t) conds_preds = DataLoader.log2ppm(conds_preds) return psi_f, psi_t, conds_preds, meta class ObservationModel: ''' Functions: 1. observation_operator_H: 从浓度场 ψ 中提取点位浓度,使用双线性插值(线性算子) 2. observation_operator_H_ens: 对 ensemble 预测场批量应用观测算子,得到每个成员的点位浓度 ''' @staticmethod # 不依赖实例状态,可以直接通过类调用 def observation_operator_H(psi, obs_xy): # 观测算子 M: # 从浓度场 ψ 中提取点位浓度 # 使用双线性插值(线性算子) Hh, Ww = psi.shape xs = np.clip(obs_xy[:, 0], 0, Ww - 1 - 1e-6) ys = np.clip(obs_xy[:, 1], 0, Hh - 1 - 1e-6) x0 = np.floor(xs).astype(int) y0 = np.floor(ys).astype(int) x1 = np.clip(x0 + 1, 0, Ww - 1) y1 = np.clip(y0 + 1, 0, Hh - 1) dx = xs - x0 dy = ys - y0 f00 = psi[y0, x0] f10 = psi[y0, x1] f01 = psi[y1, x0] f11 = psi[y1, x1] return ( f00 * (1 - dx) * (1 - dy) + f10 * dx * (1 - dy) + f01 * (1 - dx) * dy + f11 * dx * dy ) @staticmethod def observation_operator_H_ens(psi_ens, obs_xy): """ psi_ens: (N_ens, H, W) obs_xy : (n_obs, 2) return : HX (N_ens, n_obs) """ N_ens, Hh, Ww = psi_ens.shape xs = np.clip(obs_xy[:, 0], 0, Ww - 1 - 1e-6) ys = np.clip(obs_xy[:, 1], 0, Hh - 1 - 1e-6) x0 = np.floor(xs).astype(np.int64) y0 = np.floor(ys).astype(np.int64) x1 = np.clip(x0 + 1, 0, Ww - 1) y1 = np.clip(y0 + 1, 0, Hh - 1) dx = xs - x0 dy = ys - y0 f00 = psi_ens[:, y0, x0] f10 = psi_ens[:, y0, x1] f01 = psi_ens[:, y1, x0] f11 = psi_ens[:, y1, x1] HX = ( f00 * (1 - dx) * (1 - dy) + f10 * dx * (1 - dy) + f01 * (1 - dx) * dy + f11 * dx * dy ) return HX class SamplingStrategies: # ========================= # (1) Sampling strategies # ========================= @staticmethod def sample_random(field_shape, num_points, seed=42): rng = np.random.default_rng(seed) H, W = field_shape _, non_building_mask = PrintMetrics.get_building_area() valid_idx = np.where(non_building_mask.ravel())[0] chosen = rng.choice(valid_idx, size=num_points, replace=False) yy, xx = np.meshgrid(np.arange(H), np.arange(W), indexing="ij") coords = np.stack([xx.ravel(), yy.ravel()], axis=1) return coords[chosen].astype(float) @staticmethod def sample_uniform(field_shape, num_points, margin=20): H, W = field_shape nx = int(np.ceil(np.sqrt(num_points * W / H))) ny = int(np.ceil(num_points / nx)) xs = np.linspace(margin, W - 1 - margin, nx) ys = np.linspace(margin, H - 1 - margin, ny) xx, yy = np.meshgrid(xs, ys) grid_xy = np.stack([xx.ravel(), yy.ravel()], axis=1) _, non_building_mask = PrintMetrics.get_building_area() xi = np.clip(grid_xy[:, 0].astype(int), 0, W - 1) yi = np.clip(grid_xy[:, 1].astype(int), 0, H - 1) valid = non_building_mask[yi, xi] == 1 grid_xy = grid_xy[valid] if len(grid_xy) > num_points: idx = np.linspace(0, len(grid_xy) - 1, num_points).astype(int) grid_xy = grid_xy[idx] return grid_xy @staticmethod def two_stage_sampling( true_field, pred_field, num_points, ens_preds_ppm=None, seed=42, # ====== 全局控制 ====== min_dist=22, n1_ratio=0.65, # Stage1 比例 # ====== Stage1 可调参数 ===== stage1_grad_power=0.8, # 梯度权重幂次 stage1_value_power=1.2, # 值权重幂次 stage1_center_boost=1.2, # 是否增强高值区域 ): # 内部函数: 基于排斥采样的加权随机选择 def repulse_pick(candidate_idx, weights, k, selected_idx): if k <= 0 or len(candidate_idx) == 0: return list(selected_idx) candidate_idx = np.asarray(candidate_idx, dtype=np.int64) weights = np.maximum(np.asarray(weights, dtype=float), 0.0) if weights.sum() <= 0: weights = np.ones_like(weights) weights = weights / weights.sum() overs = min(len(candidate_idx), max(k * 15, 200)) cand = rng.choice(candidate_idx, size=overs, replace=False, p=weights) selected = list(selected_idx) for idx in cand: xy = coords[idx] if not selected: selected.append(idx) continue sel_xy = coords[np.asarray(selected)] if cKDTree(sel_xy).query(xy, k=1)[0] >= min_dist: selected.append(idx) if len(selected) >= k + len(selected_idx): break return selected rng = np.random.default_rng(seed) H, W = pred_field.shape yy, xx = np.meshgrid(np.arange(H), np.arange(W), indexing="ij") coords = np.stack([xx.ravel(), yy.ravel()], axis=1) # 二维坐标网格 (HW, 2) [x,y] v = np.maximum(pred_field, 0.0).ravel() vmax = float(v.max()) _, non_building_mask = PrintMetrics.get_building_area() non_building_flat = non_building_mask.ravel().astype(bool) if vmax <= 1e-6: valid_idx = np.where(non_building_flat)[0] idx = rng.choice(valid_idx, size=num_points, replace=False) obs_xy = coords[idx] obs_val = ObservationModel.observation_operator_H(true_field, obs_xy) return obs_xy, obs_val n_center_ratio= 1 - n1_ratio n_center = max(2, int(num_points * n_center_ratio)) n1 = num_points - n_center # 1. 结构修正(LOG) z = np.log1p(np.maximum(pred_field, 0.0)) z_flat = z.ravel() gx, gy = np.gradient(z) grad = np.sqrt(gx**2 + gy**2).ravel() nz = z_flat > 1e-6 # 只在非零区域上算分位数,避免大量0把lo/hi压塌 z_nz = z_flat[nz] if z_nz.size < 50: support_mask = (grad.reshape(H, W) > np.quantile(grad, 0.90)) else: lo = np.quantile(z_nz, 0.70) hi = np.quantile(z_nz, 0.90) core_mask = (z >= lo) & (z <= hi) r_out = 12 # 外扩:把结构带膨胀一圈,让采样不只盯着最强边界 support_mask = binary_dilation(core_mask, iterations=r_out) support_idx = np.where(support_mask.ravel() & non_building_flat)[0] if len(support_idx) < num_points * 2: support_idx = np.where(non_building_flat)[0] # Stage1:梯度主导 + 适度保留外圈 weights1 = ( (grad[support_idx] ** stage1_grad_power) * (z_flat[support_idx] ** stage1_value_power + 1e-6) ) if stage1_center_boost > 1.0: weights1 *= (1 + stage1_center_boost * (z_flat[support_idx] / z_flat.max())) selected = repulse_pick(support_idx, weights1, n1, []) # Stage 2: 中心峰值区,只取2-3个点 peak_idx = np.argmax(z_flat * non_building_flat.astype(float)) peak_xy = coords[peak_idx] # print(f"峰值位置: {peak_xy}, z值: {z_flat[peak_idx]:.3f}") selected.append(int(peak_idx)) # 直接把峰值点加进去(1个) # 再在峰值极近邻选1-2个,min_dist放松到5保证不重叠 if n_center > 1: peak_radius = 10 # 很小的半径,只捕捉最高点附近 stage2_idx = np.where( non_building_flat & (np.sqrt((coords[:, 0] - peak_xy[0])**2 + (coords[:, 1] - peak_xy[1])**2) <= peak_radius) )[0] stage2_idx = np.setdiff1d(stage2_idx, np.array(selected)) if len(stage2_idx) >= 1: weights2 = z_flat[stage2_idx] weights2 = weights2 / (weights2.sum() + 1e-12) overs = min(len(stage2_idx), max((n_center - 1) * 10, 20)) cands = rng.choice(stage2_idx, size=overs, replace=False, p=weights2) for idx in cands: xy = coords[idx] if cKDTree(coords[np.array(selected)]).query(xy, k=1)[0] >= 5: selected.append(int(idx)) if len(selected) >= n_center + len([]): # 只加到n_center个为止 break if len(selected) - (num_points - n_center) >= n_center: break selected = list(dict.fromkeys(selected)) # 补足剩余点(从Stage1结构带里再补,如果selected不够num_points) if len(selected) < num_points: remain = np.setdiff1d(support_idx, np.array(selected)) if len(remain) > 0: w_remain = ( (grad[remain] ** stage1_grad_power) * (z_flat[remain] ** stage1_value_power + 1e-6) ) extra = repulse_pick(remain, w_remain, num_points - len(selected), selected) selected = extra obs_xy = coords[np.array(selected[:num_points])] obs_val = ObservationModel.observation_operator_H(true_field, obs_xy) return obs_xy, obs_val @staticmethod def two_stage_pro( true_field, pred_field, num_points, ens_preds_ppm=None, seed=42, min_dist=22, n1_ratio=0.65, stage1_grad_power=0.8, stage1_value_power=1.2, stage1_center_boost=1.2, ): import numpy as np from scipy.spatial import cKDTree from scipy.ndimage import binary_dilation def repulse_pick(candidate_idx, weights, k, selected_idx, this_min_dist): if k <= 0 or len(candidate_idx) == 0: return list(selected_idx) candidate_idx = np.asarray(candidate_idx, dtype=np.int64) weights = np.maximum(np.asarray(weights, dtype=float), 0.0) if weights.sum() <= 0: weights = np.ones_like(weights, dtype=float) weights = weights / weights.sum() overs = min(len(candidate_idx), max(k * 15, 200)) cand = rng.choice(candidate_idx, size=overs, replace=False, p=weights) selected = list(selected_idx) for idx in cand: idx = int(idx) if idx in selected: continue xy = coords[idx] if not selected: selected.append(idx) continue sel_xy = coords[np.asarray(selected)] if cKDTree(sel_xy).query(xy, k=1)[0] >= this_min_dist: selected.append(idx) if len(selected) >= k + len(selected_idx): break return selected rng = np.random.default_rng(seed) H, W = pred_field.shape yy, xx = np.meshgrid(np.arange(H), np.arange(W), indexing="ij") coords = np.stack([xx.ravel(), yy.ravel()], axis=1) # (HW, 2), [x, y] v = np.maximum(pred_field, 0.0).ravel() vmax = float(v.max()) _, non_building_mask = PrintMetrics.get_building_area() non_building_flat = non_building_mask.ravel().astype(bool) if vmax <= 1e-6: valid_idx = np.where(non_building_flat)[0] idx = rng.choice(valid_idx, size=min(num_points, len(valid_idx)), replace=False) if len(idx) < num_points: raise ValueError(f"Not enough valid non-building points: need {num_points}, got {len(idx)}") obs_xy = coords[idx] obs_val = ObservationModel.observation_operator_H(true_field, obs_xy) return obs_xy, obs_val n_center_ratio = 1 - n1_ratio n_center = max(2, int(num_points * n_center_ratio)) n1 = num_points - n_center z = np.log1p(np.maximum(pred_field, 0.0)) z_flat = z.ravel() gx, gy = np.gradient(z) grad = np.sqrt(gx**2 + gy**2).ravel() nz = z_flat > 1e-6 z_nz = z_flat[nz] if z_nz.size < 50: support_mask = (grad.reshape(H, W) > np.quantile(grad, 0.90)) else: lo = np.quantile(z_nz, 0.70) hi = np.quantile(z_nz, 0.90) core_mask = (z >= lo) & (z <= hi) # r_out = int(0.60 * num_points) r_out = int(np.clip(num_points / 3 + 16 / 3, 12, 24)) # 根据 num_points 动态调整外扩半径,保持在12-24范围内 support_mask = binary_dilation(core_mask, iterations=r_out) support_idx = np.where(support_mask.ravel() & non_building_flat)[0] if len(support_idx) < num_points * 2: support_idx = np.where(non_building_flat)[0] weights1 = ( (grad[support_idx] ** stage1_grad_power) * (z_flat[support_idx] ** stage1_value_power + 1e-6) ) if stage1_center_boost > 1.0: weights1 *= (1 + stage1_center_boost * (z_flat[support_idx] / (z_flat.max() + 1e-12))) selected = repulse_pick(support_idx, weights1, n1, [], min_dist) peak_idx = int(np.argmax(z_flat * non_building_flat.astype(float))) peak_xy = coords[peak_idx] selected.append(int(peak_idx)) if n_center > 1: peak_radius = 10 stage2_idx = np.where( non_building_flat & (np.sqrt((coords[:, 0] - peak_xy[0]) ** 2 + (coords[:, 1] - peak_xy[1]) ** 2) <= peak_radius) )[0] stage2_idx = np.setdiff1d(stage2_idx, np.array(selected)) if len(stage2_idx) >= 1: weights2 = z_flat[stage2_idx] weights2 = weights2 / (weights2.sum() + 1e-12) overs = min(len(stage2_idx), max((n_center - 1) * 10, 20)) cands = rng.choice(stage2_idx, size=overs, replace=False, p=weights2) for idx in cands: idx = int(idx) xy = coords[idx] if cKDTree(coords[np.array(selected)]).query(xy, k=1)[0] >= 5: selected.append(idx) if len(selected) >= n_center + len([]): break if len(selected) - (num_points - n_center) >= n_center: break selected = list(dict.fromkeys(selected)) if len(selected) >= num_points: selected = selected[:num_points] obs_xy = coords[np.array(selected)] obs_val = ObservationModel.observation_operator_H(true_field, obs_xy) return obs_xy, obs_val remain = np.setdiff1d(support_idx, np.array(selected)) if len(remain) > 0: for d_try in [ min_dist, max(1, int(min_dist * 0.7)), max(1, int(min_dist * 0.4)), 5, 3 ]: if len(selected) >= num_points: break remain = np.setdiff1d(support_idx, np.array(selected)) if len(remain) == 0: break w_remain = ( (grad[remain] ** stage1_grad_power) * (z_flat[remain] ** stage1_value_power + 1e-6) ) selected = repulse_pick( remain, w_remain, num_points - len(selected), selected, d_try ) selected = list(dict.fromkeys(selected)) if len(selected) < num_points: remain_support = np.setdiff1d(support_idx, np.array(selected)) if len(remain_support) > 0: need = num_points - len(selected) extra = rng.choice( remain_support, size=min(need, len(remain_support)), replace=False ) selected.extend(extra.tolist()) selected = list(dict.fromkeys(selected)) if len(selected) < num_points: all_valid = np.where(non_building_flat)[0] remain_all = np.setdiff1d(all_valid, np.array(selected)) if len(remain_all) > 0: need = num_points - len(selected) extra = rng.choice( remain_all, size=min(need, len(remain_all)), replace=False ) selected.extend(extra.tolist()) selected = list(dict.fromkeys(selected)) selected = selected[:num_points] assert len(selected) == num_points, f"Expected {num_points} points, got {len(selected)}" obs_xy = coords[np.array(selected)] obs_val = ObservationModel.observation_operator_H(true_field, obs_xy) return obs_xy, obs_val @staticmethod def smart_two_pass( enkf, psi_f, conds_preds, true_field, n1, n2, n_rounds=2, phase1_method='two_stage', min_dist_p2=22, under_correct_alpha=1.5, use_localization=False, loc_radius_pixobs=35.0, loc_radius_obsobs=40.0, seed=42, verbose=True, ): """ 多轮迭代选点 + EnKF 同化。 每轮流程: Phase 1 — 基于当前先验场选 n1 个点,做 pilot EnKF; Phase 2 — 基于 pilot 残差找欠校正区,再选 n2 个点,做 final EnKF; 本轮分析场作为下一轮的先验(psi_f)。 参数: enkf : EnKF 实例 psi_f : 初始先验场 (H, W) conds_preds : 集合预测场 (N_ens, H, W),协方差来源,全程不变 true_field : 真值场 (H, W),仅用于观测值提取 n1 : 每轮 Phase-1 选点数 n2 : 每轮 Phase-2 选点数 n_rounds : 迭代轮数(默认 1,即原始两阶段行为) phase1_method : Phase-1 采样策略('two_stage' 或其他 generate 支持的方法) min_dist_p2 : Phase-2 选点与已有点的最小距离(像素) under_correct_alpha : Phase-2 欠校正权重幂次 use_localization: 是否使用局地化 EnKF loc_radius_pixobs / loc_radius_obsobs : 局地化半径 seed : 随机种子 verbose : 是否打印中间日志 返回: psi_a_final : 最终分析场 (H, W) all_obs_xy : 所有轮次累计观测坐标 (n_rounds*(n1+n2), 2) all_obs_val : 所有轮次累计观测值 psi_pilot : 最后一轮的 pilot(Phase-1)分析场 obs_xy_p1_last : 最后一轮 Phase-1 选点坐标 """ conds_preds = np.asarray(conds_preds) N_ens, H, W = conds_preds.shape rng = np.random.default_rng(seed) _, non_building_mask = PrintMetrics.get_building_area() non_building_flat = non_building_mask.ravel().astype(bool) yy, xx = np.meshgrid(np.arange(H), np.arange(W), indexing="ij") coords = np.stack([xx.ravel(), yy.ravel()], axis=1) # 预先构建集合(整个函数中只用这一个 X_f,协方差永远基于原始集合) ens_mean = np.mean(conds_preds, axis=0) X_f_base = conds_preds - ens_mean[None, :, :] + psi_f[None, :, :] # 累计所有轮次的观测(跨轮次积累,最终一次性返回) all_obs_xy_list = [] all_obs_val_list = [] # 当前先验:第 1 轮用 psi_f,后续轮次用上一轮的分析场 psi_current = psi_f psi_pilot = None obs_xy_p1_last = None for round_idx in range(n_rounds): round_seed = seed + round_idx # 每轮不同种子,避免重复采样 # ── Phase 1:基于当前先验选点 + pilot EnKF ──────────────── if phase1_method == 'two_stage': obs_xy_p1, obs_val_p1 = SamplingStrategies.two_stage_sampling( true_field=true_field, pred_field=psi_current, num_points=n1, seed=round_seed) else: obs_xy_p1, obs_val_p1 = SamplingStrategies.generate( true_field, psi_current, n1, method=phase1_method, seed=round_seed) # pilot EnKF 使用当前先验重新中心化的集合 ens_mean_cur = np.mean(conds_preds, axis=0) X_f_cur = conds_preds - ens_mean_cur[None, :, :] + psi_current[None, :, :] if use_localization: psi_pilot = enkf._enkf_update_localized( X_f_cur, obs_xy_p1, obs_val_p1, loc_radius_pixobs, loc_radius_obsobs, round_seed) else: psi_pilot = enkf._enkf_update_standard(X_f_cur, obs_xy_p1, obs_val_p1) if verbose: from sklearn.metrics import r2_score as _r2 print(f"[SmartEnKF] Round {round_idx+1}/{n_rounds} Phase1: " f"{n1} 点, pilot R²={_r2(true_field.ravel(), psi_pilot.ravel()):.4f}") # ── Phase 2:找欠校正区,补充选点 ────────────────────────── psi_f_flat = psi_current.ravel() psi_pilot_flat = psi_pilot.ravel() correction_map = np.abs(psi_pilot_flat - psi_f_flat) nz_vals = psi_f_flat[non_building_flat & (psi_f_flat > 1e-4)] prior_thresh = np.quantile(nz_vals, 0.20) if len(nz_vals) > 20 else 1e-4 plume_support = non_building_flat & (psi_f_flat > prior_thresh) cand_idx = np.where(plume_support)[0] if len(cand_idx) < n2 * 3: cand_idx = np.where(non_building_flat & (psi_f_flat > 1e-6))[0] prior_cand = psi_f_flat[cand_idx] corr_cand = correction_map[cand_idx] prior_norm = prior_cand / (prior_cand.max() + 1e-12) corr_norm = corr_cand / (corr_cand.max() + 1e-12) under_score = prior_norm * (1.0 - corr_norm + 0.05) p1_tree = cKDTree(obs_xy_p1) dist_p1, _ = p1_tree.query(coords[cand_idx], k=1) dist_w = np.tanh(dist_p1 / (min_dist_p2 * 2.5)) weights_p2 = (under_score ** under_correct_alpha) * (dist_w + 0.05) weights_p2 = np.maximum(weights_p2, 1e-12) weights_p2 /= weights_p2.sum() rng_round = np.random.default_rng(round_seed) n_over = min(len(cand_idx), max(n2 * 30, 600)) cands = rng_round.choice(cand_idx, size=n_over, replace=False, p=weights_p2) selected_p2 = [] for cidx in cands: xy = coords[cidx] if p1_tree.query(xy, k=1)[0] < min_dist_p2: continue if selected_p2: if cKDTree(coords[np.array(selected_p2)]).query(xy, k=1)[0] < min_dist_p2: continue selected_p2.append(int(cidx)) if len(selected_p2) >= n2: break if len(selected_p2) < n2: remain = np.setdiff1d(cand_idx, np.array(selected_p2, dtype=int)) extra = rng_round.choice(remain, size=min(n2 - len(selected_p2), len(remain)), replace=False) selected_p2.extend(extra.tolist()) obs_xy_p2 = coords[np.array(selected_p2[:n2])] obs_val_p2 = ObservationModel.observation_operator_H(true_field, obs_xy_p2) if verbose: print(f"[SmartEnKF] Round {round_idx+1}/{n_rounds} Phase2: 补充 {n2} 个欠校正区域点") # ── Final:本轮全部点 + 当前先验做最终 EnKF ──────────────── round_obs_xy = np.vstack([obs_xy_p1, obs_xy_p2]) round_obs_val = np.concatenate([obs_val_p1, obs_val_p2]) if use_localization: psi_a_round = enkf._enkf_update_localized( X_f_cur, round_obs_xy, round_obs_val, loc_radius_pixobs, loc_radius_obsobs, round_seed) else: psi_a_round = enkf._enkf_update_standard(X_f_cur, round_obs_xy, round_obs_val) if verbose: from sklearn.metrics import r2_score as _r2 print(f"[SmartEnKF] Round {round_idx+1}/{n_rounds} Final: " f"{n1+n2} 点, R²={_r2(true_field.ravel(), psi_a_round.ravel()):.4f}") # 累计观测,更新先验进入下一轮 all_obs_xy_list.append(round_obs_xy) all_obs_val_list.append(round_obs_val) psi_current = np.maximum(psi_a_round, 0.0) obs_xy_p1_last = obs_xy_p1 all_obs_xy = np.vstack(all_obs_xy_list) all_obs_val = np.concatenate(all_obs_val_list) return (psi_current, all_obs_xy, all_obs_val, np.maximum(psi_pilot, 0.0), obs_xy_p1_last) @staticmethod def generate(true_field, pred_field, num_points, method="uniform", seed=42, enkf=None, conds_preds=None, **sample_params): field_shape = true_field.shape if method == "random": obs_xy = SamplingStrategies.sample_random(field_shape, num_points, seed) elif method == "uniform": obs_xy = SamplingStrategies.sample_uniform(field_shape, num_points) elif method == "two_stage": obs_xy, _ = SamplingStrategies.two_stage_sampling( true_field, pred_field, num_points, seed=seed, **sample_params ) elif method == "two_stage_pro": obs_xy, _ = SamplingStrategies.two_stage_pro( true_field, pred_field, num_points, seed=seed, **sample_params ) elif method == "smart_two_pass": if enkf is None or conds_preds is None: raise ValueError( "method='smart_two_pass' 需要传入 enkf 实例和 conds_preds 集合场。" ) # 解析 n1 / n2(支持用 n1_ratio 自动计算) n1_ratio = float(sample_params.pop('n1_ratio', 0.6)) n1_default = int(round(num_points * n1_ratio)) n1 = int(sample_params.pop('n1', n1_default)) if num_points > 1: n1 = max(1, min(n1, num_points - 1)) else: n1 = 1 n2 = int(sample_params.pop('n2', num_points - n1)) return SamplingStrategies.smart_two_pass( enkf=enkf, psi_f=pred_field, conds_preds=conds_preds, true_field=true_field, n1=n1, n2=n2, seed=seed, **sample_params, ) else: raise ValueError(f"Unknown observation sampling method: {method}") obs_val = ObservationModel.observation_operator_H(true_field, obs_xy) return obs_xy, obs_val class EnKF: def __init__( self, obs_std_scale=0.08, # relative observation noise level damping=1.0, jitter=1e-5, ): self.obs_std_scale = obs_std_scale self.damping = damping self.jitter = jitter def standard_enkf(self, psi_f, conds_preds, obs_xy, d_obs): """ psi_f: Unet预测的最佳先验场 (H, W) conds_preds: 通过扰动参数生成的集合场 (N_ens, H, W) obs_xy: 监测站坐标 (n_obs, 2) d_obs: 监测站真实浓度 (n_obs,) """ conds_preds = np.asarray(conds_preds) N_ens, H, W = conds_preds.shape n_obs = obs_xy.shape[0] ens_mean = np.mean(conds_preds, axis=0) # 计算集合均值 # 重要:将集合成员的波动叠加到 Unet 预测场 psi_f 上 , # 确保分析场的统计中心是 Unet 预测的那个场,而不是集合均值(可能有偏差导致更新不好) X_f = conds_preds - ens_mean[None, :, :] + psi_f[None, :, :] X_f_flat = X_f.reshape(N_ens, -1) # (N_ens, Pixels) HX = ObservationModel.observation_operator_H_ens(X_f, obs_xy) # (N_ens, n_obs) HX_mean = np.mean(HX, axis=0) X_f_bar = np.mean(X_f_flat, axis=0) # 计算偏差矩阵 A_prime = (X_f_flat - X_f_bar[None, :]).T # A_prime (状态偏差): (Pixels, N_ens) Y_prime = (HX - HX_mean).T # Y_prime (观测空间偏差): (n_obs, N_ens) # # 构造观测误差矩阵 R_e # # 基于观测值大小设定自适应噪声 (8% 相对误差) obs_std = self.obs_std_scale * np.maximum(np.abs(d_obs), 1.0) # 先定标准差 rng = np.random.default_rng(42) # SVD正交化生成E Z = rng.standard_normal((N_ens, n_obs)) U, _, Vt = np.linalg.svd(Z, full_matrices=False) Z = U @ Vt * np.sqrt(N_ens - 1) E = Z * obs_std[None, :] # (N_ens, n_obs),E.T即为文献中的E矩阵 # 从E计算Re(按照文献公式 Re = EE^T / N-1) E_T = E.T # (n_obs, N_ens),对应文献的E R_e = (E_T @ E_T.T) / (N_ens - 1) # (n_obs, n_obs) R_e += self.jitter * np.eye(n_obs) # 数值稳定项 Y_o = d_obs[None, :] + E # (N_ens, n_obs) # 增益计算与状态更新 (对应公式 3-16, 3-17) # 计算 Pe*H.T 和 H*Pe*H.T 的统计估计值 Pe_HT = (A_prime @ Y_prime.T) / (N_ens - 1) H_Pe_HT = (Y_prime @ Y_prime.T) / (N_ens - 1) # 计算集合卡尔曼增益 K_e = Pe*H.T * inverse(H*Pe*H.T + R_e) # 使用 solve 提高数值稳定性 K_e = np.linalg.solve((H_Pe_HT + R_e).T, Pe_HT.T).T # 计算创新值 (Innovation): (n_obs, N_ens) # 每个成员根据自己的观测扰动和预测值进行修正 innovation = (Y_o - HX).T # 更新系统状态的集合预测矩阵 X_a # X_a = X_f + K_e * (Y_o - HX_f) X_a_flat = X_f_flat + (self.damping * (K_e @ innovation)).T # 输出最终分析场,取集合均值作为最终结果 psi_a_flat = np.mean(X_a_flat, axis=0) psi_a = psi_a_flat.reshape(H, W) # 物理约束:确保浓度不为负数 return np.maximum(psi_a, 0.0) def enkf_localization(self, psi_f, conds_preds, obs_xy, d_obs, loc_radius_pixobs=40.0, # Pixel-Obs localization radius (in pixels) loc_radius_obsobs=60.0, # Obs-Obs localization radius (in pixels) seed=42, SAVE_DIAGNOSTICS=False, ): conds_preds = np.asarray(conds_preds) N_ens, H, W = conds_preds.shape n_obs = obs_xy.shape[0] # ========= 1) prior ensemble centered at psi_f ========= ens_mean = np.mean(conds_preds, axis=0) X_f = conds_preds - ens_mean[None, :, :] + psi_f[None, :, :] X_f_flat = X_f.reshape(N_ens, -1) HX = ObservationModel.observation_operator_H_ens(X_f, obs_xy) # 注意:必须用 X_f HX_mean = np.mean(HX, axis=0) X_f_bar = np.mean(X_f_flat, axis=0) A_prime = (X_f_flat - X_f_bar[None, :]).T # (Pixels, N_ens) Y_prime = (HX - HX_mean).T # (n_obs, N_ens) # # ========= 2) perturbed obs (deterministic-ish, fixed seed) ========= obs_std = self.obs_std_scale * np.maximum(np.abs(d_obs), 1.0) # 先定标准差 rng = np.random.default_rng(seed) # SVD正交化生成E Z = rng.standard_normal((N_ens, n_obs)) U, _, Vt = np.linalg.svd(Z, full_matrices=False) Z = U @ Vt * np.sqrt(N_ens - 1) E = Z * obs_std[None, :] # (N_ens, n_obs),E.T即为文献中的E矩阵 # 从E计算Re(按照文献公式 Re = EE^T / N-1) E_T = E.T # (n_obs, N_ens),对应文献的E R_e = (E_T @ E_T.T) / (N_ens - 1) # (n_obs, n_obs) R_e += self.jitter * np.eye(n_obs) # 数值稳定项 Y_o = d_obs[None, :] + E # ========= 3) sample covariances ========= Pe_HT = (A_prime @ Y_prime.T) / (N_ens - 1) # (Pixels, n_obs) H_Pe_HT = (Y_prime @ Y_prime.T) / (N_ens - 1) # (n_obs, n_obs) # ========= 4) localization ========= # (a) Pixel-Obs localization: rho_xy (Pixels, n_obs) yy, xx = np.meshgrid(np.arange(H), np.arange(W), indexing="ij") grid = np.stack([xx.ravel(), yy.ravel()], axis=1) # (Pixels,2) dx = grid[:, None, 0] - obs_xy[None, :, 0] dy = grid[:, None, 1] - obs_xy[None, :, 1] dist2_xy = dx*dx + dy*dy rho_xy = np.exp(-0.5 * dist2_xy / (loc_radius_pixobs**2)) # (b) Obs-Obs localization: rho_oo (n_obs, n_obs) dox = obs_xy[:, None, 0] - obs_xy[None, :, 0] doy = obs_xy[:, None, 1] - obs_xy[None, :, 1] dist2_oo = dox*dox + doy*doy rho_oo = np.exp(-0.5 * dist2_oo / (loc_radius_obsobs**2)) Pe_HT = Pe_HT * rho_xy H_Pe_HT = H_Pe_HT * rho_oo # ========= [诊断] P_e 的谱结构 ========= # P_e = A_prime @ A_prime.T / (N_ens-1),直接分解 A_prime 的奇异值更高效 # A_prime shape: (Pixels, N_ens),SVD给出 P_e 的特征值 = sigma^2 U_ens, sigma, Vt_ens = np.linalg.svd(A_prime / np.sqrt(N_ens - 1), full_matrices=False) # sigma shape: (N_ens,),对应 P_e 的特征值平方根 eigenvalues = sigma ** 2 # P_e 的特征值,降序排列 # --- 指标1:有效秩 r_eff = (Σλ)² / Σλ² 衡量特征值分布均匀程度--- # r_eff→1: 近似秩1(能量集中于单一方向)r_eff→N: 各向同性(能量均匀分布) r_eff = (eigenvalues.sum() ** 2) / (eigenvalues ** 2).sum() # --- 指标2:主特征值 λ1 = P_e 在主方向上的方差 --- # 只受幅度参数(v, Q)影响,随d单调增大 lambda1 = eigenvalues[0] lambda_min = eigenvalues[-2] # --- 指标3:方向集中度 λ1/λ2 衡量P_e各向异性程度 --- # 峰值对应最优d配置(d**),超过后集合引入非物理方向 ratio_1_2 = eigenvalues[0] / eigenvalues[1] if len(eigenvalues) > 1 else np.inf # --- 指标4:主特征向量峰值位置--- # 峰值位置随d系统性漂移,随v/Q不变,随n随机漂移 u1 = U_ens[:, 0].reshape(H, W) # u1 = P_e 的第一特征向量,代表集合扰动的主方向 u1_peak = np.unravel_index(np.abs(u1).argmax(), u1.shape) # ========= 5) Kalman gain ========= S = H_Pe_HT + R_e K_e = np.linalg.solve(S.T, Pe_HT.T).T innovation = (Y_o - HX).T X_a_flat = X_f_flat + (self.damping * (K_e @ innovation)).T psi_a = np.mean(X_a_flat, axis=0).reshape(H, W) psi_a = np.maximum(psi_a, 0.0) if SAVE_DIAGNOSTICS: print("=" * 50) print(f"[P_e 谱诊断]") print(f" 指标1 r_eff = {r_eff:.2f} # (Σλ)²/Σλ²,建筑影响下界≈2.1") print(f" 指标2 λ1 = {lambda1:.2f} {lambda_min:.2f} # 主方向方差,随d单调增大") print(f" 指标3 λ1/λ2 = {ratio_1_2:.2f} # 各向异性,d=45°时峰值→最优配置") print(f" 指标4 u1峰值位置 = {u1_peak} # d变化时系统漂移,v/Q不变") print("=" * 50) diag = { 'r_eff': r_eff, 'lambda1': lambda1, 'ratio_1_2': ratio_1_2, 'u1_peak_row': u1_peak[0], 'u1_peak_col': u1_peak[1], } return psi_a, diag else: return psi_a def _enkf_update_standard(self, X_f, obs_xy, d_obs): """ 标准 EnKF 更新,直接接受已中心化的集合 X_f (N_ens, H, W)。 返回分析场均值 psi_a (H, W),>=0。 """ N_ens, H, W = X_f.shape n_obs = obs_xy.shape[0] X_f_flat = X_f.reshape(N_ens, -1) HX = ObservationModel.observation_operator_H_ens(X_f, obs_xy) HX_mean = np.mean(HX, axis=0) X_f_bar = np.mean(X_f_flat, axis=0) A_prime = (X_f_flat - X_f_bar[None, :]).T # (Pixels, N_ens) Y_prime = (HX - HX_mean).T # (n_obs, N_ens) obs_std = self.obs_std_scale * np.maximum(np.abs(d_obs), 1.0) rng = np.random.default_rng(42) Z = rng.standard_normal((N_ens, n_obs)) U, _, Vt = np.linalg.svd(Z, full_matrices=False) Z = U @ Vt * np.sqrt(N_ens - 1) E = Z * obs_std[None, :] E_T = E.T R_e = (E_T @ E_T.T) / (N_ens - 1) R_e += self.jitter * np.eye(n_obs) Y_o = d_obs[None, :] + E Pe_HT = (A_prime @ Y_prime.T) / (N_ens - 1) H_Pe_HT = (Y_prime @ Y_prime.T) / (N_ens - 1) K_e = np.linalg.solve((H_Pe_HT + R_e).T, Pe_HT.T).T innovation = (Y_o - HX).T X_a_flat = X_f_flat + (self.damping * (K_e @ innovation)).T psi_a = np.mean(X_a_flat, axis=0).reshape(H, W) return np.maximum(psi_a, 0.0) def _enkf_update_localized(self, X_f, obs_xy, d_obs, loc_radius_pixobs=35.0, loc_radius_obsobs=40.0, seed=42): """ 局地化 EnKF 更新,直接接受已中心化的集合 X_f (N_ens, H, W)。 返回分析场均值 psi_a (H, W),>=0。 """ N_ens, H, W = X_f.shape n_obs = obs_xy.shape[0] X_f_flat = X_f.reshape(N_ens, -1) HX = ObservationModel.observation_operator_H_ens(X_f, obs_xy) HX_mean = np.mean(HX, axis=0) X_f_bar = np.mean(X_f_flat, axis=0) A_prime = (X_f_flat - X_f_bar[None, :]).T Y_prime = (HX - HX_mean).T obs_std = self.obs_std_scale * np.maximum(np.abs(d_obs), 1.0) rng = np.random.default_rng(seed) Z = rng.standard_normal((N_ens, n_obs)) U, _, Vt = np.linalg.svd(Z, full_matrices=False) Z = U @ Vt * np.sqrt(N_ens - 1) E = Z * obs_std[None, :] E_T = E.T R_e = (E_T @ E_T.T) / (N_ens - 1) R_e += self.jitter * np.eye(n_obs) Y_o = d_obs[None, :] + E Pe_HT = (A_prime @ Y_prime.T) / (N_ens - 1) H_Pe_HT = (Y_prime @ Y_prime.T) / (N_ens - 1) yy, xx = np.meshgrid(np.arange(H), np.arange(W), indexing="ij") grid = np.stack([xx.ravel(), yy.ravel()], axis=1) dx = grid[:, None, 0] - obs_xy[None, :, 0] dy = grid[:, None, 1] - obs_xy[None, :, 1] rho_xy = np.exp(-0.5 * (dx*dx + dy*dy) / (loc_radius_pixobs**2)) dox = obs_xy[:, None, 0] - obs_xy[None, :, 0] doy = obs_xy[:, None, 1] - obs_xy[None, :, 1] rho_oo = np.exp(-0.5 * (dox*dox + doy*doy) / (loc_radius_obsobs**2)) Pe_HT = Pe_HT * rho_xy H_Pe_HT = H_Pe_HT * rho_oo S = H_Pe_HT + R_e K_e = np.linalg.solve(S.T, Pe_HT.T).T innovation = (Y_o - HX).T X_a_flat = X_f_flat + (self.damping * (K_e @ innovation)).T psi_a = np.mean(X_a_flat, axis=0).reshape(H, W) return np.maximum(psi_a, 0.0) class PrintMetrics: @staticmethod def pad_center_crop(arr, center_y, center_x, out_h=256, out_w=256): # Pad and center-crop 2D or 3D array if arr.ndim == 3: C, H, W = arr.shape out = np.zeros((C, out_h, out_w), dtype=arr.dtype) else: H, W = arr.shape out = np.zeros((out_h, out_w), dtype=arr.dtype) y0, x0 = center_y - out_h // 2, center_x - out_w // 2 y1, x1 = y0 + out_h, x0 + out_w sy0, sy1 = max(0, y0), min(H, y1) sx0, sx1 = max(0, x0), min(W, x1) dy0, dx0 = sy0 - y0, sx0 - x0 dy1, dx1 = dy0 + (sy1 - sy0), dx0 + (sx1 - sx0) if arr.ndim == 3: out[:, dy0:dy1, dx0:dx1] = arr[:, sy0:sy1, sx0:sx1] else: out[dy0:dy1, dx0:dx1] = arr[sy0:sy1, sx0:sx1] return out @staticmethod def get_building_area(): # load building data npz_path = '../Gas_unet/Gas_code/dataset_m/5min_m_Data_special/min5_m_v1_0_d270_sc2_s10_04118.npz' data = np.load(npz_path) build_data = data['three_channel_data'][0] non_building_mask = (build_data == 0).astype(np.uint8) center_y, center_x = 498, 538 build_data_256 = PrintMetrics.pad_center_crop(build_data, center_y, center_x, 256, 256) non_building_mask = PrintMetrics.pad_center_crop(non_building_mask, center_y, center_x, 256, 256) return build_data_256, non_building_mask @staticmethod def weighted_r2(y_true, y_pred, gamma=1.0, eps=1e-12): """ Weighted R2 score emphasizing high-value regions. """ y_true = np.asarray(y_true) y_pred = np.asarray(y_pred) w = np.maximum(y_true, eps) ** gamma w = w / np.sum(w) y_bar = np.sum(w * y_true) num = np.sum(w * (y_true - y_pred) ** 2) den = np.sum(w * (y_true - y_bar) ** 2) if den < eps: return np.nan return 1.0 - num / den @staticmethod def print_metrics(i, wind_speed, wind_direction, sc, source_number, true_field, pred_field, analysis, obs_xy, metrics_save_flag=False, metrics_print_flag=True): """ Metrics: 1) Field-wise (all pixels) 2) Plume-aware (true > eps) 3) At observations """ def nmse_metrics(y_true, y_pred): nmse = np.mean((y_true.flatten() - y_pred.flatten())**2) / (np.mean(y_true) * np.mean(y_pred) + 1e-12) return nmse def nmae_metrics(y_true, y_pred): nmae = np.mean(np.abs(y_true.flatten() - y_pred.flatten())) / (np.mean(y_true) + 1e-12) return nmae # ========= 保留原始 2D 场 ========= _, non_building_mask = PrintMetrics.get_building_area() true_field = np.where(true_field > 0, true_field, 0) * non_building_mask pred_field = np.where(pred_field > 0, pred_field, 0) * non_building_mask analysis = np.where(analysis > 0, analysis, 0) * non_building_mask true_flat = true_field.ravel() pred_flat = pred_field.ravel() ana_flat = analysis.ravel() # =============================== # (1) Field-wise (all pixels) # =============================== r2_before = r2_score(true_flat, pred_flat) r2_after = r2_score(true_flat, ana_flat) mse_before = mean_squared_error(true_flat, pred_flat) mse_after = mean_squared_error(true_flat, ana_flat) mae_before = mean_absolute_error(true_flat, pred_flat) mae_after = mean_absolute_error(true_flat, ana_flat) nmse_before = nmse_metrics(true_flat, pred_flat) nmse_after = nmse_metrics(true_flat, ana_flat) nmae_before = nmae_metrics(true_flat, pred_flat) nmae_after = nmae_metrics(true_flat, ana_flat) # =============================== # (2) Plume-aware (true > eps) # =============================== plume_mask = true_flat > 1e-6 true_p = true_flat[plume_mask] pred_p = pred_flat[plume_mask] ana_p = ana_flat[plume_mask] r2_plume_before = r2_score(true_p, pred_p) r2_plume_after = r2_score(true_p, ana_p) mse_plume_before = mean_squared_error(true_p, pred_p) mse_plume_after = mean_squared_error(true_p, ana_p) mae_plume_before = mean_absolute_error(true_p, pred_p) mae_plume_after = mean_absolute_error(true_p, ana_p) nmse_plume_before = nmse_metrics(true_p, pred_p) nmse_plume_after = nmse_metrics(true_p, ana_p) nmae_plume_before = nmae_metrics(true_p, pred_p) nmae_plume_after = nmae_metrics(true_p, ana_p) # ---- Weighted R2 (plume-aware) ---- wr2_plume_before = PrintMetrics.weighted_r2(true_p, pred_p, gamma=1.0) wr2_plume_after = PrintMetrics.weighted_r2(true_p, ana_p, gamma=1.0) # =============================== # (3) At observations # =============================== true_at_obs = ObservationModel.observation_operator_H(true_field, obs_xy) pred_at_obs = ObservationModel.observation_operator_H(pred_field, obs_xy) ana_at_obs = ObservationModel.observation_operator_H(analysis, obs_xy) r2_obs_before = r2_score(true_at_obs, pred_at_obs) r2_obs_after = r2_score(true_at_obs, ana_at_obs) mse_obs_before = mean_squared_error(true_at_obs, pred_at_obs) mse_obs_after = mean_squared_error(true_at_obs, ana_at_obs) mae_obs_before = mean_absolute_error(true_at_obs, pred_at_obs) mae_obs_after = mean_absolute_error(true_at_obs, ana_at_obs) nmse_obs_before = nmse_metrics(true_at_obs, pred_at_obs) nmse_obs_after = nmse_metrics(true_at_obs, ana_at_obs) nmae_obs_before = nmae_metrics(true_at_obs, pred_at_obs) nmae_obs_after = nmae_metrics(true_at_obs, ana_at_obs) if metrics_print_flag: print("=== Assimilation Metrics ===") print("[Field-wise]") print(f"R2 : {r2_before:.4f}->{r2_after:.4f}") print(f"MSE : {mse_before:.4f}->{mse_after:.4f}") print(f"MAE : {mae_before:.4f}->{mae_after:.4f}") print("[Plume-aware]") print(f"R2 : {r2_plume_before:.4f}->{r2_plume_after:.4f}") print(f"MSE : {mse_plume_before:.4f}->{mse_plume_after:.4f}") print(f"MAE : {mae_plume_before:.4f}->{mae_plume_after:.4f}") print(f"W-R2 : {wr2_plume_before:.4f}->{wr2_plume_after:.4f}") print("[At observations]") print(f"R2 : {r2_obs_before:.4f}->{r2_obs_after:.4f}") print(f"MSE : {mse_obs_before:.4f}->{mse_obs_after:.4f}") print(f"MAE : {mae_obs_before:.4f}->{mae_obs_after:.4f}") if metrics_save_flag: return { 'idx': i, 'wind_speed': wind_speed, 'wind_direction': wind_direction, 'stability_class': sc, 'source_number': source_number, "r2_before": r2_before, "r2_after": r2_after, "r2_plume_before": r2_plume_before, "r2_plume_after": r2_plume_after, "w_r2_plume_before": wr2_plume_before, "w_r2_plume_after": wr2_plume_after, "r2_obs_before": r2_obs_before, "r2_obs_after": r2_obs_after, "mse_before": mse_before, "mse_after": mse_after, "mse_plume_before": mse_plume_before, "mse_plume_after": mse_plume_after, "mse_obs_before": mse_obs_before, "mse_obs_after": mse_obs_after, "mae_before": mae_before, "mae_after": mae_after, "mae_plume_before": mae_plume_before, "mae_plume_after": mae_plume_after, "mae_obs_before": mae_obs_before, "mae_obs_after": mae_obs_after, "nmse_before": nmse_before, "nmse_after": nmse_after, "nmse_plume_before": nmse_plume_before, "nmse_plume_after": nmse_plume_after, "nmae_before": nmae_before, "nmae_after": nmae_after, "nmae_plume_before": nmae_plume_before, "nmae_plume_after": nmae_plume_after, "nmse_obs_before": nmse_obs_before, "nmse_obs_after": nmse_obs_after, "nmae_obs_before": nmae_obs_before, "nmae_obs_after": nmae_obs_after, } class Visualization: def plot_assimilation_with_building( true_field, pred_field, analysis, obs_xy, vmax=10, title_suffix="" ): """ - 建筑 mask - 非建筑区浓度 - 同化前 / 后对比 """ # ---------- 物理裁剪 + 建筑 mask ---------- build_data_256, non_building_mask = PrintMetrics.get_building_area() true_field = np.where(true_field > 0, true_field, 0) * non_building_mask pred_field = np.where(pred_field > 0, pred_field, 0) * non_building_mask analysis = np.where(analysis > 0, analysis, 0) * non_building_mask # ---------- 画图 ---------- fig, axs = plt.subplots(1, 3, figsize=(14, 4), dpi=300) cmap = "inferno" levels = np.linspace(0, vmax, 21) im0 = axs[0].contourf(true_field, levels=levels, cmap=cmap, vmin=0, vmax=vmax, extend='max') axs[0].set_title('True Field' + title_suffix) plt.colorbar(im0, ax=axs[0]) im1 = axs[1].contourf(pred_field, levels=levels, cmap=cmap, vmin=0, vmax=vmax, extend='max') axs[1].set_title(r'Prior Prediction Field $\psi^{f}$') plt.colorbar(im1, ax=axs[1]) im2 = axs[2].contourf(analysis, levels=levels, cmap=cmap, vmin=0, vmax=vmax, extend='max') axs[2].set_title(r'Analysis $\psi^{a}$') plt.colorbar(im2, ax=axs[2]) axs[0].scatter(obs_xy[:, 0], obs_xy[:, 1], c='red', s=15, edgecolors='k') axs[1].scatter(obs_xy[:, 0], obs_xy[:, 1], c='red', s=15, edgecolors='k') axs[2].scatter(obs_xy[:, 0], obs_xy[:, 1], c='red', s=15, edgecolors='k') # ---------- 指标 ---------- axs[1].text( 80, 15, f"$R^2$={r2_score(true_field.ravel(), pred_field.ravel()):.4f}\n" f"$MSE$={mean_squared_error(true_field.ravel(), pred_field.ravel()):.4f}\n" f"$MAE@Obs$={mean_absolute_error(ObservationModel.observation_operator_H(true_field, obs_xy), ObservationModel.observation_operator_H(pred_field, obs_xy)):.3f}", color='white' ) axs[2].text( 80, 15, f"$R^2$={r2_score(true_field.ravel(), analysis.ravel()):.4f}\n" f"$MSE$={mean_squared_error(true_field.ravel(), analysis.ravel()):.4f}\n" f"$MAE@Obs$={mean_absolute_error(ObservationModel.observation_operator_H(true_field, obs_xy), ObservationModel.observation_operator_H(analysis, obs_xy)):.3f}", color='white' ) plt.tight_layout() plt.show() def plot_assimilation_4panel( true_field, pred_field, analysis, obs_xy, obs_val, vmin=0, vmax=10, title_suffix="" ): # ---------- 物理裁剪 + 建筑 mask ---------- _, non_building_mask = PrintMetrics.get_building_area() true_field = np.where(true_field > 0, true_field, 0) * non_building_mask pred_field = np.where(pred_field > 0, pred_field, 0) * non_building_mask analysis = np.where(analysis > 0, analysis, 0) * non_building_mask # ---------- Figure ---------- fig, axs = plt.subplots(1, 4, figsize=(18, 4), dpi=300) cmap = "inferno" levels = np.linspace(0, vmax, 21) # ---------- (a) True field ---------- im0 = axs[0].contourf(true_field, levels=levels, cmap=cmap, vmin=vmin, vmax=vmax, extend='both') axs[0].set_title("True Field" + title_suffix) plt.colorbar(im0, ax=axs[0]) # ---------- (b) Prior prediction ---------- im1 = axs[1].contourf(pred_field, levels=levels, cmap=cmap, vmin=vmin, vmax=vmax, extend='both') axs[1].set_title(r"Prior Prediction $\psi^{f}$") plt.colorbar(im1, ax=axs[1]) # ---------- (c) Observations (points only) ---------- sc = axs[2].scatter( obs_xy[:, 0], obs_xy[:, 1], c=obs_val, cmap=cmap, vmin=vmin, vmax=vmax, s=30, edgecolors="k", linewidths=0.4, alpha=0.9 ) axs[2].set_title("Observations $d_i$") axs[2].set_xlim(0, true_field.shape[1]) axs[2].set_ylim(true_field.shape[0], 0) axs[2].set_aspect("equal") axs[2].invert_yaxis() plt.colorbar(sc, ax=axs[2], extend='both') # ---------- (d) Analysis field ---------- im3 = axs[3].contourf(analysis, levels=levels, cmap=cmap, vmin=vmin, vmax=vmax, extend='both') axs[3].set_title(r"Analysis $\psi^{a}$") plt.colorbar(im3, ax=axs[3]) # ---------- Metrics ----------pred_at_obs axs[1].text( 0.02, 0.95, f"$R^2$={r2_score(true_field.ravel(), pred_field.ravel()):.4f}\n" f"$MSE$={mean_squared_error(true_field.ravel(), pred_field.ravel()):.4f}", transform=axs[1].transAxes, va="top", color="white" ) axs[3].text( 0.02, 0.95, f"$R^2$={r2_score(true_field.ravel(), analysis.ravel()):.4f}\n" f"$MSE$={mean_squared_error(true_field.ravel(), analysis.ravel()):.4f}", transform=axs[3].transAxes, va="top", color="white" ) plt.tight_layout() plt.show() def plot_pe_spectrum(all_diags, save_flag=False): C_BLUE = "#488ABA" C_ORANGE = "#e5954e" ds = [diag['d'] for diag in all_diags] r_eff_values = [diag['r_eff'] for diag in all_diags] ratio_12_values = [diag['ratio_1_2'] for diag in all_diags] fig, ax = plt.subplots(figsize=(6, 3), dpi=300) l1, = ax.plot(ds, r_eff_values, marker='o', color=C_BLUE, linewidth=1.5, alpha=0.4, markersize=6, label=r'$r_{\rm eff}$') ax.set_xlabel(r'Wind direction', labelpad=3) ax.set_ylabel(r'Effective rank $r_{\rm eff}$', color=C_BLUE, labelpad=4) ax.tick_params(axis='y', colors=C_BLUE) ax.spines['left'].set_color(C_BLUE) ax.set_xticks(ds) ax.set_xticklabels([f'{d}°' for d in ds]) ax.set_ylim(1.5, 3) # 右轴:λ1/λ2 ax2 = ax.twinx() l2, = ax2.plot(ds, ratio_12_values, marker='s', color=C_ORANGE, linewidth=1.5, alpha=0.4, markersize=6, label=r'$\lambda_1/\lambda_2$') ax2.set_ylabel(r'Anisotropy $\lambda_1/\lambda_2$', color=C_ORANGE, labelpad=4) ax2.tick_params(axis='y', colors=C_ORANGE) ax2.spines['right'].set_color(C_ORANGE) ax2.set_ylim(1.5, 3.5) opt_idx = int(np.argmax(ratio_12_values)) ax2.axvline(ds[opt_idx], color='grey', linewidth=0.8, linestyle='--', alpha=0.6) ax2.text(ds[opt_idx] + 0.8, 1.58, r'$d^{**}$', color='grey') # 统一图例 fig.legend(handles=[l1, l2], loc='upper left', bbox_to_anchor=(0.15, 0.95), ncol=1, frameon=False) plt.tight_layout() if save_flag: plt.savefig('./figures/test1/reff_ratio.png', dpi=300, bbox_inches='tight', transparent=True) # plt.savefig('./figures/test1/reff_ratio.svg', dpi=300, bbox_inches='tight', format='svg') plt.show() def assimilation_scatter(psi_t_log, psi_f_log, psi_a_log, obs_xy): def log10_formatter(x, pos): return r'$10^{%d}$' % x obs_true = np.log10(ObservationModel.observation_operator_H(psi_t_log, obs_xy)+1e-3) obs_prior = np.log10(ObservationModel.observation_operator_H(psi_f_log, obs_xy)+1e-3) obs_analysis = np.log10(ObservationModel.observation_operator_H(psi_a_log, obs_xy)+1e-3) fig, ax = plt.subplots(figsize=(5, 4.5), dpi=300) vmin, vamx = -4, 2 lim = [vmin, vamx] ax.plot(lim, lim, 'k--', lw=1, label='1:1 line', zorder=1) for obs_pred, label, color in zip( [obs_prior, obs_analysis], ['Prior', 'Analysis'], ['steelblue', 'tomato'] ): # 散点 ax.scatter(obs_true, obs_pred, s=25, alpha=0.6, color=color, zorder=3) slope, intercept, r, _, _ = stats.linregress(obs_true, obs_pred) rmse = np.sqrt(np.mean((obs_pred - obs_true) ** 2)) x_fit = np.linspace(lim[0], lim[1], 100) ax.plot(x_fit, slope * x_fit + intercept, '-', color=color, lw=1.5, label=f'{label}r:{r:.2f}', zorder=2) ax.set_xlabel('log(True)') ax.set_ylabel('log(Predicted)') ax.legend(loc='lower right', frameon=False) ax.set_xlim(-4, 2) ax.set_ylim(-4, 2) ax.xaxis.set_major_formatter(ticker.FuncFormatter(log10_formatter)) ax.yaxis.set_major_formatter(ticker.FuncFormatter(log10_formatter)) plt.tight_layout() plt.show() def none_assimilation_scatter(psi_t_log, psi_f_log, psi_a_log, obs_xy): def sample_independent_points(field, obs_xy, num_points=100, seed=42): H, W = field.shape yy, xx = np.meshgrid(np.arange(H), np.arange(W), indexing='ij') all_xy = np.stack([xx.ravel(), yy.ravel()], axis=1) obs_set = set(map(tuple, np.round(obs_xy).astype(int))) obs_mask = np.array([tuple(p) not in obs_set for p in all_xy]) _, non_building_mask = PrintMetrics.get_building_area() building_mask = non_building_mask[yy.ravel(), xx.ravel()] == 1 num_mask = field.ravel() > 1e-4 candidate_xy = all_xy[obs_mask & building_mask & num_mask] rng = np.random.default_rng(seed) idx = rng.choice(len(candidate_xy), num_points, replace=False) return candidate_xy[idx] test_xy = sample_independent_points(psi_t_log, obs_xy, 200) obs_true = np.log10(ObservationModel.observation_operator_H(psi_t_log, test_xy) + 1e-6) obs_prior = np.log10(ObservationModel.observation_operator_H(psi_f_log, test_xy) + 1e-6) obs_analysis = np.log10(ObservationModel.observation_operator_H(psi_a_log, test_xy) + 1e-6) def log10_formatter(x, pos): return r'$10^{%d}$' % x fig, ax = plt.subplots(figsize=(5, 4.5), dpi=300) vmin, vmax = -4, 2 lim = [vmin, vmax] # 1:1 line ax.plot(lim, lim, 'k--', lw=1, label='1:1 line', zorder=1) for obs_pred, label, color in zip( [obs_prior, obs_analysis], ['Prior', 'Analysis'], ['steelblue', 'tomato'] ): # scatter ax.scatter(obs_true, obs_pred, s=30, alpha=0.65, color=color, zorder=3) slope, intercept, r, _, _ = stats.linregress(obs_true, obs_pred) rmse = np.sqrt(np.mean((obs_pred - obs_true) ** 2)) x_fit = np.linspace(lim[0], lim[1], 100) ax.plot(x_fit, slope * x_fit + intercept, '-', color=color, lw=1.5, label=f'{label}r:{r:.2f}', zorder=2) ax.set_xlim(vmin, vmax) ax.set_ylim(vmin, vmax) ax.set_xlabel('log(True)') ax.set_ylabel('log(Predicted)') ax.xaxis.set_major_formatter(ticker.FuncFormatter(log10_formatter)) ax.yaxis.set_major_formatter(ticker.FuncFormatter(log10_formatter)) ax.legend(loc='lower right', frameon=False) plt.tight_layout() plt.show() def methods_comparison(source_idx=25, num_points = 10, methods = ['random', 'uniform', 'two_stage']): for method in methods: print(f"\n=== 观测点采样方法: {method} ===") data = np.load(f'./dataset/assim_conds/fields_n{num_points}_{method}_obs1.npz', allow_pickle=True) all_fields = data['all_fields'] sample_data = all_fields[source_idx] psi_t_log = sample_data['trues_log'] psi_f_log = sample_data['preds_log'] psi_a_log = sample_data['analysis_log'] psi_t_ppm = sample_data['trues_ppm'] psi_f_ppm = sample_data['preds_ppm'] psi_a_ppm = sample_data['analysis_ppm'] obs_xy = sample_data['obs_xy'] obs_value_log = sample_data['obs_value_log'] obs_value_ppm = sample_data['obs_value_ppm'] Visualization.plot_assimilation_with_building( true_field=psi_t_log, pred_field=psi_f_log, analysis=psi_a_log, obs_xy=obs_xy, vmax=10, title_suffix=f" (idx={source_idx})" ) Visualization.plot_assimilation_4panel( true_field=psi_t_log, pred_field=psi_f_log, analysis=psi_a_log, obs_xy=obs_xy, obs_val=obs_value_log, vmax=10, title_suffix=f" (idx={source_idx})" ) Visualization.plot_assimilation_4panel( true_field=psi_t_ppm, pred_field=psi_f_ppm, analysis=psi_a_ppm, obs_xy=obs_xy, obs_val=obs_value_ppm, vmax=200, title_suffix=f" in PPM SPACE (idx={source_idx})" ) def plot_n_hist_comparison(obs_tag=1, space_mode="log", methods=["random", "uniform", "two_stage"], n_list=[10, 20, 30, 40, 50], base_dir="./dataset/assim_conds", plot_mode="after", target_method="two_stage"): scope_labels = ["overall", "plume", "obs"] metric_keys = { "r2": ["r2", "r2_plume", "r2_obs"], "nmse": ["nmse", "nmse_plume", "nmse_obs"], "nmae": ["nmae", "nmae_plume", "nmae_obs"], } def load_method_df(space, method, num_points): candidate_methods = [method] if method != "two_stage_pro": candidate_methods.append("two_stage_pro") for m in candidate_methods: fp = os.path.join( base_dir, f"assimi_{space}_n{num_points}_{m}_obs{obs_tag}.csv" ) if os.path.exists(fp): if m != method: print(f"[Info] {method} not exsist, back to {fp}") return pd.read_csv(fp) print(f"[Warning] file not found: {candidate_methods}") return None def get_metric_value(df, metric_name): before_col = f"{metric_name}_before" after_col = f"{metric_name}_after" if before_col not in df.columns or after_col not in df.columns: return np.nan before_mean = df[before_col].mean() after_mean = df[after_col].mean() if plot_mode == "delta": return after_mean - before_mean return after_mean data = { "r2": np.full((len(n_list), 3), np.nan), "nmse": np.full((len(n_list), 3), np.nan), "nmae": np.full((len(n_list), 3), np.nan), } for i_n, n in enumerate(n_list): df = load_method_df(space_mode, target_method, n) if df is None: continue for metric_type in ["r2", "nmse", "nmae"]: vals = [] for mk in metric_keys[metric_type]: vals.append(get_metric_value(df, mk)) data[metric_type][i_n, :] = vals # print(f"space_mode = {space_mode}, plot_mode = {plot_mode}, method = {target_method}") # for metric_type in ["r2", "mse", "mae"]: # print(f"\n{metric_type.upper()}:") # print(pd.DataFrame(data[metric_type], index=n_list, columns=scope_labels)) fig, ax1 = plt.subplots(figsize=(12, 6), dpi=300) ax2 = ax1.twinx() x_base = np.arange(len(n_list)) scope_offsets = { "overall": -0.24, "plume": 0.00, "obs": 0.24, } bar_w = 0.20 colors = cm.get_cmap("Blues") scope_colors = { "overall": colors(0.45), "plume": colors(0.65), "obs": colors(0.85), "edge": colors(0.85), } scope_linestyles = { "overall": "-", "plume": "--", "obs": ":", } scope_markers_mse = { "overall": "o", "plume": "o", "obs": "o", } scope_markers_mae = { "overall": "s", "plume": "s", "obs": "s", } # ------------------------- # 左轴:R2 柱状图 # ------------------------- text_r2 = r"$\mathit{R}^2$" for j, scope in enumerate(scope_labels): x = x_base + scope_offsets[scope] y = data["r2"][:, j] ax1.bar( x, y, width=bar_w, color=scope_colors[scope], alpha=0.75, label=f"{text_r2}-{scope}", edgecolor=scope_colors["edge"], zorder=2 ) # 给每根柱子加数值 for xi, yi in zip(x, y): if np.isfinite(yi): ax1.text( xi, yi + 0.001, f"{yi:.2f}", ha="center", va="bottom" ) ax1.set_ylabel(r"$\mathit{R}^2$") ax1.set_xticks(x_base) ax1.set_xticklabels([f"n={n}" for n in n_list]) ax1.grid(axis="y", linestyle="--", alpha=0.25, zorder=0) # ax1.set_ylim(0.6, 1.1) if plot_mode == "delta": ax1.axhline(0, color="k", linewidth=1) # 在每个 n 下标出 overall / plume / obs y1_min, y1_max = ax1.get_ylim() y_text = y1_min - 0.06 * (y1_max - y1_min) # for i in range(len(n_list)): # ax1.text(x_base[i] + scope_offsets["overall"], y_text, "overall", # ha="center", va="top") # ax1.text(x_base[i] + scope_offsets["plume"], y_text, "plume", # ha="center", va="top") # ax1.text(x_base[i] + scope_offsets["obs"], y_text, "obs", # ha="center", va="top") for j, scope in enumerate(scope_labels): x = x_base + scope_offsets[scope] ax2.plot( x, data["nmse"][:, j], color=scope_colors[scope], linestyle="--", marker="o", linewidth=1.8, markersize=5, label=f"NMSE-{scope}", markeredgecolor=scope_colors["edge"], zorder=3 ) ax2.plot( x, data["nmae"][:, j], color=scope_colors[scope], linestyle=":", marker="s", linewidth=1.8, markersize=5, markeredgecolor=scope_colors["edge"], label=f"NMAE-{scope}", zorder=3 ) ax2.set_ylabel("NMSE / NMAE") # ax2.set_ylim(0, 0.5) if plot_mode == "delta": ax2.axhline(0, color="gray", linewidth=1, alpha=0.6) h1, l1 = ax1.get_legend_handles_labels() h2, l2 = ax2.get_legend_handles_labels() ax1.legend( h1 + h2, l1 + l2, frameon=False, loc="center left", bbox_to_anchor=(1.15, 0.5) ) plt.tight_layout() plt.show() def cal_all_test_resluts(config, conds_pkl_path=None, data_path=None, use_localization=True, save_fields_flag=True, save_metrics_flag=False): ''' 使用说明: sample_method_lists = ["random", "uniform", "two_stage"] for method in sample_method_lists: config_test = { "num_points": 20, "sample_method": method, "obs_std_scale": 0.01, "damping": 1, "two_stage_params": { "min_dist": 28, "n1_ratio": 0.6, "stage1_support_frac": 0.2, "stage1_grad_power": 0.8, "stage1_value_power": 1.2, "stage1_center_boost": 1.2, }, } cal_all_test_resluts(config_test) ''' # ==== 加载数据 ==== num_points = config['num_points'] sample_method = config['sample_method'] # "random" or "uniform" # Kalman 参数 obs_std_scale=config['obs_std_scale'] damping=config['damping'] sample_method = config["sample_method"] sample_params = {} params_key = f"{sample_method}_params" if params_key in config and isinstance(config[params_key], dict): sample_params = config[params_key] if conds_pkl_path is not None: loader = DataLoader( pred_npz_path='./dataset/pre_data/all_test_pred2.npz', meta_txt_path='./dataset/pre_data/combined_test_special.txt', conds_pkl_path=conds_pkl_path ) else: loader = DataLoader( pred_npz_path='./dataset/pre_data/all_test_pred2.npz', meta_txt_path='./dataset/pre_data/combined_test_special.txt', conds_pkl_path='./dataset/pre_data/pred_condition/test_results/conditioned_results_v0_5_d45_n40.pkl' ) trues, preds = loader.trues, loader.preds # print(f"Total test samples: {len(preds)}") all_metrics_log = [] all_metrics_ppm = [] all_fields = [] enkf = EnKF(obs_std_scale=obs_std_scale, damping=damping) for i in trange(len(preds), desc="Running assimilation"): psi_f_ppm, psi_t_ppm, conds_preds, meta = loader.get_sample(idx=i, in_ppm=True) psi_f_log = np.log1p(np.maximum(psi_f_ppm, 0)) psi_t_log = np.log1p(np.maximum(psi_t_ppm, 0)) conds_log = np.log1p(np.maximum(conds_preds, 0)) if sample_method == "smart_two_pass": n1_ratio = float(sample_params.get('n1_ratio', 0.6)) n1_default = int(round(num_points * n1_ratio)) n1 = int(sample_params.get('n1', n1_default)) if num_points > 1: n1 = max(1, min(n1, num_points - 1)) else: n1 = 1 n2 = num_points - n1 psi_a_log, obs_xy, all_obs_val_log, _, _ = SamplingStrategies.smart_two_pass( enkf=enkf, psi_f=psi_f_log, conds_preds=conds_log, true_field=psi_t_log, n1=n1, n2=n2, phase1_method=sample_params.get('phase1_method', 'two_stage'), min_dist_p2=sample_params.get('min_dist_p2', 22), under_correct_alpha=sample_params.get('under_correct_alpha', 1.5), use_localization=sample_params.get('use_localization', use_localization), loc_radius_pixobs=sample_params.get('loc_radius_pixobs', 35.0), loc_radius_obsobs=sample_params.get('loc_radius_obsobs', 40.0), seed=42, verbose=sample_params.get('verbose', False), ) obs_value_log = np.asarray(all_obs_val_log) obs_value_ppm = DataLoader.log2ppm(obs_value_log) else: obs_xy, obs_value_ppm = SamplingStrategies.generate(psi_t_ppm, psi_f_ppm, num_points=num_points, seed=42, method=sample_method, ens_preds_ppm=conds_preds, **sample_params ) d_obs_log = np.log1p(np.maximum(obs_value_ppm, 0)) # avoid log(0) if use_localization: psi_a_log = enkf.enkf_localization(psi_f_log, conds_log, obs_xy, d_obs_log, loc_radius_pixobs=35.0, loc_radius_obsobs=30.0) else: psi_a_log = enkf.standard_enkf(psi_f_log, conds_log, obs_xy, d_obs_log) # 计算innovation,判断是否需要同化 obs_prior_at_obs = np.log1p(np.maximum( ObservationModel.observation_operator_H(psi_f_ppm, obs_xy), 0 )) obs_innovation = np.mean(np.abs(obs_prior_at_obs - d_obs_log)) threshold = config.get('innovation_threshold', 0.05) if obs_innovation < threshold: psi_a_log = psi_f_log else: psi_a_log = enkf.enkf_localization( psi_f_log, conds_log, obs_xy, d_obs_log, loc_radius_pixobs=35.0, loc_radius_obsobs=30.0 ) obs_value_log = np.log1p(np.maximum(obs_value_ppm, 0)) # avoid log(0) psi_a_ppm = DataLoader.log2ppm(psi_a_log) # 计算指标 metrics_log = PrintMetrics.print_metrics( i=i, wind_speed=meta['wind_speed'], wind_direction=meta['wind_direction'], sc=meta['sc'], source_number=meta['source_number'], true_field=psi_t_log, pred_field=psi_f_log, analysis=psi_a_log, obs_xy=obs_xy, metrics_save_flag=True, metrics_print_flag=False ) all_metrics_log.append(metrics_log) metrics_ppm = PrintMetrics.print_metrics( i=i, wind_speed=meta['wind_speed'], wind_direction=meta['wind_direction'], sc=meta['sc'], source_number=meta['source_number'], true_field=psi_t_ppm, pred_field=psi_f_ppm, analysis=psi_a_ppm, obs_xy=obs_xy, metrics_save_flag=True, metrics_print_flag=False ) all_metrics_ppm.append(metrics_ppm) all_fields.append({ "idx": i, "trues_log": psi_t_log, "preds_log": psi_f_log, "analysis_log": psi_a_log, "trues_ppm": psi_t_ppm, "preds_ppm": psi_f_ppm, "analysis_ppm": psi_a_ppm, "obs_xy": obs_xy, "obs_value_log": obs_value_log, "obs_value_ppm": obs_value_ppm, }) data_paths = f'./dataset/assim_conds/{data_path}' if not os.path.exists(data_paths): os.makedirs(data_paths) if save_fields_flag: np.savez_compressed(f'./dataset/assim_conds/{data_path}/fields_n{num_points}_{sample_method}_obs{int(obs_std_scale*100)}_damping{damping}.npz', all_fields=all_fields) all_metrics_df_log = pd.DataFrame(all_metrics_log) all_metrics_df_ppm = pd.DataFrame(all_metrics_ppm) if save_metrics_flag: all_metrics_df_ppm.to_csv(f'./dataset/assim_conds/{data_path}/assimi_ppm_n{num_points}_{sample_method}_obs{int(obs_std_scale*100)}_damping{damping}.csv', index=False) all_metrics_df_log.to_csv(f'./dataset/assim_conds/{data_path}/assimi_log_n{num_points}_{sample_method}_obs{int(obs_std_scale*100)}_damping{damping}.csv', index=False) print("\n=== 平均指标提升 ===") metrics_list = ['r2', 'w_r2_plume', 'r2_plume','mse', 'mae'] for metric in metrics_list: before_mean = all_metrics_df_log[f"{metric}_before"].mean() after_mean = all_metrics_df_log[f"{metric}_after"].mean() delta = after_mean - before_mean print(f'{metric.upper()}: before={before_mean:.4f}, after={after_mean:.4f}, delta={delta:.4f}') before_mean_ppm = all_metrics_df_ppm[f"{metric}_before"].mean() after_mean_ppm = all_metrics_df_ppm[f"{metric}_after"].mean() delta_ppm = after_mean_ppm - before_mean_ppm print(f'PPM {metric.upper()}: before={before_mean_ppm:.4f}, after={after_mean_ppm:.4f}, delta={delta_ppm:.4f}')