| import pandas as pd
|
| import numpy as np
|
| import os
|
| from scipy import stats
|
| from scipy.spatial import cKDTree
|
| from scipy.ndimage import binary_dilation
|
| from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
|
| from skimage.metrics import structural_similarity as ssim
|
| from tqdm import trange
|
|
|
| import matplotlib.pyplot as plt
|
| import matplotlib.ticker as ticker
|
| import matplotlib.cm as cm
|
| import matplotlib
|
| matplotlib.rcParams['font.sans-serif'] = ['Arial']
|
| matplotlib.rcParams['font.size'] = 16
|
|
|
|
|
|
|
| class DataLoader:
|
| '''
|
| Fucntions:
|
| 1. load_predictions: 从 npz 文件加载预测和真实浓度场
|
| 2. load_metadata: 从 meta txt 文件加载元信息(风速、风向、稳定度、源编号等)
|
| 3. load_conds_data: 从 pkl 文件加载条件预测数据(如果有)
|
| 4. log2ppm: 将 log 浓度转换为 ppm 浓度(根据给定的关系)
|
| 5. get_sample: 根据索引获取单个样本的预测场、真实场、条件预测和元信息
|
| '''
|
| def __init__(self, pred_npz_path, meta_txt_path, conds_pkl_path):
|
| self.pred_npz_path = pred_npz_path
|
| self.meta_txt_path = meta_txt_path
|
| self.conds_pkl_path = conds_pkl_path
|
|
|
| self.load_predictions()
|
| self.meta = self.load_metadata()
|
| self.conds_data = self.load_conds_data()
|
|
|
| def load_predictions(self):
|
| data = np.load(self.pred_npz_path)
|
| self.preds = data['preds'].squeeze(1)
|
| self.trues = data['trues'].squeeze(1)
|
| _, non_building_mask = PrintMetrics.get_building_area()
|
| self.preds = self.preds * non_building_mask[None, :, :]
|
| self.trues = self.trues * non_building_mask[None, :, :]
|
| return self.preds, self.trues
|
|
|
| def load_metadata(self):
|
| df = pd.read_csv(self.meta_txt_path, sep=',', header=None)
|
| df.columns = ['npz_colname']
|
| pattern = r'v([0-9_]+)_d(\d+)_sc(\d+)_s(\d+)'
|
| df[['wind_speed', 'wind_direction', 'sc', 'source_number']] = (
|
| df['npz_colname'].str.extract(pattern))
|
| df['wind_speed'] = df['wind_speed'].str.replace('_', '.').astype(float)
|
| df[['wind_direction', 'sc', 'source_number']] = df[['wind_direction', 'sc',
|
| 'source_number']].astype(int)
|
| return df
|
|
|
| def load_conds_data(self):
|
| conds_data = np.load(self.conds_pkl_path, allow_pickle=True)
|
| return conds_data
|
|
|
| @staticmethod
|
| def log2ppm(log_conc):
|
| log_conc = np.asarray(log_conc)
|
| log_conc = np.minimum(log_conc, 15.0)
|
| ppm_conc = np.expm1(log_conc) * (0.7449)
|
| return np.maximum(ppm_conc, 0.0)
|
|
|
| def get_sample(self, idx, in_ppm=True):
|
| psi_f = self.preds[idx]
|
| psi_t = self.trues[idx]
|
| meta = self.meta.iloc[idx]
|
| conds_preds = self.conds_data[idx]['conds']['preds']
|
| if in_ppm:
|
| psi_f = DataLoader.log2ppm(psi_f)
|
| psi_t = DataLoader.log2ppm(psi_t)
|
| conds_preds = DataLoader.log2ppm(conds_preds)
|
| return psi_f, psi_t, conds_preds, meta
|
|
|
| class ObservationModel:
|
| '''
|
| Functions:
|
| 1. observation_operator_H: 从浓度场 ψ 中提取点位浓度,使用双线性插值(线性算子)
|
| 2. observation_operator_H_ens: 对 ensemble 预测场批量应用观测算子,得到每个成员的点位浓度
|
| '''
|
| @staticmethod
|
| def observation_operator_H(psi, obs_xy):
|
|
|
|
|
|
|
| Hh, Ww = psi.shape
|
| xs = np.clip(obs_xy[:, 0], 0, Ww - 1 - 1e-6)
|
| ys = np.clip(obs_xy[:, 1], 0, Hh - 1 - 1e-6)
|
| x0 = np.floor(xs).astype(int)
|
| y0 = np.floor(ys).astype(int)
|
| x1 = np.clip(x0 + 1, 0, Ww - 1)
|
| y1 = np.clip(y0 + 1, 0, Hh - 1)
|
| dx = xs - x0
|
| dy = ys - y0
|
| f00 = psi[y0, x0]
|
| f10 = psi[y0, x1]
|
| f01 = psi[y1, x0]
|
| f11 = psi[y1, x1]
|
|
|
| return (
|
| f00 * (1 - dx) * (1 - dy) +
|
| f10 * dx * (1 - dy) +
|
| f01 * (1 - dx) * dy +
|
| f11 * dx * dy
|
| )
|
|
|
| @staticmethod
|
| def observation_operator_H_ens(psi_ens, obs_xy):
|
| """
|
| psi_ens: (N_ens, H, W)
|
| obs_xy : (n_obs, 2)
|
| return : HX (N_ens, n_obs)
|
| """
|
| N_ens, Hh, Ww = psi_ens.shape
|
| xs = np.clip(obs_xy[:, 0], 0, Ww - 1 - 1e-6)
|
| ys = np.clip(obs_xy[:, 1], 0, Hh - 1 - 1e-6)
|
| x0 = np.floor(xs).astype(np.int64)
|
| y0 = np.floor(ys).astype(np.int64)
|
| x1 = np.clip(x0 + 1, 0, Ww - 1)
|
| y1 = np.clip(y0 + 1, 0, Hh - 1)
|
| dx = xs - x0
|
| dy = ys - y0
|
| f00 = psi_ens[:, y0, x0]
|
| f10 = psi_ens[:, y0, x1]
|
| f01 = psi_ens[:, y1, x0]
|
| f11 = psi_ens[:, y1, x1]
|
|
|
| HX = (
|
| f00 * (1 - dx) * (1 - dy) +
|
| f10 * dx * (1 - dy) +
|
| f01 * (1 - dx) * dy +
|
| f11 * dx * dy
|
| )
|
| return HX
|
|
|
| class SamplingStrategies:
|
|
|
|
|
|
|
|
|
| @staticmethod
|
| def sample_random(field_shape, num_points, seed=42):
|
| rng = np.random.default_rng(seed)
|
| H, W = field_shape
|
| _, non_building_mask = PrintMetrics.get_building_area()
|
| valid_idx = np.where(non_building_mask.ravel())[0]
|
| chosen = rng.choice(valid_idx, size=num_points, replace=False)
|
| yy, xx = np.meshgrid(np.arange(H), np.arange(W), indexing="ij")
|
| coords = np.stack([xx.ravel(), yy.ravel()], axis=1)
|
| return coords[chosen].astype(float)
|
|
|
| @staticmethod
|
| def sample_uniform(field_shape, num_points, margin=20):
|
| H, W = field_shape
|
| nx = int(np.ceil(np.sqrt(num_points * W / H)))
|
| ny = int(np.ceil(num_points / nx))
|
| xs = np.linspace(margin, W - 1 - margin, nx)
|
| ys = np.linspace(margin, H - 1 - margin, ny)
|
| xx, yy = np.meshgrid(xs, ys)
|
| grid_xy = np.stack([xx.ravel(), yy.ravel()], axis=1)
|
| _, non_building_mask = PrintMetrics.get_building_area()
|
| xi = np.clip(grid_xy[:, 0].astype(int), 0, W - 1)
|
| yi = np.clip(grid_xy[:, 1].astype(int), 0, H - 1)
|
| valid = non_building_mask[yi, xi] == 1
|
| grid_xy = grid_xy[valid]
|
| if len(grid_xy) > num_points:
|
| idx = np.linspace(0, len(grid_xy) - 1, num_points).astype(int)
|
| grid_xy = grid_xy[idx]
|
| return grid_xy
|
|
|
| @staticmethod
|
| def two_stage_sampling(
|
| true_field,
|
| pred_field,
|
| num_points,
|
| ens_preds_ppm=None,
|
| seed=42,
|
|
|
|
|
| min_dist=22,
|
| n1_ratio=0.65,
|
|
|
|
|
| stage1_grad_power=0.8,
|
| stage1_value_power=1.2,
|
| stage1_center_boost=1.2,
|
| ):
|
|
|
|
|
| def repulse_pick(candidate_idx, weights, k, selected_idx):
|
| if k <= 0 or len(candidate_idx) == 0:
|
| return list(selected_idx)
|
| candidate_idx = np.asarray(candidate_idx, dtype=np.int64)
|
| weights = np.maximum(np.asarray(weights, dtype=float), 0.0)
|
| if weights.sum() <= 0:
|
| weights = np.ones_like(weights)
|
| weights = weights / weights.sum()
|
| overs = min(len(candidate_idx), max(k * 15, 200))
|
| cand = rng.choice(candidate_idx, size=overs, replace=False, p=weights)
|
| selected = list(selected_idx)
|
| for idx in cand:
|
| xy = coords[idx]
|
| if not selected:
|
| selected.append(idx)
|
| continue
|
| sel_xy = coords[np.asarray(selected)]
|
| if cKDTree(sel_xy).query(xy, k=1)[0] >= min_dist:
|
| selected.append(idx)
|
| if len(selected) >= k + len(selected_idx):
|
| break
|
| return selected
|
|
|
| rng = np.random.default_rng(seed)
|
| H, W = pred_field.shape
|
| yy, xx = np.meshgrid(np.arange(H), np.arange(W), indexing="ij")
|
| coords = np.stack([xx.ravel(), yy.ravel()], axis=1)
|
| v = np.maximum(pred_field, 0.0).ravel()
|
| vmax = float(v.max())
|
| _, non_building_mask = PrintMetrics.get_building_area()
|
| non_building_flat = non_building_mask.ravel().astype(bool)
|
|
|
| if vmax <= 1e-6:
|
| valid_idx = np.where(non_building_flat)[0]
|
| idx = rng.choice(valid_idx, size=num_points, replace=False)
|
| obs_xy = coords[idx]
|
| obs_val = ObservationModel.observation_operator_H(true_field, obs_xy)
|
| return obs_xy, obs_val
|
|
|
| n_center_ratio= 1 - n1_ratio
|
| n_center = max(2, int(num_points * n_center_ratio))
|
| n1 = num_points - n_center
|
|
|
|
|
| z = np.log1p(np.maximum(pred_field, 0.0))
|
| z_flat = z.ravel()
|
| gx, gy = np.gradient(z)
|
| grad = np.sqrt(gx**2 + gy**2).ravel()
|
| nz = z_flat > 1e-6
|
| z_nz = z_flat[nz]
|
| if z_nz.size < 50:
|
| support_mask = (grad.reshape(H, W) > np.quantile(grad, 0.90))
|
| else:
|
| lo = np.quantile(z_nz, 0.70)
|
| hi = np.quantile(z_nz, 0.90)
|
| core_mask = (z >= lo) & (z <= hi)
|
| r_out = 12
|
| support_mask = binary_dilation(core_mask, iterations=r_out)
|
|
|
| support_idx = np.where(support_mask.ravel() & non_building_flat)[0]
|
| if len(support_idx) < num_points * 2:
|
| support_idx = np.where(non_building_flat)[0]
|
|
|
|
|
| weights1 = (
|
| (grad[support_idx] ** stage1_grad_power) *
|
| (z_flat[support_idx] ** stage1_value_power + 1e-6)
|
| )
|
| if stage1_center_boost > 1.0:
|
| weights1 *= (1 + stage1_center_boost * (z_flat[support_idx] / z_flat.max()))
|
| selected = repulse_pick(support_idx, weights1, n1, [])
|
|
|
|
|
| peak_idx = np.argmax(z_flat * non_building_flat.astype(float))
|
| peak_xy = coords[peak_idx]
|
|
|
| selected.append(int(peak_idx))
|
|
|
|
|
| if n_center > 1:
|
| peak_radius = 10
|
| stage2_idx = np.where(
|
| non_building_flat &
|
| (np.sqrt((coords[:, 0] - peak_xy[0])**2 +
|
| (coords[:, 1] - peak_xy[1])**2) <= peak_radius)
|
| )[0]
|
| stage2_idx = np.setdiff1d(stage2_idx, np.array(selected))
|
|
|
| if len(stage2_idx) >= 1:
|
| weights2 = z_flat[stage2_idx]
|
| weights2 = weights2 / (weights2.sum() + 1e-12)
|
| overs = min(len(stage2_idx), max((n_center - 1) * 10, 20))
|
| cands = rng.choice(stage2_idx, size=overs, replace=False, p=weights2)
|
| for idx in cands:
|
| xy = coords[idx]
|
| if cKDTree(coords[np.array(selected)]).query(xy, k=1)[0] >= 5:
|
| selected.append(int(idx))
|
| if len(selected) >= n_center + len([]):
|
| break
|
| if len(selected) - (num_points - n_center) >= n_center:
|
| break
|
| selected = list(dict.fromkeys(selected))
|
|
|
|
|
| if len(selected) < num_points:
|
| remain = np.setdiff1d(support_idx, np.array(selected))
|
| if len(remain) > 0:
|
| w_remain = (
|
| (grad[remain] ** stage1_grad_power) *
|
| (z_flat[remain] ** stage1_value_power + 1e-6)
|
| )
|
| extra = repulse_pick(remain, w_remain,
|
| num_points - len(selected), selected)
|
| selected = extra
|
|
|
| obs_xy = coords[np.array(selected[:num_points])]
|
| obs_val = ObservationModel.observation_operator_H(true_field, obs_xy)
|
|
|
| return obs_xy, obs_val
|
|
|
| @staticmethod
|
| def two_stage_pro(
|
| true_field,
|
| pred_field,
|
| num_points,
|
| ens_preds_ppm=None,
|
| seed=42,
|
| min_dist=22,
|
| n1_ratio=0.65,
|
| stage1_grad_power=0.8,
|
| stage1_value_power=1.2,
|
| stage1_center_boost=1.2,
|
| ):
|
| import numpy as np
|
| from scipy.spatial import cKDTree
|
| from scipy.ndimage import binary_dilation
|
|
|
| def repulse_pick(candidate_idx, weights, k, selected_idx, this_min_dist):
|
| if k <= 0 or len(candidate_idx) == 0:
|
| return list(selected_idx)
|
|
|
| candidate_idx = np.asarray(candidate_idx, dtype=np.int64)
|
| weights = np.maximum(np.asarray(weights, dtype=float), 0.0)
|
|
|
| if weights.sum() <= 0:
|
| weights = np.ones_like(weights, dtype=float)
|
|
|
| weights = weights / weights.sum()
|
|
|
| overs = min(len(candidate_idx), max(k * 15, 200))
|
| cand = rng.choice(candidate_idx, size=overs, replace=False, p=weights)
|
|
|
| selected = list(selected_idx)
|
| for idx in cand:
|
| idx = int(idx)
|
| if idx in selected:
|
| continue
|
|
|
| xy = coords[idx]
|
| if not selected:
|
| selected.append(idx)
|
| continue
|
|
|
| sel_xy = coords[np.asarray(selected)]
|
| if cKDTree(sel_xy).query(xy, k=1)[0] >= this_min_dist:
|
| selected.append(idx)
|
|
|
| if len(selected) >= k + len(selected_idx):
|
| break
|
|
|
| return selected
|
|
|
| rng = np.random.default_rng(seed)
|
| H, W = pred_field.shape
|
| yy, xx = np.meshgrid(np.arange(H), np.arange(W), indexing="ij")
|
| coords = np.stack([xx.ravel(), yy.ravel()], axis=1)
|
|
|
| v = np.maximum(pred_field, 0.0).ravel()
|
| vmax = float(v.max())
|
|
|
| _, non_building_mask = PrintMetrics.get_building_area()
|
| non_building_flat = non_building_mask.ravel().astype(bool)
|
|
|
| if vmax <= 1e-6:
|
| valid_idx = np.where(non_building_flat)[0]
|
| idx = rng.choice(valid_idx, size=min(num_points, len(valid_idx)), replace=False)
|
|
|
| if len(idx) < num_points:
|
| raise ValueError(f"Not enough valid non-building points: need {num_points}, got {len(idx)}")
|
|
|
| obs_xy = coords[idx]
|
| obs_val = ObservationModel.observation_operator_H(true_field, obs_xy)
|
| return obs_xy, obs_val
|
|
|
| n_center_ratio = 1 - n1_ratio
|
| n_center = max(2, int(num_points * n_center_ratio))
|
| n1 = num_points - n_center
|
|
|
| z = np.log1p(np.maximum(pred_field, 0.0))
|
| z_flat = z.ravel()
|
| gx, gy = np.gradient(z)
|
| grad = np.sqrt(gx**2 + gy**2).ravel()
|
| nz = z_flat > 1e-6
|
| z_nz = z_flat[nz]
|
| if z_nz.size < 50:
|
| support_mask = (grad.reshape(H, W) > np.quantile(grad, 0.90))
|
| else:
|
| lo = np.quantile(z_nz, 0.70)
|
| hi = np.quantile(z_nz, 0.90)
|
| core_mask = (z >= lo) & (z <= hi)
|
|
|
| r_out = int(np.clip(num_points / 3 + 16 / 3, 12, 24))
|
| support_mask = binary_dilation(core_mask, iterations=r_out)
|
|
|
| support_idx = np.where(support_mask.ravel() & non_building_flat)[0]
|
| if len(support_idx) < num_points * 2:
|
| support_idx = np.where(non_building_flat)[0]
|
|
|
| weights1 = (
|
| (grad[support_idx] ** stage1_grad_power) *
|
| (z_flat[support_idx] ** stage1_value_power + 1e-6)
|
| )
|
|
|
| if stage1_center_boost > 1.0:
|
| weights1 *= (1 + stage1_center_boost * (z_flat[support_idx] / (z_flat.max() + 1e-12)))
|
| selected = repulse_pick(support_idx, weights1, n1, [], min_dist)
|
|
|
| peak_idx = int(np.argmax(z_flat * non_building_flat.astype(float)))
|
| peak_xy = coords[peak_idx]
|
| selected.append(int(peak_idx))
|
|
|
| if n_center > 1:
|
| peak_radius = 10
|
| stage2_idx = np.where(
|
| non_building_flat &
|
| (np.sqrt((coords[:, 0] - peak_xy[0]) ** 2 +
|
| (coords[:, 1] - peak_xy[1]) ** 2) <= peak_radius)
|
| )[0]
|
| stage2_idx = np.setdiff1d(stage2_idx, np.array(selected))
|
|
|
| if len(stage2_idx) >= 1:
|
| weights2 = z_flat[stage2_idx]
|
| weights2 = weights2 / (weights2.sum() + 1e-12)
|
|
|
| overs = min(len(stage2_idx), max((n_center - 1) * 10, 20))
|
| cands = rng.choice(stage2_idx, size=overs, replace=False, p=weights2)
|
|
|
| for idx in cands:
|
| idx = int(idx)
|
| xy = coords[idx]
|
| if cKDTree(coords[np.array(selected)]).query(xy, k=1)[0] >= 5:
|
| selected.append(idx)
|
|
|
| if len(selected) >= n_center + len([]):
|
| break
|
| if len(selected) - (num_points - n_center) >= n_center:
|
| break
|
|
|
| selected = list(dict.fromkeys(selected))
|
|
|
| if len(selected) >= num_points:
|
| selected = selected[:num_points]
|
| obs_xy = coords[np.array(selected)]
|
| obs_val = ObservationModel.observation_operator_H(true_field, obs_xy)
|
| return obs_xy, obs_val
|
|
|
| remain = np.setdiff1d(support_idx, np.array(selected))
|
| if len(remain) > 0:
|
| for d_try in [
|
| min_dist,
|
| max(1, int(min_dist * 0.7)),
|
| max(1, int(min_dist * 0.4)),
|
| 5,
|
| 3
|
| ]:
|
| if len(selected) >= num_points:
|
| break
|
|
|
| remain = np.setdiff1d(support_idx, np.array(selected))
|
| if len(remain) == 0:
|
| break
|
|
|
| w_remain = (
|
| (grad[remain] ** stage1_grad_power) *
|
| (z_flat[remain] ** stage1_value_power + 1e-6)
|
| )
|
|
|
| selected = repulse_pick(
|
| remain,
|
| w_remain,
|
| num_points - len(selected),
|
| selected,
|
| d_try
|
| )
|
|
|
| selected = list(dict.fromkeys(selected))
|
|
|
| if len(selected) < num_points:
|
| remain_support = np.setdiff1d(support_idx, np.array(selected))
|
|
|
| if len(remain_support) > 0:
|
| need = num_points - len(selected)
|
| extra = rng.choice(
|
| remain_support,
|
| size=min(need, len(remain_support)),
|
| replace=False
|
| )
|
| selected.extend(extra.tolist())
|
|
|
| selected = list(dict.fromkeys(selected))
|
|
|
| if len(selected) < num_points:
|
| all_valid = np.where(non_building_flat)[0]
|
| remain_all = np.setdiff1d(all_valid, np.array(selected))
|
|
|
| if len(remain_all) > 0:
|
| need = num_points - len(selected)
|
| extra = rng.choice(
|
| remain_all,
|
| size=min(need, len(remain_all)),
|
| replace=False
|
| )
|
| selected.extend(extra.tolist())
|
|
|
| selected = list(dict.fromkeys(selected))
|
| selected = selected[:num_points]
|
| assert len(selected) == num_points, f"Expected {num_points} points, got {len(selected)}"
|
|
|
| obs_xy = coords[np.array(selected)]
|
| obs_val = ObservationModel.observation_operator_H(true_field, obs_xy)
|
|
|
| return obs_xy, obs_val
|
|
|
| @staticmethod
|
| def smart_two_pass(
|
| enkf,
|
| psi_f,
|
| conds_preds,
|
| true_field,
|
| n1,
|
| n2,
|
| n_rounds=2,
|
| phase1_method='two_stage',
|
| min_dist_p2=22,
|
| under_correct_alpha=1.5,
|
| use_localization=False,
|
| loc_radius_pixobs=35.0,
|
| loc_radius_obsobs=40.0,
|
| seed=42,
|
| verbose=True,
|
| ):
|
| """
|
| 多轮迭代选点 + EnKF 同化。
|
|
|
| 每轮流程:
|
| Phase 1 — 基于当前先验场选 n1 个点,做 pilot EnKF;
|
| Phase 2 — 基于 pilot 残差找欠校正区,再选 n2 个点,做 final EnKF;
|
| 本轮分析场作为下一轮的先验(psi_f)。
|
|
|
| 参数:
|
| enkf : EnKF 实例
|
| psi_f : 初始先验场 (H, W)
|
| conds_preds : 集合预测场 (N_ens, H, W),协方差来源,全程不变
|
| true_field : 真值场 (H, W),仅用于观测值提取
|
| n1 : 每轮 Phase-1 选点数
|
| n2 : 每轮 Phase-2 选点数
|
| n_rounds : 迭代轮数(默认 1,即原始两阶段行为)
|
| phase1_method : Phase-1 采样策略('two_stage' 或其他 generate 支持的方法)
|
| min_dist_p2 : Phase-2 选点与已有点的最小距离(像素)
|
| under_correct_alpha : Phase-2 欠校正权重幂次
|
| use_localization: 是否使用局地化 EnKF
|
| loc_radius_pixobs / loc_radius_obsobs : 局地化半径
|
| seed : 随机种子
|
| verbose : 是否打印中间日志
|
|
|
| 返回:
|
| psi_a_final : 最终分析场 (H, W)
|
| all_obs_xy : 所有轮次累计观测坐标 (n_rounds*(n1+n2), 2)
|
| all_obs_val : 所有轮次累计观测值
|
| psi_pilot : 最后一轮的 pilot(Phase-1)分析场
|
| obs_xy_p1_last : 最后一轮 Phase-1 选点坐标
|
| """
|
| conds_preds = np.asarray(conds_preds)
|
| N_ens, H, W = conds_preds.shape
|
| rng = np.random.default_rng(seed)
|
|
|
| _, non_building_mask = PrintMetrics.get_building_area()
|
| non_building_flat = non_building_mask.ravel().astype(bool)
|
| yy, xx = np.meshgrid(np.arange(H), np.arange(W), indexing="ij")
|
| coords = np.stack([xx.ravel(), yy.ravel()], axis=1)
|
|
|
|
|
| ens_mean = np.mean(conds_preds, axis=0)
|
| X_f_base = conds_preds - ens_mean[None, :, :] + psi_f[None, :, :]
|
|
|
|
|
| all_obs_xy_list = []
|
| all_obs_val_list = []
|
|
|
|
|
| psi_current = psi_f
|
| psi_pilot = None
|
| obs_xy_p1_last = None
|
|
|
| for round_idx in range(n_rounds):
|
| round_seed = seed + round_idx
|
|
|
|
|
| if phase1_method == 'two_stage':
|
| obs_xy_p1, obs_val_p1 = SamplingStrategies.two_stage_sampling(
|
| true_field=true_field, pred_field=psi_current,
|
| num_points=n1, seed=round_seed)
|
| else:
|
| obs_xy_p1, obs_val_p1 = SamplingStrategies.generate(
|
| true_field, psi_current, n1, method=phase1_method, seed=round_seed)
|
|
|
|
|
| ens_mean_cur = np.mean(conds_preds, axis=0)
|
| X_f_cur = conds_preds - ens_mean_cur[None, :, :] + psi_current[None, :, :]
|
|
|
| if use_localization:
|
| psi_pilot = enkf._enkf_update_localized(
|
| X_f_cur, obs_xy_p1, obs_val_p1,
|
| loc_radius_pixobs, loc_radius_obsobs, round_seed)
|
| else:
|
| psi_pilot = enkf._enkf_update_standard(X_f_cur, obs_xy_p1, obs_val_p1)
|
|
|
| if verbose:
|
| from sklearn.metrics import r2_score as _r2
|
| print(f"[SmartEnKF] Round {round_idx+1}/{n_rounds} Phase1: "
|
| f"{n1} 点, pilot R²={_r2(true_field.ravel(), psi_pilot.ravel()):.4f}")
|
|
|
|
|
| psi_f_flat = psi_current.ravel()
|
| psi_pilot_flat = psi_pilot.ravel()
|
| correction_map = np.abs(psi_pilot_flat - psi_f_flat)
|
|
|
| nz_vals = psi_f_flat[non_building_flat & (psi_f_flat > 1e-4)]
|
| prior_thresh = np.quantile(nz_vals, 0.20) if len(nz_vals) > 20 else 1e-4
|
| plume_support = non_building_flat & (psi_f_flat > prior_thresh)
|
| cand_idx = np.where(plume_support)[0]
|
| if len(cand_idx) < n2 * 3:
|
| cand_idx = np.where(non_building_flat & (psi_f_flat > 1e-6))[0]
|
|
|
| prior_cand = psi_f_flat[cand_idx]
|
| corr_cand = correction_map[cand_idx]
|
| prior_norm = prior_cand / (prior_cand.max() + 1e-12)
|
| corr_norm = corr_cand / (corr_cand.max() + 1e-12)
|
| under_score = prior_norm * (1.0 - corr_norm + 0.05)
|
|
|
| p1_tree = cKDTree(obs_xy_p1)
|
| dist_p1, _ = p1_tree.query(coords[cand_idx], k=1)
|
| dist_w = np.tanh(dist_p1 / (min_dist_p2 * 2.5))
|
|
|
| weights_p2 = (under_score ** under_correct_alpha) * (dist_w + 0.05)
|
| weights_p2 = np.maximum(weights_p2, 1e-12)
|
| weights_p2 /= weights_p2.sum()
|
|
|
| rng_round = np.random.default_rng(round_seed)
|
| n_over = min(len(cand_idx), max(n2 * 30, 600))
|
| cands = rng_round.choice(cand_idx, size=n_over, replace=False, p=weights_p2)
|
|
|
| selected_p2 = []
|
| for cidx in cands:
|
| xy = coords[cidx]
|
| if p1_tree.query(xy, k=1)[0] < min_dist_p2:
|
| continue
|
| if selected_p2:
|
| if cKDTree(coords[np.array(selected_p2)]).query(xy, k=1)[0] < min_dist_p2:
|
| continue
|
| selected_p2.append(int(cidx))
|
| if len(selected_p2) >= n2:
|
| break
|
|
|
| if len(selected_p2) < n2:
|
| remain = np.setdiff1d(cand_idx, np.array(selected_p2, dtype=int))
|
| extra = rng_round.choice(remain,
|
| size=min(n2 - len(selected_p2), len(remain)),
|
| replace=False)
|
| selected_p2.extend(extra.tolist())
|
|
|
| obs_xy_p2 = coords[np.array(selected_p2[:n2])]
|
| obs_val_p2 = ObservationModel.observation_operator_H(true_field, obs_xy_p2)
|
|
|
| if verbose:
|
| print(f"[SmartEnKF] Round {round_idx+1}/{n_rounds} Phase2: 补充 {n2} 个欠校正区域点")
|
|
|
|
|
| round_obs_xy = np.vstack([obs_xy_p1, obs_xy_p2])
|
| round_obs_val = np.concatenate([obs_val_p1, obs_val_p2])
|
|
|
| if use_localization:
|
| psi_a_round = enkf._enkf_update_localized(
|
| X_f_cur, round_obs_xy, round_obs_val,
|
| loc_radius_pixobs, loc_radius_obsobs, round_seed)
|
| else:
|
| psi_a_round = enkf._enkf_update_standard(X_f_cur, round_obs_xy, round_obs_val)
|
|
|
| if verbose:
|
| from sklearn.metrics import r2_score as _r2
|
| print(f"[SmartEnKF] Round {round_idx+1}/{n_rounds} Final: "
|
| f"{n1+n2} 点, R²={_r2(true_field.ravel(), psi_a_round.ravel()):.4f}")
|
|
|
|
|
| all_obs_xy_list.append(round_obs_xy)
|
| all_obs_val_list.append(round_obs_val)
|
| psi_current = np.maximum(psi_a_round, 0.0)
|
| obs_xy_p1_last = obs_xy_p1
|
|
|
| all_obs_xy = np.vstack(all_obs_xy_list)
|
| all_obs_val = np.concatenate(all_obs_val_list)
|
|
|
| return (psi_current, all_obs_xy, all_obs_val,
|
| np.maximum(psi_pilot, 0.0), obs_xy_p1_last)
|
|
|
| @staticmethod
|
| def generate(true_field, pred_field, num_points, method="uniform", seed=42,
|
| enkf=None, conds_preds=None, **sample_params):
|
| field_shape = true_field.shape
|
| if method == "random":
|
| obs_xy = SamplingStrategies.sample_random(field_shape, num_points, seed)
|
| elif method == "uniform":
|
| obs_xy = SamplingStrategies.sample_uniform(field_shape, num_points)
|
| elif method == "two_stage":
|
| obs_xy, _ = SamplingStrategies.two_stage_sampling(
|
| true_field,
|
| pred_field,
|
| num_points,
|
| seed=seed,
|
| **sample_params
|
| )
|
| elif method == "two_stage_pro":
|
| obs_xy, _ = SamplingStrategies.two_stage_pro(
|
| true_field,
|
| pred_field,
|
| num_points,
|
| seed=seed,
|
| **sample_params
|
| )
|
| elif method == "smart_two_pass":
|
| if enkf is None or conds_preds is None:
|
| raise ValueError(
|
| "method='smart_two_pass' 需要传入 enkf 实例和 conds_preds 集合场。"
|
| )
|
|
|
| n1_ratio = float(sample_params.pop('n1_ratio', 0.6))
|
| n1_default = int(round(num_points * n1_ratio))
|
| n1 = int(sample_params.pop('n1', n1_default))
|
| if num_points > 1:
|
| n1 = max(1, min(n1, num_points - 1))
|
| else:
|
| n1 = 1
|
| n2 = int(sample_params.pop('n2', num_points - n1))
|
| return SamplingStrategies.smart_two_pass(
|
| enkf=enkf,
|
| psi_f=pred_field,
|
| conds_preds=conds_preds,
|
| true_field=true_field,
|
| n1=n1,
|
| n2=n2,
|
| seed=seed,
|
| **sample_params,
|
| )
|
| else:
|
| raise ValueError(f"Unknown observation sampling method: {method}")
|
|
|
| obs_val = ObservationModel.observation_operator_H(true_field, obs_xy)
|
| return obs_xy, obs_val
|
|
|
|
|
| class EnKF:
|
|
|
| def __init__(
|
| self,
|
| obs_std_scale=0.08,
|
| damping=1.0,
|
| jitter=1e-5,
|
| ):
|
| self.obs_std_scale = obs_std_scale
|
| self.damping = damping
|
| self.jitter = jitter
|
|
|
| def standard_enkf(self, psi_f, conds_preds, obs_xy, d_obs):
|
| """
|
| psi_f: Unet预测的最佳先验场 (H, W)
|
| conds_preds: 通过扰动参数生成的集合场 (N_ens, H, W)
|
| obs_xy: 监测站坐标 (n_obs, 2)
|
| d_obs: 监测站真实浓度 (n_obs,)
|
| """
|
| conds_preds = np.asarray(conds_preds)
|
| N_ens, H, W = conds_preds.shape
|
| n_obs = obs_xy.shape[0]
|
|
|
| ens_mean = np.mean(conds_preds, axis=0)
|
|
|
|
|
| X_f = conds_preds - ens_mean[None, :, :] + psi_f[None, :, :]
|
| X_f_flat = X_f.reshape(N_ens, -1)
|
| HX = ObservationModel.observation_operator_H_ens(X_f, obs_xy)
|
| HX_mean = np.mean(HX, axis=0)
|
| X_f_bar = np.mean(X_f_flat, axis=0)
|
| A_prime = (X_f_flat - X_f_bar[None, :]).T
|
| Y_prime = (HX - HX_mean).T
|
|
|
|
|
|
|
| obs_std = self.obs_std_scale * np.maximum(np.abs(d_obs), 1.0)
|
| rng = np.random.default_rng(42)
|
| Z = rng.standard_normal((N_ens, n_obs))
|
| U, _, Vt = np.linalg.svd(Z, full_matrices=False)
|
| Z = U @ Vt * np.sqrt(N_ens - 1)
|
| E = Z * obs_std[None, :]
|
|
|
| E_T = E.T
|
| R_e = (E_T @ E_T.T) / (N_ens - 1)
|
| R_e += self.jitter * np.eye(n_obs)
|
| Y_o = d_obs[None, :] + E
|
|
|
|
|
|
|
| Pe_HT = (A_prime @ Y_prime.T) / (N_ens - 1)
|
| H_Pe_HT = (Y_prime @ Y_prime.T) / (N_ens - 1)
|
|
|
|
|
| K_e = np.linalg.solve((H_Pe_HT + R_e).T, Pe_HT.T).T
|
|
|
|
|
| innovation = (Y_o - HX).T
|
|
|
|
|
| X_a_flat = X_f_flat + (self.damping * (K_e @ innovation)).T
|
|
|
| psi_a_flat = np.mean(X_a_flat, axis=0)
|
| psi_a = psi_a_flat.reshape(H, W)
|
|
|
| return np.maximum(psi_a, 0.0)
|
|
|
| def enkf_localization(self, psi_f, conds_preds, obs_xy, d_obs,
|
| loc_radius_pixobs=40.0,
|
| loc_radius_obsobs=60.0,
|
| seed=42,
|
| SAVE_DIAGNOSTICS=False,
|
| ):
|
| conds_preds = np.asarray(conds_preds)
|
| N_ens, H, W = conds_preds.shape
|
| n_obs = obs_xy.shape[0]
|
|
|
|
|
| ens_mean = np.mean(conds_preds, axis=0)
|
| X_f = conds_preds - ens_mean[None, :, :] + psi_f[None, :, :]
|
| X_f_flat = X_f.reshape(N_ens, -1)
|
|
|
| HX = ObservationModel.observation_operator_H_ens(X_f, obs_xy)
|
| HX_mean = np.mean(HX, axis=0)
|
|
|
| X_f_bar = np.mean(X_f_flat, axis=0)
|
| A_prime = (X_f_flat - X_f_bar[None, :]).T
|
| Y_prime = (HX - HX_mean).T
|
|
|
|
|
| obs_std = self.obs_std_scale * np.maximum(np.abs(d_obs), 1.0)
|
| rng = np.random.default_rng(seed)
|
| Z = rng.standard_normal((N_ens, n_obs))
|
| U, _, Vt = np.linalg.svd(Z, full_matrices=False)
|
| Z = U @ Vt * np.sqrt(N_ens - 1)
|
| E = Z * obs_std[None, :]
|
|
|
| E_T = E.T
|
| R_e = (E_T @ E_T.T) / (N_ens - 1)
|
| R_e += self.jitter * np.eye(n_obs)
|
| Y_o = d_obs[None, :] + E
|
|
|
|
|
| Pe_HT = (A_prime @ Y_prime.T) / (N_ens - 1)
|
| H_Pe_HT = (Y_prime @ Y_prime.T) / (N_ens - 1)
|
|
|
|
|
|
|
| yy, xx = np.meshgrid(np.arange(H), np.arange(W), indexing="ij")
|
| grid = np.stack([xx.ravel(), yy.ravel()], axis=1)
|
| dx = grid[:, None, 0] - obs_xy[None, :, 0]
|
| dy = grid[:, None, 1] - obs_xy[None, :, 1]
|
| dist2_xy = dx*dx + dy*dy
|
| rho_xy = np.exp(-0.5 * dist2_xy / (loc_radius_pixobs**2))
|
|
|
|
|
| dox = obs_xy[:, None, 0] - obs_xy[None, :, 0]
|
| doy = obs_xy[:, None, 1] - obs_xy[None, :, 1]
|
| dist2_oo = dox*dox + doy*doy
|
| rho_oo = np.exp(-0.5 * dist2_oo / (loc_radius_obsobs**2))
|
| Pe_HT = Pe_HT * rho_xy
|
| H_Pe_HT = H_Pe_HT * rho_oo
|
|
|
|
|
|
|
|
|
| U_ens, sigma, Vt_ens = np.linalg.svd(A_prime / np.sqrt(N_ens - 1), full_matrices=False)
|
|
|
| eigenvalues = sigma ** 2
|
|
|
|
|
|
|
| r_eff = (eigenvalues.sum() ** 2) / (eigenvalues ** 2).sum()
|
|
|
|
|
|
|
| lambda1 = eigenvalues[0]
|
| lambda_min = eigenvalues[-2]
|
|
|
|
|
|
|
| ratio_1_2 = eigenvalues[0] / eigenvalues[1] if len(eigenvalues) > 1 else np.inf
|
|
|
|
|
|
|
| u1 = U_ens[:, 0].reshape(H, W)
|
| u1_peak = np.unravel_index(np.abs(u1).argmax(), u1.shape)
|
|
|
|
|
| S = H_Pe_HT + R_e
|
| K_e = np.linalg.solve(S.T, Pe_HT.T).T
|
|
|
| innovation = (Y_o - HX).T
|
| X_a_flat = X_f_flat + (self.damping * (K_e @ innovation)).T
|
|
|
| psi_a = np.mean(X_a_flat, axis=0).reshape(H, W)
|
| psi_a = np.maximum(psi_a, 0.0)
|
|
|
| if SAVE_DIAGNOSTICS:
|
| print("=" * 50)
|
| print(f"[P_e 谱诊断]")
|
| print(f" 指标1 r_eff = {r_eff:.2f} # (Σλ)²/Σλ²,建筑影响下界≈2.1")
|
| print(f" 指标2 λ1 = {lambda1:.2f} {lambda_min:.2f} # 主方向方差,随d单调增大")
|
| print(f" 指标3 λ1/λ2 = {ratio_1_2:.2f} # 各向异性,d=45°时峰值→最优配置")
|
| print(f" 指标4 u1峰值位置 = {u1_peak} # d变化时系统漂移,v/Q不变")
|
| print("=" * 50)
|
| diag = {
|
| 'r_eff': r_eff,
|
| 'lambda1': lambda1,
|
| 'ratio_1_2': ratio_1_2,
|
| 'u1_peak_row': u1_peak[0],
|
| 'u1_peak_col': u1_peak[1],
|
| }
|
| return psi_a, diag
|
| else:
|
| return psi_a
|
|
|
| def _enkf_update_standard(self, X_f, obs_xy, d_obs):
|
| """
|
| 标准 EnKF 更新,直接接受已中心化的集合 X_f (N_ens, H, W)。
|
| 返回分析场均值 psi_a (H, W),>=0。
|
| """
|
| N_ens, H, W = X_f.shape
|
| n_obs = obs_xy.shape[0]
|
| X_f_flat = X_f.reshape(N_ens, -1)
|
|
|
| HX = ObservationModel.observation_operator_H_ens(X_f, obs_xy)
|
| HX_mean = np.mean(HX, axis=0)
|
| X_f_bar = np.mean(X_f_flat, axis=0)
|
| A_prime = (X_f_flat - X_f_bar[None, :]).T
|
| Y_prime = (HX - HX_mean).T
|
|
|
| obs_std = self.obs_std_scale * np.maximum(np.abs(d_obs), 1.0)
|
| rng = np.random.default_rng(42)
|
| Z = rng.standard_normal((N_ens, n_obs))
|
| U, _, Vt = np.linalg.svd(Z, full_matrices=False)
|
| Z = U @ Vt * np.sqrt(N_ens - 1)
|
| E = Z * obs_std[None, :]
|
| E_T = E.T
|
| R_e = (E_T @ E_T.T) / (N_ens - 1)
|
| R_e += self.jitter * np.eye(n_obs)
|
| Y_o = d_obs[None, :] + E
|
|
|
| Pe_HT = (A_prime @ Y_prime.T) / (N_ens - 1)
|
| H_Pe_HT = (Y_prime @ Y_prime.T) / (N_ens - 1)
|
| K_e = np.linalg.solve((H_Pe_HT + R_e).T, Pe_HT.T).T
|
| innovation = (Y_o - HX).T
|
| X_a_flat = X_f_flat + (self.damping * (K_e @ innovation)).T
|
| psi_a = np.mean(X_a_flat, axis=0).reshape(H, W)
|
| return np.maximum(psi_a, 0.0)
|
|
|
| def _enkf_update_localized(self, X_f, obs_xy, d_obs,
|
| loc_radius_pixobs=35.0,
|
| loc_radius_obsobs=40.0,
|
| seed=42):
|
| """
|
| 局地化 EnKF 更新,直接接受已中心化的集合 X_f (N_ens, H, W)。
|
| 返回分析场均值 psi_a (H, W),>=0。
|
| """
|
| N_ens, H, W = X_f.shape
|
| n_obs = obs_xy.shape[0]
|
| X_f_flat = X_f.reshape(N_ens, -1)
|
|
|
| HX = ObservationModel.observation_operator_H_ens(X_f, obs_xy)
|
| HX_mean = np.mean(HX, axis=0)
|
| X_f_bar = np.mean(X_f_flat, axis=0)
|
| A_prime = (X_f_flat - X_f_bar[None, :]).T
|
| Y_prime = (HX - HX_mean).T
|
|
|
| obs_std = self.obs_std_scale * np.maximum(np.abs(d_obs), 1.0)
|
| rng = np.random.default_rng(seed)
|
| Z = rng.standard_normal((N_ens, n_obs))
|
| U, _, Vt = np.linalg.svd(Z, full_matrices=False)
|
| Z = U @ Vt * np.sqrt(N_ens - 1)
|
| E = Z * obs_std[None, :]
|
| E_T = E.T
|
| R_e = (E_T @ E_T.T) / (N_ens - 1)
|
| R_e += self.jitter * np.eye(n_obs)
|
| Y_o = d_obs[None, :] + E
|
|
|
| Pe_HT = (A_prime @ Y_prime.T) / (N_ens - 1)
|
| H_Pe_HT = (Y_prime @ Y_prime.T) / (N_ens - 1)
|
|
|
| yy, xx = np.meshgrid(np.arange(H), np.arange(W), indexing="ij")
|
| grid = np.stack([xx.ravel(), yy.ravel()], axis=1)
|
| dx = grid[:, None, 0] - obs_xy[None, :, 0]
|
| dy = grid[:, None, 1] - obs_xy[None, :, 1]
|
| rho_xy = np.exp(-0.5 * (dx*dx + dy*dy) / (loc_radius_pixobs**2))
|
| dox = obs_xy[:, None, 0] - obs_xy[None, :, 0]
|
| doy = obs_xy[:, None, 1] - obs_xy[None, :, 1]
|
| rho_oo = np.exp(-0.5 * (dox*dox + doy*doy) / (loc_radius_obsobs**2))
|
|
|
| Pe_HT = Pe_HT * rho_xy
|
| H_Pe_HT = H_Pe_HT * rho_oo
|
|
|
| S = H_Pe_HT + R_e
|
| K_e = np.linalg.solve(S.T, Pe_HT.T).T
|
| innovation = (Y_o - HX).T
|
| X_a_flat = X_f_flat + (self.damping * (K_e @ innovation)).T
|
| psi_a = np.mean(X_a_flat, axis=0).reshape(H, W)
|
| return np.maximum(psi_a, 0.0)
|
|
|
|
|
| class PrintMetrics:
|
| @staticmethod
|
| def pad_center_crop(arr, center_y, center_x, out_h=256, out_w=256):
|
|
|
| if arr.ndim == 3:
|
| C, H, W = arr.shape
|
| out = np.zeros((C, out_h, out_w), dtype=arr.dtype)
|
| else:
|
| H, W = arr.shape
|
| out = np.zeros((out_h, out_w), dtype=arr.dtype)
|
| y0, x0 = center_y - out_h // 2, center_x - out_w // 2
|
| y1, x1 = y0 + out_h, x0 + out_w
|
| sy0, sy1 = max(0, y0), min(H, y1)
|
| sx0, sx1 = max(0, x0), min(W, x1)
|
| dy0, dx0 = sy0 - y0, sx0 - x0
|
| dy1, dx1 = dy0 + (sy1 - sy0), dx0 + (sx1 - sx0)
|
| if arr.ndim == 3:
|
| out[:, dy0:dy1, dx0:dx1] = arr[:, sy0:sy1, sx0:sx1]
|
| else:
|
| out[dy0:dy1, dx0:dx1] = arr[sy0:sy1, sx0:sx1]
|
| return out
|
|
|
| @staticmethod
|
| def get_building_area():
|
|
|
| npz_path = '../Gas_unet/Gas_code/dataset_m/5min_m_Data_special/min5_m_v1_0_d270_sc2_s10_04118.npz'
|
| data = np.load(npz_path)
|
| build_data = data['three_channel_data'][0]
|
| non_building_mask = (build_data == 0).astype(np.uint8)
|
| center_y, center_x = 498, 538
|
| build_data_256 = PrintMetrics.pad_center_crop(build_data, center_y,
|
| center_x, 256, 256)
|
| non_building_mask = PrintMetrics.pad_center_crop(non_building_mask,
|
| center_y, center_x, 256, 256)
|
| return build_data_256, non_building_mask
|
|
|
| @staticmethod
|
| def weighted_r2(y_true, y_pred, gamma=1.0, eps=1e-12):
|
| """
|
| Weighted R2 score emphasizing high-value regions.
|
| """
|
| y_true = np.asarray(y_true)
|
| y_pred = np.asarray(y_pred)
|
|
|
| w = np.maximum(y_true, eps) ** gamma
|
| w = w / np.sum(w)
|
|
|
| y_bar = np.sum(w * y_true)
|
|
|
| num = np.sum(w * (y_true - y_pred) ** 2)
|
| den = np.sum(w * (y_true - y_bar) ** 2)
|
|
|
| if den < eps:
|
| return np.nan
|
|
|
| return 1.0 - num / den
|
|
|
|
|
|
|
| @staticmethod
|
| def print_metrics(i, wind_speed, wind_direction, sc, source_number,
|
| true_field, pred_field, analysis, obs_xy,
|
| metrics_save_flag=False, metrics_print_flag=True):
|
| """
|
| Metrics:
|
| 1) Field-wise (all pixels)
|
| 2) Plume-aware (true > eps)
|
| 3) At observations
|
| """
|
|
|
| def nmse_metrics(y_true, y_pred):
|
| nmse = np.mean((y_true.flatten() - y_pred.flatten())**2) / (np.mean(y_true) * np.mean(y_pred) + 1e-12)
|
| return nmse
|
|
|
| def nmae_metrics(y_true, y_pred):
|
| nmae = np.mean(np.abs(y_true.flatten() - y_pred.flatten())) / (np.mean(y_true) + 1e-12)
|
| return nmae
|
|
|
|
|
| _, non_building_mask = PrintMetrics.get_building_area()
|
| true_field = np.where(true_field > 0, true_field, 0) * non_building_mask
|
| pred_field = np.where(pred_field > 0, pred_field, 0) * non_building_mask
|
| analysis = np.where(analysis > 0, analysis, 0) * non_building_mask
|
| true_flat = true_field.ravel()
|
| pred_flat = pred_field.ravel()
|
| ana_flat = analysis.ravel()
|
|
|
|
|
|
|
|
|
| r2_before = r2_score(true_flat, pred_flat)
|
| r2_after = r2_score(true_flat, ana_flat)
|
| mse_before = mean_squared_error(true_flat, pred_flat)
|
| mse_after = mean_squared_error(true_flat, ana_flat)
|
| mae_before = mean_absolute_error(true_flat, pred_flat)
|
| mae_after = mean_absolute_error(true_flat, ana_flat)
|
| nmse_before = nmse_metrics(true_flat, pred_flat)
|
| nmse_after = nmse_metrics(true_flat, ana_flat)
|
| nmae_before = nmae_metrics(true_flat, pred_flat)
|
| nmae_after = nmae_metrics(true_flat, ana_flat)
|
|
|
|
|
|
|
|
|
| plume_mask = true_flat > 1e-6
|
| true_p = true_flat[plume_mask]
|
| pred_p = pred_flat[plume_mask]
|
| ana_p = ana_flat[plume_mask]
|
| r2_plume_before = r2_score(true_p, pred_p)
|
| r2_plume_after = r2_score(true_p, ana_p)
|
| mse_plume_before = mean_squared_error(true_p, pred_p)
|
| mse_plume_after = mean_squared_error(true_p, ana_p)
|
| mae_plume_before = mean_absolute_error(true_p, pred_p)
|
| mae_plume_after = mean_absolute_error(true_p, ana_p)
|
| nmse_plume_before = nmse_metrics(true_p, pred_p)
|
| nmse_plume_after = nmse_metrics(true_p, ana_p)
|
| nmae_plume_before = nmae_metrics(true_p, pred_p)
|
| nmae_plume_after = nmae_metrics(true_p, ana_p)
|
|
|
|
|
| wr2_plume_before = PrintMetrics.weighted_r2(true_p, pred_p, gamma=1.0)
|
| wr2_plume_after = PrintMetrics.weighted_r2(true_p, ana_p, gamma=1.0)
|
|
|
|
|
|
|
|
|
|
|
| true_at_obs = ObservationModel.observation_operator_H(true_field, obs_xy)
|
| pred_at_obs = ObservationModel.observation_operator_H(pred_field, obs_xy)
|
| ana_at_obs = ObservationModel.observation_operator_H(analysis, obs_xy)
|
| r2_obs_before = r2_score(true_at_obs, pred_at_obs)
|
| r2_obs_after = r2_score(true_at_obs, ana_at_obs)
|
| mse_obs_before = mean_squared_error(true_at_obs, pred_at_obs)
|
| mse_obs_after = mean_squared_error(true_at_obs, ana_at_obs)
|
| mae_obs_before = mean_absolute_error(true_at_obs, pred_at_obs)
|
| mae_obs_after = mean_absolute_error(true_at_obs, ana_at_obs)
|
| nmse_obs_before = nmse_metrics(true_at_obs, pred_at_obs)
|
| nmse_obs_after = nmse_metrics(true_at_obs, ana_at_obs)
|
| nmae_obs_before = nmae_metrics(true_at_obs, pred_at_obs)
|
| nmae_obs_after = nmae_metrics(true_at_obs, ana_at_obs)
|
|
|
| if metrics_print_flag:
|
| print("=== Assimilation Metrics ===")
|
| print("[Field-wise]")
|
| print(f"R2 : {r2_before:.4f}->{r2_after:.4f}")
|
| print(f"MSE : {mse_before:.4f}->{mse_after:.4f}")
|
| print(f"MAE : {mae_before:.4f}->{mae_after:.4f}")
|
| print("[Plume-aware]")
|
| print(f"R2 : {r2_plume_before:.4f}->{r2_plume_after:.4f}")
|
| print(f"MSE : {mse_plume_before:.4f}->{mse_plume_after:.4f}")
|
| print(f"MAE : {mae_plume_before:.4f}->{mae_plume_after:.4f}")
|
| print(f"W-R2 : {wr2_plume_before:.4f}->{wr2_plume_after:.4f}")
|
| print("[At observations]")
|
| print(f"R2 : {r2_obs_before:.4f}->{r2_obs_after:.4f}")
|
| print(f"MSE : {mse_obs_before:.4f}->{mse_obs_after:.4f}")
|
| print(f"MAE : {mae_obs_before:.4f}->{mae_obs_after:.4f}")
|
|
|
| if metrics_save_flag:
|
| return {
|
| 'idx': i,
|
| 'wind_speed': wind_speed,
|
| 'wind_direction': wind_direction,
|
| 'stability_class': sc,
|
| 'source_number': source_number,
|
| "r2_before": r2_before,
|
| "r2_after": r2_after,
|
| "r2_plume_before": r2_plume_before,
|
| "r2_plume_after": r2_plume_after,
|
| "w_r2_plume_before": wr2_plume_before,
|
| "w_r2_plume_after": wr2_plume_after,
|
| "r2_obs_before": r2_obs_before,
|
| "r2_obs_after": r2_obs_after,
|
| "mse_before": mse_before,
|
| "mse_after": mse_after,
|
| "mse_plume_before": mse_plume_before,
|
| "mse_plume_after": mse_plume_after,
|
| "mse_obs_before": mse_obs_before,
|
| "mse_obs_after": mse_obs_after,
|
| "mae_before": mae_before,
|
| "mae_after": mae_after,
|
| "mae_plume_before": mae_plume_before,
|
| "mae_plume_after": mae_plume_after,
|
| "mae_obs_before": mae_obs_before,
|
| "mae_obs_after": mae_obs_after,
|
| "nmse_before": nmse_before,
|
| "nmse_after": nmse_after,
|
| "nmse_plume_before": nmse_plume_before,
|
| "nmse_plume_after": nmse_plume_after,
|
| "nmae_before": nmae_before,
|
| "nmae_after": nmae_after,
|
| "nmae_plume_before": nmae_plume_before,
|
| "nmae_plume_after": nmae_plume_after,
|
| "nmse_obs_before": nmse_obs_before,
|
| "nmse_obs_after": nmse_obs_after,
|
| "nmae_obs_before": nmae_obs_before,
|
| "nmae_obs_after": nmae_obs_after,
|
| }
|
|
|
| class Visualization:
|
| def plot_assimilation_with_building(
|
| true_field,
|
| pred_field,
|
| analysis,
|
| obs_xy,
|
| vmax=10,
|
| title_suffix=""
|
| ):
|
| """
|
| - 建筑 mask
|
| - 非建筑区浓度
|
| - 同化前 / 后对比
|
| """
|
|
|
|
|
| build_data_256, non_building_mask = PrintMetrics.get_building_area()
|
| true_field = np.where(true_field > 0, true_field, 0) * non_building_mask
|
| pred_field = np.where(pred_field > 0, pred_field, 0) * non_building_mask
|
| analysis = np.where(analysis > 0, analysis, 0) * non_building_mask
|
|
|
|
|
| fig, axs = plt.subplots(1, 3, figsize=(14, 4), dpi=300)
|
| cmap = "inferno"
|
| levels = np.linspace(0, vmax, 21)
|
|
|
| im0 = axs[0].contourf(true_field, levels=levels, cmap=cmap, vmin=0, vmax=vmax,
|
| extend='max')
|
| axs[0].set_title('True Field' + title_suffix)
|
| plt.colorbar(im0, ax=axs[0])
|
|
|
| im1 = axs[1].contourf(pred_field, levels=levels, cmap=cmap, vmin=0, vmax=vmax,
|
| extend='max')
|
| axs[1].set_title(r'Prior Prediction Field $\psi^{f}$')
|
| plt.colorbar(im1, ax=axs[1])
|
|
|
| im2 = axs[2].contourf(analysis, levels=levels, cmap=cmap, vmin=0, vmax=vmax,
|
| extend='max')
|
| axs[2].set_title(r'Analysis $\psi^{a}$')
|
| plt.colorbar(im2, ax=axs[2])
|
| axs[0].scatter(obs_xy[:, 0], obs_xy[:, 1], c='red', s=15, edgecolors='k')
|
| axs[1].scatter(obs_xy[:, 0], obs_xy[:, 1], c='red', s=15, edgecolors='k')
|
| axs[2].scatter(obs_xy[:, 0], obs_xy[:, 1], c='red', s=15, edgecolors='k')
|
|
|
|
|
| axs[1].text(
|
| 80, 15,
|
| f"$R^2$={r2_score(true_field.ravel(), pred_field.ravel()):.4f}\n"
|
| f"$MSE$={mean_squared_error(true_field.ravel(), pred_field.ravel()):.4f}\n"
|
| f"$MAE@Obs$={mean_absolute_error(ObservationModel.observation_operator_H(true_field, obs_xy),
|
| ObservationModel.observation_operator_H(pred_field, obs_xy)):.3f}",
|
| color='white'
|
| )
|
| axs[2].text(
|
| 80, 15,
|
| f"$R^2$={r2_score(true_field.ravel(), analysis.ravel()):.4f}\n"
|
| f"$MSE$={mean_squared_error(true_field.ravel(), analysis.ravel()):.4f}\n"
|
| f"$MAE@Obs$={mean_absolute_error(ObservationModel.observation_operator_H(true_field, obs_xy),
|
| ObservationModel.observation_operator_H(analysis, obs_xy)):.3f}",
|
| color='white'
|
| )
|
| plt.tight_layout()
|
| plt.show()
|
|
|
| def plot_assimilation_4panel(
|
| true_field,
|
| pred_field,
|
| analysis,
|
| obs_xy,
|
| obs_val,
|
| vmin=0,
|
| vmax=10,
|
| title_suffix=""
|
| ):
|
|
|
| _, non_building_mask = PrintMetrics.get_building_area()
|
| true_field = np.where(true_field > 0, true_field, 0) * non_building_mask
|
| pred_field = np.where(pred_field > 0, pred_field, 0) * non_building_mask
|
| analysis = np.where(analysis > 0, analysis, 0) * non_building_mask
|
|
|
|
|
| fig, axs = plt.subplots(1, 4, figsize=(18, 4), dpi=300)
|
| cmap = "inferno"
|
| levels = np.linspace(0, vmax, 21)
|
|
|
|
|
| im0 = axs[0].contourf(true_field, levels=levels, cmap=cmap, vmin=vmin, vmax=vmax,
|
| extend='both')
|
| axs[0].set_title("True Field" + title_suffix)
|
| plt.colorbar(im0, ax=axs[0])
|
|
|
|
|
| im1 = axs[1].contourf(pred_field, levels=levels, cmap=cmap, vmin=vmin, vmax=vmax,
|
| extend='both')
|
| axs[1].set_title(r"Prior Prediction $\psi^{f}$")
|
| plt.colorbar(im1, ax=axs[1])
|
|
|
|
|
| sc = axs[2].scatter(
|
| obs_xy[:, 0],
|
| obs_xy[:, 1],
|
| c=obs_val,
|
| cmap=cmap,
|
| vmin=vmin,
|
| vmax=vmax,
|
| s=30,
|
| edgecolors="k",
|
| linewidths=0.4,
|
| alpha=0.9
|
| )
|
| axs[2].set_title("Observations $d_i$")
|
| axs[2].set_xlim(0, true_field.shape[1])
|
| axs[2].set_ylim(true_field.shape[0], 0)
|
| axs[2].set_aspect("equal")
|
| axs[2].invert_yaxis()
|
| plt.colorbar(sc, ax=axs[2], extend='both')
|
|
|
|
|
| im3 = axs[3].contourf(analysis, levels=levels, cmap=cmap, vmin=vmin, vmax=vmax,
|
| extend='both')
|
| axs[3].set_title(r"Analysis $\psi^{a}$")
|
| plt.colorbar(im3, ax=axs[3])
|
|
|
|
|
| axs[1].text(
|
| 0.02, 0.95,
|
| f"$R^2$={r2_score(true_field.ravel(), pred_field.ravel()):.4f}\n"
|
| f"$MSE$={mean_squared_error(true_field.ravel(), pred_field.ravel()):.4f}",
|
| transform=axs[1].transAxes,
|
| va="top",
|
| color="white"
|
| )
|
|
|
| axs[3].text(
|
| 0.02, 0.95,
|
| f"$R^2$={r2_score(true_field.ravel(), analysis.ravel()):.4f}\n"
|
| f"$MSE$={mean_squared_error(true_field.ravel(), analysis.ravel()):.4f}",
|
| transform=axs[3].transAxes,
|
| va="top",
|
| color="white"
|
| )
|
| plt.tight_layout()
|
| plt.show()
|
|
|
| def plot_pe_spectrum(all_diags, save_flag=False):
|
| C_BLUE = "#488ABA"
|
| C_ORANGE = "#e5954e"
|
|
|
| ds = [diag['d'] for diag in all_diags]
|
| r_eff_values = [diag['r_eff'] for diag in all_diags]
|
| ratio_12_values = [diag['ratio_1_2'] for diag in all_diags]
|
|
|
| fig, ax = plt.subplots(figsize=(6, 3), dpi=300)
|
| l1, = ax.plot(ds, r_eff_values,
|
| marker='o', color=C_BLUE, linewidth=1.5,
|
| alpha=0.4,
|
| markersize=6, label=r'$r_{\rm eff}$')
|
| ax.set_xlabel(r'Wind direction', labelpad=3)
|
| ax.set_ylabel(r'Effective rank $r_{\rm eff}$',
|
| color=C_BLUE, labelpad=4)
|
| ax.tick_params(axis='y', colors=C_BLUE)
|
| ax.spines['left'].set_color(C_BLUE)
|
| ax.set_xticks(ds)
|
| ax.set_xticklabels([f'{d}°' for d in ds])
|
| ax.set_ylim(1.5, 3)
|
|
|
|
|
| ax2 = ax.twinx()
|
| l2, = ax2.plot(ds, ratio_12_values,
|
| marker='s', color=C_ORANGE, linewidth=1.5,
|
| alpha=0.4,
|
| markersize=6, label=r'$\lambda_1/\lambda_2$')
|
| ax2.set_ylabel(r'Anisotropy $\lambda_1/\lambda_2$',
|
| color=C_ORANGE, labelpad=4)
|
| ax2.tick_params(axis='y', colors=C_ORANGE)
|
| ax2.spines['right'].set_color(C_ORANGE)
|
| ax2.set_ylim(1.5, 3.5)
|
|
|
| opt_idx = int(np.argmax(ratio_12_values))
|
| ax2.axvline(ds[opt_idx], color='grey', linewidth=0.8, linestyle='--', alpha=0.6)
|
| ax2.text(ds[opt_idx] + 0.8, 1.58, r'$d^{**}$', color='grey')
|
|
|
|
|
| fig.legend(handles=[l1, l2],
|
| loc='upper left',
|
| bbox_to_anchor=(0.15, 0.95),
|
| ncol=1, frameon=False)
|
|
|
| plt.tight_layout()
|
| if save_flag:
|
| plt.savefig('./figures/test1/reff_ratio.png', dpi=300, bbox_inches='tight',
|
| transparent=True)
|
|
|
| plt.show()
|
|
|
| def assimilation_scatter(psi_t_log, psi_f_log, psi_a_log, obs_xy):
|
| def log10_formatter(x, pos):
|
| return r'$10^{%d}$' % x
|
|
|
| obs_true = np.log10(ObservationModel.observation_operator_H(psi_t_log, obs_xy)+1e-3)
|
| obs_prior = np.log10(ObservationModel.observation_operator_H(psi_f_log, obs_xy)+1e-3)
|
| obs_analysis = np.log10(ObservationModel.observation_operator_H(psi_a_log, obs_xy)+1e-3)
|
|
|
| fig, ax = plt.subplots(figsize=(5, 4.5), dpi=300)
|
| vmin, vamx = -4, 2
|
| lim = [vmin, vamx]
|
| ax.plot(lim, lim, 'k--', lw=1, label='1:1 line', zorder=1)
|
|
|
| for obs_pred, label, color in zip(
|
| [obs_prior, obs_analysis],
|
| ['Prior', 'Analysis'],
|
| ['steelblue', 'tomato']
|
| ):
|
|
|
| ax.scatter(obs_true, obs_pred, s=25, alpha=0.6, color=color, zorder=3)
|
|
|
| slope, intercept, r, _, _ = stats.linregress(obs_true, obs_pred)
|
| rmse = np.sqrt(np.mean((obs_pred - obs_true) ** 2))
|
| x_fit = np.linspace(lim[0], lim[1], 100)
|
| ax.plot(x_fit, slope * x_fit + intercept, '-', color=color, lw=1.5,
|
| label=f'{label}r:{r:.2f}', zorder=2)
|
|
|
| ax.set_xlabel('log(True)')
|
| ax.set_ylabel('log(Predicted)')
|
| ax.legend(loc='lower right', frameon=False)
|
| ax.set_xlim(-4, 2)
|
| ax.set_ylim(-4, 2)
|
| ax.xaxis.set_major_formatter(ticker.FuncFormatter(log10_formatter))
|
| ax.yaxis.set_major_formatter(ticker.FuncFormatter(log10_formatter))
|
|
|
| plt.tight_layout()
|
| plt.show()
|
|
|
| def none_assimilation_scatter(psi_t_log, psi_f_log, psi_a_log, obs_xy):
|
| def sample_independent_points(field, obs_xy, num_points=100, seed=42):
|
| H, W = field.shape
|
| yy, xx = np.meshgrid(np.arange(H), np.arange(W), indexing='ij')
|
| all_xy = np.stack([xx.ravel(), yy.ravel()], axis=1)
|
| obs_set = set(map(tuple, np.round(obs_xy).astype(int)))
|
| obs_mask = np.array([tuple(p) not in obs_set for p in all_xy])
|
| _, non_building_mask = PrintMetrics.get_building_area()
|
| building_mask = non_building_mask[yy.ravel(), xx.ravel()] == 1
|
| num_mask = field.ravel() > 1e-4
|
| candidate_xy = all_xy[obs_mask & building_mask & num_mask]
|
| rng = np.random.default_rng(seed)
|
| idx = rng.choice(len(candidate_xy), num_points, replace=False)
|
| return candidate_xy[idx]
|
|
|
| test_xy = sample_independent_points(psi_t_log, obs_xy, 200)
|
| obs_true = np.log10(ObservationModel.observation_operator_H(psi_t_log, test_xy) + 1e-6)
|
| obs_prior = np.log10(ObservationModel.observation_operator_H(psi_f_log, test_xy) + 1e-6)
|
| obs_analysis = np.log10(ObservationModel.observation_operator_H(psi_a_log, test_xy) + 1e-6)
|
|
|
| def log10_formatter(x, pos):
|
| return r'$10^{%d}$' % x
|
|
|
| fig, ax = plt.subplots(figsize=(5, 4.5), dpi=300)
|
|
|
| vmin, vmax = -4, 2
|
| lim = [vmin, vmax]
|
|
|
|
|
| ax.plot(lim, lim, 'k--', lw=1, label='1:1 line', zorder=1)
|
|
|
| for obs_pred, label, color in zip(
|
| [obs_prior, obs_analysis],
|
| ['Prior', 'Analysis'],
|
| ['steelblue', 'tomato']
|
| ):
|
|
|
|
|
| ax.scatter(obs_true, obs_pred,
|
| s=30,
|
| alpha=0.65,
|
| color=color,
|
| zorder=3)
|
| slope, intercept, r, _, _ = stats.linregress(obs_true, obs_pred)
|
| rmse = np.sqrt(np.mean((obs_pred - obs_true) ** 2))
|
| x_fit = np.linspace(lim[0], lim[1], 100)
|
| ax.plot(x_fit, slope * x_fit + intercept, '-', color=color, lw=1.5,
|
| label=f'{label}r:{r:.2f}', zorder=2)
|
|
|
| ax.set_xlim(vmin, vmax)
|
| ax.set_ylim(vmin, vmax)
|
| ax.set_xlabel('log(True)')
|
| ax.set_ylabel('log(Predicted)')
|
| ax.xaxis.set_major_formatter(ticker.FuncFormatter(log10_formatter))
|
| ax.yaxis.set_major_formatter(ticker.FuncFormatter(log10_formatter))
|
| ax.legend(loc='lower right', frameon=False)
|
| plt.tight_layout()
|
| plt.show()
|
|
|
| def methods_comparison(source_idx=25,
|
| num_points = 10,
|
| methods = ['random', 'uniform', 'two_stage']):
|
|
|
| for method in methods:
|
| print(f"\n=== 观测点采样方法: {method} ===")
|
| data = np.load(f'./dataset/assim_conds/fields_n{num_points}_{method}_obs1.npz', allow_pickle=True)
|
|
|
| all_fields = data['all_fields']
|
| sample_data = all_fields[source_idx]
|
| psi_t_log = sample_data['trues_log']
|
| psi_f_log = sample_data['preds_log']
|
| psi_a_log = sample_data['analysis_log']
|
| psi_t_ppm = sample_data['trues_ppm']
|
| psi_f_ppm = sample_data['preds_ppm']
|
| psi_a_ppm = sample_data['analysis_ppm']
|
| obs_xy = sample_data['obs_xy']
|
| obs_value_log = sample_data['obs_value_log']
|
| obs_value_ppm = sample_data['obs_value_ppm']
|
| Visualization.plot_assimilation_with_building(
|
| true_field=psi_t_log,
|
| pred_field=psi_f_log,
|
| analysis=psi_a_log,
|
| obs_xy=obs_xy,
|
| vmax=10,
|
| title_suffix=f" (idx={source_idx})"
|
| )
|
| Visualization.plot_assimilation_4panel(
|
| true_field=psi_t_log,
|
| pred_field=psi_f_log,
|
| analysis=psi_a_log,
|
| obs_xy=obs_xy,
|
| obs_val=obs_value_log,
|
| vmax=10,
|
| title_suffix=f" (idx={source_idx})"
|
| )
|
| Visualization.plot_assimilation_4panel(
|
| true_field=psi_t_ppm,
|
| pred_field=psi_f_ppm,
|
| analysis=psi_a_ppm,
|
| obs_xy=obs_xy,
|
| obs_val=obs_value_ppm,
|
| vmax=200,
|
| title_suffix=f" in PPM SPACE (idx={source_idx})"
|
| )
|
|
|
| def plot_n_hist_comparison(obs_tag=1, space_mode="log",
|
| methods=["random", "uniform", "two_stage"],
|
| n_list=[10, 20, 30, 40, 50],
|
| base_dir="./dataset/assim_conds",
|
| plot_mode="after",
|
| target_method="two_stage"):
|
| scope_labels = ["overall", "plume", "obs"]
|
| metric_keys = {
|
| "r2": ["r2", "r2_plume", "r2_obs"],
|
| "nmse": ["nmse", "nmse_plume", "nmse_obs"],
|
| "nmae": ["nmae", "nmae_plume", "nmae_obs"],
|
| }
|
|
|
| def load_method_df(space, method, num_points):
|
| candidate_methods = [method]
|
| if method != "two_stage_pro":
|
| candidate_methods.append("two_stage_pro")
|
| for m in candidate_methods:
|
| fp = os.path.join(
|
| base_dir,
|
| f"assimi_{space}_n{num_points}_{m}_obs{obs_tag}.csv"
|
| )
|
| if os.path.exists(fp):
|
| if m != method:
|
| print(f"[Info] {method} not exsist, back to {fp}")
|
| return pd.read_csv(fp)
|
|
|
| print(f"[Warning] file not found: {candidate_methods}")
|
| return None
|
|
|
| def get_metric_value(df, metric_name):
|
| before_col = f"{metric_name}_before"
|
| after_col = f"{metric_name}_after"
|
|
|
| if before_col not in df.columns or after_col not in df.columns:
|
| return np.nan
|
|
|
| before_mean = df[before_col].mean()
|
| after_mean = df[after_col].mean()
|
|
|
| if plot_mode == "delta":
|
| return after_mean - before_mean
|
| return after_mean
|
|
|
| data = {
|
| "r2": np.full((len(n_list), 3), np.nan),
|
| "nmse": np.full((len(n_list), 3), np.nan),
|
| "nmae": np.full((len(n_list), 3), np.nan),
|
| }
|
|
|
|
|
| for i_n, n in enumerate(n_list):
|
| df = load_method_df(space_mode, target_method, n)
|
| if df is None:
|
| continue
|
|
|
| for metric_type in ["r2", "nmse", "nmae"]:
|
| vals = []
|
| for mk in metric_keys[metric_type]:
|
| vals.append(get_metric_value(df, mk))
|
| data[metric_type][i_n, :] = vals
|
|
|
|
|
|
|
|
|
|
|
|
|
| fig, ax1 = plt.subplots(figsize=(12, 6), dpi=300)
|
| ax2 = ax1.twinx()
|
| x_base = np.arange(len(n_list))
|
| scope_offsets = {
|
| "overall": -0.24,
|
| "plume": 0.00,
|
| "obs": 0.24,
|
| }
|
| bar_w = 0.20
|
|
|
| colors = cm.get_cmap("Blues")
|
| scope_colors = {
|
| "overall": colors(0.45),
|
| "plume": colors(0.65),
|
| "obs": colors(0.85),
|
| "edge": colors(0.85),
|
| }
|
| scope_linestyles = {
|
| "overall": "-",
|
| "plume": "--",
|
| "obs": ":",
|
| }
|
|
|
| scope_markers_mse = {
|
| "overall": "o",
|
| "plume": "o",
|
| "obs": "o",
|
| }
|
|
|
| scope_markers_mae = {
|
| "overall": "s",
|
| "plume": "s",
|
| "obs": "s",
|
| }
|
|
|
|
|
|
|
|
|
| text_r2 = r"$\mathit{R}^2$"
|
| for j, scope in enumerate(scope_labels):
|
| x = x_base + scope_offsets[scope]
|
| y = data["r2"][:, j]
|
|
|
| ax1.bar(
|
| x, y,
|
| width=bar_w,
|
| color=scope_colors[scope],
|
| alpha=0.75,
|
| label=f"{text_r2}-{scope}",
|
| edgecolor=scope_colors["edge"],
|
| zorder=2
|
| )
|
|
|
| for xi, yi in zip(x, y):
|
| if np.isfinite(yi):
|
| ax1.text(
|
| xi, yi + 0.001, f"{yi:.2f}",
|
| ha="center", va="bottom"
|
| )
|
|
|
| ax1.set_ylabel(r"$\mathit{R}^2$")
|
| ax1.set_xticks(x_base)
|
| ax1.set_xticklabels([f"n={n}" for n in n_list])
|
| ax1.grid(axis="y", linestyle="--", alpha=0.25, zorder=0)
|
|
|
|
|
| if plot_mode == "delta":
|
| ax1.axhline(0, color="k", linewidth=1)
|
|
|
|
|
| y1_min, y1_max = ax1.get_ylim()
|
| y_text = y1_min - 0.06 * (y1_max - y1_min)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| for j, scope in enumerate(scope_labels):
|
| x = x_base + scope_offsets[scope]
|
|
|
| ax2.plot(
|
| x,
|
| data["nmse"][:, j],
|
| color=scope_colors[scope],
|
| linestyle="--",
|
| marker="o",
|
| linewidth=1.8,
|
| markersize=5,
|
| label=f"NMSE-{scope}",
|
| markeredgecolor=scope_colors["edge"],
|
| zorder=3
|
| )
|
|
|
| ax2.plot(
|
| x,
|
| data["nmae"][:, j],
|
| color=scope_colors[scope],
|
| linestyle=":",
|
| marker="s",
|
| linewidth=1.8,
|
| markersize=5,
|
| markeredgecolor=scope_colors["edge"],
|
| label=f"NMAE-{scope}",
|
| zorder=3
|
| )
|
|
|
| ax2.set_ylabel("NMSE / NMAE")
|
|
|
|
|
| if plot_mode == "delta":
|
| ax2.axhline(0, color="gray", linewidth=1, alpha=0.6)
|
|
|
| h1, l1 = ax1.get_legend_handles_labels()
|
| h2, l2 = ax2.get_legend_handles_labels()
|
|
|
| ax1.legend(
|
| h1 + h2,
|
| l1 + l2,
|
| frameon=False,
|
| loc="center left",
|
| bbox_to_anchor=(1.15, 0.5)
|
| )
|
|
|
| plt.tight_layout()
|
| plt.show()
|
|
|
| def cal_all_test_resluts(config, conds_pkl_path=None,
|
| data_path=None,
|
| use_localization=True,
|
| save_fields_flag=True,
|
| save_metrics_flag=False):
|
| '''
|
| 使用说明:
|
| sample_method_lists = ["random", "uniform", "two_stage"]
|
| for method in sample_method_lists:
|
| config_test = {
|
| "num_points": 20,
|
| "sample_method": method,
|
| "obs_std_scale": 0.01,
|
| "damping": 1,
|
| "two_stage_params": {
|
| "min_dist": 28,
|
| "n1_ratio": 0.6,
|
| "stage1_support_frac": 0.2,
|
| "stage1_grad_power": 0.8,
|
| "stage1_value_power": 1.2,
|
| "stage1_center_boost": 1.2,
|
| },
|
| }
|
| cal_all_test_resluts(config_test)
|
| '''
|
|
|
| num_points = config['num_points']
|
| sample_method = config['sample_method']
|
|
|
| obs_std_scale=config['obs_std_scale']
|
| damping=config['damping']
|
| sample_method = config["sample_method"]
|
| sample_params = {}
|
| params_key = f"{sample_method}_params"
|
| if params_key in config and isinstance(config[params_key], dict):
|
| sample_params = config[params_key]
|
|
|
| if conds_pkl_path is not None:
|
| loader = DataLoader(
|
| pred_npz_path='./dataset/pre_data/all_test_pred2.npz',
|
| meta_txt_path='./dataset/pre_data/combined_test_special.txt',
|
| conds_pkl_path=conds_pkl_path
|
| )
|
| else:
|
| loader = DataLoader(
|
| pred_npz_path='./dataset/pre_data/all_test_pred2.npz',
|
| meta_txt_path='./dataset/pre_data/combined_test_special.txt',
|
| conds_pkl_path='./dataset/pre_data/pred_condition/test_results/conditioned_results_v0_5_d45_n40.pkl'
|
| )
|
| trues, preds = loader.trues, loader.preds
|
|
|
| all_metrics_log = []
|
| all_metrics_ppm = []
|
| all_fields = []
|
| enkf = EnKF(obs_std_scale=obs_std_scale, damping=damping)
|
| for i in trange(len(preds), desc="Running assimilation"):
|
| psi_f_ppm, psi_t_ppm, conds_preds, meta = loader.get_sample(idx=i, in_ppm=True)
|
| psi_f_log = np.log1p(np.maximum(psi_f_ppm, 0))
|
| psi_t_log = np.log1p(np.maximum(psi_t_ppm, 0))
|
| conds_log = np.log1p(np.maximum(conds_preds, 0))
|
|
|
| if sample_method == "smart_two_pass":
|
| n1_ratio = float(sample_params.get('n1_ratio', 0.6))
|
| n1_default = int(round(num_points * n1_ratio))
|
| n1 = int(sample_params.get('n1', n1_default))
|
| if num_points > 1:
|
| n1 = max(1, min(n1, num_points - 1))
|
| else:
|
| n1 = 1
|
| n2 = num_points - n1
|
|
|
| psi_a_log, obs_xy, all_obs_val_log, _, _ = SamplingStrategies.smart_two_pass(
|
| enkf=enkf,
|
| psi_f=psi_f_log,
|
| conds_preds=conds_log,
|
| true_field=psi_t_log,
|
| n1=n1,
|
| n2=n2,
|
| phase1_method=sample_params.get('phase1_method', 'two_stage'),
|
| min_dist_p2=sample_params.get('min_dist_p2', 22),
|
| under_correct_alpha=sample_params.get('under_correct_alpha', 1.5),
|
| use_localization=sample_params.get('use_localization', use_localization),
|
| loc_radius_pixobs=sample_params.get('loc_radius_pixobs', 35.0),
|
| loc_radius_obsobs=sample_params.get('loc_radius_obsobs', 40.0),
|
| seed=42,
|
| verbose=sample_params.get('verbose', False),
|
| )
|
|
|
| obs_value_log = np.asarray(all_obs_val_log)
|
| obs_value_ppm = DataLoader.log2ppm(obs_value_log)
|
| else:
|
| obs_xy, obs_value_ppm = SamplingStrategies.generate(psi_t_ppm, psi_f_ppm, num_points=num_points,
|
| seed=42, method=sample_method,
|
| ens_preds_ppm=conds_preds,
|
| **sample_params
|
| )
|
| d_obs_log = np.log1p(np.maximum(obs_value_ppm, 0))
|
| if use_localization:
|
| psi_a_log = enkf.enkf_localization(psi_f_log, conds_log, obs_xy, d_obs_log,
|
| loc_radius_pixobs=35.0,
|
| loc_radius_obsobs=30.0)
|
| else:
|
| psi_a_log = enkf.standard_enkf(psi_f_log, conds_log, obs_xy, d_obs_log)
|
|
|
|
|
| obs_prior_at_obs = np.log1p(np.maximum(
|
| ObservationModel.observation_operator_H(psi_f_ppm, obs_xy), 0
|
| ))
|
| obs_innovation = np.mean(np.abs(obs_prior_at_obs - d_obs_log))
|
| threshold = config.get('innovation_threshold', 0.05)
|
|
|
| if obs_innovation < threshold:
|
| psi_a_log = psi_f_log
|
| else:
|
| psi_a_log = enkf.enkf_localization(
|
| psi_f_log, conds_log, obs_xy, d_obs_log,
|
| loc_radius_pixobs=35.0,
|
| loc_radius_obsobs=30.0
|
| )
|
| obs_value_log = np.log1p(np.maximum(obs_value_ppm, 0))
|
|
|
| psi_a_ppm = DataLoader.log2ppm(psi_a_log)
|
|
|
|
|
| metrics_log = PrintMetrics.print_metrics(
|
| i=i,
|
| wind_speed=meta['wind_speed'],
|
| wind_direction=meta['wind_direction'],
|
| sc=meta['sc'],
|
| source_number=meta['source_number'],
|
| true_field=psi_t_log,
|
| pred_field=psi_f_log,
|
| analysis=psi_a_log,
|
| obs_xy=obs_xy,
|
| metrics_save_flag=True,
|
| metrics_print_flag=False
|
| )
|
| all_metrics_log.append(metrics_log)
|
| metrics_ppm = PrintMetrics.print_metrics(
|
| i=i,
|
| wind_speed=meta['wind_speed'],
|
| wind_direction=meta['wind_direction'],
|
| sc=meta['sc'],
|
| source_number=meta['source_number'],
|
| true_field=psi_t_ppm,
|
| pred_field=psi_f_ppm,
|
| analysis=psi_a_ppm,
|
| obs_xy=obs_xy,
|
| metrics_save_flag=True,
|
| metrics_print_flag=False
|
| )
|
| all_metrics_ppm.append(metrics_ppm)
|
| all_fields.append({
|
| "idx": i,
|
| "trues_log": psi_t_log,
|
| "preds_log": psi_f_log,
|
| "analysis_log": psi_a_log,
|
| "trues_ppm": psi_t_ppm,
|
| "preds_ppm": psi_f_ppm,
|
| "analysis_ppm": psi_a_ppm,
|
| "obs_xy": obs_xy,
|
| "obs_value_log": obs_value_log,
|
| "obs_value_ppm": obs_value_ppm,
|
| })
|
| data_paths = f'./dataset/assim_conds/{data_path}'
|
| if not os.path.exists(data_paths):
|
| os.makedirs(data_paths)
|
| if save_fields_flag:
|
| np.savez_compressed(f'./dataset/assim_conds/{data_path}/fields_n{num_points}_{sample_method}_obs{int(obs_std_scale*100)}_damping{damping}.npz',
|
| all_fields=all_fields)
|
| all_metrics_df_log = pd.DataFrame(all_metrics_log)
|
| all_metrics_df_ppm = pd.DataFrame(all_metrics_ppm)
|
| if save_metrics_flag:
|
| all_metrics_df_ppm.to_csv(f'./dataset/assim_conds/{data_path}/assimi_ppm_n{num_points}_{sample_method}_obs{int(obs_std_scale*100)}_damping{damping}.csv', index=False)
|
| all_metrics_df_log.to_csv(f'./dataset/assim_conds/{data_path}/assimi_log_n{num_points}_{sample_method}_obs{int(obs_std_scale*100)}_damping{damping}.csv', index=False)
|
| print("\n=== 平均指标提升 ===")
|
| metrics_list = ['r2', 'w_r2_plume', 'r2_plume','mse', 'mae']
|
| for metric in metrics_list:
|
| before_mean = all_metrics_df_log[f"{metric}_before"].mean()
|
| after_mean = all_metrics_df_log[f"{metric}_after"].mean()
|
| delta = after_mean - before_mean
|
| print(f'{metric.upper()}: before={before_mean:.4f}, after={after_mean:.4f}, delta={delta:.4f}')
|
| before_mean_ppm = all_metrics_df_ppm[f"{metric}_before"].mean()
|
| after_mean_ppm = all_metrics_df_ppm[f"{metric}_after"].mean()
|
| delta_ppm = after_mean_ppm - before_mean_ppm
|
| print(f'PPM {metric.upper()}: before={before_mean_ppm:.4f}, after={after_mean_ppm:.4f}, delta={delta_ppm:.4f}') |