|
|
import numpy as np |
|
|
import pandas as pd |
|
|
from generate_reports import get_report_reliability |
|
|
|
|
|
DAMAGE_STATES = ['none', 'minor', 'severe', 'collapse'] |
|
|
|
|
|
def compute_likelihood(reported_state, true_state, reliability): |
|
|
if reported_state == true_state: |
|
|
return reliability |
|
|
|
|
|
reported_idx = DAMAGE_STATES.index(reported_state) |
|
|
true_idx = DAMAGE_STATES.index(true_state) |
|
|
distance = abs(reported_idx - true_idx) |
|
|
|
|
|
base_error_prob = (1 - reliability) / 3.0 |
|
|
|
|
|
if distance == 1: |
|
|
return base_error_prob * 2.0 |
|
|
elif distance == 2: |
|
|
return base_error_prob * 1.0 |
|
|
else: |
|
|
return base_error_prob * 0.5 |
|
|
|
|
|
def bayesian_update(prior, reported_state, reliability): |
|
|
likelihood = np.array([ |
|
|
compute_likelihood(reported_state, state, reliability) |
|
|
for state in DAMAGE_STATES |
|
|
]) |
|
|
|
|
|
numerator = likelihood * prior |
|
|
denominator = np.sum(numerator) |
|
|
|
|
|
if denominator < 1e-10: |
|
|
return prior |
|
|
|
|
|
posterior = numerator / denominator |
|
|
return posterior |
|
|
|
|
|
def entropy(probs): |
|
|
probs = np.array(probs) |
|
|
probs = probs[probs > 0] |
|
|
return -np.sum(probs * np.log2(probs)) |
|
|
|
|
|
def bootstrap_beliefs(reports, prior, n_bootstrap=100): |
|
|
if len(reports) == 0: |
|
|
return { |
|
|
'mean': prior, |
|
|
'std_dev': np.zeros_like(prior) |
|
|
} |
|
|
|
|
|
bootstrap_posteriors = [] |
|
|
|
|
|
for _ in range(n_bootstrap): |
|
|
resampled_reports = [reports[i] for i in np.random.choice( |
|
|
len(reports), size=len(reports), replace=True |
|
|
)] |
|
|
|
|
|
belief = prior.copy() |
|
|
for report in resampled_reports: |
|
|
reliability = get_report_reliability(report['source']) |
|
|
belief = bayesian_update(belief, report['reported_state'], reliability) |
|
|
|
|
|
bootstrap_posteriors.append(belief) |
|
|
|
|
|
bootstrap_posteriors = np.array(bootstrap_posteriors) |
|
|
|
|
|
return { |
|
|
'mean': np.mean(bootstrap_posteriors, axis=0), |
|
|
'std_dev': np.std(bootstrap_posteriors, axis=0) |
|
|
} |
|
|
|
|
|
def process_building(building, all_reports, prior): |
|
|
building_id = building['building_id'] |
|
|
building_reports = all_reports[all_reports['building_id'] == building_id] |
|
|
building_reports = building_reports.sort_values('time_minutes') |
|
|
|
|
|
current_belief = prior.copy() |
|
|
report_list = [] |
|
|
|
|
|
for _, report in building_reports.iterrows(): |
|
|
reliability = get_report_reliability(report['source']) |
|
|
current_belief = bayesian_update( |
|
|
current_belief, |
|
|
report['reported_state'], |
|
|
reliability |
|
|
) |
|
|
report_list.append(report.to_dict()) |
|
|
|
|
|
bootstrap_result = bootstrap_beliefs(report_list, prior, n_bootstrap=50) |
|
|
|
|
|
return { |
|
|
'building_id': building_id, |
|
|
'p_none': current_belief[0], |
|
|
'p_minor': current_belief[1], |
|
|
'p_severe': current_belief[2], |
|
|
'p_collapse': current_belief[3], |
|
|
'entropy': entropy(current_belief), |
|
|
'num_reports': len(building_reports) |
|
|
} |
|
|
|
|
|
def run_inference(buildings_df, reports_df, n_samples=5000): |
|
|
results = [] |
|
|
|
|
|
for _, building in buildings_df.iterrows(): |
|
|
prior = np.array([ |
|
|
building['p_none'], |
|
|
building['p_minor'], |
|
|
building['p_severe'], |
|
|
building['p_collapse'] |
|
|
]) |
|
|
|
|
|
result = process_building(building, reports_df, prior) |
|
|
|
|
|
|
|
|
posterior = np.array([ |
|
|
result['p_none'], |
|
|
result['p_minor'], |
|
|
result['p_severe'], |
|
|
result['p_collapse'] |
|
|
]) |
|
|
posterior = posterior / posterior.sum() |
|
|
|
|
|
samples = np.random.choice([0,1,2,3], size=n_samples, p=posterior) |
|
|
collapse_samples = (samples == 3).astype(float) |
|
|
result['p_collapse_std'] = collapse_samples.std() |
|
|
|
|
|
results.append(result) |
|
|
|
|
|
return pd.DataFrame(results) |
|
|
|
|
|
|
|
|
def compute_decision_metrics(buildings_df, beliefs_df, n_teams): |
|
|
merged = buildings_df[['building_id', 'true_damage', 'occupancy', 'p_none', 'p_minor', 'p_severe', 'p_collapse']].merge( |
|
|
beliefs_df[['building_id', 'p_none', 'p_minor', 'p_severe', 'p_collapse']], |
|
|
on='building_id', |
|
|
suffixes=('_prior', '_posterior') |
|
|
) |
|
|
|
|
|
merged['at_risk_true'] = merged.apply(lambda row: { |
|
|
'collapse': 0.9 * row['occupancy'], |
|
|
'severe': 0.4 * row['occupancy'], |
|
|
'minor': 0.05 * row['occupancy'], |
|
|
'none': 0 |
|
|
}[row['true_damage']], axis=1) |
|
|
|
|
|
merged['expected_at_risk'] = ( |
|
|
merged['p_collapse_posterior'] * 0.9 * merged['occupancy'] + |
|
|
merged['p_severe_posterior'] * 0.4 * merged['occupancy'] + |
|
|
merged['p_minor_posterior'] * 0.05 * merged['occupancy'] |
|
|
) |
|
|
|
|
|
bayesian_top = merged.nlargest(n_teams, 'expected_at_risk') |
|
|
bayesian_saved = bayesian_top['at_risk_true'].sum() |
|
|
|
|
|
naive_top = merged.nlargest(n_teams, 'p_collapse_prior') |
|
|
naive_saved = naive_top['at_risk_true'].sum() |
|
|
|
|
|
return { |
|
|
'bayesian_lives_saved': int(bayesian_saved), |
|
|
'naive_lives_saved': int(naive_saved), |
|
|
'improvement': int(bayesian_saved - naive_saved), |
|
|
'improvement_pct': (bayesian_saved - naive_saved) / naive_saved * 100 if naive_saved > 0 else 0 |
|
|
} |