import numpy as np import pandas as pd DAMAGE_STATES = ['none', 'minor', 'severe', 'collapse'] REPORT_SOURCES = { 'automated_sensor': {'reliability': 0.85, 'weight': 0.25}, 'phone_call': {'reliability': 0.60, 'weight': 0.50}, 'social_media': {'reliability': 0.40, 'weight': 0.20}, 'inspector': {'reliability': 0.95, 'weight': 0.05} } def generate_noisy_report(true_state, reliability): if np.random.random() < reliability: return true_state other_states = [s for s in DAMAGE_STATES if s != true_state] true_idx = DAMAGE_STATES.index(true_state) weights = [] for state in other_states: state_idx = DAMAGE_STATES.index(state) distance = abs(state_idx - true_idx) weight = 1.0 / (distance + 1) weights.append(weight) weights = np.array(weights) / sum(weights) return np.random.choice(other_states, p=weights) def generate_building_reports(building, lambda_rates, max_time_hours=3, seed=None): if seed is not None: np.random.seed(seed) true_damage = building['true_damage'] lambda_rate = lambda_rates[true_damage] reports = [] current_time = 0 max_time_minutes = max_time_hours * 60 while current_time < max_time_minutes: lambda_per_minute = lambda_rate / 60.0 time_to_next = np.random.exponential(1.0 / lambda_per_minute) current_time += time_to_next if current_time >= max_time_minutes: break source_types = list(REPORT_SOURCES.keys()) source_weights = [REPORT_SOURCES[s]['weight'] for s in source_types] source = np.random.choice(source_types, p=source_weights) reliability = REPORT_SOURCES[source]['reliability'] reported_state = generate_noisy_report(true_damage, reliability) reports.append({ 'time_minutes': round(current_time, 2), 'source': source, 'reported_state': reported_state, 'building_id': building['building_id'], 'building_type': building['building_type'], 'true_damage': building['true_damage'] }) return reports def generate_all_reports(buildings_df, lambda_rates=None, max_time_hours=3, seed=42): if lambda_rates is None: lambda_rates = { 'collapse': 8.0, 'severe': 3.0, 'minor': 0.8, 'none': 0.2 } np.random.seed(seed) all_reports = [] for idx, building in buildings_df.iterrows(): building_seed = seed + idx if seed is not None else None reports = generate_building_reports( building.to_dict(), lambda_rates, max_time_hours, building_seed ) all_reports.extend(reports) reports_df = pd.DataFrame(all_reports) if len(reports_df) > 0: reports_df = reports_df.sort_values('time_minutes').reset_index(drop=True) return reports_df def get_report_reliability(source): return REPORT_SOURCES[source]['reliability']