Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| import pandas as pd | |
| import numpy as np | |
| from datetime import datetime, timedelta | |
| import random | |
| def generate_training_data(days=30, num_meters=900, output_path="training_data.csv"): | |
| print(f"Generating {days} days of high-fidelity historical data for {num_meters} meters...") | |
| start_date = datetime(2026, 4, 1) # Start before May 2026 to have historical baseline | |
| hours = days * 24 | |
| # Karnataka 2026 Holidays (Partial for training data window) | |
| holidays_2026 = [ | |
| datetime(2026, 4, 3), # Good Friday | |
| datetime(2026, 4, 14), # Ambedkar Jayanthi | |
| datetime(2026, 4, 20), # Basava Jayanthi | |
| datetime(2026, 5, 1), # May Day | |
| datetime(2026, 5, 28), # Bakrid | |
| ] | |
| # Pre-generate meter profiles | |
| meters = [] | |
| for i in range(num_meters): | |
| is_commercial = i % 10 >= 7 # 30% commercial | |
| meter_id = f"MTR-{i:04d}" | |
| is_thief = random.random() < 0.05 # 5% thieves | |
| tamper_type = "Normal" | |
| if is_thief: | |
| if is_commercial: | |
| tamper_type = random.choice(["Phase Bypass", "ESD Attack", "Magnetic Interference"]) | |
| else: | |
| tamper_type = random.choice(["Subsidized Abuse", "PF Mismatch", "Phase Bypass"]) | |
| is_fp = not is_thief and random.random() < 0.05 | |
| fp_type = "None" | |
| if is_fp: | |
| fp_type = random.choice(["Solar Mask", "Vacation Filter"]) | |
| meters.append({ | |
| "meter_id": meter_id, | |
| "type": "Commercial" if is_commercial else "Domestic", | |
| "tamper_type": tamper_type, | |
| "fp_type": fp_type, | |
| "baseline_kw": 2.5 if is_commercial else 0.8 | |
| }) | |
| records = [] | |
| for h in range(hours): | |
| current_time = start_date + timedelta(hours=h) | |
| hour_of_day = current_time.hour | |
| is_night = 23 <= hour_of_day or hour_of_day <= 6 | |
| is_day = 10 <= hour_of_day <= 16 | |
| is_holiday = any(h.date() == current_time.date() for h in holidays_2026) | |
| # Calculate cluster average for peer deviation | |
| domestic_avg = 0.8 + 1.2 * np.sin(np.pi * hour_of_day / 12) | |
| commercial_avg = 3.5 + 2.0 * np.sin(np.pi * (hour_of_day - 6) / 12) | |
| # Simulate Weather | |
| # Temperature peaks around 14:00 (2 PM) | |
| base_temp = 25.0 + 8.0 * np.sin(np.pi * (hour_of_day - 8) / 12) | |
| if is_night: | |
| base_temp = 22.0 + np.random.normal(0, 1.0) | |
| temp_c = round(max(20.0, base_temp + np.random.normal(0, 2.0)), 1) | |
| # Solar radiation peaks at 12:00 | |
| solar_ghi = 0.0 | |
| if 6 <= hour_of_day <= 18: | |
| solar_ghi = round(800.0 * np.sin(np.pi * (hour_of_day - 6) / 12) + np.random.normal(0, 50.0), 1) | |
| solar_ghi = max(0.0, solar_ghi) | |
| for m in meters: | |
| c_type = m["type"] | |
| tamper = m["tamper_type"] | |
| fp = m["fp_type"] | |
| # Base generation | |
| if c_type == "Domestic": | |
| base_kw = 0.5 + 1.0 * np.exp(-0.5 * ((hour_of_day - 8) / 2)**2) + 2.5 * np.exp(-0.5 * ((hour_of_day - 20) / 2)**2) | |
| cluster_ref = domestic_avg | |
| else: | |
| base_kw = 2.0 + 8.5 * np.exp(-0.5 * ((hour_of_day - 14) / 4)**2) | |
| cluster_ref = commercial_avg | |
| # Holiday adjustment | |
| if is_holiday: | |
| if c_type == "Commercial": base_kw *= 0.3 | |
| else: base_kw *= 1.2 | |
| actual_kw = max(0.1, base_kw + np.random.normal(0, base_kw * 0.1)) | |
| reported_kw = actual_kw | |
| pf = np.random.uniform(0.92, 0.99) | |
| v_avg = np.random.normal(230, 2.0) | |
| v_r = v_avg + np.random.normal(0, 0.5); v_y = v_avg + np.random.normal(0, 0.5); v_b = v_avg + np.random.normal(0, 0.5) | |
| total_current = (reported_kw * 1000) / (v_avg * pf * 3 if reported_kw > 0 else 1) | |
| i_r = total_current + np.random.normal(0, 0.1); i_y = total_current + np.random.normal(0, 0.1); i_b = total_current + np.random.normal(0, 0.1) | |
| # Indicators | |
| is_tampering_now = (tamper != "Normal") and (random.random() < 0.8) | |
| if is_tampering_now: | |
| if tamper == "Phase Bypass": | |
| reported_kw *= 0.4 | |
| i_b = 0.0 # Physical bypass signature | |
| elif tamper == "PF Mismatch": | |
| pf = np.random.uniform(0.4, 0.6) | |
| elif tamper == "ESD Attack": | |
| v_r = v_y = v_b = 0.0 | |
| reported_kw = 0.0 | |
| elif tamper == "Magnetic Interference": | |
| reported_kw *= 0.7 | |
| pf = np.random.uniform(0.65, 0.75) | |
| elif tamper == "Subsidized Abuse": | |
| if is_night: reported_kw += 12.0 | |
| # False Positives | |
| if fp == "Solar Mask" and is_day: | |
| reported_kw = max(0.05, reported_kw - 3.0) | |
| elif fp == "Vacation Filter": | |
| reported_kw = 0.06 + np.random.normal(0, 0.01) | |
| # Engineered Features for ML | |
| is_weekend = 1 if current_time.weekday() >= 5 else 0 | |
| peer_dev = (reported_kw - cluster_ref) / cluster_ref if cluster_ref > 0 else 0 | |
| records.append({ | |
| "timestamp": current_time.strftime("%Y-%m-%d %H:%M:%S"), | |
| "meter_id": m["meter_id"], | |
| "consumer_type": c_type, | |
| "reported_kw": round(reported_kw, 3), | |
| "v_r": round(v_r, 1), "v_y": round(v_y, 1), "v_b": round(v_b, 1), | |
| "i_r": round(i_r, 2), "i_y": round(i_y, 2), "i_b": round(i_b, 2), | |
| "power_factor": round(pf, 2), | |
| "is_holiday": 1 if is_holiday else 0, | |
| "is_weekend": is_weekend, | |
| "peer_dev_pct": round(peer_dev * 100, 2), | |
| "is_anomaly": 1 if tamper != "Normal" else 0, | |
| "tamper_type": tamper, | |
| "temperature_c": temp_c, | |
| "solar_ghi": solar_ghi | |
| }) | |
| df = pd.DataFrame(records) | |
| df.to_csv(output_path, index=False) | |
| print(f"Saved {len(df)} records with enhanced indicators to {output_path}") | |
| if __name__ == "__main__": | |
| generate_training_data() | |