"""Analyze DCASE feature distributions to design better rules.""" import joblib import numpy as np from audio_analyzer import AudioFeatures FEATURE_NAMES = [ "duration_s", "rms_db", "rms_variance", "zero_crossing_rate", "spectral_centroid_hz", "spectral_bandwidth_hz", "spectral_rolloff_hz", "dominant_frequency_hz", "harmonic_ratio", "onset_rate_per_sec", "has_regular_pattern", "pattern_interval_ms", "peak_db", "anomaly_score", ] MACHINE_TO_APPLIANCE = { "fan": "Electric fan", "pump": "Dishwasher", "slider": "Power drill", "ToyCar": "Car engine", "ToyTrain": "Tumble dryer", "gearbox": "Electric motor (generic)", "bearing": "Electric motor (generic)", "valve": "Refrigerator/Freezer", } cache = joblib.load("features_cache_r1.5_s42.joblib") X, y, machines = cache["X"], cache["y"], cache["machines"] # Per-machine feature analysis for machine in sorted(set(machines)): idx = [i for i in range(len(machines)) if machines[i] == machine] anom_idx = [i for i in idx if y[i] == 1] norm_idx = [i for i in idx if y[i] == 0] if not anom_idx or not norm_idx: continue print(f"\n{'='*60}") print(f" {machine.upper()} (n={len(idx)}, anom={len(anom_idx)}, norm={len(norm_idx)})") print(f" Appliance: {MACHINE_TO_APPLIANCE.get(machine, '???')}") print(f"{'='*60}") # Key features to compare key_feats = ["rms_db", "rms_variance", "zero_crossing_rate", "spectral_centroid_hz", "harmonic_ratio", "onset_rate_per_sec", "has_regular_pattern", "pattern_interval_ms", "dominant_frequency_hz", "anomaly_score"] for feat in key_feats: j = FEATURE_NAMES.index(feat) anom_vals = X[anom_idx, j] norm_vals = X[norm_idx, j] if feat == "has_regular_pattern": anom_pct = np.mean(anom_vals) norm_pct = np.mean(norm_vals) print(f" {feat:<28} anom: {anom_pct*100:.0f}% have pattern norm: {norm_pct*100}% have pattern") else: a_med = np.median(anom_vals) n_med = np.median(norm_vals) a_p25, a_p75 = np.percentile(anom_vals, 25), np.percentile(anom_vals, 75) n_p25, n_p75 = np.percentile(norm_vals, 25), np.percentile(norm_vals, 75) print(f" {feat:<28} anom: {a_med:8.2f} [{a_p25:.2f}-{a_p75:.2f}] " f"norm: {n_med:8.2f} [{n_p25:.2f}-{n_p75:.2f}] " f"delta: {a_med-n_med:+.2f}")