Spaces:
Runtime error
Runtime error
| """Analyze DCASE feature distributions to design better rules.""" | |
| import joblib | |
| import numpy as np | |
| from audio_analyzer import AudioFeatures | |
| FEATURE_NAMES = [ | |
| "duration_s", "rms_db", "rms_variance", "zero_crossing_rate", | |
| "spectral_centroid_hz", "spectral_bandwidth_hz", "spectral_rolloff_hz", | |
| "dominant_frequency_hz", "harmonic_ratio", "onset_rate_per_sec", | |
| "has_regular_pattern", "pattern_interval_ms", "peak_db", "anomaly_score", | |
| ] | |
| MACHINE_TO_APPLIANCE = { | |
| "fan": "Electric fan", "pump": "Dishwasher", "slider": "Power drill", | |
| "ToyCar": "Car engine", "ToyTrain": "Tumble dryer", | |
| "gearbox": "Electric motor (generic)", "bearing": "Electric motor (generic)", | |
| "valve": "Refrigerator/Freezer", | |
| } | |
| cache = joblib.load("features_cache_r1.5_s42.joblib") | |
| X, y, machines = cache["X"], cache["y"], cache["machines"] | |
| # Per-machine feature analysis | |
| for machine in sorted(set(machines)): | |
| idx = [i for i in range(len(machines)) if machines[i] == machine] | |
| anom_idx = [i for i in idx if y[i] == 1] | |
| norm_idx = [i for i in idx if y[i] == 0] | |
| if not anom_idx or not norm_idx: | |
| continue | |
| print(f"\n{'='*60}") | |
| print(f" {machine.upper()} (n={len(idx)}, anom={len(anom_idx)}, norm={len(norm_idx)})") | |
| print(f" Appliance: {MACHINE_TO_APPLIANCE.get(machine, '???')}") | |
| print(f"{'='*60}") | |
| # Key features to compare | |
| key_feats = ["rms_db", "rms_variance", "zero_crossing_rate", "spectral_centroid_hz", | |
| "harmonic_ratio", "onset_rate_per_sec", "has_regular_pattern", | |
| "pattern_interval_ms", "dominant_frequency_hz", "anomaly_score"] | |
| for feat in key_feats: | |
| j = FEATURE_NAMES.index(feat) | |
| anom_vals = X[anom_idx, j] | |
| norm_vals = X[norm_idx, j] | |
| if feat == "has_regular_pattern": | |
| anom_pct = np.mean(anom_vals) | |
| norm_pct = np.mean(norm_vals) | |
| print(f" {feat:<28} anom: {anom_pct*100:.0f}% have pattern norm: {norm_pct*100}% have pattern") | |
| else: | |
| a_med = np.median(anom_vals) | |
| n_med = np.median(norm_vals) | |
| a_p25, a_p75 = np.percentile(anom_vals, 25), np.percentile(anom_vals, 75) | |
| n_p25, n_p75 = np.percentile(norm_vals, 25), np.percentile(norm_vals, 75) | |
| print(f" {feat:<28} anom: {a_med:8.2f} [{a_p25:.2f}-{a_p75:.2f}] " | |
| f"norm: {n_med:8.2f} [{n_p25:.2f}-{n_p75:.2f}] " | |
| f"delta: {a_med-n_med:+.2f}") | |