sound-broken / analyze_features.py
mitvho09's picture
Upload Space app
edb671a verified
Raw
History Blame Contribute Delete
2.48 kB
"""Analyze DCASE feature distributions to design better rules."""
import joblib
import numpy as np
from audio_analyzer import AudioFeatures
FEATURE_NAMES = [
"duration_s", "rms_db", "rms_variance", "zero_crossing_rate",
"spectral_centroid_hz", "spectral_bandwidth_hz", "spectral_rolloff_hz",
"dominant_frequency_hz", "harmonic_ratio", "onset_rate_per_sec",
"has_regular_pattern", "pattern_interval_ms", "peak_db", "anomaly_score",
]
MACHINE_TO_APPLIANCE = {
"fan": "Electric fan", "pump": "Dishwasher", "slider": "Power drill",
"ToyCar": "Car engine", "ToyTrain": "Tumble dryer",
"gearbox": "Electric motor (generic)", "bearing": "Electric motor (generic)",
"valve": "Refrigerator/Freezer",
}
cache = joblib.load("features_cache_r1.5_s42.joblib")
X, y, machines = cache["X"], cache["y"], cache["machines"]
# Per-machine feature analysis
for machine in sorted(set(machines)):
idx = [i for i in range(len(machines)) if machines[i] == machine]
anom_idx = [i for i in idx if y[i] == 1]
norm_idx = [i for i in idx if y[i] == 0]
if not anom_idx or not norm_idx:
continue
print(f"\n{'='*60}")
print(f" {machine.upper()} (n={len(idx)}, anom={len(anom_idx)}, norm={len(norm_idx)})")
print(f" Appliance: {MACHINE_TO_APPLIANCE.get(machine, '???')}")
print(f"{'='*60}")
# Key features to compare
key_feats = ["rms_db", "rms_variance", "zero_crossing_rate", "spectral_centroid_hz",
"harmonic_ratio", "onset_rate_per_sec", "has_regular_pattern",
"pattern_interval_ms", "dominant_frequency_hz", "anomaly_score"]
for feat in key_feats:
j = FEATURE_NAMES.index(feat)
anom_vals = X[anom_idx, j]
norm_vals = X[norm_idx, j]
if feat == "has_regular_pattern":
anom_pct = np.mean(anom_vals)
norm_pct = np.mean(norm_vals)
print(f" {feat:<28} anom: {anom_pct*100:.0f}% have pattern norm: {norm_pct*100}% have pattern")
else:
a_med = np.median(anom_vals)
n_med = np.median(norm_vals)
a_p25, a_p75 = np.percentile(anom_vals, 25), np.percentile(anom_vals, 75)
n_p25, n_p75 = np.percentile(norm_vals, 25), np.percentile(norm_vals, 75)
print(f" {feat:<28} anom: {a_med:8.2f} [{a_p25:.2f}-{a_p75:.2f}] "
f"norm: {n_med:8.2f} [{n_p25:.2f}-{n_p75:.2f}] "
f"delta: {a_med-n_med:+.2f}")