audio-classifier / utils.py
ahmedtarekabd's picture
Add Models & files.
4c8f740
import os
import json
import joblib
import numpy as np
import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient
from config import AUDIO_CACHE, FEATURES_CACHE, MODELS_DIR
#* Audio Caching
def cache_audio(data: np.ndarray, filename: str = "default", force_update=False):
path = AUDIO_CACHE / f"{filename}.npy"
if force_update or not path.exists():
np.save(path, data)
def load_cached_audio(filename: str = "default"):
path = AUDIO_CACHE / f"{filename}.npy"
return np.load(path) if path.exists() else None
#* Feature Caching
def cache_features(X, y, feature_name: str = "features", label_name: str = "labels", force_update=False):
X_path = FEATURES_CACHE / f"{feature_name}.npy"
y_path = FEATURES_CACHE / f"{label_name}.npy"
if force_update or not X_path.exists() or not y_path.exists():
np.save(X_path, X)
np.save(y_path, y)
def load_cached_features(feature_name: str = "features", label_name: str = "labels"):
X_path = FEATURES_CACHE / f"{feature_name}.npy"
y_path = FEATURES_CACHE / f"{label_name}.npy"
if X_path.exists() and y_path.exists():
return np.load(X_path), np.load(y_path)
return None, None
#* Model Caching
def cache_model(model, best_params: dict, model_name: str = None, save_option='default', force_update=False):
model_class = model.__class__.__name__
model_folder = MODELS_DIR / (model_name or model_class)
model_folder.mkdir(exist_ok=True)
model_path = model_folder / ("model.pkl" if save_option == "joblib" else "model.cbm")
params_path = model_folder / "best_params.json"
# Save model
if force_update or not model_path.exists():
if save_option == "joblib":
joblib.dump(model, model_path)
else:
model.save_model(model_path)
# Save best params
if force_update or not params_path.exists():
with open(params_path, "w") as f:
json.dump(best_params, f, indent=2)
def load_model(model_class, model_name: str = None, save_option='default'):
model_class_name = model_class.__name__
model_folder = MODELS_DIR / (model_name or model_class_name)
model_path = model_folder / ("model.pkl" if save_option == "joblib" else "model.cbm")
params_path = model_folder / "best_params.json"
if not model_path.exists() or not params_path.exists():
return None, None
with open(params_path, "r") as f:
best_params = json.load(f)
if save_option == "joblib":
model = joblib.load(model_path)
else:
model = model_class()
model.load_model(model_path)
return model, best_params
# === Utility: MLflow Helpers ===
def list_top_mlflow_runs(metric="f1-score", top_n=5):
client = MlflowClient()
runs = mlflow.search_runs(experiment_ids=["0"], order_by=[f"metrics.weighted avg.{metric} DESC"])
return runs[["run_id", "params.model_type", f"metrics.weighted avg.{metric}"]].head(top_n)
def load_mlflow_model(run_id):
client = MlflowClient()
run = client.get_run(run_id)
model = mlflow.sklearn.load_model(f"runs:/{run_id}/model")
params = run.data.params
metrics = run.data.metrics
return model, params, metrics