Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import joblib | |
| import numpy as np | |
| import mlflow | |
| import mlflow.sklearn | |
| from mlflow.tracking import MlflowClient | |
| from config import AUDIO_CACHE, FEATURES_CACHE, MODELS_DIR | |
| #* Audio Caching | |
| def cache_audio(data: np.ndarray, filename: str = "default", force_update=False): | |
| path = AUDIO_CACHE / f"{filename}.npy" | |
| if force_update or not path.exists(): | |
| np.save(path, data) | |
| def load_cached_audio(filename: str = "default"): | |
| path = AUDIO_CACHE / f"{filename}.npy" | |
| return np.load(path) if path.exists() else None | |
| #* Feature Caching | |
| def cache_features(X, y, feature_name: str = "features", label_name: str = "labels", force_update=False): | |
| X_path = FEATURES_CACHE / f"{feature_name}.npy" | |
| y_path = FEATURES_CACHE / f"{label_name}.npy" | |
| if force_update or not X_path.exists() or not y_path.exists(): | |
| np.save(X_path, X) | |
| np.save(y_path, y) | |
| def load_cached_features(feature_name: str = "features", label_name: str = "labels"): | |
| X_path = FEATURES_CACHE / f"{feature_name}.npy" | |
| y_path = FEATURES_CACHE / f"{label_name}.npy" | |
| if X_path.exists() and y_path.exists(): | |
| return np.load(X_path), np.load(y_path) | |
| return None, None | |
| #* Model Caching | |
| def cache_model(model, best_params: dict, model_name: str = None, save_option='default', force_update=False): | |
| model_class = model.__class__.__name__ | |
| model_folder = MODELS_DIR / (model_name or model_class) | |
| model_folder.mkdir(exist_ok=True) | |
| model_path = model_folder / ("model.pkl" if save_option == "joblib" else "model.cbm") | |
| params_path = model_folder / "best_params.json" | |
| # Save model | |
| if force_update or not model_path.exists(): | |
| if save_option == "joblib": | |
| joblib.dump(model, model_path) | |
| else: | |
| model.save_model(model_path) | |
| # Save best params | |
| if force_update or not params_path.exists(): | |
| with open(params_path, "w") as f: | |
| json.dump(best_params, f, indent=2) | |
| def load_model(model_class, model_name: str = None, save_option='default'): | |
| model_class_name = model_class.__name__ | |
| model_folder = MODELS_DIR / (model_name or model_class_name) | |
| model_path = model_folder / ("model.pkl" if save_option == "joblib" else "model.cbm") | |
| params_path = model_folder / "best_params.json" | |
| if not model_path.exists() or not params_path.exists(): | |
| return None, None | |
| with open(params_path, "r") as f: | |
| best_params = json.load(f) | |
| if save_option == "joblib": | |
| model = joblib.load(model_path) | |
| else: | |
| model = model_class() | |
| model.load_model(model_path) | |
| return model, best_params | |
| # === Utility: MLflow Helpers === | |
| def list_top_mlflow_runs(metric="f1-score", top_n=5): | |
| client = MlflowClient() | |
| runs = mlflow.search_runs(experiment_ids=["0"], order_by=[f"metrics.weighted avg.{metric} DESC"]) | |
| return runs[["run_id", "params.model_type", f"metrics.weighted avg.{metric}"]].head(top_n) | |
| def load_mlflow_model(run_id): | |
| client = MlflowClient() | |
| run = client.get_run(run_id) | |
| model = mlflow.sklearn.load_model(f"runs:/{run_id}/model") | |
| params = run.data.params | |
| metrics = run.data.metrics | |
| return model, params, metrics | |