Spaces:
Sleeping
Sleeping
Commit Β·
fb9037e
1
Parent(s): 7cbdfa5
Update 2026-03-26 18:00:31
Browse files- app.py +9 -9
- mlops/algorithms.py +43 -0
- pipelines/pipeline_defs.py +35 -13
- templates/pipeline.html +63 -6
app.py
CHANGED
|
@@ -236,15 +236,15 @@ def api_pipeline_execute(pipeline_id):
|
|
| 236 |
except ValueError as e:
|
| 237 |
return jsonify({"error": str(e)}), 400
|
| 238 |
|
| 239 |
-
#
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
|
| 249 |
exec_id = execute_dag(dag, context)
|
| 250 |
return jsonify({"exec_id": exec_id, "status": "queued", "engine": "builtin"})
|
|
|
|
| 236 |
except ValueError as e:
|
| 237 |
return jsonify({"error": str(e)}), 400
|
| 238 |
|
| 239 |
+
# Apache Airflow is the primary engine; built-in DAG engine is the fallback.
|
| 240 |
+
try:
|
| 241 |
+
from mlops.airflow_runner import trigger_pipeline
|
| 242 |
+
exec_id = trigger_pipeline(pipeline_id, context=context, dag=dag)
|
| 243 |
+
return jsonify({"exec_id": exec_id, "status": "queued", "engine": "airflow"})
|
| 244 |
+
except ImportError:
|
| 245 |
+
app.logger.warning("Airflow not installed β using built-in DAG engine")
|
| 246 |
+
except Exception as af_err:
|
| 247 |
+
app.logger.warning(f"Airflow trigger failed, using built-in engine: {af_err}")
|
| 248 |
|
| 249 |
exec_id = execute_dag(dag, context)
|
| 250 |
return jsonify({"exec_id": exec_id, "status": "queued", "engine": "builtin"})
|
mlops/algorithms.py
CHANGED
|
@@ -369,6 +369,49 @@ ALGORITHMS = {
|
|
| 369 |
}
|
| 370 |
|
| 371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
def get_algorithm(task: str, category: str, name: str) -> dict:
|
| 373 |
"""Retrieve algorithm config by task / category / name."""
|
| 374 |
try:
|
|
|
|
| 369 |
}
|
| 370 |
|
| 371 |
|
| 372 |
+
# ββ Hyperparameter search grids (keyed by model class name) βββββββββββββββββββ
|
| 373 |
+
HPO_GRIDS: dict[str, dict] = {
|
| 374 |
+
# Linear Models
|
| 375 |
+
"LogisticRegression": {"C": [0.001, 0.01, 0.1, 1, 10, 100], "solver": ["lbfgs", "saga"], "max_iter": [500, 1000]},
|
| 376 |
+
"RidgeClassifier": {"alpha": [0.001, 0.01, 0.1, 1.0, 10.0, 100.0]},
|
| 377 |
+
"SGDClassifier": {"loss": ["hinge", "log_loss", "modified_huber"], "alpha": [0.0001, 0.001, 0.01]},
|
| 378 |
+
"Ridge": {"alpha": [0.001, 0.01, 0.1, 1.0, 10.0, 100.0]},
|
| 379 |
+
"Lasso": {"alpha": [0.001, 0.01, 0.1, 1.0, 10.0]},
|
| 380 |
+
"ElasticNet": {"alpha": [0.001, 0.01, 0.1, 1.0], "l1_ratio": [0.1, 0.3, 0.5, 0.7, 0.9]},
|
| 381 |
+
"HuberRegressor": {"epsilon": [1.1, 1.35, 1.5, 2.0], "alpha": [0.0001, 0.001, 0.01, 0.1]},
|
| 382 |
+
# Tree-Based
|
| 383 |
+
"DecisionTreeClassifier":{"max_depth": [3, 5, 7, 10, None], "min_samples_split": [2, 5, 10], "min_samples_leaf": [1, 2, 4], "criterion": ["gini", "entropy"]},
|
| 384 |
+
"DecisionTreeRegressor": {"max_depth": [3, 5, 7, 10, None], "min_samples_split": [2, 5, 10], "min_samples_leaf": [1, 2, 4]},
|
| 385 |
+
"RandomForestClassifier":{"n_estimators": [50, 100, 200, 300], "max_depth": [None, 5, 10, 20], "min_samples_split": [2, 5, 10], "max_features": ["sqrt", "log2"]},
|
| 386 |
+
"RandomForestRegressor": {"n_estimators": [50, 100, 200, 300], "max_depth": [None, 5, 10, 20], "min_samples_split": [2, 5, 10], "max_features": ["sqrt", "log2", None]},
|
| 387 |
+
"ExtraTreesClassifier": {"n_estimators": [50, 100, 200], "max_depth": [None, 5, 10, 20], "min_samples_split": [2, 5, 10]},
|
| 388 |
+
"ExtraTreesRegressor": {"n_estimators": [50, 100, 200], "max_depth": [None, 5, 10, 20], "min_samples_split": [2, 5, 10]},
|
| 389 |
+
# Boosting
|
| 390 |
+
"GradientBoostingClassifier": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.05, 0.1, 0.2], "max_depth": [3, 4, 5, 6], "subsample": [0.7, 0.8, 0.9, 1.0]},
|
| 391 |
+
"GradientBoostingRegressor": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.05, 0.1, 0.2], "max_depth": [3, 4, 5, 6], "subsample": [0.7, 0.8, 0.9, 1.0]},
|
| 392 |
+
"AdaBoostClassifier": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.1, 0.5, 1.0]},
|
| 393 |
+
"AdaBoostRegressor": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.1, 0.5, 1.0], "loss": ["linear", "square", "exponential"]},
|
| 394 |
+
"XGBClassifier": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.05, 0.1, 0.2], "max_depth": [3, 4, 5, 6, 7], "subsample": [0.7, 0.8, 0.9], "colsample_bytree": [0.7, 0.8, 0.9]},
|
| 395 |
+
"XGBRegressor": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.05, 0.1, 0.2], "max_depth": [3, 4, 5, 6, 7], "subsample": [0.7, 0.8, 0.9], "colsample_bytree": [0.7, 0.8, 0.9]},
|
| 396 |
+
"LGBMClassifier": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.05, 0.1, 0.2], "max_depth": [-1, 5, 10, 20], "num_leaves": [15, 31, 63, 127], "subsample": [0.7, 0.8, 0.9, 1.0]},
|
| 397 |
+
"LGBMRegressor": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.05, 0.1, 0.2], "max_depth": [-1, 5, 10, 20], "num_leaves": [15, 31, 63, 127], "subsample": [0.7, 0.8, 0.9, 1.0]},
|
| 398 |
+
# SVM
|
| 399 |
+
"SVC": {"C": [0.1, 1, 10, 100], "gamma": ["scale", "auto", 0.001, 0.01, 0.1]},
|
| 400 |
+
"SVR": {"C": [0.1, 1, 10, 100], "gamma": ["scale", "auto"], "epsilon": [0.01, 0.1, 0.5, 1.0]},
|
| 401 |
+
# KNN
|
| 402 |
+
"KNeighborsClassifier": {"n_neighbors": [3, 5, 7, 9, 11, 15], "weights": ["uniform", "distance"], "metric": ["euclidean", "manhattan"]},
|
| 403 |
+
"KNeighborsRegressor": {"n_neighbors": [3, 5, 7, 9, 11, 15], "weights": ["uniform", "distance"], "metric": ["euclidean", "manhattan"]},
|
| 404 |
+
# MLP
|
| 405 |
+
"MLPClassifier": {"hidden_layer_sizes": [(64,), (128,), (64, 32), (128, 64), (256, 128)], "learning_rate_init": [0.001, 0.005, 0.01], "alpha": [0.0001, 0.001, 0.01], "activation": ["relu", "tanh"]},
|
| 406 |
+
"MLPRegressor": {"hidden_layer_sizes": [(64,), (128,), (64, 32), (128, 64), (256, 128)], "learning_rate_init": [0.001, 0.005, 0.01], "alpha": [0.0001, 0.001, 0.01], "activation": ["relu", "tanh"]},
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
|
| 410 |
+
def get_hpo_grid(cls) -> dict:
|
| 411 |
+
"""Return the hyperparameter search grid for a model class, or {} if none defined."""
|
| 412 |
+
return HPO_GRIDS.get(cls.__name__, {})
|
| 413 |
+
|
| 414 |
+
|
| 415 |
def get_algorithm(task: str, category: str, name: str) -> dict:
|
| 416 |
"""Retrieve algorithm config by task / category / name."""
|
| 417 |
try:
|
pipelines/pipeline_defs.py
CHANGED
|
@@ -34,15 +34,17 @@ def _feature_engineering(ctx, results):
|
|
| 34 |
|
| 35 |
def _train_model(ctx, results):
|
| 36 |
from mlops.datasets import load_dataset
|
| 37 |
-
from mlops.algorithms import get_algorithm
|
| 38 |
from sklearn.preprocessing import StandardScaler
|
| 39 |
import mlflow, mlflow.sklearn
|
| 40 |
|
| 41 |
-
log
|
| 42 |
-
ds
|
| 43 |
-
cat
|
| 44 |
-
alg
|
| 45 |
-
task
|
|
|
|
|
|
|
| 46 |
|
| 47 |
if log: log(f"Dataset: {ds} Β· Algorithm: {alg} ({cat})")
|
| 48 |
X_train, X_test, y_train, y_test, _ = load_dataset(ds)
|
|
@@ -50,13 +52,33 @@ def _train_model(ctx, results):
|
|
| 50 |
X_tr = scaler.fit_transform(X_train)
|
| 51 |
X_te = scaler.transform(X_test)
|
| 52 |
|
| 53 |
-
cfg
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
def _evaluate_model(ctx, results):
|
| 62 |
log = ctx.get("_log")
|
|
|
|
| 34 |
|
| 35 |
def _train_model(ctx, results):
|
| 36 |
from mlops.datasets import load_dataset
|
| 37 |
+
from mlops.algorithms import get_algorithm, get_hpo_grid
|
| 38 |
from sklearn.preprocessing import StandardScaler
|
| 39 |
import mlflow, mlflow.sklearn
|
| 40 |
|
| 41 |
+
log = ctx.get("_log")
|
| 42 |
+
ds = ctx.get("dataset", "Iris Flowers")
|
| 43 |
+
cat = ctx.get("category", "Tree-Based")
|
| 44 |
+
alg = ctx.get("algorithm", "Random Forest")
|
| 45 |
+
task = ctx.get("task_type", "classification")
|
| 46 |
+
hpo_enabled = ctx.get("hpo_enabled", False)
|
| 47 |
+
hpo_trials = max(5, int(ctx.get("hpo_trials", 20)))
|
| 48 |
|
| 49 |
if log: log(f"Dataset: {ds} Β· Algorithm: {alg} ({cat})")
|
| 50 |
X_train, X_test, y_train, y_test, _ = load_dataset(ds)
|
|
|
|
| 52 |
X_tr = scaler.fit_transform(X_train)
|
| 53 |
X_te = scaler.transform(X_test)
|
| 54 |
|
| 55 |
+
cfg = get_algorithm(task, cat, alg)
|
| 56 |
+
grid = get_hpo_grid(cfg["class"]) if hpo_enabled else {}
|
| 57 |
+
|
| 58 |
+
if hpo_enabled and grid:
|
| 59 |
+
from sklearn.model_selection import RandomizedSearchCV
|
| 60 |
+
if log: log(f"Hyperparameter search Β· {hpo_trials} trials Β· 3-fold CVβ¦")
|
| 61 |
+
search = RandomizedSearchCV(
|
| 62 |
+
cfg["class"](**cfg["params"]), grid,
|
| 63 |
+
n_iter=hpo_trials, cv=3, n_jobs=-1,
|
| 64 |
+
random_state=42, refit=True,
|
| 65 |
+
)
|
| 66 |
+
search.fit(X_tr, y_train)
|
| 67 |
+
model = search.best_estimator_
|
| 68 |
+
best = {k: v for k, v in search.best_params_.items()}
|
| 69 |
+
if log: log(f"Best params: {best}")
|
| 70 |
+
score = model.score(X_te, y_test)
|
| 71 |
+
if log: log(f"HPO complete Β· score = {score:.4f} (baseline without HPO may differ)")
|
| 72 |
+
return f"HPO score={score:.4f} Β· {best}"
|
| 73 |
+
else:
|
| 74 |
+
if hpo_enabled and not grid:
|
| 75 |
+
if log: log("No HPO grid defined for this algorithm β training with defaults")
|
| 76 |
+
model = cfg["class"](**cfg["params"])
|
| 77 |
+
if log: log(f"Fitting {alg} on {len(X_train)} training samplesβ¦")
|
| 78 |
+
model.fit(X_tr, y_train)
|
| 79 |
+
score = model.score(X_te, y_test)
|
| 80 |
+
if log: log(f"Evaluation complete Β· score = {score:.4f}")
|
| 81 |
+
return f"Model trained Β· score={score:.4f}"
|
| 82 |
|
| 83 |
def _evaluate_model(ctx, results):
|
| 84 |
log = ctx.get("_log")
|
templates/pipeline.html
CHANGED
|
@@ -175,6 +175,28 @@
|
|
| 175 |
.cfg-row-k { color: var(--text-muted); white-space: nowrap; padding-right: 8px; }
|
| 176 |
.cfg-row-v { color: var(--text-primary); font-weight: 500; text-align: right; word-break: break-word; max-width: 62%; font-size: .77rem; }
|
| 177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
/* ββ Terminal βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 179 |
.ps-term {
|
| 180 |
flex-shrink: 0; height: 34px; overflow: hidden;
|
|
@@ -302,10 +324,12 @@ let _seenLogs = 0;
|
|
| 302 |
|
| 303 |
// Pipeline context β updated via config panel; used when running
|
| 304 |
let pCtx = {
|
| 305 |
-
dataset:
|
| 306 |
-
category:
|
| 307 |
-
algorithm:
|
| 308 |
-
task_type:
|
|
|
|
|
|
|
| 309 |
};
|
| 310 |
|
| 311 |
// Layout
|
|
@@ -493,6 +517,27 @@ async function openConfig(taskId) {
|
|
| 493 |
<div class="cfg-sec">
|
| 494 |
<label class="cfg-lbl" for="cfg-alg">Algorithm</label>
|
| 495 |
<select class="cfg-select" id="cfg-alg" onchange="pCtx.algorithm=this.value"><option>Loadingβ¦</option></select>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 496 |
</div>`;
|
| 497 |
}
|
| 498 |
|
|
@@ -582,6 +627,12 @@ function onDatasetChange(name) {
|
|
| 582 |
}
|
| 583 |
}
|
| 584 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 585 |
async function onTtChange(tt) {
|
| 586 |
pCtx.task_type = tt; pCtx.category=''; pCtx.algorithm='';
|
| 587 |
await _loadAlgos(tt);
|
|
@@ -599,8 +650,14 @@ async function runPipeline() {
|
|
| 599 |
|
| 600 |
const ctx = {};
|
| 601 |
if (cur === 'training_pipeline') {
|
| 602 |
-
Object.assign(ctx, {
|
| 603 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 604 |
}
|
| 605 |
|
| 606 |
try {
|
|
|
|
| 175 |
.cfg-row-k { color: var(--text-muted); white-space: nowrap; padding-right: 8px; }
|
| 176 |
.cfg-row-v { color: var(--text-primary); font-weight: 500; text-align: right; word-break: break-word; max-width: 62%; font-size: .77rem; }
|
| 177 |
|
| 178 |
+
/* ββ HPO toggle ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 179 |
+
.hpo-toggle { position:relative; display:inline-block; width:34px; height:19px; flex-shrink:0; }
|
| 180 |
+
.hpo-toggle input { opacity:0; width:0; height:0; }
|
| 181 |
+
.hpo-knob {
|
| 182 |
+
position:absolute; cursor:pointer; inset:0;
|
| 183 |
+
background:var(--bg-tertiary); border:1px solid var(--border-color);
|
| 184 |
+
border-radius:20px; transition:.18s;
|
| 185 |
+
}
|
| 186 |
+
.hpo-knob::before {
|
| 187 |
+
content:''; position:absolute;
|
| 188 |
+
width:13px; height:13px; left:2px; top:2px;
|
| 189 |
+
background:var(--text-muted); border-radius:50%; transition:.18s;
|
| 190 |
+
}
|
| 191 |
+
.hpo-toggle input:checked + .hpo-knob { background:rgba(139,92,246,.2); border-color:var(--accent); }
|
| 192 |
+
.hpo-toggle input:checked + .hpo-knob::before { transform:translateX(15px); background:var(--accent); }
|
| 193 |
+
.cfg-num {
|
| 194 |
+
width:64px; background:var(--bg-tertiary); border:1px solid var(--border-color);
|
| 195 |
+
color:var(--text-primary); border-radius:6px; padding:3px 7px;
|
| 196 |
+
font-size:.8rem; text-align:right; outline:none;
|
| 197 |
+
}
|
| 198 |
+
.cfg-num:focus { border-color:var(--accent); }
|
| 199 |
+
|
| 200 |
/* ββ Terminal βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 201 |
.ps-term {
|
| 202 |
flex-shrink: 0; height: 34px; overflow: hidden;
|
|
|
|
| 324 |
|
| 325 |
// Pipeline context β updated via config panel; used when running
|
| 326 |
let pCtx = {
|
| 327 |
+
dataset: Object.keys(DATASETS)[0] || 'Iris Flowers',
|
| 328 |
+
category: 'Tree-Based',
|
| 329 |
+
algorithm: 'Random Forest',
|
| 330 |
+
task_type: 'classification',
|
| 331 |
+
hpo_enabled: false,
|
| 332 |
+
hpo_trials: 20,
|
| 333 |
};
|
| 334 |
|
| 335 |
// Layout
|
|
|
|
| 517 |
<div class="cfg-sec">
|
| 518 |
<label class="cfg-lbl" for="cfg-alg">Algorithm</label>
|
| 519 |
<select class="cfg-select" id="cfg-alg" onchange="pCtx.algorithm=this.value"><option>Loadingβ¦</option></select>
|
| 520 |
+
</div>
|
| 521 |
+
<div class="cfg-sec">
|
| 522 |
+
<span class="cfg-lbl">Hyperparameter Search</span>
|
| 523 |
+
<div class="cfg-row">
|
| 524 |
+
<span class="cfg-row-k">Enable HPO</span>
|
| 525 |
+
<label class="hpo-toggle">
|
| 526 |
+
<input type="checkbox" id="cfg-hpo" onchange="onHpoToggle(this.checked)"${pCtx.hpo_enabled?' checked':''}>
|
| 527 |
+
<span class="hpo-knob"></span>
|
| 528 |
+
</label>
|
| 529 |
+
</div>
|
| 530 |
+
<div id="cfg-hpo-opts"${pCtx.hpo_enabled?'':' style="display:none"'}>
|
| 531 |
+
<div class="cfg-row">
|
| 532 |
+
<span class="cfg-row-k">Search trials</span>
|
| 533 |
+
<input type="number" class="cfg-num" id="cfg-trials" min="5" max="100" value="${pCtx.hpo_trials}"
|
| 534 |
+
onchange="pCtx.hpo_trials=Math.max(5,Math.min(100,+this.value))">
|
| 535 |
+
</div>
|
| 536 |
+
<div class="cfg-row">
|
| 537 |
+
<span class="cfg-row-k">CV folds</span>
|
| 538 |
+
<span class="cfg-row-v" style="color:var(--text-muted)">3</span>
|
| 539 |
+
</div>
|
| 540 |
+
</div>
|
| 541 |
</div>`;
|
| 542 |
}
|
| 543 |
|
|
|
|
| 627 |
}
|
| 628 |
}
|
| 629 |
|
| 630 |
+
function onHpoToggle(enabled) {
|
| 631 |
+
pCtx.hpo_enabled = enabled;
|
| 632 |
+
const el = document.getElementById('cfg-hpo-opts');
|
| 633 |
+
if (el) el.style.display = enabled ? '' : 'none';
|
| 634 |
+
}
|
| 635 |
+
|
| 636 |
async function onTtChange(tt) {
|
| 637 |
pCtx.task_type = tt; pCtx.category=''; pCtx.algorithm='';
|
| 638 |
await _loadAlgos(tt);
|
|
|
|
| 650 |
|
| 651 |
const ctx = {};
|
| 652 |
if (cur === 'training_pipeline') {
|
| 653 |
+
Object.assign(ctx, {
|
| 654 |
+
dataset: pCtx.dataset,
|
| 655 |
+
category: pCtx.category,
|
| 656 |
+
algorithm: pCtx.algorithm,
|
| 657 |
+
task_type: pCtx.task_type,
|
| 658 |
+
hpo_enabled: pCtx.hpo_enabled || false,
|
| 659 |
+
hpo_trials: pCtx.hpo_trials || 20,
|
| 660 |
+
});
|
| 661 |
}
|
| 662 |
|
| 663 |
try {
|