mnoorchenar commited on
Commit
fb9037e
Β·
1 Parent(s): 7cbdfa5

Update 2026-03-26 18:00:31

Browse files
app.py CHANGED
@@ -236,15 +236,15 @@ def api_pipeline_execute(pipeline_id):
236
  except ValueError as e:
237
  return jsonify({"error": str(e)}), 400
238
 
239
- # Built-in engine is the default β€” zero scheduler latency, runs immediately.
240
- # Set USE_AIRFLOW=true in the environment to hand off to Apache Airflow instead.
241
- if os.environ.get("USE_AIRFLOW", "").lower() in ("1", "true"):
242
- try:
243
- from mlops.airflow_runner import trigger_pipeline
244
- exec_id = trigger_pipeline(pipeline_id, context=context, dag=dag)
245
- return jsonify({"exec_id": exec_id, "status": "queued", "engine": "airflow"})
246
- except Exception as af_err:
247
- app.logger.warning(f"Airflow trigger failed, falling back to built-in engine: {af_err}")
248
 
249
  exec_id = execute_dag(dag, context)
250
  return jsonify({"exec_id": exec_id, "status": "queued", "engine": "builtin"})
 
236
  except ValueError as e:
237
  return jsonify({"error": str(e)}), 400
238
 
239
+ # Apache Airflow is the primary engine; built-in DAG engine is the fallback.
240
+ try:
241
+ from mlops.airflow_runner import trigger_pipeline
242
+ exec_id = trigger_pipeline(pipeline_id, context=context, dag=dag)
243
+ return jsonify({"exec_id": exec_id, "status": "queued", "engine": "airflow"})
244
+ except ImportError:
245
+ app.logger.warning("Airflow not installed β€” using built-in DAG engine")
246
+ except Exception as af_err:
247
+ app.logger.warning(f"Airflow trigger failed, using built-in engine: {af_err}")
248
 
249
  exec_id = execute_dag(dag, context)
250
  return jsonify({"exec_id": exec_id, "status": "queued", "engine": "builtin"})
mlops/algorithms.py CHANGED
@@ -369,6 +369,49 @@ ALGORITHMS = {
369
  }
370
 
371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
  def get_algorithm(task: str, category: str, name: str) -> dict:
373
  """Retrieve algorithm config by task / category / name."""
374
  try:
 
369
  }
370
 
371
 
372
+ # ── Hyperparameter search grids (keyed by model class name) ───────────────────
373
+ HPO_GRIDS: dict[str, dict] = {
374
+ # Linear Models
375
+ "LogisticRegression": {"C": [0.001, 0.01, 0.1, 1, 10, 100], "solver": ["lbfgs", "saga"], "max_iter": [500, 1000]},
376
+ "RidgeClassifier": {"alpha": [0.001, 0.01, 0.1, 1.0, 10.0, 100.0]},
377
+ "SGDClassifier": {"loss": ["hinge", "log_loss", "modified_huber"], "alpha": [0.0001, 0.001, 0.01]},
378
+ "Ridge": {"alpha": [0.001, 0.01, 0.1, 1.0, 10.0, 100.0]},
379
+ "Lasso": {"alpha": [0.001, 0.01, 0.1, 1.0, 10.0]},
380
+ "ElasticNet": {"alpha": [0.001, 0.01, 0.1, 1.0], "l1_ratio": [0.1, 0.3, 0.5, 0.7, 0.9]},
381
+ "HuberRegressor": {"epsilon": [1.1, 1.35, 1.5, 2.0], "alpha": [0.0001, 0.001, 0.01, 0.1]},
382
+ # Tree-Based
383
+ "DecisionTreeClassifier":{"max_depth": [3, 5, 7, 10, None], "min_samples_split": [2, 5, 10], "min_samples_leaf": [1, 2, 4], "criterion": ["gini", "entropy"]},
384
+ "DecisionTreeRegressor": {"max_depth": [3, 5, 7, 10, None], "min_samples_split": [2, 5, 10], "min_samples_leaf": [1, 2, 4]},
385
+ "RandomForestClassifier":{"n_estimators": [50, 100, 200, 300], "max_depth": [None, 5, 10, 20], "min_samples_split": [2, 5, 10], "max_features": ["sqrt", "log2"]},
386
+ "RandomForestRegressor": {"n_estimators": [50, 100, 200, 300], "max_depth": [None, 5, 10, 20], "min_samples_split": [2, 5, 10], "max_features": ["sqrt", "log2", None]},
387
+ "ExtraTreesClassifier": {"n_estimators": [50, 100, 200], "max_depth": [None, 5, 10, 20], "min_samples_split": [2, 5, 10]},
388
+ "ExtraTreesRegressor": {"n_estimators": [50, 100, 200], "max_depth": [None, 5, 10, 20], "min_samples_split": [2, 5, 10]},
389
+ # Boosting
390
+ "GradientBoostingClassifier": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.05, 0.1, 0.2], "max_depth": [3, 4, 5, 6], "subsample": [0.7, 0.8, 0.9, 1.0]},
391
+ "GradientBoostingRegressor": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.05, 0.1, 0.2], "max_depth": [3, 4, 5, 6], "subsample": [0.7, 0.8, 0.9, 1.0]},
392
+ "AdaBoostClassifier": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.1, 0.5, 1.0]},
393
+ "AdaBoostRegressor": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.1, 0.5, 1.0], "loss": ["linear", "square", "exponential"]},
394
+ "XGBClassifier": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.05, 0.1, 0.2], "max_depth": [3, 4, 5, 6, 7], "subsample": [0.7, 0.8, 0.9], "colsample_bytree": [0.7, 0.8, 0.9]},
395
+ "XGBRegressor": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.05, 0.1, 0.2], "max_depth": [3, 4, 5, 6, 7], "subsample": [0.7, 0.8, 0.9], "colsample_bytree": [0.7, 0.8, 0.9]},
396
+ "LGBMClassifier": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.05, 0.1, 0.2], "max_depth": [-1, 5, 10, 20], "num_leaves": [15, 31, 63, 127], "subsample": [0.7, 0.8, 0.9, 1.0]},
397
+ "LGBMRegressor": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.05, 0.1, 0.2], "max_depth": [-1, 5, 10, 20], "num_leaves": [15, 31, 63, 127], "subsample": [0.7, 0.8, 0.9, 1.0]},
398
+ # SVM
399
+ "SVC": {"C": [0.1, 1, 10, 100], "gamma": ["scale", "auto", 0.001, 0.01, 0.1]},
400
+ "SVR": {"C": [0.1, 1, 10, 100], "gamma": ["scale", "auto"], "epsilon": [0.01, 0.1, 0.5, 1.0]},
401
+ # KNN
402
+ "KNeighborsClassifier": {"n_neighbors": [3, 5, 7, 9, 11, 15], "weights": ["uniform", "distance"], "metric": ["euclidean", "manhattan"]},
403
+ "KNeighborsRegressor": {"n_neighbors": [3, 5, 7, 9, 11, 15], "weights": ["uniform", "distance"], "metric": ["euclidean", "manhattan"]},
404
+ # MLP
405
+ "MLPClassifier": {"hidden_layer_sizes": [(64,), (128,), (64, 32), (128, 64), (256, 128)], "learning_rate_init": [0.001, 0.005, 0.01], "alpha": [0.0001, 0.001, 0.01], "activation": ["relu", "tanh"]},
406
+ "MLPRegressor": {"hidden_layer_sizes": [(64,), (128,), (64, 32), (128, 64), (256, 128)], "learning_rate_init": [0.001, 0.005, 0.01], "alpha": [0.0001, 0.001, 0.01], "activation": ["relu", "tanh"]},
407
+ }
408
+
409
+
410
+ def get_hpo_grid(cls) -> dict:
411
+ """Return the hyperparameter search grid for a model class, or {} if none defined."""
412
+ return HPO_GRIDS.get(cls.__name__, {})
413
+
414
+
415
  def get_algorithm(task: str, category: str, name: str) -> dict:
416
  """Retrieve algorithm config by task / category / name."""
417
  try:
pipelines/pipeline_defs.py CHANGED
@@ -34,15 +34,17 @@ def _feature_engineering(ctx, results):
34
 
35
  def _train_model(ctx, results):
36
  from mlops.datasets import load_dataset
37
- from mlops.algorithms import get_algorithm
38
  from sklearn.preprocessing import StandardScaler
39
  import mlflow, mlflow.sklearn
40
 
41
- log = ctx.get("_log")
42
- ds = ctx.get("dataset", "Iris Flowers")
43
- cat = ctx.get("category", "Tree-Based")
44
- alg = ctx.get("algorithm", "Random Forest")
45
- task = ctx.get("task_type", "classification")
 
 
46
 
47
  if log: log(f"Dataset: {ds} Β· Algorithm: {alg} ({cat})")
48
  X_train, X_test, y_train, y_test, _ = load_dataset(ds)
@@ -50,13 +52,33 @@ def _train_model(ctx, results):
50
  X_tr = scaler.fit_transform(X_train)
51
  X_te = scaler.transform(X_test)
52
 
53
- cfg = get_algorithm(task, cat, alg)
54
- model = cfg["class"](**cfg["params"])
55
- if log: log(f"Fitting {alg} on {len(X_train)} training samples…")
56
- model.fit(X_tr, y_train)
57
- score = model.score(X_te, y_test)
58
- if log: log(f"Evaluation complete Β· score = {score:.4f}")
59
- return f"Model trained Β· score={score:.4f}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  def _evaluate_model(ctx, results):
62
  log = ctx.get("_log")
 
34
 
35
  def _train_model(ctx, results):
36
  from mlops.datasets import load_dataset
37
+ from mlops.algorithms import get_algorithm, get_hpo_grid
38
  from sklearn.preprocessing import StandardScaler
39
  import mlflow, mlflow.sklearn
40
 
41
+ log = ctx.get("_log")
42
+ ds = ctx.get("dataset", "Iris Flowers")
43
+ cat = ctx.get("category", "Tree-Based")
44
+ alg = ctx.get("algorithm", "Random Forest")
45
+ task = ctx.get("task_type", "classification")
46
+ hpo_enabled = ctx.get("hpo_enabled", False)
47
+ hpo_trials = max(5, int(ctx.get("hpo_trials", 20)))
48
 
49
  if log: log(f"Dataset: {ds} Β· Algorithm: {alg} ({cat})")
50
  X_train, X_test, y_train, y_test, _ = load_dataset(ds)
 
52
  X_tr = scaler.fit_transform(X_train)
53
  X_te = scaler.transform(X_test)
54
 
55
+ cfg = get_algorithm(task, cat, alg)
56
+ grid = get_hpo_grid(cfg["class"]) if hpo_enabled else {}
57
+
58
+ if hpo_enabled and grid:
59
+ from sklearn.model_selection import RandomizedSearchCV
60
+ if log: log(f"Hyperparameter search Β· {hpo_trials} trials Β· 3-fold CV…")
61
+ search = RandomizedSearchCV(
62
+ cfg["class"](**cfg["params"]), grid,
63
+ n_iter=hpo_trials, cv=3, n_jobs=-1,
64
+ random_state=42, refit=True,
65
+ )
66
+ search.fit(X_tr, y_train)
67
+ model = search.best_estimator_
68
+ best = {k: v for k, v in search.best_params_.items()}
69
+ if log: log(f"Best params: {best}")
70
+ score = model.score(X_te, y_test)
71
+ if log: log(f"HPO complete Β· score = {score:.4f} (baseline without HPO may differ)")
72
+ return f"HPO score={score:.4f} Β· {best}"
73
+ else:
74
+ if hpo_enabled and not grid:
75
+ if log: log("No HPO grid defined for this algorithm β€” training with defaults")
76
+ model = cfg["class"](**cfg["params"])
77
+ if log: log(f"Fitting {alg} on {len(X_train)} training samples…")
78
+ model.fit(X_tr, y_train)
79
+ score = model.score(X_te, y_test)
80
+ if log: log(f"Evaluation complete Β· score = {score:.4f}")
81
+ return f"Model trained Β· score={score:.4f}"
82
 
83
  def _evaluate_model(ctx, results):
84
  log = ctx.get("_log")
templates/pipeline.html CHANGED
@@ -175,6 +175,28 @@
175
  .cfg-row-k { color: var(--text-muted); white-space: nowrap; padding-right: 8px; }
176
  .cfg-row-v { color: var(--text-primary); font-weight: 500; text-align: right; word-break: break-word; max-width: 62%; font-size: .77rem; }
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  /* ── Terminal ─────────────────────────────────────────────────────────────── */
179
  .ps-term {
180
  flex-shrink: 0; height: 34px; overflow: hidden;
@@ -302,10 +324,12 @@ let _seenLogs = 0;
302
 
303
  // Pipeline context β€” updated via config panel; used when running
304
  let pCtx = {
305
- dataset: Object.keys(DATASETS)[0] || 'Iris Flowers',
306
- category: 'Tree-Based',
307
- algorithm: 'Random Forest',
308
- task_type: 'classification',
 
 
309
  };
310
 
311
  // Layout
@@ -493,6 +517,27 @@ async function openConfig(taskId) {
493
  <div class="cfg-sec">
494
  <label class="cfg-lbl" for="cfg-alg">Algorithm</label>
495
  <select class="cfg-select" id="cfg-alg" onchange="pCtx.algorithm=this.value"><option>Loading…</option></select>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
496
  </div>`;
497
  }
498
 
@@ -582,6 +627,12 @@ function onDatasetChange(name) {
582
  }
583
  }
584
 
 
 
 
 
 
 
585
  async function onTtChange(tt) {
586
  pCtx.task_type = tt; pCtx.category=''; pCtx.algorithm='';
587
  await _loadAlgos(tt);
@@ -599,8 +650,14 @@ async function runPipeline() {
599
 
600
  const ctx = {};
601
  if (cur === 'training_pipeline') {
602
- Object.assign(ctx, { dataset:pCtx.dataset, category:pCtx.category,
603
- algorithm:pCtx.algorithm, task_type:pCtx.task_type });
 
 
 
 
 
 
604
  }
605
 
606
  try {
 
175
  .cfg-row-k { color: var(--text-muted); white-space: nowrap; padding-right: 8px; }
176
  .cfg-row-v { color: var(--text-primary); font-weight: 500; text-align: right; word-break: break-word; max-width: 62%; font-size: .77rem; }
177
 
178
+ /* ── HPO toggle ──────────────────────────────────────────────────────────── */
179
+ .hpo-toggle { position:relative; display:inline-block; width:34px; height:19px; flex-shrink:0; }
180
+ .hpo-toggle input { opacity:0; width:0; height:0; }
181
+ .hpo-knob {
182
+ position:absolute; cursor:pointer; inset:0;
183
+ background:var(--bg-tertiary); border:1px solid var(--border-color);
184
+ border-radius:20px; transition:.18s;
185
+ }
186
+ .hpo-knob::before {
187
+ content:''; position:absolute;
188
+ width:13px; height:13px; left:2px; top:2px;
189
+ background:var(--text-muted); border-radius:50%; transition:.18s;
190
+ }
191
+ .hpo-toggle input:checked + .hpo-knob { background:rgba(139,92,246,.2); border-color:var(--accent); }
192
+ .hpo-toggle input:checked + .hpo-knob::before { transform:translateX(15px); background:var(--accent); }
193
+ .cfg-num {
194
+ width:64px; background:var(--bg-tertiary); border:1px solid var(--border-color);
195
+ color:var(--text-primary); border-radius:6px; padding:3px 7px;
196
+ font-size:.8rem; text-align:right; outline:none;
197
+ }
198
+ .cfg-num:focus { border-color:var(--accent); }
199
+
200
  /* ── Terminal ─────────────────────────────────────────────────────────────── */
201
  .ps-term {
202
  flex-shrink: 0; height: 34px; overflow: hidden;
 
324
 
325
  // Pipeline context β€” updated via config panel; used when running
326
  let pCtx = {
327
+ dataset: Object.keys(DATASETS)[0] || 'Iris Flowers',
328
+ category: 'Tree-Based',
329
+ algorithm: 'Random Forest',
330
+ task_type: 'classification',
331
+ hpo_enabled: false,
332
+ hpo_trials: 20,
333
  };
334
 
335
  // Layout
 
517
  <div class="cfg-sec">
518
  <label class="cfg-lbl" for="cfg-alg">Algorithm</label>
519
  <select class="cfg-select" id="cfg-alg" onchange="pCtx.algorithm=this.value"><option>Loading…</option></select>
520
+ </div>
521
+ <div class="cfg-sec">
522
+ <span class="cfg-lbl">Hyperparameter Search</span>
523
+ <div class="cfg-row">
524
+ <span class="cfg-row-k">Enable HPO</span>
525
+ <label class="hpo-toggle">
526
+ <input type="checkbox" id="cfg-hpo" onchange="onHpoToggle(this.checked)"${pCtx.hpo_enabled?' checked':''}>
527
+ <span class="hpo-knob"></span>
528
+ </label>
529
+ </div>
530
+ <div id="cfg-hpo-opts"${pCtx.hpo_enabled?'':' style="display:none"'}>
531
+ <div class="cfg-row">
532
+ <span class="cfg-row-k">Search trials</span>
533
+ <input type="number" class="cfg-num" id="cfg-trials" min="5" max="100" value="${pCtx.hpo_trials}"
534
+ onchange="pCtx.hpo_trials=Math.max(5,Math.min(100,+this.value))">
535
+ </div>
536
+ <div class="cfg-row">
537
+ <span class="cfg-row-k">CV folds</span>
538
+ <span class="cfg-row-v" style="color:var(--text-muted)">3</span>
539
+ </div>
540
+ </div>
541
  </div>`;
542
  }
543
 
 
627
  }
628
  }
629
 
630
+ function onHpoToggle(enabled) {
631
+ pCtx.hpo_enabled = enabled;
632
+ const el = document.getElementById('cfg-hpo-opts');
633
+ if (el) el.style.display = enabled ? '' : 'none';
634
+ }
635
+
636
  async function onTtChange(tt) {
637
  pCtx.task_type = tt; pCtx.category=''; pCtx.algorithm='';
638
  await _loadAlgos(tt);
 
650
 
651
  const ctx = {};
652
  if (cur === 'training_pipeline') {
653
+ Object.assign(ctx, {
654
+ dataset: pCtx.dataset,
655
+ category: pCtx.category,
656
+ algorithm: pCtx.algorithm,
657
+ task_type: pCtx.task_type,
658
+ hpo_enabled: pCtx.hpo_enabled || false,
659
+ hpo_trials: pCtx.hpo_trials || 20,
660
+ });
661
  }
662
 
663
  try {