Spaces:

mnoorchenar
/

AutoMLOps

Sleeping

App Files Files Community

mnoorchenar commited on Mar 26

Commit

fb9037e

1 Parent(s): 7cbdfa5

Update 2026-03-26 18:00:31

Browse files

Files changed (4) hide show

app.py +9 -9
mlops/algorithms.py +43 -0
pipelines/pipeline_defs.py +35 -13
templates/pipeline.html +63 -6

app.py CHANGED Viewed

@@ -236,15 +236,15 @@ def api_pipeline_execute(pipeline_id):
     except ValueError as e:
         return jsonify({"error": str(e)}), 400
-    # Built-in engine is the default — zero scheduler latency, runs immediately.
-    # Set USE_AIRFLOW=true in the environment to hand off to Apache Airflow instead.
-    if os.environ.get("USE_AIRFLOW", "").lower() in ("1", "true"):
-        try:
-            from mlops.airflow_runner import trigger_pipeline
-            exec_id = trigger_pipeline(pipeline_id, context=context, dag=dag)
-            return jsonify({"exec_id": exec_id, "status": "queued", "engine": "airflow"})
-        except Exception as af_err:
-            app.logger.warning(f"Airflow trigger failed, falling back to built-in engine: {af_err}")
     exec_id = execute_dag(dag, context)
     return jsonify({"exec_id": exec_id, "status": "queued", "engine": "builtin"})

     except ValueError as e:
         return jsonify({"error": str(e)}), 400
+    # Apache Airflow is the primary engine; built-in DAG engine is the fallback.
+    try:
+        from mlops.airflow_runner import trigger_pipeline
+        exec_id = trigger_pipeline(pipeline_id, context=context, dag=dag)
+        return jsonify({"exec_id": exec_id, "status": "queued", "engine": "airflow"})
+    except ImportError:
+        app.logger.warning("Airflow not installed — using built-in DAG engine")
+    except Exception as af_err:
+        app.logger.warning(f"Airflow trigger failed, using built-in engine: {af_err}")
     exec_id = execute_dag(dag, context)
     return jsonify({"exec_id": exec_id, "status": "queued", "engine": "builtin"})

mlops/algorithms.py CHANGED Viewed

@@ -369,6 +369,49 @@ ALGORITHMS = {
 }
 def get_algorithm(task: str, category: str, name: str) -> dict:
     """Retrieve algorithm config by task / category / name."""
     try:

 }
+# ── Hyperparameter search grids (keyed by model class name) ───────────────────
+HPO_GRIDS: dict[str, dict] = {
+    # Linear Models
+    "LogisticRegression":    {"C": [0.001, 0.01, 0.1, 1, 10, 100], "solver": ["lbfgs", "saga"], "max_iter": [500, 1000]},
+    "RidgeClassifier":       {"alpha": [0.001, 0.01, 0.1, 1.0, 10.0, 100.0]},
+    "SGDClassifier":         {"loss": ["hinge", "log_loss", "modified_huber"], "alpha": [0.0001, 0.001, 0.01]},
+    "Ridge":                 {"alpha": [0.001, 0.01, 0.1, 1.0, 10.0, 100.0]},
+    "Lasso":                 {"alpha": [0.001, 0.01, 0.1, 1.0, 10.0]},
+    "ElasticNet":            {"alpha": [0.001, 0.01, 0.1, 1.0], "l1_ratio": [0.1, 0.3, 0.5, 0.7, 0.9]},
+    "HuberRegressor":        {"epsilon": [1.1, 1.35, 1.5, 2.0], "alpha": [0.0001, 0.001, 0.01, 0.1]},
+    # Tree-Based
+    "DecisionTreeClassifier":{"max_depth": [3, 5, 7, 10, None], "min_samples_split": [2, 5, 10], "min_samples_leaf": [1, 2, 4], "criterion": ["gini", "entropy"]},
+    "DecisionTreeRegressor": {"max_depth": [3, 5, 7, 10, None], "min_samples_split": [2, 5, 10], "min_samples_leaf": [1, 2, 4]},
+    "RandomForestClassifier":{"n_estimators": [50, 100, 200, 300], "max_depth": [None, 5, 10, 20], "min_samples_split": [2, 5, 10], "max_features": ["sqrt", "log2"]},
+    "RandomForestRegressor": {"n_estimators": [50, 100, 200, 300], "max_depth": [None, 5, 10, 20], "min_samples_split": [2, 5, 10], "max_features": ["sqrt", "log2", None]},
+    "ExtraTreesClassifier":  {"n_estimators": [50, 100, 200], "max_depth": [None, 5, 10, 20], "min_samples_split": [2, 5, 10]},
+    "ExtraTreesRegressor":   {"n_estimators": [50, 100, 200], "max_depth": [None, 5, 10, 20], "min_samples_split": [2, 5, 10]},
+    # Boosting
+    "GradientBoostingClassifier": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.05, 0.1, 0.2], "max_depth": [3, 4, 5, 6], "subsample": [0.7, 0.8, 0.9, 1.0]},
+    "GradientBoostingRegressor":  {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.05, 0.1, 0.2], "max_depth": [3, 4, 5, 6], "subsample": [0.7, 0.8, 0.9, 1.0]},
+    "AdaBoostClassifier":  {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.1, 0.5, 1.0]},
+    "AdaBoostRegressor":   {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.1, 0.5, 1.0], "loss": ["linear", "square", "exponential"]},
+    "XGBClassifier":  {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.05, 0.1, 0.2], "max_depth": [3, 4, 5, 6, 7], "subsample": [0.7, 0.8, 0.9], "colsample_bytree": [0.7, 0.8, 0.9]},
+    "XGBRegressor":   {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.05, 0.1, 0.2], "max_depth": [3, 4, 5, 6, 7], "subsample": [0.7, 0.8, 0.9], "colsample_bytree": [0.7, 0.8, 0.9]},
+    "LGBMClassifier": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.05, 0.1, 0.2], "max_depth": [-1, 5, 10, 20], "num_leaves": [15, 31, 63, 127], "subsample": [0.7, 0.8, 0.9, 1.0]},
+    "LGBMRegressor":  {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.05, 0.1, 0.2], "max_depth": [-1, 5, 10, 20], "num_leaves": [15, 31, 63, 127], "subsample": [0.7, 0.8, 0.9, 1.0]},
+    # SVM
+    "SVC": {"C": [0.1, 1, 10, 100], "gamma": ["scale", "auto", 0.001, 0.01, 0.1]},
+    "SVR": {"C": [0.1, 1, 10, 100], "gamma": ["scale", "auto"], "epsilon": [0.01, 0.1, 0.5, 1.0]},
+    # KNN
+    "KNeighborsClassifier": {"n_neighbors": [3, 5, 7, 9, 11, 15], "weights": ["uniform", "distance"], "metric": ["euclidean", "manhattan"]},
+    "KNeighborsRegressor":  {"n_neighbors": [3, 5, 7, 9, 11, 15], "weights": ["uniform", "distance"], "metric": ["euclidean", "manhattan"]},
+    # MLP
+    "MLPClassifier": {"hidden_layer_sizes": [(64,), (128,), (64, 32), (128, 64), (256, 128)], "learning_rate_init": [0.001, 0.005, 0.01], "alpha": [0.0001, 0.001, 0.01], "activation": ["relu", "tanh"]},
+    "MLPRegressor":  {"hidden_layer_sizes": [(64,), (128,), (64, 32), (128, 64), (256, 128)], "learning_rate_init": [0.001, 0.005, 0.01], "alpha": [0.0001, 0.001, 0.01], "activation": ["relu", "tanh"]},
+}
+def get_hpo_grid(cls) -> dict:
+    """Return the hyperparameter search grid for a model class, or {} if none defined."""
+    return HPO_GRIDS.get(cls.__name__, {})
 def get_algorithm(task: str, category: str, name: str) -> dict:
     """Retrieve algorithm config by task / category / name."""
     try:

pipelines/pipeline_defs.py CHANGED Viewed

@@ -34,15 +34,17 @@ def _feature_engineering(ctx, results):
 def _train_model(ctx, results):
     from mlops.datasets import load_dataset
-    from mlops.algorithms import get_algorithm
     from sklearn.preprocessing import StandardScaler
     import mlflow, mlflow.sklearn
-    log  = ctx.get("_log")
-    ds   = ctx.get("dataset", "Iris Flowers")
-    cat  = ctx.get("category", "Tree-Based")
-    alg  = ctx.get("algorithm", "Random Forest")
-    task = ctx.get("task_type", "classification")
     if log: log(f"Dataset: {ds}  ·  Algorithm: {alg} ({cat})")
     X_train, X_test, y_train, y_test, _ = load_dataset(ds)
@@ -50,13 +52,33 @@ def _train_model(ctx, results):
     X_tr = scaler.fit_transform(X_train)
     X_te = scaler.transform(X_test)
-    cfg   = get_algorithm(task, cat, alg)
-    model = cfg["class"](**cfg["params"])
-    if log: log(f"Fitting {alg} on {len(X_train)} training samples…")
-    model.fit(X_tr, y_train)
-    score = model.score(X_te, y_test)
-    if log: log(f"Evaluation complete · score = {score:.4f}")
-    return f"Model trained · score={score:.4f}"
 def _evaluate_model(ctx, results):
     log = ctx.get("_log")

 def _train_model(ctx, results):
     from mlops.datasets import load_dataset
+    from mlops.algorithms import get_algorithm, get_hpo_grid
     from sklearn.preprocessing import StandardScaler
     import mlflow, mlflow.sklearn
+    log         = ctx.get("_log")
+    ds          = ctx.get("dataset",     "Iris Flowers")
+    cat         = ctx.get("category",    "Tree-Based")
+    alg         = ctx.get("algorithm",   "Random Forest")
+    task        = ctx.get("task_type",   "classification")
+    hpo_enabled = ctx.get("hpo_enabled", False)
+    hpo_trials  = max(5, int(ctx.get("hpo_trials", 20)))
     if log: log(f"Dataset: {ds}  ·  Algorithm: {alg} ({cat})")
     X_train, X_test, y_train, y_test, _ = load_dataset(ds)
     X_tr = scaler.fit_transform(X_train)
     X_te = scaler.transform(X_test)
+    cfg  = get_algorithm(task, cat, alg)
+    grid = get_hpo_grid(cfg["class"]) if hpo_enabled else {}
+    if hpo_enabled and grid:
+        from sklearn.model_selection import RandomizedSearchCV
+        if log: log(f"Hyperparameter search · {hpo_trials} trials · 3-fold CV…")
+        search = RandomizedSearchCV(
+            cfg["class"](**cfg["params"]), grid,
+            n_iter=hpo_trials, cv=3, n_jobs=-1,
+            random_state=42, refit=True,
+        )
+        search.fit(X_tr, y_train)
+        model = search.best_estimator_
+        best  = {k: v for k, v in search.best_params_.items()}
+        if log: log(f"Best params: {best}")
+        score = model.score(X_te, y_test)
+        if log: log(f"HPO complete · score = {score:.4f} (baseline without HPO may differ)")
+        return f"HPO score={score:.4f} · {best}"
+    else:
+        if hpo_enabled and not grid:
+            if log: log("No HPO grid defined for this algorithm — training with defaults")
+        model = cfg["class"](**cfg["params"])
+        if log: log(f"Fitting {alg} on {len(X_train)} training samples…")
+        model.fit(X_tr, y_train)
+        score = model.score(X_te, y_test)
+        if log: log(f"Evaluation complete · score = {score:.4f}")
+        return f"Model trained · score={score:.4f}"
 def _evaluate_model(ctx, results):
     log = ctx.get("_log")

templates/pipeline.html CHANGED Viewed

@@ -175,6 +175,28 @@
 .cfg-row-k { color: var(--text-muted); white-space: nowrap; padding-right: 8px; }
 .cfg-row-v { color: var(--text-primary); font-weight: 500; text-align: right; word-break: break-word; max-width: 62%; font-size: .77rem; }
 /* ── Terminal ─────────────────────────────────────────────────────────────── */
 .ps-term {
   flex-shrink: 0; height: 34px; overflow: hidden;
@@ -302,10 +324,12 @@ let _seenLogs = 0;
 // Pipeline context — updated via config panel; used when running
 let pCtx = {
-  dataset:   Object.keys(DATASETS)[0] || 'Iris Flowers',
-  category:  'Tree-Based',
-  algorithm: 'Random Forest',
-  task_type: 'classification',
 };
 // Layout
@@ -493,6 +517,27 @@ async function openConfig(taskId) {
       <div class="cfg-sec">
         <label class="cfg-lbl" for="cfg-alg">Algorithm</label>
         <select class="cfg-select" id="cfg-alg" onchange="pCtx.algorithm=this.value"><option>Loading…</option></select>
       </div>`;
   }
@@ -582,6 +627,12 @@ function onDatasetChange(name) {
   }
 }
 async function onTtChange(tt) {
   pCtx.task_type = tt; pCtx.category=''; pCtx.algorithm='';
   await _loadAlgos(tt);
@@ -599,8 +650,14 @@ async function runPipeline() {
   const ctx = {};
   if (cur === 'training_pipeline') {
-    Object.assign(ctx, { dataset:pCtx.dataset, category:pCtx.category,
-                          algorithm:pCtx.algorithm, task_type:pCtx.task_type });
   }
   try {

 .cfg-row-k { color: var(--text-muted); white-space: nowrap; padding-right: 8px; }
 .cfg-row-v { color: var(--text-primary); font-weight: 500; text-align: right; word-break: break-word; max-width: 62%; font-size: .77rem; }
+/* ── HPO toggle ──────────────────────────────────────────────────────────── */
+.hpo-toggle { position:relative; display:inline-block; width:34px; height:19px; flex-shrink:0; }
+.hpo-toggle input { opacity:0; width:0; height:0; }
+.hpo-knob {
+  position:absolute; cursor:pointer; inset:0;
+  background:var(--bg-tertiary); border:1px solid var(--border-color);
+  border-radius:20px; transition:.18s;
+}
+.hpo-knob::before {
+  content:''; position:absolute;
+  width:13px; height:13px; left:2px; top:2px;
+  background:var(--text-muted); border-radius:50%; transition:.18s;
+}
+.hpo-toggle input:checked + .hpo-knob { background:rgba(139,92,246,.2); border-color:var(--accent); }
+.hpo-toggle input:checked + .hpo-knob::before { transform:translateX(15px); background:var(--accent); }
+.cfg-num {
+  width:64px; background:var(--bg-tertiary); border:1px solid var(--border-color);
+  color:var(--text-primary); border-radius:6px; padding:3px 7px;
+  font-size:.8rem; text-align:right; outline:none;
+}
+.cfg-num:focus { border-color:var(--accent); }
 /* ── Terminal ─────────────────────────────────────────────────────────────── */
 .ps-term {
   flex-shrink: 0; height: 34px; overflow: hidden;
 // Pipeline context — updated via config panel; used when running
 let pCtx = {
+  dataset:     Object.keys(DATASETS)[0] || 'Iris Flowers',
+  category:    'Tree-Based',
+  algorithm:   'Random Forest',
+  task_type:   'classification',
+  hpo_enabled: false,
+  hpo_trials:  20,
 };
 // Layout
       <div class="cfg-sec">
         <label class="cfg-lbl" for="cfg-alg">Algorithm</label>
         <select class="cfg-select" id="cfg-alg" onchange="pCtx.algorithm=this.value"><option>Loading…</option></select>
+      </div>
+      <div class="cfg-sec">
+        <span class="cfg-lbl">Hyperparameter Search</span>
+        <div class="cfg-row">
+          <span class="cfg-row-k">Enable HPO</span>
+          <label class="hpo-toggle">
+            <input type="checkbox" id="cfg-hpo" onchange="onHpoToggle(this.checked)"${pCtx.hpo_enabled?' checked':''}>
+            <span class="hpo-knob"></span>
+          </label>
+        </div>
+        <div id="cfg-hpo-opts"${pCtx.hpo_enabled?'':' style="display:none"'}>
+          <div class="cfg-row">
+            <span class="cfg-row-k">Search trials</span>
+            <input type="number" class="cfg-num" id="cfg-trials" min="5" max="100" value="${pCtx.hpo_trials}"
+              onchange="pCtx.hpo_trials=Math.max(5,Math.min(100,+this.value))">
+          </div>
+          <div class="cfg-row">
+            <span class="cfg-row-k">CV folds</span>
+            <span class="cfg-row-v" style="color:var(--text-muted)">3</span>
+          </div>
+        </div>
       </div>`;
   }
   }
 }
+function onHpoToggle(enabled) {
+  pCtx.hpo_enabled = enabled;
+  const el = document.getElementById('cfg-hpo-opts');
+  if (el) el.style.display = enabled ? '' : 'none';
+}
 async function onTtChange(tt) {
   pCtx.task_type = tt; pCtx.category=''; pCtx.algorithm='';
   await _loadAlgos(tt);
   const ctx = {};
   if (cur === 'training_pipeline') {
+    Object.assign(ctx, {
+      dataset:     pCtx.dataset,
+      category:    pCtx.category,
+      algorithm:   pCtx.algorithm,
+      task_type:   pCtx.task_type,
+      hpo_enabled: pCtx.hpo_enabled || false,
+      hpo_trials:  pCtx.hpo_trials  || 20,
+    });
   }
   try {