Spaces:
Sleeping
Sleeping
| # app.py β PulseFlow MLOps Demo Space | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import json | |
| from datetime import datetime | |
| # Simulate the PulseFlow pipeline stages | |
| def run_etl(uploaded_file): | |
| if uploaded_file is None: | |
| df = pd.DataFrame({ | |
| "feature_1": np.random.randn(100), | |
| "feature_2": np.random.randn(100), | |
| "feature_3": np.random.rand(100) * 100, | |
| "target": np.random.randint(0, 2, 100) | |
| }) | |
| source = "generated sample data (100 rows)" | |
| else: | |
| df = pd.read_csv(uploaded_file.name) | |
| source = f"uploaded file ({len(df)} rows)" | |
| nulls_before = df.isnull().sum().sum() | |
| df = df.dropna() | |
| nulls_after = df.isnull().sum().sum() | |
| report = { | |
| "source": source, | |
| "rows_loaded": len(df), | |
| "columns": list(df.columns), | |
| "nulls_removed": int(nulls_before - nulls_after), | |
| "dtypes": {c: str(t) for c, t in df.dtypes.items()}, | |
| "timestamp": datetime.utcnow().isoformat() + "Z" | |
| } | |
| return df.head(10), json.dumps(report, indent=2) | |
| def run_training(n_estimators, max_depth, test_size): | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import accuracy_score, f1_score | |
| np.random.seed(42) | |
| X = np.random.randn(500, 5) | |
| y = (X[:, 0] + X[:, 1] > 0).astype(int) | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X, y, test_size=test_size / 100, random_state=42 | |
| ) | |
| model = RandomForestClassifier( | |
| n_estimators=int(n_estimators), | |
| max_depth=int(max_depth), | |
| random_state=42 | |
| ) | |
| model.fit(X_train, y_train) | |
| preds = model.predict(X_test) | |
| metrics = { | |
| "experiment": "enterprise_mlops_training", | |
| "model": "RandomForestClassifier", | |
| "n_estimators": int(n_estimators), | |
| "max_depth": int(max_depth), | |
| "test_size_pct": test_size, | |
| "accuracy": round(accuracy_score(y_test, preds), 4), | |
| "f1_score": round(f1_score(y_test, preds), 4), | |
| "train_samples": len(X_train), | |
| "test_samples": len(X_test), | |
| "status": "completed", | |
| "mlflow_uri": "See GitHub repo to connect your MLflow instance", | |
| "timestamp": datetime.utcnow().isoformat() + "Z" | |
| } | |
| return json.dumps(metrics, indent=2) | |
| def run_inference(f1, f2, f3, f4, f5): | |
| features = [f1, f2, f3, f4, f5] | |
| score = 1 / (1 + np.exp(-sum(features[:2]))) | |
| prediction = int(score > 0.5) | |
| result = { | |
| "endpoint": "/predict", | |
| "input": {"features": features}, | |
| "prediction": prediction, | |
| "confidence": round(float(score if prediction == 1 else 1 - score), 4), | |
| "model": "RandomForestClassifier v0.1.0", | |
| "latency_ms": round(np.random.uniform(2, 8), 2), | |
| "status": "200 OK", | |
| "timestamp": datetime.utcnow().isoformat() + "Z" | |
| } | |
| return json.dumps(result, indent=2) | |
| # UI | |
| with gr.Blocks(title="PulseFlow MLOps", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # PulseFlow MLOps Pipeline | |
| **Production-grade open source MLOps** β ETL β Training β FastAPI Inference | |
| [](https://pypi.org/project/pulseflow-mlops/) | |
| [](https://github.com/anilatambharii/PulseFlow) | |
| [](https://huggingface.co/AmbhariiLabs) | |
| Built by [Anil Prasad](https://www.linkedin.com/in/anilsprasad) β Head of Engineering & Product, Duke Energy | Founder, Ambharii Labs | |
| """) | |
| with gr.Tabs(): | |
| with gr.Tab("Stage 1 β ETL"): | |
| gr.Markdown("Upload a CSV or use generated sample data to simulate the ingestion and preprocessing pipeline.") | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload CSV (optional)", file_types=[".csv"]) | |
| etl_btn = gr.Button("Run ETL Pipeline", variant="primary") | |
| data_preview = gr.Dataframe(label="Processed Data Preview (first 10 rows)") | |
| etl_report = gr.Code(label="ETL Report (JSON)", language="json") | |
| etl_btn.click(run_etl, inputs=[file_input], outputs=[data_preview, etl_report]) | |
| with gr.Tab("Stage 2 β Training"): | |
| gr.Markdown("Configure hyperparameters and run the training pipeline. Metrics mirror what MLflow captures in production.") | |
| with gr.Row(): | |
| n_est = gr.Slider(10, 200, value=100, step=10, label="n_estimators") | |
| max_d = gr.Slider(2, 20, value=5, step=1, label="max_depth") | |
| test_s = gr.Slider(10, 40, value=20, step=5, label="Test size %") | |
| train_btn = gr.Button("Run Training", variant="primary") | |
| train_output = gr.Code(label="MLflow Experiment Results (JSON)", language="json") | |
| train_btn.click(run_training, inputs=[n_est, max_d, test_s], outputs=[train_output]) | |
| with gr.Tab("Stage 3 β Inference"): | |
| gr.Markdown("Simulate the FastAPI `/predict` endpoint. In production this runs via `uvicorn deployment.app.main:app`.") | |
| with gr.Row(): | |
| f1 = gr.Number(value=0.5, label="Feature 1") | |
| f2 = gr.Number(value=-0.3, label="Feature 2") | |
| f3 = gr.Number(value=1.2, label="Feature 3") | |
| f4 = gr.Number(value=0.0, label="Feature 4") | |
| f5 = gr.Number(value=0.8, label="Feature 5") | |
| infer_btn = gr.Button("Run Inference", variant="primary") | |
| infer_output = gr.Code(label="API Response (JSON)", language="json") | |
| infer_btn.click(run_inference, inputs=[f1, f2, f3, f4, f5], outputs=[infer_output]) | |
| gr.Markdown(""" | |
| --- | |
| **Install locally:** `pip install pulseflow-mlops` | | |
| **Full stack:** `docker-compose up --build` | | |
| **Docs:** [GitHub README](https://github.com/anilatambharii/PulseFlow) | |
| """) | |
| demo.launch() |