Spaces:

AmbhariiLabs
/

pulseflow-mlops

Sleeping

App Files Files Community

pulseflow-mlops / app.py

meetanilp

Add PulseFlow Gradio demo app (#2)

a9096f2 about 2 months ago

raw

history blame contribute delete

6.25 kB

	# app.py — PulseFlow MLOps Demo Space
	import gradio as gr
	import pandas as pd
	import numpy as np
	import json
	from datetime import datetime

	# Simulate the PulseFlow pipeline stages
	def run_etl(uploaded_file):
	if uploaded_file is None:
	df = pd.DataFrame({
	"feature_1": np.random.randn(100),
	"feature_2": np.random.randn(100),
	"feature_3": np.random.rand(100) * 100,
	"target": np.random.randint(0, 2, 100)
	})
	source = "generated sample data (100 rows)"
	else:
	df = pd.read_csv(uploaded_file.name)
	source = f"uploaded file ({len(df)} rows)"

	nulls_before = df.isnull().sum().sum()
	df = df.dropna()
	nulls_after = df.isnull().sum().sum()

	report = {
	"source": source,
	"rows_loaded": len(df),
	"columns": list(df.columns),
	"nulls_removed": int(nulls_before - nulls_after),
	"dtypes": {c: str(t) for c, t in df.dtypes.items()},
	"timestamp": datetime.utcnow().isoformat() + "Z"
	}
	return df.head(10), json.dumps(report, indent=2)

	def run_training(n_estimators, max_depth, test_size):
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import accuracy_score, f1_score

	np.random.seed(42)
	X = np.random.randn(500, 5)
	y = (X[:, 0] + X[:, 1] > 0).astype(int)

	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=test_size / 100, random_state=42
	)

	model = RandomForestClassifier(
	n_estimators=int(n_estimators),
	max_depth=int(max_depth),
	random_state=42
	)
	model.fit(X_train, y_train)
	preds = model.predict(X_test)

	metrics = {
	"experiment": "enterprise_mlops_training",
	"model": "RandomForestClassifier",
	"n_estimators": int(n_estimators),
	"max_depth": int(max_depth),
	"test_size_pct": test_size,
	"accuracy": round(accuracy_score(y_test, preds), 4),
	"f1_score": round(f1_score(y_test, preds), 4),
	"train_samples": len(X_train),
	"test_samples": len(X_test),
	"status": "completed",
	"mlflow_uri": "See GitHub repo to connect your MLflow instance",
	"timestamp": datetime.utcnow().isoformat() + "Z"
	}
	return json.dumps(metrics, indent=2)

	def run_inference(f1, f2, f3, f4, f5):
	features = [f1, f2, f3, f4, f5]
	score = 1 / (1 + np.exp(-sum(features[:2])))
	prediction = int(score > 0.5)
	result = {
	"endpoint": "/predict",
	"input": {"features": features},
	"prediction": prediction,
	"confidence": round(float(score if prediction == 1 else 1 - score), 4),
	"model": "RandomForestClassifier v0.1.0",
	"latency_ms": round(np.random.uniform(2, 8), 2),
	"status": "200 OK",
	"timestamp": datetime.utcnow().isoformat() + "Z"
	}
	return json.dumps(result, indent=2)

	# UI
	with gr.Blocks(title="PulseFlow MLOps", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# PulseFlow MLOps Pipeline
	Production-grade open source MLOps — ETL → Training → FastAPI Inference

	[![PyPI](https://img.shields.io/pypi/v/pulseflow-mlops)](https://pypi.org/project/pulseflow-mlops/)
	[![GitHub](https://img.shields.io/badge/GitHub-PulseFlow-black)](https://github.com/anilatambharii/PulseFlow)
	[![AmbhariiLabs](https://img.shields.io/badge/Org-AmbhariiLabs-blue)](https://huggingface.co/AmbhariiLabs)

	Built by [Anil Prasad](https://www.linkedin.com/in/anilsprasad) — Head of Engineering & Product, Duke Energy \| Founder, Ambharii Labs
	""")

	with gr.Tabs():
	with gr.Tab("Stage 1 — ETL"):
	gr.Markdown("Upload a CSV or use generated sample data to simulate the ingestion and preprocessing pipeline.")
	with gr.Row():
	file_input = gr.File(label="Upload CSV (optional)", file_types=[".csv"])
	etl_btn = gr.Button("Run ETL Pipeline", variant="primary")
	data_preview = gr.Dataframe(label="Processed Data Preview (first 10 rows)")
	etl_report = gr.Code(label="ETL Report (JSON)", language="json")
	etl_btn.click(run_etl, inputs=[file_input], outputs=[data_preview, etl_report])

	with gr.Tab("Stage 2 — Training"):
	gr.Markdown("Configure hyperparameters and run the training pipeline. Metrics mirror what MLflow captures in production.")
	with gr.Row():
	n_est = gr.Slider(10, 200, value=100, step=10, label="n_estimators")
	max_d = gr.Slider(2, 20, value=5, step=1, label="max_depth")
	test_s = gr.Slider(10, 40, value=20, step=5, label="Test size %")
	train_btn = gr.Button("Run Training", variant="primary")
	train_output = gr.Code(label="MLflow Experiment Results (JSON)", language="json")
	train_btn.click(run_training, inputs=[n_est, max_d, test_s], outputs=[train_output])

	with gr.Tab("Stage 3 — Inference"):
	gr.Markdown("Simulate the FastAPI `/predict` endpoint. In production this runs via `uvicorn deployment.app.main:app`.")
	with gr.Row():
	f1 = gr.Number(value=0.5, label="Feature 1")
	f2 = gr.Number(value=-0.3, label="Feature 2")
	f3 = gr.Number(value=1.2, label="Feature 3")
	f4 = gr.Number(value=0.0, label="Feature 4")
	f5 = gr.Number(value=0.8, label="Feature 5")
	infer_btn = gr.Button("Run Inference", variant="primary")
	infer_output = gr.Code(label="API Response (JSON)", language="json")
	infer_btn.click(run_inference, inputs=[f1, f2, f3, f4, f5], outputs=[infer_output])

	gr.Markdown("""
	---
	Install locally: `pip install pulseflow-mlops` \|
	Full stack: `docker-compose up --build` \|
	Docs: [GitHub README](https://github.com/anilatambharii/PulseFlow)
	""")

	demo.launch()