# app.py — PulseFlow MLOps Demo Space
import gradio as gr
import pandas as pd
import numpy as np
import json
from datetime import datetime

# Simulate the PulseFlow pipeline stages
def run_etl(uploaded_file):
    if uploaded_file is None:
        df = pd.DataFrame({
            "feature_1": np.random.randn(100),
            "feature_2": np.random.randn(100),
            "feature_3": np.random.rand(100) * 100,
            "target":    np.random.randint(0, 2, 100)
        })
        source = "generated sample data (100 rows)"
    else:
        df = pd.read_csv(uploaded_file.name)
        source = f"uploaded file ({len(df)} rows)"

    nulls_before = df.isnull().sum().sum()
    df = df.dropna()
    nulls_after  = df.isnull().sum().sum()

    report = {
        "source":        source,
        "rows_loaded":   len(df),
        "columns":       list(df.columns),
        "nulls_removed": int(nulls_before - nulls_after),
        "dtypes":        {c: str(t) for c, t in df.dtypes.items()},
        "timestamp":     datetime.utcnow().isoformat() + "Z"
    }
    return df.head(10), json.dumps(report, indent=2)

def run_training(n_estimators, max_depth, test_size):
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import accuracy_score, f1_score

    np.random.seed(42)
    X = np.random.randn(500, 5)
    y = (X[:, 0] + X[:, 1] > 0).astype(int)

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size / 100, random_state=42
    )

    model = RandomForestClassifier(
        n_estimators=int(n_estimators),
        max_depth=int(max_depth),
        random_state=42
    )
    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    metrics = {
        "experiment":    "enterprise_mlops_training",
        "model":         "RandomForestClassifier",
        "n_estimators":  int(n_estimators),
        "max_depth":     int(max_depth),
        "test_size_pct": test_size,
        "accuracy":      round(accuracy_score(y_test, preds), 4),
        "f1_score":      round(f1_score(y_test, preds), 4),
        "train_samples": len(X_train),
        "test_samples":  len(X_test),
        "status":        "completed",
        "mlflow_uri":    "See GitHub repo to connect your MLflow instance",
        "timestamp":     datetime.utcnow().isoformat() + "Z"
    }
    return json.dumps(metrics, indent=2)

def run_inference(f1, f2, f3, f4, f5):
    features   = [f1, f2, f3, f4, f5]
    score      = 1 / (1 + np.exp(-sum(features[:2])))
    prediction = int(score > 0.5)
    result = {
        "endpoint":    "/predict",
        "input":       {"features": features},
        "prediction":  prediction,
        "confidence":  round(float(score if prediction == 1 else 1 - score), 4),
        "model":       "RandomForestClassifier v0.1.0",
        "latency_ms":  round(np.random.uniform(2, 8), 2),
        "status":      "200 OK",
        "timestamp":   datetime.utcnow().isoformat() + "Z"
    }
    return json.dumps(result, indent=2)

# UI
with gr.Blocks(title="PulseFlow MLOps", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
# PulseFlow MLOps Pipeline
**Production-grade open source MLOps** — ETL → Training → FastAPI Inference

[![PyPI](https://img.shields.io/pypi/v/pulseflow-mlops)](https://pypi.org/project/pulseflow-mlops/)
[![GitHub](https://img.shields.io/badge/GitHub-PulseFlow-black)](https://github.com/anilatambharii/PulseFlow)
[![AmbhariiLabs](https://img.shields.io/badge/Org-AmbhariiLabs-blue)](https://huggingface.co/AmbhariiLabs)

Built by [Anil Prasad](https://www.linkedin.com/in/anilsprasad) — Head of Engineering & Product, Duke Energy | Founder, Ambharii Labs
    """)

    with gr.Tabs():
        with gr.Tab("Stage 1 — ETL"):
            gr.Markdown("Upload a CSV or use generated sample data to simulate the ingestion and preprocessing pipeline.")
            with gr.Row():
                file_input = gr.File(label="Upload CSV (optional)", file_types=[".csv"])
                etl_btn    = gr.Button("Run ETL Pipeline", variant="primary")
            data_preview  = gr.Dataframe(label="Processed Data Preview (first 10 rows)")
            etl_report    = gr.Code(label="ETL Report (JSON)", language="json")
            etl_btn.click(run_etl, inputs=[file_input], outputs=[data_preview, etl_report])

        with gr.Tab("Stage 2 — Training"):
            gr.Markdown("Configure hyperparameters and run the training pipeline. Metrics mirror what MLflow captures in production.")
            with gr.Row():
                n_est      = gr.Slider(10, 200, value=100, step=10, label="n_estimators")
                max_d      = gr.Slider(2,   20, value=5,   step=1,  label="max_depth")
                test_s     = gr.Slider(10,  40, value=20,  step=5,  label="Test size %")
            train_btn      = gr.Button("Run Training", variant="primary")
            train_output   = gr.Code(label="MLflow Experiment Results (JSON)", language="json")
            train_btn.click(run_training, inputs=[n_est, max_d, test_s], outputs=[train_output])

        with gr.Tab("Stage 3 — Inference"):
            gr.Markdown("Simulate the FastAPI `/predict` endpoint. In production this runs via `uvicorn deployment.app.main:app`.")
            with gr.Row():
                f1 = gr.Number(value=0.5,  label="Feature 1")
                f2 = gr.Number(value=-0.3, label="Feature 2")
                f3 = gr.Number(value=1.2,  label="Feature 3")
                f4 = gr.Number(value=0.0,  label="Feature 4")
                f5 = gr.Number(value=0.8,  label="Feature 5")
            infer_btn    = gr.Button("Run Inference", variant="primary")
            infer_output = gr.Code(label="API Response (JSON)", language="json")
            infer_btn.click(run_inference, inputs=[f1, f2, f3, f4, f5], outputs=[infer_output])

    gr.Markdown("""
---
**Install locally:** `pip install pulseflow-mlops` | 
**Full stack:** `docker-compose up --build` | 
**Docs:** [GitHub README](https://github.com/anilatambharii/PulseFlow)
    """)

demo.launch()