LeonardoMdSA's picture
main.py and README.md final touches
b2715a4
# app/api/routes.py
from fastapi import APIRouter, BackgroundTasks, UploadFile, File, Request, HTTPException
from fastapi.responses import JSONResponse
from fastapi.templating import Jinja2Templates
from app.inference.predictor import Predictor
from app.monitoring.data_loader import load_production_data
from app.monitoring.governance import run_governance_checks
import pandas as pd
import numpy as np
import json
import os
templates = Jinja2Templates(directory="app/templates")
router = APIRouter()
predictor = Predictor()
# Production log file
PROD_LOG = "data/production/predictions_log.csv"
# ------------------------------------------------------------------
# ENSURE production log exists at server startup
# ------------------------------------------------------------------
os.makedirs(os.path.dirname(PROD_LOG), exist_ok=True)
if not os.path.exists(PROD_LOG):
base_cols = list(predictor.features)
extra_cols = [
"target", # true label
"model_prediction", # model output
"model_probability",
"model_risk_level",
"model_version",
"timestamp",
]
empty_df = pd.DataFrame(columns=base_cols + extra_cols)
empty_df.to_csv(PROD_LOG, index=False)
# ------------------------------------------------------------------
@router.post("/predict")
async def predict_file(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
df = pd.read_csv(file.file)
# ---- STRICT MODE: schema enforcement ----
missing = set(predictor.features) - set(df.columns)
if missing:
raise HTTPException(
status_code=400,
detail=f"Invalid schema. Missing required columns: {sorted(missing)}",
)
# ---- Model inference ----
preds, probas = predictor.predict(df)
results = []
for i, (pred, proba) in enumerate(zip(preds, probas)):
results.append({
"row": i,
"probability": round(float(proba), 4),
"prediction": "Default" if pred == 1 else "No Default",
"risk_level": "High" if proba >= 0.75 else "Medium" if proba >= 0.5 else "Low"
})
# ---- Append predictions to production log (minimal, fast) ----
df_log = df.copy()
# Keep true target if present
if "target" in df.columns:
df_log["target"] = df["target"]
else:
df_log["target"] = np.nan
# Remove any old model prediction columns to prevent duplicates
for col in ["model_prediction", "model_probability", "model_risk_level", "model_version", "timestamp"]:
if col in df_log.columns:
df_log = df_log.drop(columns=[col])
df_log["model_prediction"] = preds
df_log["model_probability"] = probas
df_log["model_risk_level"] = [
"High" if p >= 0.75 else "Medium" if p >= 0.5 else "Low"
for p in probas
]
df_log["model_version"] = predictor.model_version
df_log["timestamp"] = pd.Timestamp.utcnow()
df_log.to_csv(PROD_LOG, mode="a", header=False, index=False)
return JSONResponse({
"n_rows": len(results),
"results": results,
})
@router.get("/health")
def health():
return {"status": "ok"}
@router.get("/")
def dashboard(request: Request):
return templates.TemplateResponse("dashboard.html", {"request": request})