Spaces:
Running
Running
| # app.py | |
| import os, json, joblib | |
| from typing import List, Dict, Any, Optional | |
| import pandas as pd | |
| from fastapi import FastAPI, HTTPException, Body, Query | |
| from fastapi.responses import RedirectResponse | |
| from pydantic import BaseModel, Field | |
| # ====== Artefatos ====== | |
| ARTIFACT_DIR = os.getenv("ARTIFACT_DIR", "model") # <-- padrão agora é 'model' | |
| PREPROCESS_PATH = os.path.join(ARTIFACT_DIR, "preprocess.joblib") | |
| MODEL_PATH = os.path.join(ARTIFACT_DIR, "xgb_model.joblib") | |
| META_PATH = os.path.join(ARTIFACT_DIR, "metadata.json") | |
| try: | |
| preprocess = joblib.load(PREPROCESS_PATH) | |
| model = joblib.load(MODEL_PATH) | |
| with open(META_PATH, "r", encoding="utf-8") as f: | |
| META: Dict[str, Any] = json.load(f) | |
| except Exception as e: | |
| raise RuntimeError(f"Falha ao carregar artefatos: {e}") | |
| BEST_T = float(META.get("best_threshold", 0.5)) | |
| VERSION = str(META.get("version", "1.0")) | |
| CAT_COLS: List[str] = list(META.get("cat_cols", [])) | |
| NUM_COLS: List[str] = list(META.get("num_cols", [])) | |
| RAW_FEATURES: List[str] = CAT_COLS + NUM_COLS # ordem esperada pelo preprocess | |
| # exemplo (mesmo do seu teste) | |
| EXAMPLE_PAYLOAD = { | |
| "checking_status": "A12", | |
| "credit_duration_months": 24, | |
| "credit_history": "A32", | |
| "loan_purpose": "A43", | |
| "credit_amount": 3500, | |
| "savings_account": "A61", | |
| "employment_duration": "A73", | |
| "installment_rate": 3, | |
| "personal_status_sex": "A93", | |
| "guarantors": "A101", | |
| "residence_since": 2, | |
| "property_type": "A121", | |
| "age_years": 35, | |
| "other_installment_plans": "A143", | |
| "housing_type": "A152", | |
| "existing_credits": 1, | |
| "job_type": "A173", | |
| "dependents": 1, | |
| "telephone": "A192", | |
| "foreign_worker": "A202" | |
| } | |
| # ============================================================================= | |
| # FastAPI metadata (Swagger super detalhado) | |
| # ============================================================================= | |
| DESCRIPTION = """ | |
| **IVerify — API de Decisão de Crédito (fraude/inadimplência)** | |
| - **Modelo**: XGBoost (classificador binário) | |
| - **Pré-processamento**: pipeline (OneHotEncoder com `handle_unknown="ignore"`, scaler/num, etc.) | |
| - **Saída**: | |
| - `prob_approved` (probabilidade de aprovação) | |
| - `approved` (0/1) de acordo com `threshold` (padrão: `best_threshold` dos metadados) | |
| - **Threshold**: | |
| - Otimizado via política de custo definida no treino (`metadata.json` → `cost_matrix`). | |
| - Pode ser sobrescrito por query `?threshold=0.42` nos endpoints de predição. | |
| **Endpoints principais** | |
| - `GET /health` — status e metadados do modelo | |
| - `GET /schema` — colunas esperadas (raw), rótulos e exemplo | |
| - `GET /sample` — payload pronto para teste | |
| - `POST /predict` — predição única (prob + decisão) | |
| - `POST /predict_batch` — predição em lote | |
| **Observabilidade** | |
| - Respostas incluem `meta_version`, e (quando aplicável) campos extras ignorados. | |
| > **Atenção**: garanta que o payload contenha **todas** as colunas `RAW_FEATURES` esperadas pelo preprocess. | |
| """ | |
| TAGS = [ | |
| {"name": "Health", "description": "Status, versão e metadados do modelo."}, | |
| {"name": "Schema", "description": "Colunas esperadas, exemplo de payload e labels."}, | |
| {"name": "Inference", "description": "Predição única e em lote com threshold configurável."}, | |
| ] | |
| app = FastAPI( | |
| title="IVerify — Credit Decision API", | |
| version=VERSION, | |
| description=DESCRIPTION, | |
| contact={ | |
| "name": "IVerify", | |
| "url": "https://github.com/ViniciusKanh", | |
| }, | |
| license_info={"name": "MIT"}, | |
| openapi_tags=TAGS, | |
| ) | |
| # ============================================================================= | |
| # Schemas (Pydantic) | |
| # ============================================================================= | |
| class Application(BaseModel): | |
| # Campos **RAW** (antes do OneHot). Nomes devem bater com o treino. | |
| checking_status: str = Field(..., examples=["A12"]) | |
| credit_duration_months: int = Field(..., ge=1, examples=[24]) | |
| credit_history: str = Field(..., examples=["A32"]) | |
| loan_purpose: str = Field(..., examples=["A43"]) | |
| credit_amount: float = Field(..., gt=0, examples=[3500]) | |
| savings_account: str = Field(..., examples=["A61"]) | |
| employment_duration: str = Field(..., examples=["A73"]) | |
| installment_rate: int = Field(..., ge=1, le=4, examples=[3]) | |
| personal_status_sex: str = Field(..., examples=["A93"]) | |
| guarantors: str = Field(..., examples=["A101"]) | |
| residence_since: int = Field(..., ge=1, examples=[2]) | |
| property_type: str = Field(..., examples=["A121"]) | |
| age_years: int = Field(..., ge=18, examples=[35]) | |
| other_installment_plans: str = Field(..., examples=["A143"]) | |
| housing_type: str = Field(..., examples=["A152"]) | |
| existing_credits: int = Field(..., ge=0, examples=[1]) | |
| job_type: str = Field(..., examples=["A173"]) | |
| dependents: int = Field(..., ge=0, examples=[1]) | |
| telephone: str = Field(..., examples=["A192"]) | |
| foreign_worker: str = Field(..., examples=["A202"]) | |
| class PredictResponse(BaseModel): | |
| prob_approved: float | |
| approved: int | |
| threshold: float | |
| meta_version: Optional[str] = None | |
| extra_fields_ignored: Optional[List[str]] = None | |
| class BatchRequest(BaseModel): | |
| items: List[Application] | |
| threshold: Optional[float] = Field(None, description="Sobrescreve o threshold global (opcional).") | |
| class BatchResponseItem(PredictResponse): | |
| pass | |
| class BatchResponse(BaseModel): | |
| results: List[BatchResponseItem] | |
| # ============================================================================= | |
| # Helpers | |
| # ============================================================================= | |
| def _to_frame(payload: Dict[str, Any]) -> pd.DataFrame: | |
| df = pd.DataFrame([payload]) | |
| missing = [c for c in RAW_FEATURES if c not in df.columns] | |
| if missing: | |
| raise HTTPException(status_code=400, detail={ | |
| "error": "missing_required_features", | |
| "missing": missing | |
| }) | |
| # Reordena e descarta extras | |
| extras = [c for c in df.columns if c not in RAW_FEATURES] | |
| df = df[RAW_FEATURES] | |
| if extras: | |
| df.attrs["extra_cols"] = extras | |
| return df | |
| def _predict_one(d: Dict[str, Any], thr: float) -> Dict[str, Any]: | |
| X = _to_frame(d) | |
| try: | |
| Xp = preprocess.transform(X) | |
| proba = float(model.predict_proba(Xp)[:, 1][0]) # prob da classe positiva (aprovado) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail={"error": "inference_failed", "msg": str(e)}) | |
| resp = { | |
| "prob_approved": proba, | |
| "approved": int(proba >= thr), | |
| "threshold": float(thr), | |
| "meta_version": VERSION, | |
| } | |
| extras = X.attrs.get("extra_cols") | |
| if extras: | |
| resp["extra_fields_ignored"] = extras | |
| return resp | |
| # ============================================================================= | |
| # Rotas | |
| # ============================================================================= | |
| def root(): | |
| return RedirectResponse(url="/docs") | |
| def health(): | |
| return { | |
| "status": "ok", | |
| "model_loaded": True, | |
| "version": VERSION, | |
| "threshold": BEST_T, | |
| "features_raw_expected": RAW_FEATURES, | |
| "cost_matrix": META.get("cost_matrix"), | |
| } | |
| def schema(): | |
| return { | |
| "features_raw": RAW_FEATURES, | |
| "labels": {0: "Negado", 1: "Aprovado"}, | |
| "positive_class": 1, | |
| "example_payload": EXAMPLE_PAYLOAD, | |
| } | |
| def sample(): | |
| return {"payload": EXAMPLE_PAYLOAD} | |
| def predict( | |
| payload: Application = Body( | |
| ..., | |
| example=EXAMPLE_PAYLOAD, | |
| description="Payload cru (antes do OneHot), com as mesmas colunas usadas no treino.", | |
| ), | |
| threshold: Optional[float] = Query( | |
| None, description="Opcional. Sobrescreve o threshold global." | |
| ), | |
| ): | |
| thr = float(threshold) if threshold is not None else BEST_T | |
| return _predict_one(payload.model_dump(), thr) | |
| def predict_batch(request: BatchRequest): | |
| thr = float(request.threshold) if request.threshold is not None else BEST_T | |
| results = [_predict_one(item.model_dump(), thr) for item in request.items] | |
| return {"results": results} | |