celpri
feat: add unit and functional tests with coverage
db7ff99
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from contextlib import asynccontextmanager
import pandas as pd
import os
import time
import json
from datetime import datetime
from huggingface_hub import hf_hub_download
from src.model.model import load_model
from fastapi.responses import RedirectResponse
class ClientID(BaseModel):
sk_id_curr: int
from pathlib import Path
import os
def get_features_by_id(sk_id_curr: int) -> pd.DataFrame:
# CAS TEST : features injectées par pytest
if hasattr(app.state, "features") and app.state.features is not None:
df = app.state.features
else:
# CAS LOCAL
local_path = Path(__file__).resolve().parents[2] / "Data" / "features_clients.csv"
if local_path.exists():
df = pd.read_csv(local_path)
else:
# CAS HF : repo MODELE
path = hf_hub_download(
repo_id="PCelia/credit-scoring-model",
filename="features_clients.csv",
token=os.environ.get("HF_TOKEN")
)
df = pd.read_csv(path)
row = df[df["SK_ID_CURR"] == sk_id_curr]
if row.empty:
raise KeyError("Client not found")
return row.drop(columns=["SK_ID_CURR"])
def load_features() -> pd.DataFrame:
# CAS LOCAL
local_path = Path(__file__).resolve().parents[2] / "Data" / "features_clients.csv"
if local_path.exists():
return pd.read_csv(local_path)
# CAS HF
path = hf_hub_download(
repo_id="PCelia/credit-scoring-model",
filename="features_clients.csv",
token=os.environ.get("HF_TOKEN")
)
return pd.read_csv(path)
@asynccontextmanager
async def lifespan(app: FastAPI):
app.state.model = load_model()
# chargement unique des features
app.state.features = load_features()
yield
app = FastAPI(lifespan=lifespan)
@app.get("/")
def root():
return RedirectResponse(url="/docs")
@app.post("/predict_by_id")
def predict_by_id(payload: ClientID):
start_total = time.perf_counter()
try:
start_features = time.perf_counter()
X = get_features_by_id(payload.sk_id_curr)
features_time = time.perf_counter() - start_features
except KeyError:
raise HTTPException(status_code=404, detail="Client not found")
start_infer = time.perf_counter()
score = float(app.state.model.predict_proba(X)[:, 1][0])
infer_time = time.perf_counter() - start_infer
total_time = time.perf_counter() - start_total
print(
f"features={features_time:.3f}s | "
f"infer={infer_time:.3f}s | "
f"total={total_time:.3f}s"
)
log_entry = {
"timestamp": datetime.utcnow().isoformat(),
"endpoint": "/predict_by_id",
"sk_id_curr": payload.sk_id_curr,
"score": score,
"features_time": features_time,
"inference_time": infer_time,
"total_time": total_time
}
with open("api_logs.jsonl", "a") as f:
f.write(json.dumps(log_entry) + "\n")
return {
"score": score,
"features_time": features_time,
"inference_time": infer_time,
"total_time": total_time
}