diff --git a/hf_space/.github/workflows/deploy-assets.yml b/hf_space/.github/workflows/deploy-assets.yml index 6efc3ad10f80bc8fe87b046c4f21614c0f8ee267..3ad805e5cdb53a2792cf9409b879f3afb9f5e349 100644 --- a/hf_space/.github/workflows/deploy-assets.yml +++ b/hf_space/.github/workflows/deploy-assets.yml @@ -22,8 +22,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 - with: - lfs: true - name: Set up Python uses: actions/setup-python@v5 diff --git a/hf_space/.github/workflows/deploy.yml b/hf_space/.github/workflows/deploy.yml index 80403c1aeebcf3669659873e0b9395a6dcdee10f..630f18d0da797b514aeb5304700241ff600f4e03 100644 --- a/hf_space/.github/workflows/deploy.yml +++ b/hf_space/.github/workflows/deploy.yml @@ -12,8 +12,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 - with: - lfs: true - name: Set up Python uses: actions/setup-python@v5 diff --git a/hf_space/.gitignore b/hf_space/.gitignore index 5cb707c9cabc91d1be6248d4f6db7de93de7d944..d3e9e89a8cab321beececfddeb20f9054e6ba9ac 100644 --- a/hf_space/.gitignore +++ b/hf_space/.gitignore @@ -5,10 +5,7 @@ __pycache__/ logs/ reports/ data/* -!data/*_final_model.pkl -!data/data_final.parquet artifacts/* -!artifacts/preprocessor.joblib .DS_Store .vscode/ .idea/ diff --git a/hf_space/Dockerfile b/hf_space/Dockerfile index 4b44cc223ea0269f483dc95963aa11f17597cd22..88a6c834c28222ee21db51311f271e092eb71de7 100644 --- a/hf_space/Dockerfile +++ b/hf_space/Dockerfile @@ -11,8 +11,6 @@ RUN pip install --no-cache-dir -r requirements.txt COPY app/ app/ COPY app_entry.py app.py gradio_app.py ./ COPY src/ src/ -COPY data/ data/ -COPY artifacts/ artifacts/ EXPOSE 7860 diff --git a/hf_space/README.md b/hf_space/README.md index 545dfb2fc2b0e63894fa2b227e1b534b0afac5e3..6647385cbc2af7d874d3794bf0c5e442ccb8c3eb 100644 --- a/hf_space/README.md +++ b/hf_space/README.md @@ -381,7 +381,7 @@ python monitoring/drift_report.py \ --logs logs/predictions.jsonl \ --reference data/data_final.parquet \ --output-dir reports \ - --min-prod-samples 200 \ + --min-prod-samples 50 \ --fdr-alpha 0.05 \ --prod-since "2024-01-01T00:00:00Z" \ --prod-until "2024-01-31T23:59:59Z" @@ -391,7 +391,7 @@ Le rapport HTML est généré dans `reports/drift_report.html` (avec des plots d `reports/plots/`). Sur Hugging Face, le disque est éphemère : télécharger les logs avant d'analyser. -Le drift est calcule uniquement si `n_prod >= --min-prod-samples` (defaut 200). +Le drift est calcule uniquement si `n_prod >= --min-prod-samples` (defaut 50). Sinon, un badge "Sample insuffisant" est affiche et les alertes sont desactivees. Robustesse integree: @@ -418,20 +418,16 @@ Captures (snapshot local du reporting + stockage): ## Profiling & Optimisation (Etape 4) -Profiling et benchmark d'inference (cProfile + latence) : +Profiling et benchmark d'inference (cProfile + latence): -- Desormais via le notebook modélisation (section TODO 5). -- L'ancien script est archive dans `dev_archive/profiling/profile_inference.py`. - -Sorties: - -- `docs/performance/benchmark_results.json` -- `docs/performance/profile_summary.txt` -- Rapport detaille: `docs/performance/performance_report.md` +- Notebook: `notebooks/P6_MANET_Stephane_notebook_modélisation.ipynb` (section TODO 5). +- Resultats: `docs/performance/benchmark_results.json`, `docs/performance/profile_summary.txt`, `docs/performance/performance_report.md`. Dashboard local Streamlit (monitoring + drift): ```shell +streamlit run monitoring/streamlit_app.py +# ou python -m streamlit run monitoring/streamlit_app.py ``` @@ -452,21 +448,3 @@ python -m streamlit run monitoring/streamlit_app.py - **CI/CD** : tests avec couverture (`pytest-cov`), build Docker et deploy vers Hugging Face Spaces. ![Screenshot MLFlow](https://raw.githubusercontent.com/stephmnt/credit-scoring-mlops/main/screen-mlflow.png) - -### Manques prioritaires - -* Mission 2 Étape 4 non couverte: pas de profiling/optimisation post‑déploiement ni rapport de gains, à livrer avec une version optimisée. - -### Preuves / doc à compléter - -* Lien explicite vers le dépôt public + stratégie de versions/branches à ajouter dans README.md. -* Preuve de model registry/serving MLflow à conserver (capture UI registry ou commande de serving) en plus de screen-mlflow.png. -* Dataset de référence non versionné (data_final.parquet est ignoré), documenter l’obtention pour exécuter drift_report.py. -* Badge GitHub Actions pointe vers OCR_Projet05 dans README.md, corriger l’URL. -* RGPD/PII: LOG_HASH_SK_ID est désactivé par défaut dans main.py, préciser l’activation en prod dans README.md. - -### Améliorations recommandées - -* Compléter les tests API: /logs (auth OK/KO), batch predict, param threshold, SK_ID_CURR manquant, outliers dans test_api.py. -* Simplifier le fallback ALLOW_MISSING_ARTIFACTS et DummyModel si les artefacts sont versionnés (nettoyer main.py et conftest.py). -* Si l’évaluateur attend une stratégie de branches, créer une branche feature et fusionner pour preuve. diff --git a/hf_space/docs/performance/performance_report.md b/hf_space/docs/performance/performance_report.md index 0314fd4a757842c5a41805c5dd9b8d8614045361..707e482b2c9bd7839876fb92d05553b781adaf56 100644 --- a/hf_space/docs/performance/performance_report.md +++ b/hf_space/docs/performance/performance_report.md @@ -6,11 +6,10 @@ Mesurer la latence d'inference, identifier les goulots d'etranglement et propose ## Setup -- Script (archivé): `dev_archive/profiling/profile_inference.py` -- Workflow courant: notebook modélisation (section TODO 5) +- Notebook: `notebooks/P6_MANET_Stephane_notebook_modélisation.ipynb` (section TODO 5) - Donnees: `data/data_final.parquet` (echantillon) - Parametres: `--sample-size 500 --batch-size 100 --runs 2` -- Modele: `HistGB_final_model.pkl` +- Modele: `data/*_final_model.pkl` (ex: `data/xgb_final_model.pkl`) Les resultats sont sauvegardes dans: @@ -21,21 +20,21 @@ Les resultats sont sauvegardes dans: | Scenario | Batch | Mean (ms) | P50 (ms) | P95 (ms) | Throughput (rows/s) | | --- | --- | ---:| ---:| ---:| ---:| -| optimized_preprocess | 100 | 187.37 | 169.96 | 271.41 | 533.71 | -| legacy_preprocess_alignment | 100 | 273.05 | 264.45 | 357.41 | 366.23 | +| optimized_preprocess | 100 | 35.73 | 33.77 | 43.09 | 2798.44 | +| legacy_preprocess_alignment | 100 | 47.57 | 47.19 | 51.23 | 2102.36 | -Gain observe (moyenne): ~31% de reduction de latence par batch sur le chemin optimise. +Gain observe (moyenne): ~25% de reduction de latence par batch sur le chemin optimise. ## Goulots d'etranglement (cProfile) Extrait `docs/performance/profile_summary.txt`: -- `app.main:preprocess_input` represente l'essentiel du temps cumule (~0.90s sur 1.05s). +- `app.main:preprocess_input` represente l'essentiel du temps cumule (voir `docs/performance/profile_summary.txt`). - Operations pandas dominantes: - `DataFrame.__setitem__` / `insert` - `fillna`, `to_numeric` - `get_dummies` -- `HistGradientBoostingClassifier.predict_proba` est present mais non majoritaire (~0.15s). +- `predict_proba` est present mais non majoritaire. ## Optimisation appliquee diff --git a/hf_space/hf_space/app/main.py b/hf_space/hf_space/app/main.py index 14a2b41062eb591bba9a68c586ff4f9b1808a2ff..193b314f2d49c63b28977250c779461cc19bf932 100644 --- a/hf_space/hf_space/app/main.py +++ b/hf_space/hf_space/app/main.py @@ -1,5 +1,6 @@ from __future__ import annotations +import io import logging import os import pickle @@ -8,6 +9,7 @@ from datetime import datetime, timezone import hashlib import json from pathlib import Path +import threading import time from typing import Any import uuid @@ -16,6 +18,7 @@ from collections import deque import numpy as np import pandas as pd from fastapi import FastAPI, Header, HTTPException, Query, Response +from huggingface_hub import HfApi from pydantic import BaseModel from sklearn.preprocessing import MinMaxScaler import joblib @@ -78,6 +81,19 @@ HF_CUSTOMER_REPO_ID = os.getenv("HF_CUSTOMER_REPO_ID") HF_CUSTOMER_REPO_TYPE = os.getenv("HF_CUSTOMER_REPO_TYPE", "dataset") HF_CUSTOMER_FILENAME = os.getenv("HF_CUSTOMER_FILENAME", CUSTOMER_DATA_PATH.name) +HF_LOG_ENABLED = os.getenv("HF_LOG_ENABLED", "1") == "1" +HF_LOG_DATASET_REPO = os.getenv("HF_LOG_DATASET_REPO") +HF_LOG_PATH_PREFIX = os.getenv("HF_LOG_PATH_PREFIX", "prod_logs") + +HF_LOG_BUFFER_MAX = int(os.getenv("HF_LOG_BUFFER_MAX", "50")) +HF_LOG_FLUSH_SECONDS = int(os.getenv("HF_LOG_FLUSH_SECONDS", "60")) + +_hf_api = HfApi(token=os.getenv("HF_TOKEN")) if os.getenv("HF_TOKEN") else None +_hf_lock = threading.Lock() +_hf_buffer: list[dict[str, Any]] = [] +_hf_last_flush = 0.0 +_hf_flusher_started = False + IGNORE_FEATURES = ["is_train", "is_test", "TARGET", "SK_ID_CURR"] ENGINEERED_FEATURES = [ "DAYS_EMPLOYED_ANOM", @@ -218,6 +234,87 @@ def _hash_value(value: Any) -> str: return hashlib.sha256(str(value).encode("utf-8")).hexdigest() +def _utc_day() -> str: + return datetime.now(timezone.utc).strftime("%Y-%m-%d") + + +def _utc_stamp() -> str: + return datetime.now(timezone.utc).strftime("%H%M%S") + + +def _start_hf_flusher_if_needed() -> None: + global _hf_flusher_started + if _hf_flusher_started: + return + _hf_flusher_started = True + + def _loop() -> None: + while True: + time.sleep(HF_LOG_FLUSH_SECONDS) + with _hf_lock: + _flush_hf_locked(force=True) + + threading.Thread(target=_loop, daemon=True).start() + + +def _upload_parquet_part(df: pd.DataFrame) -> None: + if not (HF_LOG_ENABLED and _hf_api and HF_LOG_DATASET_REPO): + return + + part_path = ( + f"{HF_LOG_PATH_PREFIX}/date={_utc_day()}/" + f"part-{_utc_stamp()}-{uuid.uuid4().hex}.parquet" + ) + + bio = io.BytesIO() + df.to_parquet(bio, index=False) + + for attempt in range(3): + try: + bio.seek(0) + _hf_api.upload_file( + path_or_fileobj=bio, + path_in_repo=part_path, + repo_id=HF_LOG_DATASET_REPO, + repo_type="dataset", + commit_message=f"Add inference logs {_utc_day()}", + ) + return + except Exception: + if attempt == 2: + raise + time.sleep(1.5 * (attempt + 1)) + + +def _flush_hf_locked(force: bool = False) -> None: + global _hf_buffer, _hf_last_flush + if not _hf_buffer: + return + + now = time.time() + if not force: + if len(_hf_buffer) < HF_LOG_BUFFER_MAX and (now - _hf_last_flush) < HF_LOG_FLUSH_SECONDS: + return + + df = pd.DataFrame(_hf_buffer) + _hf_buffer = [] + _hf_last_flush = now + + try: + _upload_parquet_part(df) + except Exception as exc: + logger.warning("HF log upload failed: %s", exc) + + +def hf_log_rows(rows: list[dict[str, Any]]) -> None: + if not (HF_LOG_ENABLED and _hf_api and HF_LOG_DATASET_REPO): + return + _start_hf_flusher_if_needed() + with _hf_lock: + _hf_buffer.extend(rows) + _flush_hf_locked(force=False) + + def _normalize_category_value(value: object, mapping: dict[str, str]) -> object: if pd.isna(value): # type: ignore return np.nan @@ -234,7 +331,9 @@ def _ensure_hf_asset( repo_type: str, ) -> Path | None: if local_path.exists(): - return local_path + if not _is_lfs_pointer(local_path): + return local_path + logger.warning("LFS pointer detected for %s; attempting remote download.", local_path) if not repo_id: return None @@ -254,6 +353,16 @@ def _ensure_hf_asset( ) +def _is_lfs_pointer(path: Path) -> bool: + try: + with path.open("rb") as handle: + head = handle.read(200) + except OSError: + return False + text = head.decode("utf-8", errors="ignore") + return text.startswith("version https://git-lfs.github.com/spec/v1") + + def _normalize_inputs( df_raw: pd.DataFrame, @@ -470,11 +579,39 @@ def _log_prediction_entries( "prediction": result.get("prediction"), } ) - if error: - entry["error"] = error - entries.append(entry) + if error: + entry["error"] = error + entries.append(entry) _append_log_entries(entries) + flat_rows: list[dict[str, Any]] = [] + for entry in entries: + row = { + "timestamp_utc": entry.get("timestamp"), + "request_id": entry.get("request_id"), + "endpoint": entry.get("endpoint"), + "source": entry.get("source"), + "status_code": entry.get("status_code"), + "latency_ms": entry.get("latency_ms"), + "model_version": entry.get("model_version"), + "threshold": entry.get("threshold"), + "sk_id_curr": entry.get("sk_id_curr"), + "probability": entry.get("probability"), + "prediction": entry.get("prediction"), + "error": entry.get("error"), + } + inputs = entry.get("inputs") or {} + for key, value in inputs.items(): + row[f"input__{key}"] = value + + dq = entry.get("data_quality") or {} + for key, value in dq.items(): + row[f"dq__{key}"] = value + + flat_rows.append(row) + + hf_log_rows(flat_rows) + def build_preprocessor(data_path: Path) -> PreprocessorArtifacts: df = pd.read_parquet(data_path) @@ -853,7 +990,7 @@ def _get_customer_reference(preprocessor: PreprocessorArtifacts) -> pd.DataFrame if cached is not None: return cached data_path = CUSTOMER_DATA_PATH - if not data_path.exists(): + if not data_path.exists() or _is_lfs_pointer(data_path): downloaded = _ensure_hf_asset( data_path, HF_CUSTOMER_REPO_ID, @@ -1362,7 +1499,7 @@ def startup_event() -> None: if getattr(app.state, "model", None) is not None and getattr(app.state, "preprocessor", None) is not None: return model_path = MODEL_PATH - if not model_path.exists(): + if not model_path.exists() or _is_lfs_pointer(model_path): downloaded = _ensure_hf_asset( model_path, HF_MODEL_REPO_ID, @@ -1371,7 +1508,7 @@ def startup_event() -> None: ) if downloaded is not None: model_path = downloaded - if not model_path.exists(): + if not model_path.exists() or _is_lfs_pointer(model_path): if ALLOW_MISSING_ARTIFACTS: logger.warning("Model file not found: %s. Using dummy model.", model_path) app.state.model = DummyModel() @@ -1379,10 +1516,17 @@ def startup_event() -> None: raise RuntimeError(f"Model file not found: {model_path}") else: logger.info("Loading model from %s", model_path) - app.state.model = load_model(model_path) + try: + app.state.model = load_model(model_path) + except Exception as exc: + if ALLOW_MISSING_ARTIFACTS: + logger.warning("Model load failed (%s). Using dummy model.", exc) + app.state.model = DummyModel() + else: + raise data_path = DATA_PATH - if not data_path.exists(): + if not data_path.exists() or _is_lfs_pointer(data_path): downloaded = _ensure_hf_asset( data_path, HF_CUSTOMER_REPO_ID, @@ -1393,7 +1537,7 @@ def startup_event() -> None: data_path = downloaded try: artifacts_path = ARTIFACTS_PATH - if not artifacts_path.exists(): + if not artifacts_path.exists() or _is_lfs_pointer(artifacts_path): downloaded = _ensure_hf_asset( artifacts_path, HF_PREPROCESSOR_REPO_ID or None, @@ -1404,7 +1548,7 @@ def startup_event() -> None: artifacts_path = downloaded logger.info("Loading preprocessor artifacts from %s", artifacts_path) app.state.preprocessor = load_preprocessor(data_path, artifacts_path) - except RuntimeError as exc: + except Exception as exc: if ALLOW_MISSING_ARTIFACTS: logger.warning("Preprocessor artifacts missing (%s). Using fallback preprocessor.", exc) app.state.preprocessor = build_fallback_preprocessor() diff --git a/hf_space/hf_space/hf_space/.github/workflows/deploy-assets.yml b/hf_space/hf_space/hf_space/.github/workflows/deploy-assets.yml index 9cb50ad8c3c742b9e434eb0a83338bbb1b02bc30..6efc3ad10f80bc8fe87b046c4f21614c0f8ee267 100644 --- a/hf_space/hf_space/hf_space/.github/workflows/deploy-assets.yml +++ b/hf_space/hf_space/hf_space/.github/workflows/deploy-assets.yml @@ -59,6 +59,24 @@ jobs: model_path = candidates[0] api = HfApi() + existing = api.list_repo_files( + repo_id=repo_id, + repo_type=repo_type, + token=token, + ) + to_delete = [ + name + for name in existing + if name.endswith("_final_model.pkl") and name != model_path.name + ] + for name in to_delete: + api.delete_file( + path_in_repo=name, + repo_id=repo_id, + repo_type=repo_type, + token=token, + commit_message=f"Remove {name}", + ) for path in [model_path]: api.upload_file( path_or_fileobj=str(path), diff --git a/hf_space/hf_space/hf_space/data/xgb_final_model.pkl b/hf_space/hf_space/hf_space/data/xgb_final_model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1f59e2f2597b12e23e83db3280ae4a612f44c78d --- /dev/null +++ b/hf_space/hf_space/hf_space/data/xgb_final_model.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fe10d7c60f50f96a87bafd298f2919653aed37d90a091059017800450e6273b +size 1370510 diff --git a/hf_space/hf_space/hf_space/gradio_app.py b/hf_space/hf_space/hf_space/gradio_app.py index ce7dcf4171653c959bf823dcbcb7af02d848f5ec..2c784bc051ebc492499ed3c9f4175143bac324b5 100644 --- a/hf_space/hf_space/hf_space/gradio_app.py +++ b/hf_space/hf_space/hf_space/gradio_app.py @@ -21,20 +21,10 @@ from app.main import ( _normalize_inputs, ) -import io -import os -import threading -import time -import uuid -from datetime import datetime, timezone - -from huggingface_hub import HfApi - def _ensure_startup() -> None: if not getattr(app.state, "preprocessor", None): startup_event() - _start_log_flusher_if_needed() def _customer_snapshot(sk_id_curr: int) -> dict[str, Any]: @@ -297,8 +287,7 @@ with gr.Blocks(title="Credit scoring MLOps") as demo: """

Renseignez l'identifiant client, le montant du crédit et la durée.

Le modèle prédit la probabilité de défaut de paiement ainsi que la prédiction binaire associée. Le tableau SHAP affiche les 10 features les plus influentes pour cette prédiction. Le snapshot client affiche quelques informations de référence sur le client.

-

Pour accéder au data drift monitoring et aux rapports, rendez-vous sur l'application Streamlit dédiée.

-

Le dataset est disponible sur Hugging Face.

+

Pour accéder au data drift monitoring et aux rapports, rendez-vous sur l'application Streamlit dédiée. Le dataset est disponible sur Hugging Face.

""" ) @@ -328,115 +317,6 @@ with gr.Blocks(title="Credit scoring MLOps") as demo: outputs=[probability, prediction, shap_table, snapshot], ) -# ========================= -# HF Dataset logging (Parquet parts) -# ========================= - -LOG_ENABLED = os.getenv("LOG_ENABLED", "1") == "1" -LOG_DATASET_REPO = os.getenv("LOG_DATASET_REPO", "stephmnt/assets-credit-scoring-mlops") -LOG_PATH_PREFIX = os.getenv("LOG_PATH_PREFIX", "prod_logs") -HF_TOKEN = os.getenv("HF_TOKEN") # Secret HF (write) sur le Space inference - -LOG_BUFFER_MAX = int(os.getenv("LOG_BUFFER_MAX", "50")) # flush dès 50 lignes -LOG_FLUSH_SECONDS = int(os.getenv("LOG_FLUSH_SECONDS", "60")) # flush au moins toutes les 60s - -_hf_api = HfApi(token=HF_TOKEN) if HF_TOKEN else None -_log_lock = threading.Lock() -_log_buffer: list[dict] = [] -_last_flush_ts = 0.0 -_flusher_started = False - - -def _now_utc_iso() -> str: - return datetime.now(timezone.utc).isoformat() - - -def _upload_parquet_part(df: pd.DataFrame) -> None: - if _hf_api is None: - return # pas de token => pas de write - day = datetime.now(timezone.utc).strftime("%Y-%m-%d") - stamp = datetime.now(timezone.utc).strftime("%H%M%S") - part = f"{LOG_PATH_PREFIX}/date={day}/part-{stamp}-{uuid.uuid4().hex}.parquet" - - bio = io.BytesIO() - df.to_parquet(bio, index=False) - bio.seek(0) - - _hf_api.upload_file( - path_or_fileobj=bio, - path_in_repo=part, - repo_id=LOG_DATASET_REPO, - repo_type="dataset", - commit_message=f"Add inference logs {day}", - ) - - -def _flush_logs_locked(force: bool = False) -> None: - global _log_buffer, _last_flush_ts - if not _log_buffer: - return - - now = time.time() - if not force: - if len(_log_buffer) < LOG_BUFFER_MAX and (now - _last_flush_ts) < LOG_FLUSH_SECONDS: - return - - df = pd.DataFrame(_log_buffer) - _log_buffer = [] - _last_flush_ts = now - - try: - _upload_parquet_part(df) - except Exception: - # En prod tu peux logger ça en stderr / structlog etc. - # On évite de faire échouer l'inférence. - pass - - -def _start_log_flusher_if_needed() -> None: - global _flusher_started - if _flusher_started: - return - _flusher_started = True - - def _loop(): - while True: - time.sleep(LOG_FLUSH_SECONDS) - with _log_lock: - _flush_logs_locked(force=True) - - t = threading.Thread(target=_loop, daemon=True) - t.start() - - -def log_inference_row(row: dict) -> None: - if not LOG_ENABLED or _hf_api is None: - return - with _log_lock: - _log_buffer.append(row) - _flush_logs_locked(force=False) - - # --- Logging (Evidently-friendly) --- - row = { - "timestamp_utc": _now_utc_iso(), - "model_version": MODEL_VERSION, - "source": "gradio", - "sk_id_curr": int(sk_id_curr), - "amt_credit_requested": float(amt_credit), - "duration_months": int(duration_months), - "probability": float(probability), - "prediction": int(pred_value), - } - # Ajoute quelques features "business" utiles au drift (cat + num) - # (tu peux en ajouter plus si tu veux) - for k, v in snapshot.items(): - if k == "SK_ID_CURR": - continue - row[f"cust__{k}"] = v - - log_inference_row(row) - - if __name__ == "__main__": _ensure_startup() demo.launch() diff --git a/hf_space/hf_space/hf_space/hf_space/.gitignore b/hf_space/hf_space/hf_space/hf_space/.gitignore index 827e81828b8ec0902bc93840a2020a4ab1839ef5..5cb707c9cabc91d1be6248d4f6db7de93de7d944 100644 --- a/hf_space/hf_space/hf_space/hf_space/.gitignore +++ b/hf_space/hf_space/hf_space/hf_space/.gitignore @@ -5,7 +5,7 @@ __pycache__/ logs/ reports/ data/* -!data/HistGB_final_model.pkl +!data/*_final_model.pkl !data/data_final.parquet artifacts/* !artifacts/preprocessor.joblib diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy-assets.yml b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy-assets.yml index 70552a84815150893a83eca7a14a3c185b27fadf..9cb50ad8c3c742b9e434eb0a83338bbb1b02bc30 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy-assets.yml +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy-assets.yml @@ -11,6 +11,10 @@ on: description: "HF repo type (dataset or model)" required: true default: "dataset" + push: + branches: ["main"] + paths: + - "data/*_final_model.pkl" jobs: upload-assets: @@ -34,8 +38,8 @@ jobs: - name: Upload assets to Hugging Face Hub env: HF_TOKEN: ${{ secrets.HF_TOKEN }} - HF_REPO_ID: ${{ inputs.repo_id }} - HF_REPO_TYPE: ${{ inputs.repo_type }} + HF_REPO_ID: ${{ inputs.repo_id || 'stephmnt/assets-credit-scoring-mlops' }} + HF_REPO_TYPE: ${{ inputs.repo_type || 'dataset' }} run: | python - <<'PY' import os diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py index 46a9ca60e0bd5d6c7d1c9e8307d577625d5aa45b..14a2b41062eb591bba9a68c586ff4f9b1808a2ff 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py @@ -237,22 +237,24 @@ def _ensure_hf_asset( return local_path if not repo_id: return None - try: - from huggingface_hub import hf_hub_download - except ImportError as exc: # pragma: no cover - optional dependency - raise RuntimeError("huggingface_hub is required to download remote assets.") from exc + + from huggingface_hub import hf_hub_download + + token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN") + local_path.parent.mkdir(parents=True, exist_ok=True) return Path( hf_hub_download( repo_id=repo_id, filename=filename, repo_type=repo_type, + token=token, # ✅ essentiel pour repo gated local_dir=str(local_path.parent), - local_dir_use_symlinks=False, ) ) + def _normalize_inputs( df_raw: pd.DataFrame, preprocessor: PreprocessorArtifacts, diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/gradio_app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/gradio_app.py index d943c9ca7857ec8489c8c286d8491c868a008f5d..ce7dcf4171653c959bf823dcbcb7af02d848f5ec 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/gradio_app.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/gradio_app.py @@ -296,10 +296,9 @@ with gr.Blocks(title="Credit scoring MLOps") as demo: gr.HTML( """

Renseignez l'identifiant client, le montant du crédit et la durée.

-

Le modèle prédit la probabilité de défaut de paiement ainsi que la prédiction binaire associée.

-

Le tableau SHAP affiche les 10 features les plus influentes pour cette prédiction.

-

Le snapshot client affiche quelques informations de référence sur le client.

+

Le modèle prédit la probabilité de défaut de paiement ainsi que la prédiction binaire associée. Le tableau SHAP affiche les 10 features les plus influentes pour cette prédiction. Le snapshot client affiche quelques informations de référence sur le client.

Pour accéder au data drift monitoring et aux rapports, rendez-vous sur l'application Streamlit dédiée.

+

Le dataset est disponible sur Hugging Face.

""" ) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml index 9b4bfca1817f10dd2a1bb0298b3584437cd69c1d..80403c1aeebcf3669659873e0b9395a6dcdee10f 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml @@ -49,7 +49,6 @@ jobs: --exclude 'logs' \ --exclude 'reports' \ --exclude 'screen-mlflow.png' \ - --exclude 'data/*_final_model.pkl' \ --exclude 'artifacts/preprocessor.joblib' \ --exclude 'data/*.csv' \ --exclude 'data/*.parquet' \ diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/gradio_app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/gradio_app.py index 39d6506c169d4a516bd2f6befadae15ed9f0cb41..d943c9ca7857ec8489c8c286d8491c868a008f5d 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/gradio_app.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/gradio_app.py @@ -21,10 +21,20 @@ from app.main import ( _normalize_inputs, ) +import io +import os +import threading +import time +import uuid +from datetime import datetime, timezone + +from huggingface_hub import HfApi + def _ensure_startup() -> None: if not getattr(app.state, "preprocessor", None): startup_event() + _start_log_flusher_if_needed() def _customer_snapshot(sk_id_curr: int) -> dict[str, Any]: @@ -283,13 +293,19 @@ with gr.Blocks(title="Credit scoring MLOps") as demo: """) - gr.Markdown( - "Renseignez l'identifiant client, le montant du crédit et la durée. " + gr.HTML( + """ +

Renseignez l'identifiant client, le montant du crédit et la durée.

+

Le modèle prédit la probabilité de défaut de paiement ainsi que la prédiction binaire associée.

+

Le tableau SHAP affiche les 10 features les plus influentes pour cette prédiction.

+

Le snapshot client affiche quelques informations de référence sur le client.

+

Pour accéder au data drift monitoring et aux rapports, rendez-vous sur l'application Streamlit dédiée.

+ """ ) with gr.Row(): sk_id_curr = gr.Number(label="Identifiant client", precision=0, value=100001) - amt_credit = gr.Number(label="Montant du crédit", value=200000) + amt_credit = gr.Number(label="Montant du crédit", value=2000000) duration_months = gr.Number(label="Durée (mois)", precision=0, value=60) run_btn = gr.Button("Scorer") @@ -313,6 +329,114 @@ with gr.Blocks(title="Credit scoring MLOps") as demo: outputs=[probability, prediction, shap_table, snapshot], ) +# ========================= +# HF Dataset logging (Parquet parts) +# ========================= + +LOG_ENABLED = os.getenv("LOG_ENABLED", "1") == "1" +LOG_DATASET_REPO = os.getenv("LOG_DATASET_REPO", "stephmnt/assets-credit-scoring-mlops") +LOG_PATH_PREFIX = os.getenv("LOG_PATH_PREFIX", "prod_logs") +HF_TOKEN = os.getenv("HF_TOKEN") # Secret HF (write) sur le Space inference + +LOG_BUFFER_MAX = int(os.getenv("LOG_BUFFER_MAX", "50")) # flush dès 50 lignes +LOG_FLUSH_SECONDS = int(os.getenv("LOG_FLUSH_SECONDS", "60")) # flush au moins toutes les 60s + +_hf_api = HfApi(token=HF_TOKEN) if HF_TOKEN else None +_log_lock = threading.Lock() +_log_buffer: list[dict] = [] +_last_flush_ts = 0.0 +_flusher_started = False + + +def _now_utc_iso() -> str: + return datetime.now(timezone.utc).isoformat() + + +def _upload_parquet_part(df: pd.DataFrame) -> None: + if _hf_api is None: + return # pas de token => pas de write + day = datetime.now(timezone.utc).strftime("%Y-%m-%d") + stamp = datetime.now(timezone.utc).strftime("%H%M%S") + part = f"{LOG_PATH_PREFIX}/date={day}/part-{stamp}-{uuid.uuid4().hex}.parquet" + + bio = io.BytesIO() + df.to_parquet(bio, index=False) + bio.seek(0) + + _hf_api.upload_file( + path_or_fileobj=bio, + path_in_repo=part, + repo_id=LOG_DATASET_REPO, + repo_type="dataset", + commit_message=f"Add inference logs {day}", + ) + + +def _flush_logs_locked(force: bool = False) -> None: + global _log_buffer, _last_flush_ts + if not _log_buffer: + return + + now = time.time() + if not force: + if len(_log_buffer) < LOG_BUFFER_MAX and (now - _last_flush_ts) < LOG_FLUSH_SECONDS: + return + + df = pd.DataFrame(_log_buffer) + _log_buffer = [] + _last_flush_ts = now + + try: + _upload_parquet_part(df) + except Exception: + # En prod tu peux logger ça en stderr / structlog etc. + # On évite de faire échouer l'inférence. + pass + + +def _start_log_flusher_if_needed() -> None: + global _flusher_started + if _flusher_started: + return + _flusher_started = True + + def _loop(): + while True: + time.sleep(LOG_FLUSH_SECONDS) + with _log_lock: + _flush_logs_locked(force=True) + + t = threading.Thread(target=_loop, daemon=True) + t.start() + + +def log_inference_row(row: dict) -> None: + if not LOG_ENABLED or _hf_api is None: + return + with _log_lock: + _log_buffer.append(row) + _flush_logs_locked(force=False) + + # --- Logging (Evidently-friendly) --- + row = { + "timestamp_utc": _now_utc_iso(), + "model_version": MODEL_VERSION, + "source": "gradio", + "sk_id_curr": int(sk_id_curr), + "amt_credit_requested": float(amt_credit), + "duration_months": int(duration_months), + "probability": float(probability), + "prediction": int(pred_value), + } + # Ajoute quelques features "business" utiles au drift (cat + num) + # (tu peux en ajouter plus si tu veux) + for k, v in snapshot.items(): + if k == "SK_ID_CURR": + continue + row[f"cust__{k}"] = v + + log_inference_row(row) + if __name__ == "__main__": _ensure_startup() diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py index 00c40128e30d61bc382f014477acad000c6071ca..46a9ca60e0bd5d6c7d1c9e8307d577625d5aa45b 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py @@ -219,7 +219,7 @@ def _hash_value(value: Any) -> str: def _normalize_category_value(value: object, mapping: dict[str, str]) -> object: - if pd.isna(value): + if pd.isna(value): # type: ignore return np.nan key = str(value).strip().upper() if not key: @@ -265,12 +265,12 @@ def _normalize_inputs( unknown_masks: dict[str, pd.Series] = {} if "CODE_GENDER" in df.columns: raw = df["CODE_GENDER"] - normalized = raw.apply(lambda v: _normalize_category_value(v, CODE_GENDER_MAPPING)) + normalized = raw.apply(lambda v: _normalize_category_value(v, CODE_GENDER_MAPPING)) # type: ignore unknown_masks["CODE_GENDER"] = normalized.eq("Unknown") & raw.notna() df["CODE_GENDER"] = normalized if "FLAG_OWN_CAR" in df.columns: raw = df["FLAG_OWN_CAR"] - normalized = raw.apply(lambda v: _normalize_category_value(v, FLAG_OWN_CAR_MAPPING)) + normalized = raw.apply(lambda v: _normalize_category_value(v, FLAG_OWN_CAR_MAPPING)) # type: ignore unknown_masks["FLAG_OWN_CAR"] = normalized.eq("Unknown") & raw.notna() df["FLAG_OWN_CAR"] = normalized @@ -404,7 +404,7 @@ def _build_minimal_record( ) if "AMT_GOODS_PRICE" in record: record["AMT_GOODS_PRICE"] = float(payload.amt_credit) - return record + return record # type: ignore def _append_log_entries(entries: list[dict[str, Any]]) -> None: @@ -1576,7 +1576,7 @@ def _predict_records( latency_ms = (time.perf_counter() - start_time) * 1000.0 _log_prediction_entries( request_id=request_id, - records=log_records, + records=log_records, # type: ignore results=results, latency_ms=latency_ms, threshold=use_threshold, @@ -1598,7 +1598,7 @@ def _predict_records( latency_ms = (time.perf_counter() - start_time) * 1000.0 _log_prediction_entries( request_id=request_id, - records=log_records, + records=log_records, # type: ignore results=results, latency_ms=latency_ms, threshold=None, @@ -1613,7 +1613,7 @@ def _predict_records( detail = exc.detail if isinstance(exc.detail, dict) else {"message": str(exc.detail)} _log_prediction_entries( request_id=request_id, - records=log_records if "log_records" in locals() else records, + records=log_records if "log_records" in locals() else records, # type: ignore results=None, latency_ms=latency_ms, threshold=threshold, @@ -1628,7 +1628,7 @@ def _predict_records( latency_ms = (time.perf_counter() - start_time) * 1000.0 _log_prediction_entries( request_id=request_id, - records=log_records if "log_records" in locals() else records, + records=log_records if "log_records" in locals() else records, # type: ignore results=None, latency_ms=latency_ms, threshold=threshold, diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/HistGB_final_model.pkl b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/HistGB_final_model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..75ad1f28102dd9fe9961b43a228dc6d7cacb45ed --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/HistGB_final_model.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c7b31d6b2aa9d622717d03b6eaf79e6e21297869ff401f2f61a2d688cc55d6f +size 411244 diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.env.example b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.env.example new file mode 100644 index 0000000000000000000000000000000000000000..2b98289470f4e6d6c848e94b6eb668679c37cab7 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.env.example @@ -0,0 +1,46 @@ +# Core paths +MODEL_PATH=data/HistGB_final_model.pkl +DATA_PATH=data/data_final.parquet +ARTIFACTS_PATH=artifacts/preprocessor.joblib + +# Prediction behavior +PREDICTION_THRESHOLD=0.5 +CACHE_PREPROCESSOR=1 +USE_REDUCED_INPUTS=1 +ALLOW_MISSING_ARTIFACTS=0 +MISSING_INDICATOR_MIN_RATE=0.05 + +# Feature selection (correlation) +FEATURE_SELECTION_METHOD=correlation +FEATURE_SELECTION_TOP_N=8 +FEATURE_SELECTION_MIN_CORR=0.02 +CORRELATION_THRESHOLD=0.85 +CORRELATION_SAMPLE_SIZE=50000 + +# Logging +LOG_PREDICTIONS=1 +LOG_DIR=logs +LOG_FILE=predictions.jsonl +LOG_INCLUDE_INPUTS=1 +LOG_HASH_SK_ID=0 +MODEL_VERSION=HistGB_final_model.pkl +LOGS_ACCESS_TOKEN= + +# Customer reference lookup +CUSTOMER_DATA_PATH=data/data_final.parquet +CUSTOMER_LOOKUP_ENABLED=1 +CUSTOMER_LOOKUP_CACHE=1 + +# Hugging Face assets (optional) +HF_MODEL_REPO_ID=stephmnt/assets-credit-scoring-mlops +HF_MODEL_REPO_TYPE=model +HF_MODEL_FILENAME=HistGB_final_model.pkl +HF_PREPROCESSOR_REPO_ID=stephmnt/assets-credit-scoring-mlops +HF_PREPROCESSOR_REPO_TYPE=model +HF_PREPROCESSOR_FILENAME=preprocessor.joblib +HF_CUSTOMER_REPO_ID=stephmnt/assets-credit-scoring-mlops +HF_CUSTOMER_REPO_TYPE=dataset +HF_CUSTOMER_FILENAME=data_final.parquet + +# MLflow +MLFLOW_TRACKING_URI=http://127.0.0.1:5000 diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy-assets.yml b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy-assets.yml index 8df5ea08c0cf83b468ab3c1322c26db9fd757a8f..70552a84815150893a83eca7a14a3c185b27fadf 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy-assets.yml +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy-assets.yml @@ -46,24 +46,23 @@ jobs: repo_type = os.environ["HF_REPO_TYPE"] token = os.environ["HF_TOKEN"] - files = { - "data/HistGB_final_model.pkl": "HistGB_final_model.pkl", - "artifacts/preprocessor.joblib": "preprocessor.joblib", - "data/data_final.parquet": "data_final.parquet", - } + candidates = sorted(Path("data").glob("*_final_model.pkl")) + if not candidates: + raise SystemExit("Missing model file: data/*_final_model.pkl") + if len(candidates) > 1: + names = ", ".join(path.name for path in candidates) + raise SystemExit(f"Multiple *_final_model.pkl files found: {names}") + model_path = candidates[0] api = HfApi() - for local_path, remote_name in files.items(): - path = Path(local_path) - if not path.exists(): - raise SystemExit(f"Missing file: {path}") + for path in [model_path]: api.upload_file( path_or_fileobj=str(path), - path_in_repo=remote_name, + path_in_repo=path.name, repo_id=repo_id, repo_type=repo_type, token=token, - commit_message=f"Update {remote_name}", + commit_message=f"Update {path.name}", ) print("Assets uploaded.") PY diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml index c39651ec5e97b3d40ed76117fd0c2e14b5b60c58..9b4bfca1817f10dd2a1bb0298b3584437cd69c1d 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml @@ -49,10 +49,11 @@ jobs: --exclude 'logs' \ --exclude 'reports' \ --exclude 'screen-mlflow.png' \ - --exclude 'data/HistGB_final_model.pkl' \ + --exclude 'data/*_final_model.pkl' \ --exclude 'artifacts/preprocessor.joblib' \ --exclude 'data/*.csv' \ --exclude 'data/*.parquet' \ + --exclude 'notebooks/mlflow.db' \ ./ hf_space/ cd hf_space git add . diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile index 44a3c6cf803ba97d741cb861cef7b457312b78ab..4b44cc223ea0269f483dc95963aa11f17597cd22 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile @@ -10,6 +10,7 @@ RUN pip install --no-cache-dir -r requirements.txt COPY app/ app/ COPY app_entry.py app.py gradio_app.py ./ +COPY src/ src/ COPY data/ data/ COPY artifacts/ artifacts/ diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md index 1d1297464999480f3ae9aefc4a9ae59f4ec78134..545dfb2fc2b0e63894fa2b227e1b534b0afac5e3 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md @@ -14,6 +14,18 @@ pinned: false [![GitHub Release Date](https://img.shields.io/github/release-date/stephmnt/credit-scoring-mlops?display_date=published_at&style=flat-square)](https://github.com/stephmnt/credit-scoring-mlops/releases) [![project_license](https://img.shields.io/github/license/stephmnt/credit-scoring-mlops.svg)](https://github.com/stephmnt/credit-scoring-mlops/blob/main/LICENSE) +## Structure rapide + +- `app/` API FastAPI + preprocessing inference +- `monitoring/` rapport drift + Streamlit +- `notebooks/` exploration + modelisation +- `src/` utilitaires ML (feature engineering / pipeline) +- `docs/` preuves & rapports (monitoring, perf) +- `tests/` tests unitaires/integration + +Le feature engineering est factorise dans `src/features.py` et reutilise +par le notebook et l'API pour eviter le training-serving skew. + ## Lancer MLFlow Le notebook est configure pour utiliser un serveur MLflow local (`http://127.0.0.1:5000`). @@ -75,6 +87,28 @@ pytest -q uvicorn app.main:app --reload --port 7860 ``` +### Workflow DEV (notebooks) + +Ordre recommande (dev uniquement) : + +1. `notebooks/P6_MANET_Stephane_notebook_exploration.ipynb` → genere `data/data_final.parquet` (ecrase). +2. `notebooks/P6_MANET_Stephane_notebook_compare_tuning_mlflow.ipynb` → compare+tuning, log MLflow, ecrit `reports/best_model.json`. +3. `notebooks/P6_MANET_Stephane_notebook_modélisation.ipynb` → rebuild preprocessor, entraine le modele final, exporte `data/_final_model.pkl`. +4. Lancer manuellement le workflow `deploy-assets.yml` pour pousser `data/*_final_model.pkl`. + +Note : ces notebooks restent dev-only. Le code prod reste dans `app/` et `monitoring/`. + +### Configuration (.env) + +Dupliquez `.env.example` en `.env` si vous voulez surcharger les chemins, +seuils ou sources Hugging Face. +Le seuil `MISSING_INDICATOR_MIN_RATE` limite les colonnes `is_missing_*` +aux features avec un taux de NaN >= 5% (par defaut). + +```shell +cp .env.example .env +``` + ### Environnement Poetry (livrable) Le livrable inclut `pyproject.toml`, aligne sur `requirements.txt`. Si besoin : @@ -85,9 +119,9 @@ poetry run pytest -q poetry run uvicorn app.main:app --reload --port 7860 ``` -Important : le modele `HistGB_final_model.pkl` doit etre regenere avec la +Important : le modele `*_final_model.pkl` doit etre regenere avec la version de scikit-learn definie dans `requirements.txt` / `pyproject.toml` -(re-execution de `P6_MANET_Stephane_notebook_modélisation.ipynb`, cellule de +(re-execution de `notebooks/P6_MANET_Stephane_notebook_modélisation.ipynb`, cellule de sauvegarde pickle). ### Exemple d'input (schema + valeurs) @@ -158,10 +192,13 @@ Variables utiles : ### Data contract (validation) - Types numeriques stricts (invalides -> 422). -- Ranges numeriques (min/max entrainement) controles. +- Ranges numeriques (min/max entrainement) controles, hors `SK_ID_CURR` (ID). - Categoriels normalises: `CODE_GENDER` -> {`F`, `M`}, `FLAG_OWN_CAR` -> {`Y`, `N`}. -- Sentinelle `DAYS_EMPLOYED=365243` remplacee par NaN. -- Logs enrichis via `data_quality` pour distinguer drift vs qualite de donnees. +- Sentinelle `DAYS_EMPLOYED=365243` remplacee par NaN + flag `DAYS_EMPLOYED_ANOM`. +- Ratios securises (division par zero) + flags `DENOM_ZERO_*`. +- Outliers clippees (p1/p99) + flags `is_outlier_*`. +- Missingness indicators `is_missing_*` pour les numeriques avec taux de NaN >= 5%. +- Logs enrichis via `data_quality` et `source` pour distinguer drift vs qualite de donnees. ### Interface Gradio (scoring) @@ -186,13 +223,13 @@ variables suivantes sont definies : Exemple (un seul repo dataset avec 3 fichiers) : -- `HF_MODEL_REPO_ID=stephmnt/credit-scoring-mlops-assets` +- `HF_MODEL_REPO_ID=stephmnt/assets-credit-scoring-mlops` - `HF_MODEL_REPO_TYPE=dataset` -- `HF_MODEL_FILENAME=HistGB_final_model.pkl` -- `HF_PREPROCESSOR_REPO_ID=stephmnt/credit-scoring-mlops-assets` +- `HF_MODEL_FILENAME=histgb_final_model.pkl` (ou `lgbm_final_model.pkl` / `xgb_final_model.pkl`) +- `HF_PREPROCESSOR_REPO_ID=stephmnt/assets-credit-scoring-mlops` - `HF_PREPROCESSOR_REPO_TYPE=dataset` - `HF_PREPROCESSOR_FILENAME=preprocessor.joblib` -- `HF_CUSTOMER_REPO_ID=stephmnt/credit-scoring-mlops-assets` +- `HF_CUSTOMER_REPO_ID=stephmnt/assets-credit-scoring-mlops` - `HF_CUSTOMER_REPO_TYPE=dataset` - `HF_CUSTOMER_FILENAME=data_final.parquet` @@ -311,8 +348,11 @@ Variables utiles : - `LOG_HASH_SK_ID=1` pour anonymiser `SK_ID_CURR` Les logs incluent un bloc `data_quality` par requete (champs manquants, -types invalides, out-of-range, categories inconnues, sentinelle -`DAYS_EMPLOYED`). +types invalides, out-of-range, outliers, categories inconnues, sentinelle +`DAYS_EMPLOYED`) et un champ `source` (api/gradio/etc.). + +Astuce : vous pouvez passer un header `X-Client-Source` pour tagger la source +des requetes (ex: `gradio`, `test`, `batch`). Exemple local : @@ -359,6 +399,7 @@ Robustesse integree: - Categoriels: PSI avec lissage (`--psi-eps`) + categories rares regroupees (OTHER). - Numeriques: KS corrige par FDR (Benjamini-Hochberg, `--fdr-alpha`). - Sentinel `DAYS_EMPLOYED`: converti en NaN + taux suivi. +- Outliers: clipping p1/p99 + taux via `data_quality`. Le rapport inclut aussi la distribution des scores predits et le taux de prediction (option `--score-bins` pour ajuster le nombre de bins), ainsi qu'une section @@ -379,12 +420,8 @@ Captures (snapshot local du reporting + stockage): Profiling et benchmark d'inference (cProfile + latence) : -```shell -python profiling/profile_inference.py \ - --sample-size 2000 \ - --batch-size 128 \ - --runs 3 -``` +- Desormais via le notebook modélisation (section TODO 5). +- L'ancien script est archive dans `dev_archive/profiling/profile_inference.py`. Sorties: diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py index a7915cfb8178eacc7cd58a442e0d882418c4d10b..00c40128e30d61bc382f014477acad000c6071ca 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py @@ -20,9 +20,33 @@ from pydantic import BaseModel from sklearn.preprocessing import MinMaxScaler import joblib +from src.features import ( + add_missingness_indicators, + apply_outlier_clipping, + compute_outlier_bounds, + new_features_creation, + select_missing_indicator_columns, +) + logger = logging.getLogger("uvicorn.error") -MODEL_PATH = Path(os.getenv("MODEL_PATH", "data/HistGB_final_model.pkl")) +def _resolve_model_path() -> Path: + env_path = os.getenv("MODEL_PATH") + if env_path: + return Path(env_path) + candidates = sorted(Path("data").glob("*_final_model.pkl")) + if len(candidates) == 1: + return candidates[0] + if candidates: + logger.warning( + "Multiple *_final_model.pkl files found; set MODEL_PATH explicitly. Using %s", + candidates[0], + ) + return candidates[0] + return Path("data/histgb_final_model.pkl") + + +MODEL_PATH = _resolve_model_path() DATA_PATH = Path(os.getenv("DATA_PATH", "data/data_final.parquet")) ARTIFACTS_PATH = Path(os.getenv("ARTIFACTS_PATH", "artifacts/preprocessor.joblib")) DEFAULT_THRESHOLD = float(os.getenv("PREDICTION_THRESHOLD", "0.5")) @@ -56,11 +80,17 @@ HF_CUSTOMER_FILENAME = os.getenv("HF_CUSTOMER_FILENAME", CUSTOMER_DATA_PATH.name IGNORE_FEATURES = ["is_train", "is_test", "TARGET", "SK_ID_CURR"] ENGINEERED_FEATURES = [ + "DAYS_EMPLOYED_ANOM", "DAYS_EMPLOYED_PERC", "INCOME_CREDIT_PERC", "INCOME_PER_PERSON", "ANNUITY_INCOME_PERC", "PAYMENT_RATE", + "DENOM_ZERO_DAYS_EMPLOYED_PERC", + "DENOM_ZERO_INCOME_CREDIT_PERC", + "DENOM_ZERO_INCOME_PER_PERSON", + "DENOM_ZERO_ANNUITY_INCOME_PERC", + "DENOM_ZERO_PAYMENT_RATE", ] ENGINEERED_SOURCES = [ "DAYS_EMPLOYED", @@ -98,6 +128,9 @@ OUTLIER_COLUMNS = [ "AMT_REQ_CREDIT_BUREAU_YEAR", "AMT_REQ_CREDIT_BUREAU_QRT", ] +OUTLIER_LOWER_Q = 0.01 +OUTLIER_UPPER_Q = 0.99 +MISSING_INDICATOR_MIN_RATE = float(os.getenv("MISSING_INDICATOR_MIN_RATE", "0.05")) CODE_GENDER_MAPPING = { "F": "F", @@ -143,6 +176,8 @@ class PreprocessorArtifacts: numeric_medians: dict[str, float] categorical_columns: list[str] outlier_maxes: dict[str, float] + outlier_bounds: dict[str, tuple[float, float]] + missing_indicator_columns: list[str] numeric_ranges: dict[str, tuple[float, float]] features_to_scaled: list[str] scaler: MinMaxScaler @@ -243,6 +278,7 @@ def _normalize_inputs( if "DAYS_EMPLOYED" in df.columns: values = pd.to_numeric(df["DAYS_EMPLOYED"], errors="coerce") sentinel_mask = values == DAYS_EMPLOYED_SENTINEL + df["DAYS_EMPLOYED_ANOM"] = sentinel_mask.astype(int) if sentinel_mask.any(): df.loc[sentinel_mask, "DAYS_EMPLOYED"] = np.nan @@ -267,6 +303,7 @@ def _build_data_quality_records( missing_mask = df_norm[required_cols].isna() if required_cols else pd.DataFrame(index=df_norm.index) invalid_masks: dict[str, pd.Series] = {} out_of_range_masks: dict[str, pd.Series] = {} + outlier_masks: dict[str, pd.Series] = {} for col in numeric_required: if col not in df_raw.columns: @@ -283,6 +320,13 @@ def _build_data_quality_records( values = pd.to_numeric(df_norm[col], errors="coerce") out_of_range_masks[col] = (values < min_val) | (values > max_val) + for col, (low, high) in getattr(preprocessor, "outlier_bounds", {}).items(): + if col not in df_norm.columns: + outlier_masks[col] = pd.Series(False, index=df_norm.index) + continue + values = pd.to_numeric(df_norm[col], errors="coerce") + outlier_masks[col] = (values < low) | (values > high) + records: list[dict[str, Any]] = [] for idx in df_norm.index: missing_cols = ( @@ -292,18 +336,26 @@ def _build_data_quality_records( ) invalid_cols = [col for col, mask in invalid_masks.items() if mask.at[idx]] out_of_range_cols = [col for col, mask in out_of_range_masks.items() if mask.at[idx]] + outlier_cols = [col for col, mask in outlier_masks.items() if mask.at[idx]] unknown_cols = [col for col, mask in unknown_masks.items() if mask.at[idx]] + unknown_values = { + col: df_raw.at[idx, col] + for col in unknown_cols + if col in df_raw.columns + } nan_rate = float(missing_mask.loc[idx].mean()) if not missing_mask.empty else 0.0 - records.append( - { - "missing_required_columns": missing_cols, - "invalid_numeric_columns": invalid_cols, - "out_of_range_columns": out_of_range_cols, - "unknown_categories": unknown_cols, - "days_employed_sentinel": bool(sentinel_mask.at[idx]) if not sentinel_mask.empty else False, - "nan_rate": nan_rate, - } - ) + record = { + "missing_required_columns": missing_cols, + "invalid_numeric_columns": invalid_cols, + "out_of_range_columns": out_of_range_cols, + "outlier_columns": outlier_cols, + "unknown_categories": unknown_cols, + "days_employed_sentinel": bool(sentinel_mask.at[idx]) if not sentinel_mask.empty else False, + "nan_rate": nan_rate, + } + if unknown_values: + record["unknown_category_values"] = unknown_values + records.append(record) return records @@ -376,6 +428,7 @@ def _log_prediction_entries( threshold: float | None, status_code: int, preprocessor: PreprocessorArtifacts, + source: str | None = None, data_quality: list[dict[str, Any]] | None = None, error: str | None = None, ) -> None: @@ -400,6 +453,7 @@ def _log_prediction_entries( "status_code": status_code, "model_version": MODEL_VERSION, "threshold": threshold, + "source": source or "api", "inputs": inputs, } if data_quality and idx < len(data_quality): @@ -420,25 +474,16 @@ def _log_prediction_entries( _append_log_entries(entries) -def new_features_creation(df: pd.DataFrame) -> pd.DataFrame: - df_features = df.copy() - for col in ENGINEERED_SOURCES: - if col not in df_features.columns: - df_features[col] = np.nan - df_features["DAYS_EMPLOYED_PERC"] = df_features["DAYS_EMPLOYED"] / df_features["DAYS_BIRTH"] - df_features["INCOME_CREDIT_PERC"] = df_features["AMT_INCOME_TOTAL"] / df_features["AMT_CREDIT"] - df_features["INCOME_PER_PERSON"] = df_features["AMT_INCOME_TOTAL"] / df_features["CNT_FAM_MEMBERS"] - df_features["ANNUITY_INCOME_PERC"] = df_features["AMT_ANNUITY"] / df_features["AMT_INCOME_TOTAL"] - df_features["PAYMENT_RATE"] = df_features["AMT_ANNUITY"] / df_features["AMT_CREDIT"] - return df_features - - def build_preprocessor(data_path: Path) -> PreprocessorArtifacts: df = pd.read_parquet(data_path) raw_feature_columns = df.columns.tolist() input_feature_columns = [c for c in raw_feature_columns if c not in ["is_train", "is_test", "TARGET"]] - df = new_features_creation(df) + df = new_features_creation( + df, + days_employed_sentinel=DAYS_EMPLOYED_SENTINEL, + engineered_sources=ENGINEERED_SOURCES, + ) df.replace([np.inf, -np.inf], np.nan, inplace=True) missing_rate = df.isna().mean() @@ -448,6 +493,26 @@ def build_preprocessor(data_path: Path) -> PreprocessorArtifacts: df = df[columns_keep] df = df.dropna(subset=columns_must_not_missing) + if "CODE_GENDER" in df.columns: + df = df[df["CODE_GENDER"] != "XNA"] + + missing_indicator_columns = select_missing_indicator_columns( + df, + exclude_cols=set(IGNORE_FEATURES), + min_missing_rate=MISSING_INDICATOR_MIN_RATE, + ) + df = add_missingness_indicators(df, missing_indicator_columns) + + outlier_bounds = compute_outlier_bounds( + df, + OUTLIER_COLUMNS, + lower_q=OUTLIER_LOWER_Q, + upper_q=OUTLIER_UPPER_Q, + ) + df = apply_outlier_clipping(df, outlier_bounds) + + columns_keep = df.columns.tolist() + numeric_cols = df.select_dtypes(include=["number"]).columns numeric_medians = df[numeric_cols].median().to_dict() df[numeric_cols] = df[numeric_cols].fillna(numeric_medians) @@ -455,12 +520,7 @@ def build_preprocessor(data_path: Path) -> PreprocessorArtifacts: categorical_columns = df.select_dtypes(include=["object"]).columns.tolist() df[categorical_columns] = df[categorical_columns].fillna("Unknown") - if "CODE_GENDER" in df.columns: - df = df[df["CODE_GENDER"] != "XNA"] - - outlier_maxes = {col: df[col].max() for col in OUTLIER_COLUMNS if col in df.columns} - for col, max_val in outlier_maxes.items(): - df = df[df[col] != max_val] + outlier_maxes = {col: bounds[1] for col, bounds in outlier_bounds.items()} reduced_input_columns, selection_scores, selection_method = _compute_reduced_inputs( df, @@ -487,7 +547,11 @@ def build_preprocessor(data_path: Path) -> PreprocessorArtifacts: required_input = _fallback_reduced_inputs(input_feature_columns) else: required_input = sorted(required_raw) - numeric_required = sorted(col for col in required_input if col in numeric_medians) + numeric_required = sorted( + col + for col in required_input + if col in numeric_medians and col != "SK_ID_CURR" + ) correlated_imputation = _build_correlated_imputation( df, input_feature_columns=input_feature_columns, @@ -501,6 +565,8 @@ def build_preprocessor(data_path: Path) -> PreprocessorArtifacts: numeric_medians={k: float(v) for k, v in numeric_medians.items()}, categorical_columns=categorical_columns, outlier_maxes={k: float(v) for k, v in outlier_maxes.items()}, + outlier_bounds={k: (float(v[0]), float(v[1])) for k, v in outlier_bounds.items()}, + missing_indicator_columns=missing_indicator_columns, numeric_ranges=numeric_ranges, features_to_scaled=features_to_scaled, scaler=scaler, @@ -554,9 +620,28 @@ def build_fallback_preprocessor() -> PreprocessorArtifacts: ] ) - df = new_features_creation(base) + df = new_features_creation( + base, + days_employed_sentinel=DAYS_EMPLOYED_SENTINEL, + engineered_sources=ENGINEERED_SOURCES, + ) df.replace([np.inf, -np.inf], np.nan, inplace=True) + missing_indicator_columns = select_missing_indicator_columns( + df, + exclude_cols=set(IGNORE_FEATURES), + min_missing_rate=MISSING_INDICATOR_MIN_RATE, + ) + df = add_missingness_indicators(df, missing_indicator_columns) + + outlier_bounds = compute_outlier_bounds( + df, + OUTLIER_COLUMNS, + lower_q=OUTLIER_LOWER_Q, + upper_q=OUTLIER_UPPER_Q, + ) + df = apply_outlier_clipping(df, outlier_bounds) + columns_keep = df.columns.tolist() columns_must_not_missing = [col for col in columns_keep if col not in IGNORE_FEATURES] @@ -579,7 +664,9 @@ def build_fallback_preprocessor() -> PreprocessorArtifacts: required_raw.update(col for col in columns_must_not_missing if col in input_feature_columns) required_raw.add("SK_ID_CURR") required_input = _fallback_reduced_inputs(input_feature_columns) - numeric_required = sorted(col for col in required_input if col in numeric_medians) + numeric_required = sorted( + col for col in required_input if col in numeric_medians and col != "SK_ID_CURR" + ) numeric_ranges = {col: (float(df[col].min()), float(df[col].max())) for col in numeric_cols} @@ -588,7 +675,9 @@ def build_fallback_preprocessor() -> PreprocessorArtifacts: columns_must_not_missing=columns_must_not_missing, numeric_medians={k: float(v) for k, v in numeric_medians.items()}, categorical_columns=categorical_columns, - outlier_maxes={}, + outlier_maxes={k: float(v[1]) for k, v in outlier_bounds.items()}, + outlier_bounds={k: (float(v[0]), float(v[1])) for k, v in outlier_bounds.items()}, + missing_indicator_columns=missing_indicator_columns, numeric_ranges=numeric_ranges, features_to_scaled=features_to_scaled, scaler=scaler, @@ -633,7 +722,9 @@ def load_preprocessor(data_path: Path, artifacts_path: Path) -> PreprocessorArti updated = True if not hasattr(preprocessor, "numeric_required_columns"): preprocessor.numeric_required_columns = sorted( - col for col in preprocessor.required_input_columns if col in preprocessor.numeric_medians + col + for col in preprocessor.required_input_columns + if col in preprocessor.numeric_medians and col != "SK_ID_CURR" ) updated = True if not hasattr(preprocessor, "numeric_ranges"): @@ -646,6 +737,56 @@ def load_preprocessor(data_path: Path, artifacts_path: Path) -> PreprocessorArti raise RuntimeError(f"Data file not found to rebuild preprocessor: {data_path}") preprocessor = build_preprocessor(data_path) updated = True + needs_missing_indicators = ( + not hasattr(preprocessor, "missing_indicator_columns") + or not preprocessor.missing_indicator_columns + ) + needs_outlier_bounds = ( + not hasattr(preprocessor, "outlier_bounds") or not preprocessor.outlier_bounds + ) + prepared_df = None + if (needs_missing_indicators or needs_outlier_bounds) and data_path.exists(): + prepared_df = pd.read_parquet(data_path) + prepared_df = new_features_creation( + prepared_df, + days_employed_sentinel=DAYS_EMPLOYED_SENTINEL, + engineered_sources=ENGINEERED_SOURCES, + ) + prepared_df.replace([np.inf, -np.inf], np.nan, inplace=True) + if preprocessor.columns_keep: + prepared_df = prepared_df[preprocessor.columns_keep] + if preprocessor.columns_must_not_missing: + prepared_df = prepared_df.dropna(subset=preprocessor.columns_must_not_missing) + if "CODE_GENDER" in prepared_df.columns: + prepared_df = prepared_df[prepared_df["CODE_GENDER"] != "XNA"] + if needs_missing_indicators: + if prepared_df is not None: + preprocessor.missing_indicator_columns = select_missing_indicator_columns( + prepared_df, + exclude_cols=set(IGNORE_FEATURES), + min_missing_rate=MISSING_INDICATOR_MIN_RATE, + ) + else: + preprocessor.missing_indicator_columns = [] + updated = True + if needs_outlier_bounds: + if prepared_df is not None: + preprocessor.outlier_bounds = compute_outlier_bounds( + prepared_df, + OUTLIER_COLUMNS, + lower_q=OUTLIER_LOWER_Q, + upper_q=OUTLIER_UPPER_Q, + ) + else: + preprocessor.outlier_bounds = {} + for col, max_val in getattr(preprocessor, "outlier_maxes", {}).items(): + min_val = None + if hasattr(preprocessor, "numeric_ranges") and col in preprocessor.numeric_ranges: + min_val = preprocessor.numeric_ranges[col][0] + if min_val is None: + min_val = float("-inf") + preprocessor.outlier_bounds[col] = (float(min_val), float(max_val)) + updated = True if USE_REDUCED_INPUTS: reduced = _reduce_input_columns(preprocessor) if preprocessor.required_input_columns != reduced: @@ -658,7 +799,9 @@ def load_preprocessor(data_path: Path, artifacts_path: Path) -> PreprocessorArti required_updated = True updated = True desired_numeric_required = sorted( - col for col in preprocessor.required_input_columns if col in preprocessor.numeric_medians + col + for col in preprocessor.required_input_columns + if col in preprocessor.numeric_medians and col != "SK_ID_CURR" ) if getattr(preprocessor, "numeric_required_columns", None) != desired_numeric_required: preprocessor.numeric_required_columns = desired_numeric_required @@ -890,7 +1033,11 @@ def _compute_reduced_inputs_from_data( if not data_path.exists(): return _fallback_reduced_inputs(preprocessor.input_feature_columns), {}, "default" df = pd.read_parquet(data_path) - df = new_features_creation(df) + df = new_features_creation( + df, + days_employed_sentinel=DAYS_EMPLOYED_SENTINEL, + engineered_sources=ENGINEERED_SOURCES, + ) df.replace([np.inf, -np.inf], np.nan, inplace=True) if preprocessor.columns_keep: @@ -908,9 +1055,25 @@ def _compute_reduced_inputs_from_data( if "CODE_GENDER" in df.columns: df = df[df["CODE_GENDER"] != "XNA"] - for col, max_val in preprocessor.outlier_maxes.items(): - if col in df.columns: - df = df[df[col] != max_val] + if getattr(preprocessor, "missing_indicator_columns", None): + df = add_missingness_indicators(df, preprocessor.missing_indicator_columns) + else: + df = add_missingness_indicators( + df, + select_missing_indicator_columns( + df, + exclude_cols=set(IGNORE_FEATURES), + min_missing_rate=MISSING_INDICATOR_MIN_RATE, + ), + ) + + outlier_bounds = getattr(preprocessor, "outlier_bounds", {}) or compute_outlier_bounds( + df, + OUTLIER_COLUMNS, + lower_q=OUTLIER_LOWER_Q, + upper_q=OUTLIER_UPPER_Q, + ) + df = apply_outlier_clipping(df, outlier_bounds) return _compute_reduced_inputs(df, input_feature_columns=preprocessor.input_feature_columns) @@ -920,7 +1083,11 @@ def _compute_correlated_imputation( preprocessor: PreprocessorArtifacts, ) -> dict[str, dict[str, float | str]]: df = pd.read_parquet(data_path) - df = new_features_creation(df) + df = new_features_creation( + df, + days_employed_sentinel=DAYS_EMPLOYED_SENTINEL, + engineered_sources=ENGINEERED_SOURCES, + ) df.replace([np.inf, -np.inf], np.nan, inplace=True) df = df[preprocessor.columns_keep] @@ -936,9 +1103,25 @@ def _compute_correlated_imputation( if "CODE_GENDER" in df.columns: df = df[df["CODE_GENDER"] != "XNA"] - for col, max_val in preprocessor.outlier_maxes.items(): - if col in df.columns: - df = df[df[col] != max_val] + if getattr(preprocessor, "missing_indicator_columns", None): + df = add_missingness_indicators(df, preprocessor.missing_indicator_columns) + else: + df = add_missingness_indicators( + df, + select_missing_indicator_columns( + df, + exclude_cols=set(IGNORE_FEATURES), + min_missing_rate=MISSING_INDICATOR_MIN_RATE, + ), + ) + + outlier_bounds = getattr(preprocessor, "outlier_bounds", {}) or compute_outlier_bounds( + df, + OUTLIER_COLUMNS, + lower_q=OUTLIER_LOWER_Q, + upper_q=OUTLIER_UPPER_Q, + ) + df = apply_outlier_clipping(df, outlier_bounds) return _build_correlated_imputation( df, @@ -1048,11 +1231,30 @@ def preprocess_input(df_raw: pd.DataFrame, artifacts: PreprocessorArtifacts) -> if "TARGET" not in df.columns: df["TARGET"] = 0 - df = new_features_creation(df) + df = new_features_creation( + df, + days_employed_sentinel=DAYS_EMPLOYED_SENTINEL, + engineered_sources=ENGINEERED_SOURCES, + ) df.replace([np.inf, -np.inf], np.nan, inplace=True) df = df.reindex(columns=artifacts.columns_keep, fill_value=np.nan) + indicator_cols = getattr(artifacts, "missing_indicator_columns", None) or select_missing_indicator_columns( + df, + exclude_cols=set(IGNORE_FEATURES), + min_missing_rate=MISSING_INDICATOR_MIN_RATE, + ) + df = add_missingness_indicators(df, indicator_cols) + + outlier_bounds = getattr(artifacts, "outlier_bounds", {}) or compute_outlier_bounds( + df, + OUTLIER_COLUMNS, + lower_q=OUTLIER_LOWER_Q, + upper_q=OUTLIER_UPPER_Q, + ) + df = apply_outlier_clipping(df, outlier_bounds) + _apply_correlated_imputation(df, artifacts) for col, median in artifacts.numeric_medians.items(): @@ -1072,16 +1274,6 @@ def preprocess_input(df_raw: pd.DataFrame, artifacts: PreprocessorArtifacts) -> detail={"message": "CODE_GENDER cannot be 'XNA' based on training rules."}, ) - for col, max_val in artifacts.outlier_maxes.items(): - if col in df.columns and (df[col] >= max_val).any(): - raise HTTPException( - status_code=422, - detail={ - "message": "Input contains outlier values removed during training.", - "outlier_columns": [col], - }, - ) - df_hot = pd.get_dummies(df, columns=artifacts.categorical_columns) df_hot = df_hot.reindex(columns=artifacts.features_to_scaled, fill_value=0) @@ -1089,6 +1281,80 @@ def preprocess_input(df_raw: pd.DataFrame, artifacts: PreprocessorArtifacts) -> return pd.DataFrame(scaled, columns=artifacts.features_to_scaled, index=df.index) +def _prepare_pipeline_input( + df_raw: pd.DataFrame, + artifacts: PreprocessorArtifacts, + model: Any, +) -> pd.DataFrame: + df = df_raw.copy() + + for col in artifacts.required_input_columns: + if col not in df.columns: + df[col] = np.nan + + allow_missing = {"DAYS_EMPLOYED"} + _ensure_required_columns(df, artifacts.required_input_columns, allow_missing=allow_missing) + _validate_numeric_inputs(df, artifacts.numeric_required_columns) + _validate_numeric_ranges( + df, + {k: v for k, v in artifacts.numeric_ranges.items() if k in artifacts.numeric_required_columns}, + ) + + df["is_train"] = 0 + df["is_test"] = 1 + if "TARGET" not in df.columns: + df["TARGET"] = 0 + + df = new_features_creation( + df, + days_employed_sentinel=DAYS_EMPLOYED_SENTINEL, + engineered_sources=ENGINEERED_SOURCES, + ) + df.replace([np.inf, -np.inf], np.nan, inplace=True) + + df = df.reindex(columns=artifacts.columns_keep, fill_value=np.nan) + + indicator_cols = getattr(artifacts, "missing_indicator_columns", None) or select_missing_indicator_columns( + df, + exclude_cols=set(IGNORE_FEATURES), + min_missing_rate=MISSING_INDICATOR_MIN_RATE, + ) + df = add_missingness_indicators(df, indicator_cols) + + outlier_bounds = getattr(artifacts, "outlier_bounds", {}) or compute_outlier_bounds( + df, + OUTLIER_COLUMNS, + lower_q=OUTLIER_LOWER_Q, + upper_q=OUTLIER_UPPER_Q, + ) + df = apply_outlier_clipping(df, outlier_bounds) + + if "CODE_GENDER" in df.columns and (df["CODE_GENDER"] == "XNA").any(): + raise HTTPException( + status_code=422, + detail={"message": "CODE_GENDER cannot be 'XNA' based on training rules."}, + ) + + expected_cols = None + if hasattr(model, "named_steps"): + preprocessor = model.named_steps.get("preprocessing") + expected_cols = getattr(preprocessor, "feature_names_in_", None) + if expected_cols is None: + expected_cols = [c for c in artifacts.input_feature_columns if c not in IGNORE_FEATURES] + + return df.reindex(columns=expected_cols, fill_value=np.nan) + + +def prepare_inference_features( + df_raw: pd.DataFrame, + artifacts: PreprocessorArtifacts, + model: Any, +) -> pd.DataFrame: + if hasattr(model, "named_steps") and model.named_steps.get("preprocessing") is not None: + return _prepare_pipeline_input(df_raw, artifacts, model) + return preprocess_input(df_raw, artifacts) + + @app.on_event("startup") def startup_event() -> None: if getattr(app.state, "model", None) is not None and getattr(app.state, "preprocessor", None) is not None: @@ -1183,9 +1449,19 @@ def features(include_all: bool = Query(default=False)) -> dict[str, Any]: for col in preprocessor.required_input_columns if col in scores } + missing_indicator_features = [ + f"is_missing_{col}" + for col in getattr(preprocessor, "missing_indicator_columns", []) or [] + ] + outlier_indicator_features = [ + f"is_outlier_{col}" + for col in getattr(preprocessor, "outlier_bounds", {}) or {} + ] payload = { "required_input_features": preprocessor.required_input_columns, "engineered_features": ENGINEERED_FEATURES, + "missing_indicator_features_count": len(missing_indicator_features), + "outlier_indicator_features_count": len(outlier_indicator_features), "model_features_count": len(preprocessor.features_to_scaled), "feature_selection_method": preprocessor.feature_selection_method, "feature_selection_top_n": FEATURE_SELECTION_TOP_N, @@ -1198,6 +1474,8 @@ def features(include_all: bool = Query(default=False)) -> dict[str, Any]: if include_all: payload["input_features"] = preprocessor.input_feature_columns payload["optional_input_features"] = optional_features + payload["missing_indicator_features"] = missing_indicator_features + payload["outlier_indicator_features"] = outlier_indicator_features else: payload["input_features"] = preprocessor.required_input_columns payload["optional_input_features"] = [] @@ -1235,8 +1513,28 @@ def logs( return Response(content="".join(lines), media_type="application/x-ndjson") +def _align_features_to_model(features: pd.DataFrame, model: Any) -> pd.DataFrame: + expected = getattr(model, "feature_names_in_", None) + if expected is None: + return features + expected = list(expected) + + extra = [c for c in features.columns if c not in expected] + missing = [c for c in expected if c not in features.columns] + if extra or missing: + logger.warning( + "Feature mismatch: extra=%s missing=%s", + extra[:15], + missing[:15], + ) + return features.reindex(columns=expected, fill_value=0) -def _predict_records(records: list[dict[str, Any]], threshold: float | None) -> dict[str, Any]: +def _predict_records( + records: list[dict[str, Any]], + threshold: float | None, + *, + source: str | None = None, +) -> dict[str, Any]: model = app.state.model preprocessor: PreprocessorArtifacts = app.state.preprocessor request_id = str(uuid.uuid4()) @@ -1260,7 +1558,8 @@ def _predict_records(records: list[dict[str, Any]], threshold: float | None) -> raise HTTPException(status_code=422, detail={"message": "SK_ID_CURR is required."}) sk_ids = df_norm["SK_ID_CURR"].tolist() - features = preprocess_input(df_norm, preprocessor) + features = prepare_inference_features(df_norm, preprocessor, model) + features = _align_features_to_model(features, model) if hasattr(model, "predict_proba"): proba = model.predict_proba(features)[:, 1] @@ -1283,6 +1582,7 @@ def _predict_records(records: list[dict[str, Any]], threshold: float | None) -> threshold=use_threshold, status_code=200, preprocessor=preprocessor, + source=source, data_quality=dq_records, ) return {"predictions": results, "threshold": use_threshold} @@ -1304,6 +1604,7 @@ def _predict_records(records: list[dict[str, Any]], threshold: float | None) -> threshold=None, status_code=200, preprocessor=preprocessor, + source=source, data_quality=dq_records, ) return {"predictions": results, "threshold": None} @@ -1318,6 +1619,7 @@ def _predict_records(records: list[dict[str, Any]], threshold: float | None) -> threshold=threshold, status_code=exc.status_code, preprocessor=preprocessor, + source=source, data_quality=dq_records if "dq_records" in locals() else None, error=json.dumps(detail, ensure_ascii=True), ) @@ -1332,6 +1634,7 @@ def _predict_records(records: list[dict[str, Any]], threshold: float | None) -> threshold=threshold, status_code=500, preprocessor=preprocessor, + source=source, data_quality=dq_records if "dq_records" in locals() else None, error=str(exc), ) @@ -1342,16 +1645,18 @@ def _predict_records(records: list[dict[str, Any]], threshold: float | None) -> def predict( payload: PredictionRequest, threshold: float | None = Query(default=None, ge=0.0, le=1.0), + x_client_source: str | None = Header(default=None, alias="X-Client-Source"), ) -> dict[str, Any]: records = payload.data if isinstance(payload.data, list) else [payload.data] - return _predict_records(records, threshold) + return _predict_records(records, threshold, source=x_client_source) @app.post("/predict-minimal") def predict_minimal( payload: MinimalPredictionRequest, threshold: float | None = Query(default=None, ge=0.0, le=1.0), + x_client_source: str | None = Header(default=None, alias="X-Client-Source"), ) -> dict[str, Any]: preprocessor: PreprocessorArtifacts = app.state.preprocessor record = _build_minimal_record(payload, preprocessor) - return _predict_records([record], threshold) + return _predict_records([record], threshold, source=x_client_source) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/performance/performance_report.md b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/performance/performance_report.md index c8c005e5a6e74ebb437c88cf7a9cad3a65ead65b..0314fd4a757842c5a41805c5dd9b8d8614045361 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/performance/performance_report.md +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/performance/performance_report.md @@ -6,7 +6,8 @@ Mesurer la latence d'inference, identifier les goulots d'etranglement et propose ## Setup -- Script: `profiling/profile_inference.py` +- Script (archivé): `dev_archive/profiling/profile_inference.py` +- Workflow courant: notebook modélisation (section TODO 5) - Donnees: `data/data_final.parquet` (echantillon) - Parametres: `--sample-size 500 --batch-size 100 --runs 2` - Modele: `HistGB_final_model.pkl` diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/gradio_app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/gradio_app.py index 75b939d19beb78d7b3560de139054a20d7200cc0..39d6506c169d4a516bd2f6befadae15ed9f0cb41 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/gradio_app.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/gradio_app.py @@ -8,13 +8,17 @@ import pandas as pd from fastapi import HTTPException from app.main import ( + DAYS_EMPLOYED_SENTINEL, + ENGINEERED_SOURCES, + MODEL_VERSION, MinimalPredictionRequest, app, + new_features_creation, + prepare_inference_features, predict_minimal, startup_event, _build_minimal_record, _normalize_inputs, - preprocess_input, ) @@ -45,7 +49,7 @@ def _shap_error_table(message: str) -> pd.DataFrame: [ { "feature": message, - "value": np.nan, + "raw_value": np.nan, "shap_value": np.nan, } ] @@ -63,38 +67,171 @@ def _extract_shap_values(shap_values: Any) -> np.ndarray: return values +def _clean_raw_value(value: Any) -> Any: + if value is None or pd.isna(value): + return None + if isinstance(value, (np.integer, np.floating)): + return value.item() + return value + + +def _strip_feature_prefix(feature_name: str) -> str: + return feature_name.split("__", 1)[1] if "__" in feature_name else feature_name + + +def _lookup_raw_value(feature_name: str, raw_df: pd.DataFrame, preprocessor) -> Any: + cleaned_name = _strip_feature_prefix(feature_name) + if cleaned_name in raw_df.columns: + return raw_df.at[0, cleaned_name] + for prefix in ("is_missing_", "is_outlier_"): + if cleaned_name.startswith(prefix): + base = cleaned_name[len(prefix):] + if base in raw_df.columns: + return raw_df.at[0, base] + for col in getattr(preprocessor, "categorical_columns", []): + if cleaned_name.startswith(f"{col}_") and col in raw_df.columns: + return raw_df.at[0, col] + return None + +def _align_features_to_model(X: Any, model: Any) -> Any: + expected = getattr(model, "feature_names_in_", None) + if expected is None: + return X + if isinstance(X, pd.DataFrame): + return X.reindex(columns=list(expected), fill_value=0) + return X + +def _model_family(model: Any) -> str: + name = type(model).__name__.lower() + if "xgb" in name: + return "xgb" + if "lgbm" in name or "lightgbm" in name: + return "lgbm" + if "histgradientboosting" in name: + return "histgb" + return "unknown" + +def _xgb_pred_contribs(estimator: Any, X: Any) -> np.ndarray: + import xgboost as xgb + + if isinstance(X, pd.DataFrame): + dm = xgb.DMatrix(X, feature_names=list(X.columns)) + else: + dm = xgb.DMatrix(np.asarray(X)) + + booster = estimator.get_booster() if hasattr(estimator, "get_booster") else estimator + contrib = booster.predict(dm, pred_contribs=True) + return np.asarray(contrib)[:, :-1] + + +def _lgbm_pred_contribs(estimator: Any, X: Any) -> np.ndarray: + contrib = estimator.predict(X, pred_contrib=True) + return np.asarray(contrib)[:, :-1] + + def _compute_shap_top_features(record: dict[str, Any], top_k: int = 10) -> pd.DataFrame: preprocessor = app.state.preprocessor + model = app.state.model df_raw = pd.DataFrame.from_records([record]) df_norm, _, _ = _normalize_inputs(df_raw, preprocessor) - features = preprocess_input(df_norm, preprocessor) + raw_reference = new_features_creation( + df_norm, + days_employed_sentinel=DAYS_EMPLOYED_SENTINEL, + engineered_sources=ENGINEERED_SOURCES, + ) + features = prepare_inference_features(df_norm, preprocessor, model) + features = _align_features_to_model(features, model) + try: import shap except ImportError: return _shap_error_table("SHAP not installed.") - explainer = getattr(app.state, "shap_explainer", None) - if explainer is None: + estimator = model + X_shap = features + if hasattr(model, "named_steps") and model.named_steps.get("preprocessing") is not None: + estimator = model.named_steps.get("estimator", model) + pipeline_preprocessor = model.named_steps["preprocessing"] + try: + X_shap = pipeline_preprocessor.transform(features) + except Exception as exc: + return _shap_error_table(f"SHAP preprocessing failed: {exc}") + try: + import scipy.sparse as sp + if sp.issparse(X_shap): + X_shap = X_shap.toarray() + except Exception: + pass try: - explainer = shap.TreeExplainer(app.state.model) + feature_names = pipeline_preprocessor.get_feature_names_out() except Exception: - explainer = shap.Explainer(app.state.model, features) - app.state.shap_explainer = explainer + feature_names = None + if feature_names is not None: + X_shap = pd.DataFrame(X_shap, columns=feature_names) + family = _model_family(estimator) + + values: np.ndarray | None = None + + # 1) Contributions natives (meilleur choix pour XGB/LGBM) try: - explanation = explainer(features) - values = _extract_shap_values(explanation.values) + if family == "xgb": + values = _xgb_pred_contribs(estimator, X_shap) + elif family == "lgbm": + values = _lgbm_pred_contribs(estimator, X_shap) except Exception: - values = _extract_shap_values(explainer.shap_values(features)) + values = None + + # 2) Fallback SHAP (utile surtout pour HistGB / inconnus) + if values is None: + cache = getattr(app.state, "shap_explainer_cache", {}) + key = f"{MODEL_VERSION}:{type(estimator).__name__}" + explainer = cache.get(key) + + if explainer is None: + try: + import shap + predict_fn = ( + (lambda X: estimator.predict_proba(X)[:, 1]) + if hasattr(estimator, "predict_proba") + else (lambda X: estimator.predict(X)) + ) + + # Evite le background dégénéré (1 seule ligne) + if isinstance(X_shap, pd.DataFrame): + bg = pd.concat([X_shap] * 50, ignore_index=True) + else: + bg = np.repeat(np.asarray(X_shap), repeats=50, axis=0) + + explainer = shap.Explainer(predict_fn, bg) + except Exception as exc: + return _shap_error_table(f"SHAP explainer init failed: {exc}") + + cache[key] = explainer + app.state.shap_explainer_cache = cache + + try: + import shap + explanation = explainer(X_shap) + values = _extract_shap_values(explanation.values) + except Exception as exc: + return _shap_error_table(f"SHAP failed: {exc}") shap_row = values[0] - feature_values = features.iloc[0].to_numpy() + if isinstance(X_shap, pd.DataFrame): + feature_values = X_shap.iloc[0].to_numpy() + feature_names = X_shap.columns + else: + feature_values = np.asarray(X_shap)[0] + feature_names = [f"feature_{idx}" for idx in range(len(feature_values))] top_idx = np.argsort(np.abs(shap_row))[::-1][:top_k] rows = [ { - "feature": str(features.columns[idx]), - "value": float(feature_values[idx]), - "shap_value": float(shap_row[idx]), + "feature": str(feature_names[idx]), + "raw_value": _clean_raw_value( + _lookup_raw_value(str(feature_names[idx]), raw_reference, preprocessor) + ), + "shap_value": float(np.round(shap_row[idx], 6)), } for idx in top_idx ] @@ -105,8 +242,7 @@ def score_minimal( sk_id_curr: float, amt_credit: float, duration_months: float, - threshold: float, -) -> tuple[float | None, str, float | None, pd.DataFrame, dict[str, Any]]: +) -> tuple[float | None, str, pd.DataFrame, dict[str, Any]]: _ensure_startup() try: payload = MinimalPredictionRequest( @@ -115,7 +251,7 @@ def score_minimal( duration_months=int(duration_months), ) record = _build_minimal_record(payload, app.state.preprocessor) - response = predict_minimal(payload, threshold=float(threshold)) + response = predict_minimal(payload, threshold=None, x_client_source="gradio") result = response["predictions"][0] probability = float(result.get("probability", 0.0)) pred_value = int(result.get("prediction", 0)) @@ -128,11 +264,11 @@ def score_minimal( "DURATION_MONTHS": int(duration_months), } ) - return probability, label, float(response.get("threshold", 0.0)), shap_table, snapshot + return probability, label, shap_table, snapshot except HTTPException as exc: - return None, f"Erreur: {exc.detail}", None, _shap_error_table("No SHAP available."), {"error": exc.detail} + return None, f"Erreur: {exc.detail}", _shap_error_table("No SHAP available."), {"error": exc.detail} except Exception as exc: # pragma: no cover - UI fallback - return None, f"Erreur: {exc}", None, _shap_error_table("No SHAP available."), {"error": str(exc)} + return None, f"Erreur: {exc}", _shap_error_table("No SHAP available."), {"error": str(exc)} with gr.Blocks(title="Credit scoring MLOps") as demo: @@ -155,19 +291,17 @@ with gr.Blocks(title="Credit scoring MLOps") as demo: sk_id_curr = gr.Number(label="Identifiant client", precision=0, value=100001) amt_credit = gr.Number(label="Montant du crédit", value=200000) duration_months = gr.Number(label="Durée (mois)", precision=0, value=60) - threshold = gr.Slider(label="Seuil", minimum=0.0, maximum=1.0, value=0.5, step=0.01) run_btn = gr.Button("Scorer") with gr.Row(): probability = gr.Number(label="Probabilité de défaut") prediction = gr.Textbox(label="Prédiction") - threshold_used = gr.Number(label="Seuil utilisé") shap_table = gr.Dataframe( - headers=["feature", "value", "shap_value"], + headers=["feature", "raw_value", "shap_value"], label="Top 10 SHAP (local)", - datatype=["str", "number", "number"], + datatype=["str", "str", "number"], interactive=False, ) @@ -175,8 +309,8 @@ with gr.Blocks(title="Credit scoring MLOps") as demo: run_btn.click( score_minimal, - inputs=[sk_id_curr, amt_credit, duration_months, threshold], - outputs=[probability, prediction, threshold_used, shap_table, snapshot], + inputs=[sk_id_curr, amt_credit, duration_months], + outputs=[probability, prediction, shap_table, snapshot], ) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/gradio_app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/gradio_app.py index 60f49cd302ca77a3b831773c5e9238e7ef022915..75b939d19beb78d7b3560de139054a20d7200cc0 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/gradio_app.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/gradio_app.py @@ -3,9 +3,19 @@ from __future__ import annotations from typing import Any import gradio as gr +import numpy as np +import pandas as pd from fastapi import HTTPException -from app.main import MinimalPredictionRequest, app, predict_minimal, startup_event +from app.main import ( + MinimalPredictionRequest, + app, + predict_minimal, + startup_event, + _build_minimal_record, + _normalize_inputs, + preprocess_input, +) def _ensure_startup() -> None: @@ -30,12 +40,73 @@ def _customer_snapshot(sk_id_curr: int) -> dict[str, Any]: return snapshot +def _shap_error_table(message: str) -> pd.DataFrame: + return pd.DataFrame( + [ + { + "feature": message, + "value": np.nan, + "shap_value": np.nan, + } + ] + ) + + +def _extract_shap_values(shap_values: Any) -> np.ndarray: + if isinstance(shap_values, list): + shap_values = shap_values[1] if len(shap_values) > 1 else shap_values[0] + values = np.asarray(shap_values) + if values.ndim == 3: + values = values[:, :, 1] + if values.ndim == 1: + values = values.reshape(1, -1) + return values + + +def _compute_shap_top_features(record: dict[str, Any], top_k: int = 10) -> pd.DataFrame: + preprocessor = app.state.preprocessor + df_raw = pd.DataFrame.from_records([record]) + df_norm, _, _ = _normalize_inputs(df_raw, preprocessor) + features = preprocess_input(df_norm, preprocessor) + try: + import shap + except ImportError: + return _shap_error_table("SHAP not installed.") + + explainer = getattr(app.state, "shap_explainer", None) + if explainer is None: + try: + explainer = shap.TreeExplainer(app.state.model) + except Exception: + explainer = shap.Explainer(app.state.model, features) + app.state.shap_explainer = explainer + + try: + explanation = explainer(features) + values = _extract_shap_values(explanation.values) + except Exception: + values = _extract_shap_values(explainer.shap_values(features)) + + shap_row = values[0] + feature_values = features.iloc[0].to_numpy() + top_idx = np.argsort(np.abs(shap_row))[::-1][:top_k] + rows = [ + { + "feature": str(features.columns[idx]), + "value": float(feature_values[idx]), + "shap_value": float(shap_row[idx]), + } + for idx in top_idx + ] + return pd.DataFrame(rows) + + def score_minimal( sk_id_curr: float, amt_credit: float, duration_months: float, threshold: float, -) -> tuple[float | None, str, float | None, dict[str, Any]]: +) -> tuple[float | None, str, float | None, pd.DataFrame, dict[str, Any]]: _ensure_startup() try: payload = MinimalPredictionRequest( @@ -43,11 +114,13 @@ def score_minimal( amt_credit=float(amt_credit), duration_months=int(duration_months), ) + record = _build_minimal_record(payload, app.state.preprocessor) response = predict_minimal(payload, threshold=float(threshold)) result = response["predictions"][0] probability = float(result.get("probability", 0.0)) pred_value = int(result.get("prediction", 0)) label = "Default (1)" if pred_value == 1 else "No default (0)" + shap_table = _compute_shap_top_features(record, top_k=10) snapshot = _customer_snapshot(int(sk_id_curr)) snapshot.update( { @@ -55,39 +128,55 @@ def score_minimal( "DURATION_MONTHS": int(duration_months), } ) - return probability, label, float(response.get("threshold", 0.0)), snapshot + return probability, label, float(response.get("threshold", 0.0)), shap_table, snapshot except HTTPException as exc: - return None, f"Erreur: {exc.detail}", None, {"error": exc.detail} + return None, f"Erreur: {exc.detail}", None, _shap_error_table("No SHAP available."), {"error": exc.detail} except Exception as exc: # pragma: no cover - UI fallback - return None, f"Erreur: {exc}", None, {"error": str(exc)} - - -with gr.Blocks(title="Credit Scoring - Minimal Inputs") as demo: - gr.Markdown("# Credit Scoring - Minimal Inputs") + return None, f"Erreur: {exc}", None, _shap_error_table("No SHAP available."), {"error": str(exc)} + + +with gr.Blocks(title="Credit scoring MLOps") as demo: + gr.Markdown("# Credit scoring MLOps") + gr.HTML(""" +
+ + GitHub Release + + + GitHub Actions Workflow Status + +
+ """) gr.Markdown( - "Renseignez l'identifiant client, le montant du credit et la duree. " - "Les autres features proviennent des donnees clients reference." + "Renseignez l'identifiant client, le montant du crédit et la durée. " ) with gr.Row(): - sk_id_curr = gr.Number(label="SK_ID_CURR", precision=0, value=100001) - amt_credit = gr.Number(label="AMT_CREDIT", value=200000) - duration_months = gr.Number(label="Duree (mois)", precision=0, value=60) + sk_id_curr = gr.Number(label="Identifiant client", precision=0, value=100001) + amt_credit = gr.Number(label="Montant du crédit", value=200000) + duration_months = gr.Number(label="Durée (mois)", precision=0, value=60) threshold = gr.Slider(label="Seuil", minimum=0.0, maximum=1.0, value=0.5, step=0.01) run_btn = gr.Button("Scorer") with gr.Row(): - probability = gr.Number(label="Probabilite de defaut") - prediction = gr.Textbox(label="Decision") - threshold_used = gr.Number(label="Seuil utilise") + probability = gr.Number(label="Probabilité de défaut") + prediction = gr.Textbox(label="Prédiction") + threshold_used = gr.Number(label="Seuil utilisé") + + shap_table = gr.Dataframe( + headers=["feature", "value", "shap_value"], + label="Top 10 SHAP (local)", + datatype=["str", "number", "number"], + interactive=False, + ) - snapshot = gr.JSON(label="Snapshot client (reference)") + snapshot = gr.JSON(label="Snapshot client (référence)") run_btn.click( score_minimal, inputs=[sk_id_curr, amt_credit, duration_months, threshold], - outputs=[probability, prediction, threshold_used, snapshot], + outputs=[probability, prediction, threshold_used, shap_table, snapshot], ) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py index 9c8bf0623c864fa8aa89796de82c940cbdaec301..e88d1d94a5372f1f6bc1ecf77195c99120826ad1 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py @@ -1,3 +1 @@ -"""Expose combined ASGI app for HF Spaces default loader.""" - -from app_entry import app, demo # re-export for uvicorn app:app +"""Package marker for the FastAPI app package.""" diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile index c740f8ef60e814f7b27eb8e09fb9f94d660d4257..44a3c6cf803ba97d741cb861cef7b457312b78ab 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile @@ -9,8 +9,9 @@ COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY app/ app/ -COPY data/HistGB_final_model.pkl data/ -COPY artifacts/preprocessor.joblib artifacts/ +COPY app_entry.py app.py gradio_app.py ./ +COPY data/ data/ +COPY artifacts/ artifacts/ EXPOSE 7860 diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py index 339157f606c7cad12d77fc199dea64c30f3cbbe9..9138d69e6e15029487abd9c2a25afecc421069c1 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py @@ -1,22 +1,4 @@ -from fastapi import FastAPI -import gradio as gr - -from app.main import app as api_app -from app.main import startup_event -from gradio_app import demo - - -root_app = FastAPI() -root_app.mount("/api", api_app) -root_app = gr.mount_gradio_app(root_app, demo, path="/") - - -@root_app.on_event("startup") -def _startup() -> None: - startup_event() - - -app = root_app +from app_entry import app, demo # re-export for HF Spaces if __name__ == "__main__": diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py index 152ec4f4c4d37e60088c582ab06c6fc57e578fbd..9c8bf0623c864fa8aa89796de82c940cbdaec301 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py @@ -1 +1,3 @@ -# Package marker for app module. +"""Expose combined ASGI app for HF Spaces default loader.""" + +from app_entry import app, demo # re-export for uvicorn app:app diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py index 1389b3822f09aefa634c9b2f4b3d0c33f687a863..a7915cfb8178eacc7cd58a442e0d882418c4d10b 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py @@ -1113,6 +1113,16 @@ def startup_event() -> None: logger.info("Loading model from %s", model_path) app.state.model = load_model(model_path) + data_path = DATA_PATH + if not data_path.exists(): + downloaded = _ensure_hf_asset( + data_path, + HF_CUSTOMER_REPO_ID, + HF_CUSTOMER_FILENAME, + HF_CUSTOMER_REPO_TYPE, + ) + if downloaded is not None: + data_path = downloaded try: artifacts_path = ARTIFACTS_PATH if not artifacts_path.exists(): @@ -1125,7 +1135,7 @@ def startup_event() -> None: if downloaded is not None: artifacts_path = downloaded logger.info("Loading preprocessor artifacts from %s", artifacts_path) - app.state.preprocessor = load_preprocessor(DATA_PATH, artifacts_path) + app.state.preprocessor = load_preprocessor(data_path, artifacts_path) except RuntimeError as exc: if ALLOW_MISSING_ARTIFACTS: logger.warning("Preprocessor artifacts missing (%s). Using fallback preprocessor.", exc) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app_entry.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app_entry.py new file mode 100644 index 0000000000000000000000000000000000000000..39502f827e699905357a999c5143cda0643ca77f --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app_entry.py @@ -0,0 +1,19 @@ +from fastapi import FastAPI +import gradio as gr + +from app.main import app as api_app +from app.main import startup_event +from gradio_app import demo + + +root_app = FastAPI() +root_app.mount("/api", api_app) +root_app = gr.mount_gradio_app(root_app, demo, path="/") + + +@root_app.on_event("startup") +def _startup() -> None: + startup_event() + + +app = root_app diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile index 1df82d30ba2cd4161c639a221cce659fe2aa1825..c740f8ef60e814f7b27eb8e09fb9f94d660d4257 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile @@ -14,4 +14,4 @@ COPY artifacts/preprocessor.joblib artifacts/ EXPOSE 7860 -CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"] +CMD ["uvicorn", "app_entry:app", "--host", "0.0.0.0", "--port", "7860"] diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md index 2f8b6c697c9c9a2ee59b7deee9b9f3cef799cd11..1d1297464999480f3ae9aefc4a9ae59f4ec78134 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md @@ -198,29 +198,38 @@ Exemple (un seul repo dataset avec 3 fichiers) : ### Demo live (commandes cles en main) -Lancer l'API : +Lancer l'API (sans UI) : ```shell uvicorn app.main:app --reload --port 7860 ``` +Lancer l'UI Gradio + API (chemin `/api`) : + +```shell +uvicorn app_entry:app --reload --port 7860 +``` + Verifier le service (HF) : ```shell BASE_URL="https://stephmnt-credit-scoring-mlops.hf.space" -curl -s "${BASE_URL}/health" +API_BASE="${BASE_URL}/api" +curl -s "${API_BASE}/health" ``` +Note : sur HF Spaces, l'UI Gradio est a la racine, l'API est sous `/api`. + Voir les features attendues (HF) : ```shell -curl -s "${BASE_URL}/features" +curl -s "${API_BASE}/features" ``` Predire un client (HF) : ```shell -curl -s -X POST "${BASE_URL}/predict?threshold=0.5" \ +curl -s -X POST "${API_BASE}/predict?threshold=0.5" \ -H "Content-Type: application/json" \ -d '{ "data": { @@ -242,7 +251,7 @@ curl -s -X POST "${BASE_URL}/predict?threshold=0.5" \ Predire plusieurs clients (batch, HF) : ```shell -curl -s -X POST "${BASE_URL}/predict?threshold=0.45" \ +curl -s -X POST "${API_BASE}/predict?threshold=0.45" \ -H "Content-Type: application/json" \ -d '{ "data": [ @@ -279,7 +288,7 @@ curl -s -X POST "${BASE_URL}/predict?threshold=0.45" \ Exemple d'erreur (champ requis manquant, HF) : ```shell -curl -s -X POST "${BASE_URL}/predict" \ +curl -s -X POST "${API_BASE}/predict" \ -H "Content-Type: application/json" \ -d '{ "data": { @@ -316,13 +325,13 @@ Recuperer les logs (HF) : Configurer `LOGS_ACCESS_TOKEN` dans les secrets du Space, puis : ```shell -curl -s -H "X-Logs-Token: $LOGS_ACCESS_TOKEN" "${BASE_URL}/logs?tail=200" +curl -s -H "X-Logs-Token: $LOGS_ACCESS_TOKEN" "${API_BASE}/logs?tail=200" ``` Alternative : ```shell -curl -s -H "Authorization: Bearer $LOGS_ACCESS_TOKEN" "${BASE_URL}/logs?tail=200" +curl -s -H "Authorization: Bearer $LOGS_ACCESS_TOKEN" "${API_BASE}/logs?tail=200" ``` Apres quelques requêtes, générer le rapport de drift : diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..c770b4828315fd91de0b0f37237e21ec88eaa8b0 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes @@ -1,35 +1,4 @@ -*.7z filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.bin filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.ckpt filter=lfs diff=lfs merge=lfs -text -*.ftz filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.lfs.* filter=lfs diff=lfs merge=lfs -text -*.mlmodel filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.npy filter=lfs diff=lfs merge=lfs -text -*.npz filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text -*.ot filter=lfs diff=lfs merge=lfs -text *.parquet filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.pickle filter=lfs diff=lfs merge=lfs -text +data/HistGB_final_model.pkl filter=lfs diff=lfs merge=lfs -text *.pkl filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text -*.safetensors filter=lfs diff=lfs merge=lfs -text -saved_model/**/* filter=lfs diff=lfs merge=lfs -text -*.tar.* filter=lfs diff=lfs merge=lfs -text -*.tar filter=lfs diff=lfs merge=lfs -text -*.tflite filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.wasm filter=lfs diff=lfs merge=lfs -text -*.xz filter=lfs diff=lfs merge=lfs -text -*.zip filter=lfs diff=lfs merge=lfs -text -*.zst filter=lfs diff=lfs merge=lfs -text -*tfevents* filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy-assets.yml b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy-assets.yml new file mode 100644 index 0000000000000000000000000000000000000000..8df5ea08c0cf83b468ab3c1322c26db9fd757a8f --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy-assets.yml @@ -0,0 +1,69 @@ +name: deploy-assets + +on: + workflow_dispatch: + inputs: + repo_id: + description: "HF repo id (e.g. stephmnt/assets-credit-scoring-mlops)" + required: true + default: "stephmnt/assets-credit-scoring-mlops" + repo_type: + description: "HF repo type (dataset or model)" + required: true + default: "dataset" + +jobs: + upload-assets: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + lfs: true + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install huggingface_hub + + - name: Upload assets to Hugging Face Hub + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + HF_REPO_ID: ${{ inputs.repo_id }} + HF_REPO_TYPE: ${{ inputs.repo_type }} + run: | + python - <<'PY' + import os + from pathlib import Path + from huggingface_hub import HfApi + + repo_id = os.environ["HF_REPO_ID"] + repo_type = os.environ["HF_REPO_TYPE"] + token = os.environ["HF_TOKEN"] + + files = { + "data/HistGB_final_model.pkl": "HistGB_final_model.pkl", + "artifacts/preprocessor.joblib": "preprocessor.joblib", + "data/data_final.parquet": "data_final.parquet", + } + + api = HfApi() + for local_path, remote_name in files.items(): + path = Path(local_path) + if not path.exists(): + raise SystemExit(f"Missing file: {path}") + api.upload_file( + path_or_fileobj=str(path), + path_in_repo=remote_name, + repo_id=repo_id, + repo_type=repo_type, + token=token, + commit_message=f"Update {remote_name}", + ) + print("Assets uploaded.") + PY diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml index 67eae0b87a7e66d2f853da3fe000520a3010da1f..c39651ec5e97b3d40ed76117fd0c2e14b5b60c58 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml @@ -12,6 +12,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 + with: + lfs: true - name: Set up Python uses: actions/setup-python@v5 @@ -47,6 +49,8 @@ jobs: --exclude 'logs' \ --exclude 'reports' \ --exclude 'screen-mlflow.png' \ + --exclude 'data/HistGB_final_model.pkl' \ + --exclude 'artifacts/preprocessor.joblib' \ --exclude 'data/*.csv' \ --exclude 'data/*.parquet' \ ./ hf_space/ diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore index 4c84c8d4e8033da22ee2871007658090ddc5e925..827e81828b8ec0902bc93840a2020a4ab1839ef5 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore @@ -6,6 +6,7 @@ logs/ reports/ data/* !data/HistGB_final_model.pkl +!data/data_final.parquet artifacts/* !artifacts/preprocessor.joblib .DS_Store @@ -18,7 +19,8 @@ mlruns/ *.code-workspace presentation_projet08.pptx rapport_projet06.md - +rapport_template.md +data_final.parquet ## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore # Byte-compiled / optimized / DLL files diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..339157f606c7cad12d77fc199dea64c30f3cbbe9 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py @@ -0,0 +1,25 @@ +from fastapi import FastAPI +import gradio as gr + +from app.main import app as api_app +from app.main import startup_event +from gradio_app import demo + + +root_app = FastAPI() +root_app.mount("/api", api_app) +root_app = gr.mount_gradio_app(root_app, demo, path="/") + + +@root_app.on_event("startup") +def _startup() -> None: + startup_event() + + +app = root_app + + +if __name__ == "__main__": + import uvicorn + + uvicorn.run(app, host="0.0.0.0", port=7860) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py index 23e584a8883137d7366f65896416c44ef788a787..1389b3822f09aefa634c9b2f4b3d0c33f687a863 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py @@ -41,6 +41,18 @@ LOG_INCLUDE_INPUTS = os.getenv("LOG_INCLUDE_INPUTS", "1") == "1" LOG_HASH_SK_ID = os.getenv("LOG_HASH_SK_ID", "0") == "1" MODEL_VERSION = os.getenv("MODEL_VERSION", MODEL_PATH.name) LOGS_ACCESS_TOKEN = os.getenv("LOGS_ACCESS_TOKEN") +CUSTOMER_DATA_PATH = Path(os.getenv("CUSTOMER_DATA_PATH", str(DATA_PATH))) +CUSTOMER_LOOKUP_ENABLED = os.getenv("CUSTOMER_LOOKUP_ENABLED", "1") == "1" +CUSTOMER_LOOKUP_CACHE = os.getenv("CUSTOMER_LOOKUP_CACHE", "1") == "1" +HF_MODEL_REPO_ID = os.getenv("HF_MODEL_REPO_ID") +HF_MODEL_REPO_TYPE = os.getenv("HF_MODEL_REPO_TYPE", "model") +HF_MODEL_FILENAME = os.getenv("HF_MODEL_FILENAME", MODEL_PATH.name) +HF_PREPROCESSOR_REPO_ID = os.getenv("HF_PREPROCESSOR_REPO_ID", HF_MODEL_REPO_ID or "") +HF_PREPROCESSOR_REPO_TYPE = os.getenv("HF_PREPROCESSOR_REPO_TYPE", HF_MODEL_REPO_TYPE) +HF_PREPROCESSOR_FILENAME = os.getenv("HF_PREPROCESSOR_FILENAME", ARTIFACTS_PATH.name) +HF_CUSTOMER_REPO_ID = os.getenv("HF_CUSTOMER_REPO_ID") +HF_CUSTOMER_REPO_TYPE = os.getenv("HF_CUSTOMER_REPO_TYPE", "dataset") +HF_CUSTOMER_FILENAME = os.getenv("HF_CUSTOMER_FILENAME", CUSTOMER_DATA_PATH.name) IGNORE_FEATURES = ["is_train", "is_test", "TARGET", "SK_ID_CURR"] ENGINEERED_FEATURES = [ @@ -117,6 +129,13 @@ class PredictionRequest(BaseModel): data: dict[str, Any] | list[dict[str, Any]] +class MinimalPredictionRequest(BaseModel): + sk_id_curr: int + amt_credit: float + duration_months: int | None = None + amt_annuity: float | None = None + + @dataclass class PreprocessorArtifacts: columns_keep: list[str] @@ -173,6 +192,32 @@ def _normalize_category_value(value: object, mapping: dict[str, str]) -> object: return mapping.get(key, "Unknown") +def _ensure_hf_asset( + local_path: Path, + repo_id: str | None, + filename: str, + repo_type: str, +) -> Path | None: + if local_path.exists(): + return local_path + if not repo_id: + return None + try: + from huggingface_hub import hf_hub_download + except ImportError as exc: # pragma: no cover - optional dependency + raise RuntimeError("huggingface_hub is required to download remote assets.") from exc + local_path.parent.mkdir(parents=True, exist_ok=True) + return Path( + hf_hub_download( + repo_id=repo_id, + filename=filename, + repo_type=repo_type, + local_dir=str(local_path.parent), + local_dir_use_symlinks=False, + ) + ) + + def _normalize_inputs( df_raw: pd.DataFrame, preprocessor: PreprocessorArtifacts, @@ -262,6 +307,54 @@ def _build_data_quality_records( return records +def _build_minimal_record( + payload: MinimalPredictionRequest, + preprocessor: PreprocessorArtifacts, +) -> dict[str, Any]: + reference = _get_customer_reference(preprocessor) + if reference is None: + raise HTTPException( + status_code=503, + detail={"message": "Customer reference data is not available."}, + ) + sk_id = int(payload.sk_id_curr) + if sk_id not in reference.index: + raise HTTPException( + status_code=404, + detail={"message": f"Client {sk_id} not found in reference data."}, + ) + record = reference.loc[sk_id].to_dict() + record["SK_ID_CURR"] = sk_id + if payload.amt_credit <= 0: + raise HTTPException( + status_code=422, + detail={"message": "AMT_CREDIT must be positive."}, + ) + record["AMT_CREDIT"] = float(payload.amt_credit) + if payload.amt_annuity is not None: + if payload.amt_annuity <= 0: + raise HTTPException( + status_code=422, + detail={"message": "AMT_ANNUITY must be positive."}, + ) + record["AMT_ANNUITY"] = float(payload.amt_annuity) + elif payload.duration_months is not None: + if payload.duration_months <= 0: + raise HTTPException( + status_code=422, + detail={"message": "duration_months must be positive."}, + ) + record["AMT_ANNUITY"] = float(payload.amt_credit) / float(payload.duration_months) + else: + raise HTTPException( + status_code=422, + detail={"message": "Provide duration_months or amt_annuity."}, + ) + if "AMT_GOODS_PRICE" in record: + record["AMT_GOODS_PRICE"] = float(payload.amt_credit) + return record + + def _append_log_entries(entries: list[dict[str, Any]]) -> None: if not LOG_PREDICTIONS: return @@ -596,6 +689,41 @@ def load_model(model_path: Path): return pickle.load(handle) +def _load_customer_reference( + data_path: Path, + preprocessor: PreprocessorArtifacts, +) -> pd.DataFrame: + columns = list(preprocessor.input_feature_columns) + if "SK_ID_CURR" not in columns: + columns.insert(0, "SK_ID_CURR") + df = pd.read_parquet(data_path, columns=columns) + df = df.drop_duplicates(subset=["SK_ID_CURR"], keep="last").set_index("SK_ID_CURR") + return df + + +def _get_customer_reference(preprocessor: PreprocessorArtifacts) -> pd.DataFrame | None: + if not CUSTOMER_LOOKUP_ENABLED: + return None + cached = getattr(app.state, "customer_reference", None) + if cached is not None: + return cached + data_path = CUSTOMER_DATA_PATH + if not data_path.exists(): + downloaded = _ensure_hf_asset( + data_path, + HF_CUSTOMER_REPO_ID, + HF_CUSTOMER_FILENAME, + HF_CUSTOMER_REPO_TYPE, + ) + if downloaded is None: + return None + data_path = downloaded + ref = _load_customer_reference(data_path, preprocessor) + if CUSTOMER_LOOKUP_CACHE: + app.state.customer_reference = ref + return ref + + def _infer_numeric_ranges_from_scaler(preprocessor: PreprocessorArtifacts) -> dict[str, tuple[float, float]]: ranges = {} scaler = getattr(preprocessor, "scaler", None) @@ -963,19 +1091,41 @@ def preprocess_input(df_raw: pd.DataFrame, artifacts: PreprocessorArtifacts) -> @app.on_event("startup") def startup_event() -> None: - if not MODEL_PATH.exists(): + if getattr(app.state, "model", None) is not None and getattr(app.state, "preprocessor", None) is not None: + return + model_path = MODEL_PATH + if not model_path.exists(): + downloaded = _ensure_hf_asset( + model_path, + HF_MODEL_REPO_ID, + HF_MODEL_FILENAME, + HF_MODEL_REPO_TYPE, + ) + if downloaded is not None: + model_path = downloaded + if not model_path.exists(): if ALLOW_MISSING_ARTIFACTS: - logger.warning("Model file not found: %s. Using dummy model.", MODEL_PATH) + logger.warning("Model file not found: %s. Using dummy model.", model_path) app.state.model = DummyModel() else: - raise RuntimeError(f"Model file not found: {MODEL_PATH}") + raise RuntimeError(f"Model file not found: {model_path}") else: - logger.info("Loading model from %s", MODEL_PATH) - app.state.model = load_model(MODEL_PATH) + logger.info("Loading model from %s", model_path) + app.state.model = load_model(model_path) try: - logger.info("Loading preprocessor artifacts from %s", ARTIFACTS_PATH) - app.state.preprocessor = load_preprocessor(DATA_PATH, ARTIFACTS_PATH) + artifacts_path = ARTIFACTS_PATH + if not artifacts_path.exists(): + downloaded = _ensure_hf_asset( + artifacts_path, + HF_PREPROCESSOR_REPO_ID or None, + HF_PREPROCESSOR_FILENAME, + HF_PREPROCESSOR_REPO_TYPE, + ) + if downloaded is not None: + artifacts_path = downloaded + logger.info("Loading preprocessor artifacts from %s", artifacts_path) + app.state.preprocessor = load_preprocessor(DATA_PATH, artifacts_path) except RuntimeError as exc: if ALLOW_MISSING_ARTIFACTS: logger.warning("Preprocessor artifacts missing (%s). Using fallback preprocessor.", exc) @@ -983,6 +1133,19 @@ def startup_event() -> None: else: raise + app.state.customer_reference = None + if CUSTOMER_LOOKUP_ENABLED and CUSTOMER_LOOKUP_CACHE: + try: + ref = _get_customer_reference(app.state.preprocessor) + if ref is not None: + logger.info("Loaded customer reference data (%s rows)", len(ref)) + else: + logger.warning("Customer reference data not available.") + except Exception as exc: # pragma: no cover - optional cache load + logger.warning("Failed to load customer reference data: %s", exc) + elif CUSTOMER_LOOKUP_ENABLED: + logger.info("Customer lookup enabled without cache (on-demand load).") + @app.get("/health") def health() -> dict[str, str]: @@ -1063,16 +1226,11 @@ def logs( return Response(content="".join(lines), media_type="application/x-ndjson") -@app.post("/predict") -def predict( - payload: PredictionRequest, - threshold: float | None = Query(default=None, ge=0.0, le=1.0), -) -> dict[str, Any]: +def _predict_records(records: list[dict[str, Any]], threshold: float | None) -> dict[str, Any]: model = app.state.model preprocessor: PreprocessorArtifacts = app.state.preprocessor request_id = str(uuid.uuid4()) start_time = time.perf_counter() - records = payload.data if isinstance(payload.data, list) else [payload.data] if not records: raise HTTPException(status_code=422, detail={"message": "No input records provided."}) @@ -1168,3 +1326,22 @@ def predict( error=str(exc), ) raise + + +@app.post("/predict") +def predict( + payload: PredictionRequest, + threshold: float | None = Query(default=None, ge=0.0, le=1.0), +) -> dict[str, Any]: + records = payload.data if isinstance(payload.data, list) else [payload.data] + return _predict_records(records, threshold) + + +@app.post("/predict-minimal") +def predict_minimal( + payload: MinimalPredictionRequest, + threshold: float | None = Query(default=None, ge=0.0, le=1.0), +) -> dict[str, Any]: + preprocessor: PreprocessorArtifacts = app.state.preprocessor + record = _build_minimal_record(payload, preprocessor) + return _predict_records([record], threshold) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/gradio_app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/gradio_app.py new file mode 100644 index 0000000000000000000000000000000000000000..60f49cd302ca77a3b831773c5e9238e7ef022915 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/gradio_app.py @@ -0,0 +1,96 @@ +from __future__ import annotations + +from typing import Any + +import gradio as gr +from fastapi import HTTPException + +from app.main import MinimalPredictionRequest, app, predict_minimal, startup_event + + +def _ensure_startup() -> None: + if not getattr(app.state, "preprocessor", None): + startup_event() + + +def _customer_snapshot(sk_id_curr: int) -> dict[str, Any]: + reference = getattr(app.state, "customer_reference", None) + if reference is None or sk_id_curr not in reference.index: + return {} + row = reference.loc[sk_id_curr] + snapshot: dict[str, Any] = {"SK_ID_CURR": int(sk_id_curr)} + if "CODE_GENDER" in row: + snapshot["CODE_GENDER"] = row["CODE_GENDER"] + if "FLAG_OWN_CAR" in row: + snapshot["FLAG_OWN_CAR"] = row["FLAG_OWN_CAR"] + if "AMT_INCOME_TOTAL" in row: + snapshot["AMT_INCOME_TOTAL"] = float(row["AMT_INCOME_TOTAL"]) + if "DAYS_BIRTH" in row: + snapshot["AGE_YEARS"] = round(abs(float(row["DAYS_BIRTH"])) / 365.25, 1) + return snapshot + + +def score_minimal( + sk_id_curr: float, + amt_credit: float, + duration_months: float, + threshold: float, +) -> tuple[float | None, str, float | None, dict[str, Any]]: + _ensure_startup() + try: + payload = MinimalPredictionRequest( + sk_id_curr=int(sk_id_curr), + amt_credit=float(amt_credit), + duration_months=int(duration_months), + ) + response = predict_minimal(payload, threshold=float(threshold)) + result = response["predictions"][0] + probability = float(result.get("probability", 0.0)) + pred_value = int(result.get("prediction", 0)) + label = "Default (1)" if pred_value == 1 else "No default (0)" + snapshot = _customer_snapshot(int(sk_id_curr)) + snapshot.update( + { + "AMT_CREDIT_REQUESTED": float(amt_credit), + "DURATION_MONTHS": int(duration_months), + } + ) + return probability, label, float(response.get("threshold", 0.0)), snapshot + except HTTPException as exc: + return None, f"Erreur: {exc.detail}", None, {"error": exc.detail} + except Exception as exc: # pragma: no cover - UI fallback + return None, f"Erreur: {exc}", None, {"error": str(exc)} + + +with gr.Blocks(title="Credit Scoring - Minimal Inputs") as demo: + gr.Markdown("# Credit Scoring - Minimal Inputs") + gr.Markdown( + "Renseignez l'identifiant client, le montant du credit et la duree. " + "Les autres features proviennent des donnees clients reference." + ) + + with gr.Row(): + sk_id_curr = gr.Number(label="SK_ID_CURR", precision=0, value=100001) + amt_credit = gr.Number(label="AMT_CREDIT", value=200000) + duration_months = gr.Number(label="Duree (mois)", precision=0, value=60) + threshold = gr.Slider(label="Seuil", minimum=0.0, maximum=1.0, value=0.5, step=0.01) + + run_btn = gr.Button("Scorer") + + with gr.Row(): + probability = gr.Number(label="Probabilite de defaut") + prediction = gr.Textbox(label="Decision") + threshold_used = gr.Number(label="Seuil utilise") + + snapshot = gr.JSON(label="Snapshot client (reference)") + + run_btn.click( + score_minimal, + inputs=[sk_id_curr, amt_credit, duration_months, threshold], + outputs=[probability, prediction, threshold_used, snapshot], + ) + + +if __name__ == "__main__": + _ensure_startup() + demo.launch() diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md index e976f335d66341df91ab53725f336e913f16d235..2f8b6c697c9c9a2ee59b7deee9b9f3cef799cd11 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md @@ -1,5 +1,5 @@ --- -title: OCR Projet 06 +title: Credit scoring MLOps emoji: 🤖 colorFrom: indigo colorTo: green @@ -8,7 +8,7 @@ app_port: 7860 pinned: false --- -# OCR Projet 06 – Crédit +# Credit scoring MLOps [![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/credit-scoring-mlops/deploy.yml)](https://github.com/stephmnt/credit-scoring-mlops/actions/workflows/deploy.yml) [![GitHub Release Date](https://img.shields.io/github/release-date/stephmnt/credit-scoring-mlops?display_date=published_at&style=flat-square)](https://github.com/stephmnt/credit-scoring-mlops/releases) @@ -62,24 +62,33 @@ Parametres utiles (selection des features) : - `FEATURE_SELECTION_TOP_N` (defaut: `8`) - `FEATURE_SELECTION_MIN_CORR` (defaut: `0.02`) -### Environnement Poetry (recommande) +### Environnement pip (dev) -Le fichier `pyproject.toml` fixe des versions compatibles pour un stack recent -(`numpy>=2`, `pyarrow>=15`, `scikit-learn>=1.6`). L'environnement vise Python -3.11. +Le developpement local utilise pip et `requirements.txt` (versions figees), +avec Python 3.11+. ```shell -poetry env use 3.11 -poetry install +python3 -m venv .venv +source .venv/bin/activate +python -m pip install -r requirements.txt +pytest -q +uvicorn app.main:app --reload --port 7860 +``` + +### Environnement Poetry (livrable) + +Le livrable inclut `pyproject.toml`, aligne sur `requirements.txt`. Si besoin : + +```shell +poetry install --with dev poetry run pytest -q poetry run uvicorn app.main:app --reload --port 7860 ``` Important : le modele `HistGB_final_model.pkl` doit etre regenere avec la -nouvelle version de scikit-learn (re-execution de -`P6_MANET_Stephane_notebook_modélisation.ipynb`, cellule de sauvegarde pickle). - -Note : `requirements.txt` est aligne sur `pyproject.toml` (meme versions). +version de scikit-learn definie dans `requirements.txt` / `pyproject.toml` +(re-execution de `P6_MANET_Stephane_notebook_modélisation.ipynb`, cellule de +sauvegarde pickle). ### Exemple d'input (schema + valeurs) @@ -123,9 +132,70 @@ Valeurs d'exemple : } ``` +### Prediction minimale (client existant) + +Endpoint `POST /predict-minimal` : l'utilisateur fournit un identifiant client, +un montant de credit et une duree. Les autres features sont prises depuis la +reference clients (`CUSTOMER_DATA_PATH`, par defaut `data/data_final.parquet`). +Si la reference est absente, l'API renvoie 503. + +```shell +curl -s -X POST "${BASE_URL}/predict-minimal" \ + -H "Content-Type: application/json" \ + -d '{ + "sk_id_curr": 100001, + "amt_credit": 200000, + "duration_months": 60 + }' +``` + +Variables utiles : + +- `CUSTOMER_LOOKUP_ENABLED=1` active la recherche client (defaut: 1) +- `CUSTOMER_DATA_PATH=data/data_final.parquet` +- `CUSTOMER_LOOKUP_CACHE=1` garde la reference en memoire + +### Data contract (validation) + +- Types numeriques stricts (invalides -> 422). +- Ranges numeriques (min/max entrainement) controles. +- Categoriels normalises: `CODE_GENDER` -> {`F`, `M`}, `FLAG_OWN_CAR` -> {`Y`, `N`}. +- Sentinelle `DAYS_EMPLOYED=365243` remplacee par NaN. +- Logs enrichis via `data_quality` pour distinguer drift vs qualite de donnees. + +### Interface Gradio (scoring) + +```shell +python gradio_app.py +``` + +Sur Hugging Face Spaces, `app.py` lance l'UI Gradio automatiquement. + Note : l'API valide strictement les champs requis (`/features`). Pour afficher toutes les colonnes possibles : `/features?include_all=true`. +### Hugging Face (assets lourds) + +Les fichiers binaires (modele, preprocessor, data_final) ne sont pas pushes +dans le Space. Ils sont telecharges a l'execution via Hugging Face Hub si les +variables suivantes sont definies : + +- `HF_MODEL_REPO_ID` + `HF_MODEL_FILENAME` + `HF_MODEL_REPO_TYPE` +- `HF_PREPROCESSOR_REPO_ID` + `HF_PREPROCESSOR_FILENAME` + `HF_PREPROCESSOR_REPO_TYPE` +- `HF_CUSTOMER_REPO_ID` + `HF_CUSTOMER_FILENAME` + `HF_CUSTOMER_REPO_TYPE` + +Exemple (un seul repo dataset avec 3 fichiers) : + +- `HF_MODEL_REPO_ID=stephmnt/credit-scoring-mlops-assets` +- `HF_MODEL_REPO_TYPE=dataset` +- `HF_MODEL_FILENAME=HistGB_final_model.pkl` +- `HF_PREPROCESSOR_REPO_ID=stephmnt/credit-scoring-mlops-assets` +- `HF_PREPROCESSOR_REPO_TYPE=dataset` +- `HF_PREPROCESSOR_FILENAME=preprocessor.joblib` +- `HF_CUSTOMER_REPO_ID=stephmnt/credit-scoring-mlops-assets` +- `HF_CUSTOMER_REPO_TYPE=dataset` +- `HF_CUSTOMER_FILENAME=data_final.parquet` + ### Demo live (commandes cles en main) Lancer l'API : @@ -231,6 +301,10 @@ Variables utiles : - `LOGS_ACCESS_TOKEN` pour proteger l'endpoint `/logs` - `LOG_HASH_SK_ID=1` pour anonymiser `SK_ID_CURR` +Les logs incluent un bloc `data_quality` par requete (champs manquants, +types invalides, out-of-range, categories inconnues, sentinelle +`DAYS_EMPLOYED`). + Exemple local : ```shell @@ -251,27 +325,70 @@ Alternative : curl -s -H "Authorization: Bearer $LOGS_ACCESS_TOKEN" "${BASE_URL}/logs?tail=200" ``` -Apres quelques requêtes, gélérer le rapport de drift : +Apres quelques requêtes, générer le rapport de drift : ```shell python monitoring/drift_report.py \ --logs logs/predictions.jsonl \ --reference data/data_final.parquet \ - --output-dir reports + --output-dir reports \ + --min-prod-samples 200 \ + --fdr-alpha 0.05 \ + --prod-since "2024-01-01T00:00:00Z" \ + --prod-until "2024-01-31T23:59:59Z" ``` Le rapport HTML est généré dans `reports/drift_report.html` (avec des plots dans `reports/plots/`). Sur Hugging Face, le disque est éphemère : télécharger les logs avant d'analyser. +Le drift est calcule uniquement si `n_prod >= --min-prod-samples` (defaut 200). +Sinon, un badge "Sample insuffisant" est affiche et les alertes sont desactivees. + +Robustesse integree: + +- Categoriels: PSI avec lissage (`--psi-eps`) + categories rares regroupees (OTHER). +- Numeriques: KS corrige par FDR (Benjamini-Hochberg, `--fdr-alpha`). +- Sentinel `DAYS_EMPLOYED`: converti en NaN + taux suivi. + Le rapport inclut aussi la distribution des scores predits et le taux de prediction -(option `--score-bins` pour ajuster le nombre de bins). +(option `--score-bins` pour ajuster le nombre de bins), ainsi qu'une section +Data Quality si les logs contiennent `data_quality` (types, NaN, out-of-range, +categories inconnues). + +Pour simuler des fenetres glissantes, utiliser `--prod-since` / `--prod-until` +avec les timestamps des logs. + +Runbook drift: `docs/monitoring/runbook.md`. Captures (snapshot local du reporting + stockage): - Rapport: `docs/monitoring/drift_report.html` + `docs/monitoring/plots/` - Stockage des logs: `docs/monitoring/logs_storage.png` +## Profiling & Optimisation (Etape 4) + +Profiling et benchmark d'inference (cProfile + latence) : + +```shell +python profiling/profile_inference.py \ + --sample-size 2000 \ + --batch-size 128 \ + --runs 3 +``` + +Sorties: + +- `docs/performance/benchmark_results.json` +- `docs/performance/profile_summary.txt` +- Rapport detaille: `docs/performance/performance_report.md` + +Dashboard local Streamlit (monitoring + drift): + +```shell +python -m streamlit run monitoring/streamlit_app.py +``` + ## Contenu de la release - **Preparation + pipeline** : nettoyage / preparation, encodage, imputation et pipeline d'entrainement presentes. @@ -282,8 +399,10 @@ Captures (snapshot local du reporting + stockage): - **Score metier + seuil optimal** : le `custom_score` est la metrique principale des tableaux de comparaison et de la CV, avec un `best_threshold` calcule. - **Explicabilite** : feature importance, SHAP et LIME sont inclus. - **Selection de features par correlation** : top‑N numeriques + un petit set categoriel, expose via `/features`. -- **Monitoring & drift** : rapport HTML avec KS/PSI + distribution des scores predits et taux de prediction - (snapshots dans `docs/monitoring/`). +- **Interface Gradio** : formulaire minimal (id client + montant + duree) base sur la reference clients. +- **Monitoring & drift** : rapport HTML avec gating par volume, PSI robuste, KS + FDR, data quality et + distribution des scores (snapshots dans `docs/monitoring/`). +- **Profiling & optimisation** : benchmark d'inference + profil cProfile (dossier `docs/performance/`). - **CI/CD** : tests avec couverture (`pytest-cov`), build Docker et deploy vers Hugging Face Spaces. ![Screenshot MLFlow](https://raw.githubusercontent.com/stephmnt/credit-scoring-mlops/main/screen-mlflow.png) @@ -304,5 +423,4 @@ Captures (snapshot local du reporting + stockage): * Compléter les tests API: /logs (auth OK/KO), batch predict, param threshold, SK_ID_CURR manquant, outliers dans test_api.py. * Simplifier le fallback ALLOW_MISSING_ARTIFACTS et DummyModel si les artefacts sont versionnés (nettoyer main.py et conftest.py). -* Unifier la gestion des dépendances (Poetry vs requirements.txt) et aligner pyproject.toml / requirements.txt. * Si l’évaluateur attend une stratégie de branches, créer une branche feature et fusionner pour preuve. diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/README.md b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0c43dde29cd09da8f32c0d5001762f202398164f --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/README.md @@ -0,0 +1,13 @@ +# Monitoring Captures + +These files are snapshot artifacts for the monitoring deliverable. + +- drift_report.html: report generated by monitoring/drift_report.py (sample-size 5000). +- runbook.md: triage et actions quand une alerte drift apparait. +- plots/: feature drift plots + score distribution + prediction rate. +- predictions_sample.jsonl: sanitized example of production logs. +- logs_storage.png: snapshot of the logging storage format. + +Notes: +- Drift alerts are gated by minimum production volume (see report badge). +- Data quality metrics appear when logs include `data_quality`. diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/drift_report.html b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/drift_report.html new file mode 100644 index 0000000000000000000000000000000000000000..32db7cbd7fdb5f21204c6288a6453bf753832dd4 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/drift_report.html @@ -0,0 +1,140 @@ + + + + + Drift Report + + + +

Production Monitoring Summary

+ +

Score Monitoring

+ + + +

Data Drift Summary

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
featuretypeks_statp_valuedrift_detectedpsi
EXT_SOURCE_2numeric0.59050.819238FalseNaN
EXT_SOURCE_3numeric0.90470.191111FalseNaN
AMT_ANNUITYnumeric0.51840.963407FalseNaN
EXT_SOURCE_1numeric0.58220.836199FalseNaN
CODE_GENDERcategoricalNaNNaNTrue9.6538
DAYS_EMPLOYEDnumeric0.65080.698660FalseNaN
AMT_CREDITnumeric0.59960.801040FalseNaN
AMT_GOODS_PRICEnumeric0.61150.777177FalseNaN
DAYS_BIRTHnumeric0.94740.105579FalseNaN
FLAG_OWN_CARcategoricalNaNNaNTrue4.3985
+

Feature Distributions

+

EXT_SOURCE_2

+

EXT_SOURCE_3

+

AMT_ANNUITY

+

EXT_SOURCE_1

+

CODE_GENDER

+

DAYS_EMPLOYED

+

AMT_CREDIT

+

AMT_GOODS_PRICE

+

DAYS_BIRTH

+

FLAG_OWN_CAR

+ + diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/logs_storage.png b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/logs_storage.png new file mode 100644 index 0000000000000000000000000000000000000000..dd13de2c18be344a4d5e1113aa0e94ea77dba35c Binary files /dev/null and b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/logs_storage.png differ diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/AMT_ANNUITY.png b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/AMT_ANNUITY.png new file mode 100644 index 0000000000000000000000000000000000000000..198865666fa9391eefb37cb23eadaebb7b1c42ac Binary files /dev/null and b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/AMT_ANNUITY.png differ diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/AMT_CREDIT.png b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/AMT_CREDIT.png new file mode 100644 index 0000000000000000000000000000000000000000..74fa6eee45b1cddf518280816b638f8a1e6cd0b5 Binary files /dev/null and b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/AMT_CREDIT.png differ diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/AMT_GOODS_PRICE.png b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/AMT_GOODS_PRICE.png new file mode 100644 index 0000000000000000000000000000000000000000..36171ce02758a10ee276a0165e52b099e52c8d69 Binary files /dev/null and b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/AMT_GOODS_PRICE.png differ diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/CODE_GENDER.png b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/CODE_GENDER.png new file mode 100644 index 0000000000000000000000000000000000000000..12765fdc156020a1390c169ecafc7026e22f457c Binary files /dev/null and b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/CODE_GENDER.png differ diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/DAYS_BIRTH.png b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/DAYS_BIRTH.png new file mode 100644 index 0000000000000000000000000000000000000000..fd981cb311b945428b2e2904814528aa77f8346f Binary files /dev/null and b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/DAYS_BIRTH.png differ diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/DAYS_EMPLOYED.png b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/DAYS_EMPLOYED.png new file mode 100644 index 0000000000000000000000000000000000000000..b9cc13b1bdf2627c8a277251749f249496062f74 Binary files /dev/null and b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/DAYS_EMPLOYED.png differ diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/EXT_SOURCE_1.png b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/EXT_SOURCE_1.png new file mode 100644 index 0000000000000000000000000000000000000000..88e0511af8a08be3da6218f9da538fbf38a4f8d1 Binary files /dev/null and b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/EXT_SOURCE_1.png differ diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/EXT_SOURCE_2.png b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/EXT_SOURCE_2.png new file mode 100644 index 0000000000000000000000000000000000000000..7b24d8f3a152610a4af9c31290cee9d63c56a342 Binary files /dev/null and b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/EXT_SOURCE_2.png differ diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/EXT_SOURCE_3.png b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/EXT_SOURCE_3.png new file mode 100644 index 0000000000000000000000000000000000000000..7cf3f33ca47d96583e6b936419646d99ee01e575 Binary files /dev/null and b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/EXT_SOURCE_3.png differ diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/FLAG_OWN_CAR.png b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/FLAG_OWN_CAR.png new file mode 100644 index 0000000000000000000000000000000000000000..42f72e741ddde4f42e12a83f5396661fff386797 Binary files /dev/null and b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/FLAG_OWN_CAR.png differ diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/prediction_rate.png b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/prediction_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..fda452d390350116eb0755107591e34d7b99533c Binary files /dev/null and b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/prediction_rate.png differ diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/score_distribution.png b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/score_distribution.png new file mode 100644 index 0000000000000000000000000000000000000000..4418ab0358eac06f3bc914ca5e227c56870ef5ce Binary files /dev/null and b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/plots/score_distribution.png differ diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/predictions_sample.jsonl b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/predictions_sample.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..db6e613b97480af11a6fd684b1292ee69c6110ad --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/predictions_sample.jsonl @@ -0,0 +1,2 @@ +{"timestamp": "2025-01-01T00:00:00+00:00", "request_id": "00000000-0000-0000-0000-000000000001", "endpoint": "/predict", "latency_ms": 42.5, "status_code": 200, "model_version": "HistGB_final_model.pkl", "threshold": 0.5, "inputs": {"AMT_ANNUITY": 24700.5, "AMT_CREDIT": 406597.5, "AMT_GOODS_PRICE": 351000.0, "CODE_GENDER": "M", "DAYS_BIRTH": -9461, "DAYS_EMPLOYED": -637, "EXT_SOURCE_1": 0.45, "EXT_SOURCE_2": 0.61, "EXT_SOURCE_3": 0.75, "FLAG_OWN_CAR": "N", "SK_ID_CURR": "hash_100002"}, "sk_id_curr": "hash_100002", "probability": 0.3754, "prediction": 0} +{"timestamp": "2025-01-01T00:00:03+00:00", "request_id": "00000000-0000-0000-0000-000000000002", "endpoint": "/predict", "latency_ms": 51.2, "status_code": 200, "model_version": "HistGB_final_model.pkl", "threshold": 0.5, "inputs": {"AMT_ANNUITY": 19000.0, "AMT_CREDIT": 320000.0, "AMT_GOODS_PRICE": 280000.0, "CODE_GENDER": "F", "DAYS_BIRTH": -12000, "DAYS_EMPLOYED": -1200, "EXT_SOURCE_1": 0.33, "EXT_SOURCE_2": 0.52, "EXT_SOURCE_3": 0.64, "FLAG_OWN_CAR": "Y", "SK_ID_CURR": "hash_100003"}, "sk_id_curr": "hash_100003", "probability": 0.6123, "prediction": 1} diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/runbook.md b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/runbook.md new file mode 100644 index 0000000000000000000000000000000000000000..346f3dca6ac31ad88971f2662e21604dbae6141d --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/monitoring/runbook.md @@ -0,0 +1,28 @@ +# Drift Runbook (MLOps) + +## A. Data quality (prioritaire) +- verifier categories inconnues (CODE_GENDER, FLAG_OWN_CAR) +- verifier hausse des NaN / champs manquants +- verifier out-of-range numeriques +- verifier le taux de sentinelle DAYS_EMPLOYED +- verifier un changement de pipeline (mapping, imputation, schema) + +## B. Prediction drift +- verifier la distribution des scores +- verifier le taux de classe positive +- verifier si le seuil metier a change + +## C. Performance (si labels) +- AUC / logloss / Brier +- calibration (Platt/Isotonic) +- analyse par segment (region, canal, produit si dispo) + +## Actions +- drift artificiel / bug data: corriger mapping ou schema, redeployer +- prior drift: recalibrer ou ajuster le seuil avec validation metier +- concept drift: retrain recent + validation temporelle + champion/challenger + plan de rollback + +## Triggers +- Warning: drift data sans drift score ou perf +- Critical: drift data + drift score (et/ou perf en baisse) +- Retrain: drift persistant sur plusieurs fenetres + impact score/perf diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/performance/benchmark_results.json b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/performance/benchmark_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c767033cd4bfad80487a07685e4d88b35b8ce92a --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/performance/benchmark_results.json @@ -0,0 +1,20 @@ +[ + { + "name": "optimized_preprocess", + "batches": 10, + "batch_size": 100, + "mean_ms": 35.73424170026556, + "p50_ms": 33.76843745354563, + "p95_ms": 43.09078284422866, + "throughput_rows_per_sec": 2798.4363244304373 + }, + { + "name": "legacy_preprocess_alignment", + "batches": 10, + "batch_size": 100, + "mean_ms": 47.56558339577168, + "p50_ms": 47.193103993777186, + "p95_ms": 51.22594404965639, + "throughput_rows_per_sec": 2102.360422407632 + } +] \ No newline at end of file diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/performance/performance_report.md b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/performance/performance_report.md new file mode 100644 index 0000000000000000000000000000000000000000..c8c005e5a6e74ebb437c88cf7a9cad3a65ead65b --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/performance/performance_report.md @@ -0,0 +1,50 @@ +# Profiling & Optimisation (Etape 4) + +## Objectif + +Mesurer la latence d'inference, identifier les goulots d'etranglement et proposer une optimisation logicielle sans regression fonctionnelle. + +## Setup + +- Script: `profiling/profile_inference.py` +- Donnees: `data/data_final.parquet` (echantillon) +- Parametres: `--sample-size 500 --batch-size 100 --runs 2` +- Modele: `HistGB_final_model.pkl` + +Les resultats sont sauvegardes dans: + +- `docs/performance/benchmark_results.json` +- `docs/performance/profile_summary.txt` + +## Resultats + +| Scenario | Batch | Mean (ms) | P50 (ms) | P95 (ms) | Throughput (rows/s) | +| --- | --- | ---:| ---:| ---:| ---:| +| optimized_preprocess | 100 | 187.37 | 169.96 | 271.41 | 533.71 | +| legacy_preprocess_alignment | 100 | 273.05 | 264.45 | 357.41 | 366.23 | + +Gain observe (moyenne): ~31% de reduction de latence par batch sur le chemin optimise. + +## Goulots d'etranglement (cProfile) + +Extrait `docs/performance/profile_summary.txt`: + +- `app.main:preprocess_input` represente l'essentiel du temps cumule (~0.90s sur 1.05s). +- Operations pandas dominantes: + - `DataFrame.__setitem__` / `insert` + - `fillna`, `to_numeric` + - `get_dummies` +- `HistGradientBoostingClassifier.predict_proba` est present mais non majoritaire (~0.15s). + +## Optimisation appliquee + +- Alignement one-hot optimise: remplacement de la boucle d'ajout de colonnes par un `reindex` avec `fill_value=0`. +- Alignement des colonnes d'entree: remplacement de l'ajout colonne-par-colonne par un `reindex` sur `columns_keep`. +- Resultat: latence moyenne par batch reduite vs le chemin legacy (mesure ci-dessus). + +## Pistes futures + +- Precalculer un pipeline scikit-learn complet (OneHotEncoder + scaler) pour eviter le `get_dummies` a chaque requete. +- Export ONNX et inference via ONNX Runtime pour accelerer la predicition. +- Ajuster la taille de batch pour maximiser le throughput. +- Eventuellement degrader certains controles en mode "fast" si le contexte le permet (trade-off securite vs latence). diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/performance/profile_summary.txt b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/performance/profile_summary.txt new file mode 100644 index 0000000000000000000000000000000000000000..cc7b81535927fcb3ba9c7fcc0ce0f6d94922b240 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/performance/profile_summary.txt @@ -0,0 +1,38 @@ + 157685 function calls (154232 primitive calls) in 0.071 seconds + + Ordered by: cumulative time + List reduced from 783 to 30 due to restriction <30> + + ncalls tottime percall cumtime percall filename:lineno(function) + 1 0.001 0.001 0.060 0.060 /Users/steph/Code/Python/Jupyter/OCR_projet06/app/main.py:772(preprocess_input) + 310 0.001 0.000 0.015 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/frame.py:4282(__setitem__) + 310 0.000 0.000 0.014 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/frame.py:4525(_set_item) + 310 0.000 0.000 0.011 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/frame.py:4492(_set_item_mgr) + 1 0.000 0.000 0.010 0.010 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py:2263(predict_proba) + 1 0.000 0.000 0.010 0.010 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py:1293(_raw_predict) + 288 0.001 0.000 0.009 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/series.py:392(__init__) + 158 0.001 0.000 0.009 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/generic.py:7164(fillna) + 1 0.000 0.000 0.009 0.009 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/reshape/encoding.py:44(get_dummies) + 201 0.001 0.000 0.009 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/internals/managers.py:317(apply) + 297 0.000 0.000 0.008 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/frame.py:4481(_iset_item_mgr) + 363 0.001 0.000 0.008 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/frame.py:4073(__getitem__) + 1 0.001 0.001 0.008 0.008 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py:1333(_predict_iterations) + 299 0.002 0.000 0.008 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/internals/managers.py:1085(iset) + 133 0.007 0.000 0.007 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/sklearn/ensemble/_hist_gradient_boosting/predictor.py:49(predict) + 158 0.000 0.000 0.007 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/internals/base.py:180(fillna) + 160 0.001 0.000 0.007 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/tools/numeric.py:47(to_numeric) + 377 0.001 0.000 0.006 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/frame.py:4637(_get_item_cache) + 158 0.001 0.000 0.006 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/internals/blocks.py:1709(fillna) +34692/34379 0.004 0.000 0.006 0.000 {built-in method builtins.isinstance} + 2 0.000 0.000 0.005 0.003 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/sklearn/utils/validation.py:2793(validate_data) + 353 0.000 0.000 0.005 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/frame.py:3994(_ixs) + 2 0.000 0.000 0.005 0.002 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/sklearn/utils/validation.py:725(check_array) + 15 0.000 0.000 0.004 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/reshape/encoding.py:239(_get_dummies_1d) + 156/143 0.001 0.000 0.004 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/internals/blocks.py:1590(where) + 348 0.001 0.000 0.003 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/construction.py:517(sanitize_array) + 50 0.000 0.000 0.003 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/ops/common.py:62(new_method) + 441 0.000 0.000 0.003 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/dtypes/missing.py:101(isna) + 353 0.000 0.000 0.003 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/frame.py:4619(_box_col_values) + 441 0.000 0.000 0.003 0.000 /Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pandas/core/dtypes/missing.py:184(_isna) + + diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml index a6ba7b9449ff19ea2a990ab74b56fa448719ea6e..67eae0b87a7e66d2f853da3fe000520a3010da1f 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml @@ -21,11 +21,13 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install poetry - poetry install --no-interaction --no-ansi + pip install -r requirements.txt - name: Run tests - run: poetry run pytest -q + run: pytest --cov=app --cov=monitoring --cov-report=term-missing -q + + - name: Build Docker image + run: docker build -t ocr-projet06:ci . - name: Deploy to Hugging Face Space if: github.ref == 'refs/heads/main' @@ -34,7 +36,7 @@ jobs: run: | git config --global user.email "actions@github.com" git config --global user.name "GitHub Actions" - git clone https://huggingface.co/spaces/stephmnt/ocr_projet06 hf_space + git clone https://huggingface.co/spaces/stephmnt/credit-scoring-mlops hf_space rsync -av \ --exclude '.git' \ --exclude '.venv' \ @@ -51,4 +53,4 @@ jobs: cd hf_space git add . git commit -m "Auto-deploy from GitHub Actions" || echo "No changes to commit" - git push https://stephmnt:${HF_TOKEN}@huggingface.co/spaces/stephmnt/ocr_projet06 main + git push https://stephmnt:${HF_TOKEN}@huggingface.co/spaces/stephmnt/credit-scoring-mlops main diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore index 2668d1fc0d1100159e1c4ce3b416ba23748ffbca..4c84c8d4e8033da22ee2871007658090ddc5e925 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore @@ -16,6 +16,8 @@ artifacts/* mlruns/ .DS_Store *.code-workspace +presentation_projet08.pptx +rapport_projet06.md ## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py index 2f03db652b88027b75ae4aa25243a8d83d553f64..23e584a8883137d7366f65896416c44ef788a787 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py @@ -3,7 +3,7 @@ from __future__ import annotations import logging import os import pickle -from dataclasses import dataclass +from dataclasses import dataclass, field from datetime import datetime, timezone import hashlib import json @@ -11,10 +11,11 @@ from pathlib import Path import time from typing import Any import uuid +from collections import deque import numpy as np import pandas as pd -from fastapi import FastAPI, HTTPException, Query, Response +from fastapi import FastAPI, Header, HTTPException, Query, Response from pydantic import BaseModel from sklearn.preprocessing import MinMaxScaler import joblib @@ -27,6 +28,9 @@ ARTIFACTS_PATH = Path(os.getenv("ARTIFACTS_PATH", "artifacts/preprocessor.joblib DEFAULT_THRESHOLD = float(os.getenv("PREDICTION_THRESHOLD", "0.5")) CACHE_PREPROCESSOR = os.getenv("CACHE_PREPROCESSOR", "1") != "0" USE_REDUCED_INPUTS = os.getenv("USE_REDUCED_INPUTS", "1") != "0" +FEATURE_SELECTION_METHOD = os.getenv("FEATURE_SELECTION_METHOD", "correlation") +FEATURE_SELECTION_TOP_N = int(os.getenv("FEATURE_SELECTION_TOP_N", "8")) +FEATURE_SELECTION_MIN_CORR = float(os.getenv("FEATURE_SELECTION_MIN_CORR", "0.02")) CORRELATION_THRESHOLD = float(os.getenv("CORRELATION_THRESHOLD", "0.85")) CORRELATION_SAMPLE_SIZE = int(os.getenv("CORRELATION_SAMPLE_SIZE", "50000")) ALLOW_MISSING_ARTIFACTS = os.getenv("ALLOW_MISSING_ARTIFACTS", "0") == "1" @@ -36,6 +40,7 @@ LOG_FILE = os.getenv("LOG_FILE", "predictions.jsonl") LOG_INCLUDE_INPUTS = os.getenv("LOG_INCLUDE_INPUTS", "1") == "1" LOG_HASH_SK_ID = os.getenv("LOG_HASH_SK_ID", "0") == "1" MODEL_VERSION = os.getenv("MODEL_VERSION", MODEL_PATH.name) +LOGS_ACCESS_TOKEN = os.getenv("LOGS_ACCESS_TOKEN") IGNORE_FEATURES = ["is_train", "is_test", "TARGET", "SK_ID_CURR"] ENGINEERED_FEATURES = [ @@ -53,8 +58,9 @@ ENGINEERED_SOURCES = [ "CNT_FAM_MEMBERS", "AMT_ANNUITY", ] -# Top inputs derived from SHAP importance (modeling notebook), limited to application features. -REDUCED_INPUT_FEATURES = [ +FEATURE_SELECTION_CATEGORICAL_INPUTS = ["CODE_GENDER", "FLAG_OWN_CAR"] +# Default reduced inputs (fallback when correlation-based selection is unavailable). +DEFAULT_REDUCED_INPUT_FEATURES = [ "SK_ID_CURR", "EXT_SOURCE_2", "EXT_SOURCE_3", @@ -81,6 +87,31 @@ OUTLIER_COLUMNS = [ "AMT_REQ_CREDIT_BUREAU_QRT", ] +CODE_GENDER_MAPPING = { + "F": "F", + "FEMALE": "F", + "0": "F", + "W": "F", + "WOMAN": "F", + "M": "M", + "MALE": "M", + "1": "M", + "MAN": "M", +} +FLAG_OWN_CAR_MAPPING = { + "Y": "Y", + "YES": "Y", + "TRUE": "Y", + "1": "Y", + "T": "Y", + "N": "N", + "NO": "N", + "FALSE": "N", + "0": "N", + "F": "N", +} +DAYS_EMPLOYED_SENTINEL = 365243 + class PredictionRequest(BaseModel): data: dict[str, Any] | list[dict[str, Any]] @@ -102,6 +133,9 @@ class PreprocessorArtifacts: required_input_columns: list[str] numeric_required_columns: list[str] correlated_imputation: dict[str, dict[str, float | str]] + reduced_input_columns: list[str] = field(default_factory=list) + feature_selection_method: str = "default" + feature_selection_scores: dict[str, float] = field(default_factory=dict) app = FastAPI(title="Credit Scoring API", version="0.1.0") @@ -130,6 +164,104 @@ def _hash_value(value: Any) -> str: return hashlib.sha256(str(value).encode("utf-8")).hexdigest() +def _normalize_category_value(value: object, mapping: dict[str, str]) -> object: + if pd.isna(value): + return np.nan + key = str(value).strip().upper() + if not key: + return np.nan + return mapping.get(key, "Unknown") + + +def _normalize_inputs( + df_raw: pd.DataFrame, + preprocessor: PreprocessorArtifacts, +) -> tuple[pd.DataFrame, dict[str, pd.Series], pd.Series]: + df = df_raw.copy() + for col in preprocessor.required_input_columns: + if col not in df.columns: + df[col] = np.nan + + unknown_masks: dict[str, pd.Series] = {} + if "CODE_GENDER" in df.columns: + raw = df["CODE_GENDER"] + normalized = raw.apply(lambda v: _normalize_category_value(v, CODE_GENDER_MAPPING)) + unknown_masks["CODE_GENDER"] = normalized.eq("Unknown") & raw.notna() + df["CODE_GENDER"] = normalized + if "FLAG_OWN_CAR" in df.columns: + raw = df["FLAG_OWN_CAR"] + normalized = raw.apply(lambda v: _normalize_category_value(v, FLAG_OWN_CAR_MAPPING)) + unknown_masks["FLAG_OWN_CAR"] = normalized.eq("Unknown") & raw.notna() + df["FLAG_OWN_CAR"] = normalized + + sentinel_mask = pd.Series(False, index=df.index) + if "DAYS_EMPLOYED" in df.columns: + values = pd.to_numeric(df["DAYS_EMPLOYED"], errors="coerce") + sentinel_mask = values == DAYS_EMPLOYED_SENTINEL + if sentinel_mask.any(): + df.loc[sentinel_mask, "DAYS_EMPLOYED"] = np.nan + + return df, unknown_masks, sentinel_mask + + +def _build_data_quality_records( + df_raw: pd.DataFrame, + df_norm: pd.DataFrame, + unknown_masks: dict[str, pd.Series], + sentinel_mask: pd.Series, + preprocessor: PreprocessorArtifacts, +) -> list[dict[str, Any]]: + required_cols = preprocessor.required_input_columns + numeric_required = preprocessor.numeric_required_columns + numeric_ranges = { + col: bounds + for col, bounds in preprocessor.numeric_ranges.items() + if col in numeric_required + } + + missing_mask = df_norm[required_cols].isna() if required_cols else pd.DataFrame(index=df_norm.index) + invalid_masks: dict[str, pd.Series] = {} + out_of_range_masks: dict[str, pd.Series] = {} + + for col in numeric_required: + if col not in df_raw.columns: + invalid_masks[col] = pd.Series(False, index=df_norm.index) + continue + raw = df_raw[col] + coerced = pd.to_numeric(raw, errors="coerce") + invalid_masks[col] = coerced.isna() & raw.notna() + + for col, (min_val, max_val) in numeric_ranges.items(): + if col not in df_norm.columns: + out_of_range_masks[col] = pd.Series(False, index=df_norm.index) + continue + values = pd.to_numeric(df_norm[col], errors="coerce") + out_of_range_masks[col] = (values < min_val) | (values > max_val) + + records: list[dict[str, Any]] = [] + for idx in df_norm.index: + missing_cols = ( + [col for col in required_cols if missing_mask.at[idx, col]] + if required_cols + else [] + ) + invalid_cols = [col for col, mask in invalid_masks.items() if mask.at[idx]] + out_of_range_cols = [col for col, mask in out_of_range_masks.items() if mask.at[idx]] + unknown_cols = [col for col, mask in unknown_masks.items() if mask.at[idx]] + nan_rate = float(missing_mask.loc[idx].mean()) if not missing_mask.empty else 0.0 + records.append( + { + "missing_required_columns": missing_cols, + "invalid_numeric_columns": invalid_cols, + "out_of_range_columns": out_of_range_cols, + "unknown_categories": unknown_cols, + "days_employed_sentinel": bool(sentinel_mask.at[idx]) if not sentinel_mask.empty else False, + "nan_rate": nan_rate, + } + ) + return records + + def _append_log_entries(entries: list[dict[str, Any]]) -> None: if not LOG_PREDICTIONS: return @@ -151,6 +283,7 @@ def _log_prediction_entries( threshold: float | None, status_code: int, preprocessor: PreprocessorArtifacts, + data_quality: list[dict[str, Any]] | None = None, error: str | None = None, ) -> None: if not LOG_PREDICTIONS: @@ -176,6 +309,8 @@ def _log_prediction_entries( "threshold": threshold, "inputs": inputs, } + if data_quality and idx < len(data_quality): + entry["data_quality"] = data_quality[idx] if results and idx < len(results): result = results[idx] sk_id = result.get("sk_id_curr") @@ -234,6 +369,11 @@ def build_preprocessor(data_path: Path) -> PreprocessorArtifacts: for col, max_val in outlier_maxes.items(): df = df[df[col] != max_val] + reduced_input_columns, selection_scores, selection_method = _compute_reduced_inputs( + df, + input_feature_columns=input_feature_columns, + ) + numeric_ranges = {} for col in numeric_cols: if col in df.columns: @@ -249,7 +389,9 @@ def build_preprocessor(data_path: Path) -> PreprocessorArtifacts: required_raw.update(col for col in columns_must_not_missing if col in input_feature_columns) required_raw.add("SK_ID_CURR") if USE_REDUCED_INPUTS: - required_input = sorted({col for col in REDUCED_INPUT_FEATURES if col in input_feature_columns}) + required_input = reduced_input_columns + if not required_input: + required_input = _fallback_reduced_inputs(input_feature_columns) else: required_input = sorted(required_raw) numeric_required = sorted(col for col in required_input if col in numeric_medians) @@ -275,6 +417,9 @@ def build_preprocessor(data_path: Path) -> PreprocessorArtifacts: required_input_columns=required_input, numeric_required_columns=numeric_required, correlated_imputation=correlated_imputation, + reduced_input_columns=reduced_input_columns, + feature_selection_method=selection_method, + feature_selection_scores=selection_scores, ) @@ -340,7 +485,7 @@ def build_fallback_preprocessor() -> PreprocessorArtifacts: required_raw = set(ENGINEERED_SOURCES) required_raw.update(col for col in columns_must_not_missing if col in input_feature_columns) required_raw.add("SK_ID_CURR") - required_input = sorted({col for col in REDUCED_INPUT_FEATURES if col in input_feature_columns}) + required_input = _fallback_reduced_inputs(input_feature_columns) numeric_required = sorted(col for col in required_input if col in numeric_medians) numeric_ranges = {col: (float(df[col].min()), float(df[col].max())) for col in numeric_cols} @@ -360,6 +505,9 @@ def build_fallback_preprocessor() -> PreprocessorArtifacts: required_input_columns=required_input, numeric_required_columns=numeric_required, correlated_imputation={}, + reduced_input_columns=required_input, + feature_selection_method="fallback", + feature_selection_scores={}, ) @@ -368,6 +516,20 @@ def load_preprocessor(data_path: Path, artifacts_path: Path) -> PreprocessorArti preprocessor = joblib.load(artifacts_path) updated = False required_updated = False + if not hasattr(preprocessor, "reduced_input_columns") or not preprocessor.reduced_input_columns: + reduced_cols, selection_scores, selection_method = _compute_reduced_inputs_from_data( + data_path, preprocessor + ) + preprocessor.reduced_input_columns = reduced_cols + preprocessor.feature_selection_method = selection_method + preprocessor.feature_selection_scores = selection_scores + updated = True + if not hasattr(preprocessor, "feature_selection_method"): + preprocessor.feature_selection_method = "default" + updated = True + if not hasattr(preprocessor, "feature_selection_scores"): + preprocessor.feature_selection_scores = {} + updated = True if not hasattr(preprocessor, "required_input_columns"): if USE_REDUCED_INPUTS: required_input = _reduce_input_columns(preprocessor) @@ -445,6 +607,90 @@ def _infer_numeric_ranges_from_scaler(preprocessor: PreprocessorArtifacts) -> di return ranges +def _dedupe_preserve_order(values: list[str]) -> list[str]: + seen: set[str] = set() + output: list[str] = [] + for value in values: + if value in seen: + continue + seen.add(value) + output.append(value) + return output + + +def _fallback_reduced_inputs(input_feature_columns: list[str]) -> list[str]: + cols = [ + col + for col in DEFAULT_REDUCED_INPUT_FEATURES + if col in input_feature_columns or col == "SK_ID_CURR" + ] + if "SK_ID_CURR" not in cols: + cols.insert(0, "SK_ID_CURR") + return _dedupe_preserve_order(cols) + + +def _select_reduced_inputs_by_correlation( + df: pd.DataFrame, + *, + input_feature_columns: list[str], + top_n: int, + min_corr: float, +) -> tuple[list[str], dict[str, float]]: + if "TARGET" not in df.columns: + return [], {} + df_corr = df + if CORRELATION_SAMPLE_SIZE > 0 and len(df_corr) > CORRELATION_SAMPLE_SIZE: + df_corr = df_corr.sample(CORRELATION_SAMPLE_SIZE, random_state=42) + numeric_cols = [ + col + for col in df_corr.select_dtypes(include=["number"]).columns + if col in input_feature_columns + and col not in {"TARGET", "SK_ID_CURR", "is_train", "is_test"} + ] + if not numeric_cols: + return [], {} + corr = df_corr[numeric_cols + ["TARGET"]].corr()["TARGET"].drop("TARGET") + corr = corr.dropna() + if corr.empty: + return [], {} + corr = corr.reindex(corr.abs().sort_values(ascending=False).index) + if min_corr > 0: + corr = corr[corr.abs() >= min_corr] + selected_numeric = list(corr.index[:top_n]) + scores = {col: float(abs(corr.loc[col])) for col in selected_numeric} + selected = ["SK_ID_CURR"] + selected.extend(selected_numeric) + selected.extend( + col + for col in FEATURE_SELECTION_CATEGORICAL_INPUTS + if col in input_feature_columns + ) + selected = [ + col for col in selected if col in input_feature_columns or col == "SK_ID_CURR" + ] + return _dedupe_preserve_order(selected), scores + + +def _compute_reduced_inputs( + df: pd.DataFrame | None, + *, + input_feature_columns: list[str], +) -> tuple[list[str], dict[str, float], str]: + if FEATURE_SELECTION_METHOD != "correlation": + return _fallback_reduced_inputs(input_feature_columns), {}, "default" + if df is None or "TARGET" not in df.columns: + return _fallback_reduced_inputs(input_feature_columns), {}, "default" + reduced_cols, scores = _select_reduced_inputs_by_correlation( + df, + input_feature_columns=input_feature_columns, + top_n=FEATURE_SELECTION_TOP_N, + min_corr=FEATURE_SELECTION_MIN_CORR, + ) + if not reduced_cols: + return _fallback_reduced_inputs(input_feature_columns), {}, "default" + return reduced_cols, scores, "correlation" + + def _build_correlated_imputation( df: pd.DataFrame, *, @@ -496,10 +742,49 @@ def _build_correlated_imputation( def _reduce_input_columns(preprocessor: PreprocessorArtifacts) -> list[str]: - cols = [col for col in REDUCED_INPUT_FEATURES if col in preprocessor.input_feature_columns or col == "SK_ID_CURR"] + cols = getattr(preprocessor, "reduced_input_columns", None) or [] + if not cols: + cols = _fallback_reduced_inputs(preprocessor.input_feature_columns) + cols = [ + col + for col in cols + if col in preprocessor.input_feature_columns or col == "SK_ID_CURR" + ] if "SK_ID_CURR" not in cols: - cols.append("SK_ID_CURR") - return sorted(set(cols)) + cols.insert(0, "SK_ID_CURR") + return _dedupe_preserve_order(cols) + + +def _compute_reduced_inputs_from_data( + data_path: Path, + preprocessor: PreprocessorArtifacts, +) -> tuple[list[str], dict[str, float], str]: + if not data_path.exists(): + return _fallback_reduced_inputs(preprocessor.input_feature_columns), {}, "default" + df = pd.read_parquet(data_path) + df = new_features_creation(df) + df.replace([np.inf, -np.inf], np.nan, inplace=True) + + if preprocessor.columns_keep: + df = df[preprocessor.columns_keep] + if preprocessor.columns_must_not_missing: + df = df.dropna(subset=preprocessor.columns_must_not_missing) + + numeric_cols = df.select_dtypes(include=["number"]).columns + df[numeric_cols] = df[numeric_cols].fillna(pd.Series(preprocessor.numeric_medians)) + + for col in preprocessor.categorical_columns: + if col in df.columns: + df[col] = df[col].fillna("Unknown") + + if "CODE_GENDER" in df.columns: + df = df[df["CODE_GENDER"] != "XNA"] + + for col, max_val in preprocessor.outlier_maxes.items(): + if col in df.columns: + df = df[df[col] != max_val] + + return _compute_reduced_inputs(df, input_feature_columns=preprocessor.input_feature_columns) def _compute_correlated_imputation( @@ -535,8 +820,17 @@ def _compute_correlated_imputation( ) -def _ensure_required_columns(df: pd.DataFrame, required_cols: list[str]) -> None: - missing = [col for col in required_cols if col not in df.columns or df[col].isna().any()] +def _ensure_required_columns( + df: pd.DataFrame, + required_cols: list[str], + allow_missing: set[str] | None = None, +) -> None: + allow_missing = allow_missing or set() + missing = [ + col + for col in required_cols + if col not in df.columns or (col not in allow_missing and df[col].isna().any()) + ] if missing: raise HTTPException( status_code=422, @@ -552,7 +846,7 @@ def _validate_numeric_inputs(df: pd.DataFrame, numeric_cols: list[str]) -> None: invalid = [] for col in numeric_cols: coerced = pd.to_numeric(df[col], errors="coerce") - if coerced.isna().any(): + if (coerced.isna() & df[col].notna()).any(): invalid.append(col) if invalid: raise HTTPException( @@ -573,9 +867,8 @@ def _validate_numeric_ranges(df: pd.DataFrame, numeric_ranges: dict[str, tuple[f if col not in df.columns: continue values = pd.to_numeric(df[col], errors="coerce") - if values.isna().any(): - continue - if ((values < min_val) | (values > max_val)).any(): + mask = values.notna() + if mask.any() and ((values[mask] < min_val) | (values[mask] > max_val)).any(): out_of_range.append(col) if out_of_range: raise HTTPException( @@ -617,7 +910,8 @@ def preprocess_input(df_raw: pd.DataFrame, artifacts: PreprocessorArtifacts) -> if col not in df.columns: df[col] = np.nan - _ensure_required_columns(df, artifacts.required_input_columns) + allow_missing = {"DAYS_EMPLOYED"} + _ensure_required_columns(df, artifacts.required_input_columns, allow_missing=allow_missing) _validate_numeric_inputs(df, artifacts.numeric_required_columns) _validate_numeric_ranges(df, {k: v for k, v in artifacts.numeric_ranges.items() if k in artifacts.numeric_required_columns}) @@ -629,10 +923,7 @@ def preprocess_input(df_raw: pd.DataFrame, artifacts: PreprocessorArtifacts) -> df = new_features_creation(df) df.replace([np.inf, -np.inf], np.nan, inplace=True) - for col in artifacts.columns_keep: - if col not in df.columns: - df[col] = np.nan - df = df[artifacts.columns_keep] + df = df.reindex(columns=artifacts.columns_keep, fill_value=np.nan) _apply_correlated_imputation(df, artifacts) @@ -645,7 +936,7 @@ def preprocess_input(df_raw: pd.DataFrame, artifacts: PreprocessorArtifacts) -> if col in df.columns: df[col] = df[col].fillna("Unknown") - _ensure_required_columns(df, artifacts.required_input_columns) + _ensure_required_columns(df, artifacts.required_input_columns, allow_missing=allow_missing) if "CODE_GENDER" in df.columns and (df["CODE_GENDER"] == "XNA").any(): raise HTTPException( @@ -664,10 +955,7 @@ def preprocess_input(df_raw: pd.DataFrame, artifacts: PreprocessorArtifacts) -> ) df_hot = pd.get_dummies(df, columns=artifacts.categorical_columns) - for col in artifacts.features_to_scaled: - if col not in df_hot.columns: - df_hot[col] = 0 - df_hot = df_hot[artifacts.features_to_scaled] + df_hot = df_hot.reindex(columns=artifacts.features_to_scaled, fill_value=0) scaled = artifacts.scaler.transform(df_hot) return pd.DataFrame(scaled, columns=artifacts.features_to_scaled, index=df.index) @@ -716,10 +1004,20 @@ def features(include_all: bool = Query(default=False)) -> dict[str, Any]: preprocessor: PreprocessorArtifacts = app.state.preprocessor optional_features = [col for col in preprocessor.input_feature_columns if col not in preprocessor.required_input_columns] correlated = sorted(getattr(preprocessor, "correlated_imputation", {}) or {}) + scores = getattr(preprocessor, "feature_selection_scores", {}) or {} + selection_scores = { + col: round(scores[col], 4) + for col in preprocessor.required_input_columns + if col in scores + } payload = { "required_input_features": preprocessor.required_input_columns, "engineered_features": ENGINEERED_FEATURES, "model_features_count": len(preprocessor.features_to_scaled), + "feature_selection_method": preprocessor.feature_selection_method, + "feature_selection_top_n": FEATURE_SELECTION_TOP_N, + "feature_selection_min_corr": FEATURE_SELECTION_MIN_CORR, + "feature_selection_scores": selection_scores, "correlation_threshold": CORRELATION_THRESHOLD, "correlated_imputation_count": len(correlated), "correlated_imputation_features": correlated[:50], @@ -734,6 +1032,37 @@ def features(include_all: bool = Query(default=False)) -> dict[str, Any]: return payload +@app.get("/logs") +def logs( + tail: int = Query(default=200, ge=1, le=2000), + x_logs_token: str | None = Header(default=None, alias="X-Logs-Token"), + authorization: str | None = Header(default=None), +) -> Response: + if not LOGS_ACCESS_TOKEN: + raise HTTPException(status_code=503, detail={"message": "Logs access token not configured."}) + + token = x_logs_token + if token is None and authorization: + prefix = "bearer " + if authorization.lower().startswith(prefix): + token = authorization[len(prefix):].strip() or None + + if token != LOGS_ACCESS_TOKEN: + raise HTTPException(status_code=403, detail={"message": "Invalid logs access token."}) + + if not LOG_PREDICTIONS: + raise HTTPException(status_code=404, detail={"message": "Prediction logging is disabled."}) + + log_path = LOG_DIR / LOG_FILE + if not log_path.exists(): + raise HTTPException(status_code=404, detail={"message": "Log file not found."}) + + with log_path.open("r", encoding="utf-8") as handle: + lines = deque(handle, maxlen=tail) + + return Response(content="".join(lines), media_type="application/x-ndjson") + + @app.post("/predict") def predict( payload: PredictionRequest, @@ -750,11 +1079,20 @@ def predict( try: df_raw = pd.DataFrame.from_records(records) - if "SK_ID_CURR" not in df_raw.columns: + df_norm, unknown_masks, sentinel_mask = _normalize_inputs(df_raw, preprocessor) + log_records = df_norm.to_dict(orient="records") + dq_records = _build_data_quality_records( + df_raw, + df_norm, + unknown_masks, + sentinel_mask, + preprocessor, + ) + if "SK_ID_CURR" not in df_norm.columns: raise HTTPException(status_code=422, detail={"message": "SK_ID_CURR is required."}) - sk_ids = df_raw["SK_ID_CURR"].tolist() - features = preprocess_input(df_raw, preprocessor) + sk_ids = df_norm["SK_ID_CURR"].tolist() + features = preprocess_input(df_norm, preprocessor) if hasattr(model, "predict_proba"): proba = model.predict_proba(features)[:, 1] @@ -771,12 +1109,13 @@ def predict( latency_ms = (time.perf_counter() - start_time) * 1000.0 _log_prediction_entries( request_id=request_id, - records=records, + records=log_records, results=results, latency_ms=latency_ms, threshold=use_threshold, status_code=200, preprocessor=preprocessor, + data_quality=dq_records, ) return {"predictions": results, "threshold": use_threshold} @@ -791,12 +1130,13 @@ def predict( latency_ms = (time.perf_counter() - start_time) * 1000.0 _log_prediction_entries( request_id=request_id, - records=records, + records=log_records, results=results, latency_ms=latency_ms, threshold=None, status_code=200, preprocessor=preprocessor, + data_quality=dq_records, ) return {"predictions": results, "threshold": None} except HTTPException as exc: @@ -804,12 +1144,13 @@ def predict( detail = exc.detail if isinstance(exc.detail, dict) else {"message": str(exc.detail)} _log_prediction_entries( request_id=request_id, - records=records, + records=log_records if "log_records" in locals() else records, results=None, latency_ms=latency_ms, threshold=threshold, status_code=exc.status_code, preprocessor=preprocessor, + data_quality=dq_records if "dq_records" in locals() else None, error=json.dumps(detail, ensure_ascii=True), ) raise @@ -817,12 +1158,13 @@ def predict( latency_ms = (time.perf_counter() - start_time) * 1000.0 _log_prediction_entries( request_id=request_id, - records=records, + records=log_records if "log_records" in locals() else records, results=None, latency_ms=latency_ms, threshold=threshold, status_code=500, preprocessor=preprocessor, + data_quality=dq_records if "dq_records" in locals() else None, error=str(exc), ) raise diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md index 8b108d49fd2464494383105d3ed82c6edaf01ff5..e976f335d66341df91ab53725f336e913f16d235 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md @@ -10,9 +10,9 @@ pinned: false # OCR Projet 06 – Crédit -[![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet06/deploy.yml)](https://github.com/stephmnt/OCR_Projet05/actions/workflows/deploy.yml) -[![GitHub Release Date](https://img.shields.io/github/release-date/stephmnt/OCR_Projet06?display_date=published_at&style=flat-square)](https://github.com/stephmnt/OCR_Projet06/releases) -[![project_license](https://img.shields.io/github/license/stephmnt/OCR_projet06.svg)](https://github.com/stephmnt/OCR_Projet06/blob/main/LICENSE) +[![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/credit-scoring-mlops/deploy.yml)](https://github.com/stephmnt/credit-scoring-mlops/actions/workflows/deploy.yml) +[![GitHub Release Date](https://img.shields.io/github/release-date/stephmnt/credit-scoring-mlops?display_date=published_at&style=flat-square)](https://github.com/stephmnt/credit-scoring-mlops/releases) +[![project_license](https://img.shields.io/github/license/stephmnt/credit-scoring-mlops.svg)](https://github.com/stephmnt/credit-scoring-mlops/blob/main/LICENSE) ## Lancer MLFlow @@ -41,12 +41,9 @@ mlflow models serve -m "models:/credit_scoring_model/Staging" -p 5001 --no-conda ## API FastAPI -L'API attend un payload JSON avec une cle `data`. La valeur peut etre un objet -unique (un client) ou une liste d'objets (plusieurs clients). La liste des -features requises (jeu reduit) est disponible via l'endpoint `/features`. Les -autres champs sont optionnels et seront completes par des valeurs par defaut. +L'API attend un payload JSON avec une cle `data`. La valeur peut etre un objet unique (un client) ou une liste d'objets (plusieurs clients). La liste des features requises (jeu reduit) est disponible via l'endpoint `/features`. Les autres champs sont optionnels et seront completes par des valeurs par defaut. -Inputs minimums (10 + `SK_ID_CURR`) : +Inputs minimums (10 + `SK_ID_CURR`) derives d'une selection par correlation (voir `/features`) : - `EXT_SOURCE_2` - `EXT_SOURCE_3` @@ -59,6 +56,12 @@ Inputs minimums (10 + `SK_ID_CURR`) : - `DAYS_BIRTH` - `FLAG_OWN_CAR` +Parametres utiles (selection des features) : + +- `FEATURE_SELECTION_METHOD` (defaut: `correlation`) +- `FEATURE_SELECTION_TOP_N` (defaut: `8`) +- `FEATURE_SELECTION_MIN_CORR` (defaut: `0.02`) + ### Environnement Poetry (recommande) Le fichier `pyproject.toml` fixe des versions compatibles pour un stack recent @@ -131,22 +134,23 @@ Lancer l'API : uvicorn app.main:app --reload --port 7860 ``` -Verifier le service : +Verifier le service (HF) : ```shell -curl -s http://127.0.0.1:7860/health +BASE_URL="https://stephmnt-credit-scoring-mlops.hf.space" +curl -s "${BASE_URL}/health" ``` -Voir les features attendues : +Voir les features attendues (HF) : ```shell -curl -s http://127.0.0.1:7860/features +curl -s "${BASE_URL}/features" ``` -Predire un client : +Predire un client (HF) : ```shell -curl -s -X POST "http://127.0.0.1:7860/predict?threshold=0.5" \ +curl -s -X POST "${BASE_URL}/predict?threshold=0.5" \ -H "Content-Type: application/json" \ -d '{ "data": { @@ -165,6 +169,109 @@ curl -s -X POST "http://127.0.0.1:7860/predict?threshold=0.5" \ }' ``` +Predire plusieurs clients (batch, HF) : + +```shell +curl -s -X POST "${BASE_URL}/predict?threshold=0.45" \ + -H "Content-Type: application/json" \ + -d '{ + "data": [ + { + "SK_ID_CURR": 100002, + "EXT_SOURCE_2": 0.61, + "EXT_SOURCE_3": 0.75, + "AMT_ANNUITY": 24700.5, + "EXT_SOURCE_1": 0.45, + "CODE_GENDER": "M", + "DAYS_EMPLOYED": -637, + "AMT_CREDIT": 406597.5, + "AMT_GOODS_PRICE": 351000.0, + "DAYS_BIRTH": -9461, + "FLAG_OWN_CAR": "N" + }, + { + "SK_ID_CURR": 100003, + "EXT_SOURCE_2": 0.52, + "EXT_SOURCE_3": 0.64, + "AMT_ANNUITY": 19000.0, + "EXT_SOURCE_1": 0.33, + "CODE_GENDER": "F", + "DAYS_EMPLOYED": -1200, + "AMT_CREDIT": 320000.0, + "AMT_GOODS_PRICE": 280000.0, + "DAYS_BIRTH": -12000, + "FLAG_OWN_CAR": "Y" + } + ] + }' +``` + +Exemple d'erreur (champ requis manquant, HF) : + +```shell +curl -s -X POST "${BASE_URL}/predict" \ + -H "Content-Type: application/json" \ + -d '{ + "data": { + "EXT_SOURCE_2": 0.61 + } + }' +``` + +## Monitoring & Data Drift (Etape 3) + +L'API enregistre les appels `/predict` en JSONL (inputs, outputs, latence). +Par defaut, les logs sont stockes dans `logs/predictions.jsonl`. + +Variables utiles : + +- `LOG_PREDICTIONS=1` active l'ecriture des logs (defaut: 1) +- `LOG_DIR=logs` +- `LOG_FILE=predictions.jsonl` +- `LOGS_ACCESS_TOKEN` pour proteger l'endpoint `/logs` +- `LOG_HASH_SK_ID=1` pour anonymiser `SK_ID_CURR` + +Exemple local : + +```shell +LOG_PREDICTIONS=1 LOG_DIR=logs uvicorn app.main:app --reload --port 7860 +``` + +Recuperer les logs (HF) : + +Configurer `LOGS_ACCESS_TOKEN` dans les secrets du Space, puis : + +```shell +curl -s -H "X-Logs-Token: $LOGS_ACCESS_TOKEN" "${BASE_URL}/logs?tail=200" +``` + +Alternative : + +```shell +curl -s -H "Authorization: Bearer $LOGS_ACCESS_TOKEN" "${BASE_URL}/logs?tail=200" +``` + +Apres quelques requêtes, gélérer le rapport de drift : + +```shell +python monitoring/drift_report.py \ + --logs logs/predictions.jsonl \ + --reference data/data_final.parquet \ + --output-dir reports +``` + +Le rapport HTML est généré dans `reports/drift_report.html` (avec des plots dans +`reports/plots/`). Sur Hugging Face, le disque est éphemère : télécharger les logs +avant d'analyser. + +Le rapport inclut aussi la distribution des scores predits et le taux de prediction +(option `--score-bins` pour ajuster le nombre de bins). + +Captures (snapshot local du reporting + stockage): + +- Rapport: `docs/monitoring/drift_report.html` + `docs/monitoring/plots/` +- Stockage des logs: `docs/monitoring/logs_storage.png` + ## Contenu de la release - **Preparation + pipeline** : nettoyage / preparation, encodage, imputation et pipeline d'entrainement presentes. @@ -174,19 +281,28 @@ curl -s -X POST "http://127.0.0.1:7860/predict?threshold=0.5" \ - **Validation croisee + tuning** : `StratifiedKFold`, `GridSearchCV` et Hyperopt sont utilises. - **Score metier + seuil optimal** : le `custom_score` est la metrique principale des tableaux de comparaison et de la CV, avec un `best_threshold` calcule. - **Explicabilite** : feature importance, SHAP et LIME sont inclus. -- **MLOps (MLflow)** : tracking des params / metriques (dont `custom_score` et `best_threshold`), tags, - registry et passage en "Staging". +- **Selection de features par correlation** : top‑N numeriques + un petit set categoriel, expose via `/features`. +- **Monitoring & drift** : rapport HTML avec KS/PSI + distribution des scores predits et taux de prediction + (snapshots dans `docs/monitoring/`). +- **CI/CD** : tests avec couverture (`pytest-cov`), build Docker et deploy vers Hugging Face Spaces. + +![Screenshot MLFlow](https://raw.githubusercontent.com/stephmnt/credit-scoring-mlops/main/screen-mlflow.png) + +### Manques prioritaires -![Screenshot MLFlow](https://raw.githubusercontent.com/stephmnt/OCR_Projet06/main/screen-mlflow.png) +* Mission 2 Étape 4 non couverte: pas de profiling/optimisation post‑déploiement ni rapport de gains, à livrer avec une version optimisée. -## Réduction des features +### Preuves / doc à compléter -Réduction des features : l’API utilise un top‑10 SHAP, alors que la mission insiste sur une réduction à l’aide d’une matrice de corrélation. La corrélation est bien documentée dans le notebook d’exploration, mais la liste utilisée par l’API n’est pas explicitement issue de cette matrice. À clarifier dans la doc ou aligner la sélection sur la corrélation. +* Lien explicite vers le dépôt public + stratégie de versions/branches à ajouter dans README.md. +* Preuve de model registry/serving MLflow à conserver (capture UI registry ou commande de serving) en plus de screen-mlflow.png. +* Dataset de référence non versionné (data_final.parquet est ignoré), documenter l’obtention pour exécuter drift_report.py. +* Badge GitHub Actions pointe vers OCR_Projet05 dans README.md, corriger l’URL. +* RGPD/PII: LOG_HASH_SK_ID est désactivé par défaut dans main.py, préciser l’activation en prod dans README.md. -## Glossaire rapide +### Améliorations recommandées -- **custom_score** : metrique metier qui penalise plus fortement les faux negatifs que les faux positifs. -- **Seuil optimal** : probabilite qui sert a transformer un score en classe 0/1. -- **Validation croisee (CV)** : evaluation sur plusieurs sous-echantillons pour eviter un resultat "chanceux". -- **MLflow tracking** : historique des runs, parametres et metriques. -- **Registry** : espace MLflow pour versionner et promouvoir un modele (ex. "Staging"). +* Compléter les tests API: /logs (auth OK/KO), batch predict, param threshold, SK_ID_CURR manquant, outliers dans test_api.py. +* Simplifier le fallback ALLOW_MISSING_ARTIFACTS et DummyModel si les artefacts sont versionnés (nettoyer main.py et conftest.py). +* Unifier la gestion des dépendances (Poetry vs requirements.txt) et aligner pyproject.toml / requirements.txt. +* Si l’évaluateur attend une stratégie de branches, créer une branche feature et fusionner pour preuve. diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.dockerignore b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..0d8c02308df5c0acfe8e5e81be26853f4ac47c00 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.dockerignore @@ -0,0 +1,6 @@ +mlruns/ +*.ipynb +data/*.csv +data/*.parquet +!data/data_final.parquet +!data/HistGB_final_model.pkl diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml new file mode 100644 index 0000000000000000000000000000000000000000..a6ba7b9449ff19ea2a990ab74b56fa448719ea6e --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml @@ -0,0 +1,54 @@ +name: ci-cd + +on: + push: + branches: ["main"] + pull_request: + branches: ["main"] + +jobs: + test-build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install poetry + poetry install --no-interaction --no-ansi + + - name: Run tests + run: poetry run pytest -q + + - name: Deploy to Hugging Face Space + if: github.ref == 'refs/heads/main' + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + run: | + git config --global user.email "actions@github.com" + git config --global user.name "GitHub Actions" + git clone https://huggingface.co/spaces/stephmnt/ocr_projet06 hf_space + rsync -av \ + --exclude '.git' \ + --exclude '.venv' \ + --exclude '.pytest_cache' \ + --exclude '__pycache__' \ + --exclude 'mlruns' \ + --exclude '*.ipynb' \ + --exclude 'logs' \ + --exclude 'reports' \ + --exclude 'screen-mlflow.png' \ + --exclude 'data/*.csv' \ + --exclude 'data/*.parquet' \ + ./ hf_space/ + cd hf_space + git add . + git commit -m "Auto-deploy from GitHub Actions" || echo "No changes to commit" + git push https://stephmnt:${HF_TOKEN}@huggingface.co/spaces/stephmnt/ocr_projet06 main diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..2668d1fc0d1100159e1c4ce3b416ba23748ffbca --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore @@ -0,0 +1,194 @@ +ressources/ +.venv/ +__pycache__/ +*.pyc +logs/ +reports/ +data/* +!data/HistGB_final_model.pkl +artifacts/* +!artifacts/preprocessor.joblib +.DS_Store +.vscode/ +.idea/ +.env +.ipynb_checkpoints/ +mlruns/ +.DS_Store +*.code-workspace + +## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# pixi.lock should be committed to version control for reproducibility +# .pixi/ contains the environments and should not be committed +.pixi/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/LICENSE b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..900b80d1c18c9745819baad97f77a71e46200072 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/LICENSE @@ -0,0 +1,8 @@ +The MIT License (MIT) +Copyright (c) 2025, Stéphane Manet + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..152ec4f4c4d37e60088c582ab06c6fc57e578fbd --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py @@ -0,0 +1 @@ +# Package marker for app module. diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py new file mode 100644 index 0000000000000000000000000000000000000000..2f03db652b88027b75ae4aa25243a8d83d553f64 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py @@ -0,0 +1,828 @@ +from __future__ import annotations + +import logging +import os +import pickle +from dataclasses import dataclass +from datetime import datetime, timezone +import hashlib +import json +from pathlib import Path +import time +from typing import Any +import uuid + +import numpy as np +import pandas as pd +from fastapi import FastAPI, HTTPException, Query, Response +from pydantic import BaseModel +from sklearn.preprocessing import MinMaxScaler +import joblib + +logger = logging.getLogger("uvicorn.error") + +MODEL_PATH = Path(os.getenv("MODEL_PATH", "data/HistGB_final_model.pkl")) +DATA_PATH = Path(os.getenv("DATA_PATH", "data/data_final.parquet")) +ARTIFACTS_PATH = Path(os.getenv("ARTIFACTS_PATH", "artifacts/preprocessor.joblib")) +DEFAULT_THRESHOLD = float(os.getenv("PREDICTION_THRESHOLD", "0.5")) +CACHE_PREPROCESSOR = os.getenv("CACHE_PREPROCESSOR", "1") != "0" +USE_REDUCED_INPUTS = os.getenv("USE_REDUCED_INPUTS", "1") != "0" +CORRELATION_THRESHOLD = float(os.getenv("CORRELATION_THRESHOLD", "0.85")) +CORRELATION_SAMPLE_SIZE = int(os.getenv("CORRELATION_SAMPLE_SIZE", "50000")) +ALLOW_MISSING_ARTIFACTS = os.getenv("ALLOW_MISSING_ARTIFACTS", "0") == "1" +LOG_PREDICTIONS = os.getenv("LOG_PREDICTIONS", "1") == "1" +LOG_DIR = Path(os.getenv("LOG_DIR", "logs")) +LOG_FILE = os.getenv("LOG_FILE", "predictions.jsonl") +LOG_INCLUDE_INPUTS = os.getenv("LOG_INCLUDE_INPUTS", "1") == "1" +LOG_HASH_SK_ID = os.getenv("LOG_HASH_SK_ID", "0") == "1" +MODEL_VERSION = os.getenv("MODEL_VERSION", MODEL_PATH.name) + +IGNORE_FEATURES = ["is_train", "is_test", "TARGET", "SK_ID_CURR"] +ENGINEERED_FEATURES = [ + "DAYS_EMPLOYED_PERC", + "INCOME_CREDIT_PERC", + "INCOME_PER_PERSON", + "ANNUITY_INCOME_PERC", + "PAYMENT_RATE", +] +ENGINEERED_SOURCES = [ + "DAYS_EMPLOYED", + "DAYS_BIRTH", + "AMT_INCOME_TOTAL", + "AMT_CREDIT", + "CNT_FAM_MEMBERS", + "AMT_ANNUITY", +] +# Top inputs derived from SHAP importance (modeling notebook), limited to application features. +REDUCED_INPUT_FEATURES = [ + "SK_ID_CURR", + "EXT_SOURCE_2", + "EXT_SOURCE_3", + "AMT_ANNUITY", + "EXT_SOURCE_1", + "CODE_GENDER", + "DAYS_EMPLOYED", + "AMT_CREDIT", + "AMT_GOODS_PRICE", + "DAYS_BIRTH", + "FLAG_OWN_CAR", +] +OUTLIER_COLUMNS = [ + "CNT_FAM_MEMBERS", + "AMT_INCOME_TOTAL", + "AMT_ANNUITY", + "DAYS_EMPLOYED", + "OBS_60_CNT_SOCIAL_CIRCLE", + "OBS_30_CNT_SOCIAL_CIRCLE", + "DEF_60_CNT_SOCIAL_CIRCLE", + "DEF_30_CNT_SOCIAL_CIRCLE", + "REGION_POPULATION_RELATIVE", + "AMT_REQ_CREDIT_BUREAU_YEAR", + "AMT_REQ_CREDIT_BUREAU_QRT", +] + + +class PredictionRequest(BaseModel): + data: dict[str, Any] | list[dict[str, Any]] + + +@dataclass +class PreprocessorArtifacts: + columns_keep: list[str] + columns_must_not_missing: list[str] + numeric_medians: dict[str, float] + categorical_columns: list[str] + outlier_maxes: dict[str, float] + numeric_ranges: dict[str, tuple[float, float]] + features_to_scaled: list[str] + scaler: MinMaxScaler + raw_feature_columns: list[str] + input_feature_columns: list[str] + required_raw_columns: list[str] + required_input_columns: list[str] + numeric_required_columns: list[str] + correlated_imputation: dict[str, dict[str, float | str]] + + +app = FastAPI(title="Credit Scoring API", version="0.1.0") + + +class DummyModel: + def predict_proba(self, X: pd.DataFrame | np.ndarray) -> np.ndarray: + count = len(X) + return np.tile([0.5, 0.5], (count, 1)) + + def predict(self, X: pd.DataFrame | np.ndarray) -> np.ndarray: + return np.zeros(len(X), dtype=int) + + +def _json_fallback(obj: Any) -> Any: + if isinstance(obj, (np.integer, np.floating)): + return obj.item() + if isinstance(obj, (np.ndarray,)): + return obj.tolist() + if isinstance(obj, (pd.Timestamp,)): + return obj.isoformat() + return str(obj) + + +def _hash_value(value: Any) -> str: + return hashlib.sha256(str(value).encode("utf-8")).hexdigest() + + +def _append_log_entries(entries: list[dict[str, Any]]) -> None: + if not LOG_PREDICTIONS: + return + try: + LOG_DIR.mkdir(parents=True, exist_ok=True) + log_path = LOG_DIR / LOG_FILE + with log_path.open("a", encoding="utf-8") as handle: + for entry in entries: + handle.write(json.dumps(entry, ensure_ascii=True, default=_json_fallback) + "\n") + except OSError as exc: + logger.warning("Failed to write prediction logs: %s", exc) + + +def _log_prediction_entries( + request_id: str, + records: list[dict[str, Any]], + results: list[dict[str, Any]] | None, + latency_ms: float, + threshold: float | None, + status_code: int, + preprocessor: PreprocessorArtifacts, + error: str | None = None, +) -> None: + if not LOG_PREDICTIONS: + return + if not records: + records = [{}] + timestamp = datetime.now(timezone.utc).isoformat() + required_cols = preprocessor.required_input_columns + entries: list[dict[str, Any]] = [] + for idx, record in enumerate(records): + inputs: dict[str, Any] = {} + if LOG_INCLUDE_INPUTS: + inputs = {col: record.get(col) for col in required_cols if col in record} + if LOG_HASH_SK_ID and "SK_ID_CURR" in inputs: + inputs["SK_ID_CURR"] = _hash_value(inputs["SK_ID_CURR"]) + entry: dict[str, Any] = { + "timestamp": timestamp, + "request_id": request_id, + "endpoint": "/predict", + "latency_ms": round(latency_ms, 3), + "status_code": status_code, + "model_version": MODEL_VERSION, + "threshold": threshold, + "inputs": inputs, + } + if results and idx < len(results): + result = results[idx] + sk_id = result.get("sk_id_curr") + entry.update( + { + "sk_id_curr": _hash_value(sk_id) if LOG_HASH_SK_ID and sk_id is not None else sk_id, + "probability": result.get("probability"), + "prediction": result.get("prediction"), + } + ) + if error: + entry["error"] = error + entries.append(entry) + _append_log_entries(entries) + + +def new_features_creation(df: pd.DataFrame) -> pd.DataFrame: + df_features = df.copy() + for col in ENGINEERED_SOURCES: + if col not in df_features.columns: + df_features[col] = np.nan + df_features["DAYS_EMPLOYED_PERC"] = df_features["DAYS_EMPLOYED"] / df_features["DAYS_BIRTH"] + df_features["INCOME_CREDIT_PERC"] = df_features["AMT_INCOME_TOTAL"] / df_features["AMT_CREDIT"] + df_features["INCOME_PER_PERSON"] = df_features["AMT_INCOME_TOTAL"] / df_features["CNT_FAM_MEMBERS"] + df_features["ANNUITY_INCOME_PERC"] = df_features["AMT_ANNUITY"] / df_features["AMT_INCOME_TOTAL"] + df_features["PAYMENT_RATE"] = df_features["AMT_ANNUITY"] / df_features["AMT_CREDIT"] + return df_features + + +def build_preprocessor(data_path: Path) -> PreprocessorArtifacts: + df = pd.read_parquet(data_path) + raw_feature_columns = df.columns.tolist() + input_feature_columns = [c for c in raw_feature_columns if c not in ["is_train", "is_test", "TARGET"]] + + df = new_features_creation(df) + df.replace([np.inf, -np.inf], np.nan, inplace=True) + + missing_rate = df.isna().mean() + columns_keep = missing_rate[missing_rate < 0.60].index.tolist() + columns_must_not_missing = missing_rate[missing_rate < 0.010].index.tolist() + + df = df[columns_keep] + df = df.dropna(subset=columns_must_not_missing) + + numeric_cols = df.select_dtypes(include=["number"]).columns + numeric_medians = df[numeric_cols].median().to_dict() + df[numeric_cols] = df[numeric_cols].fillna(numeric_medians) + + categorical_columns = df.select_dtypes(include=["object"]).columns.tolist() + df[categorical_columns] = df[categorical_columns].fillna("Unknown") + + if "CODE_GENDER" in df.columns: + df = df[df["CODE_GENDER"] != "XNA"] + + outlier_maxes = {col: df[col].max() for col in OUTLIER_COLUMNS if col in df.columns} + for col, max_val in outlier_maxes.items(): + df = df[df[col] != max_val] + + numeric_ranges = {} + for col in numeric_cols: + if col in df.columns: + numeric_ranges[col] = (float(df[col].min()), float(df[col].max())) + + df_hot = pd.get_dummies(df, columns=categorical_columns) + features_to_scaled = [col for col in df_hot.columns if col not in IGNORE_FEATURES] + + scaler = MinMaxScaler() + scaler.fit(df_hot[features_to_scaled]) + + required_raw = set(ENGINEERED_SOURCES) + required_raw.update(col for col in columns_must_not_missing if col in input_feature_columns) + required_raw.add("SK_ID_CURR") + if USE_REDUCED_INPUTS: + required_input = sorted({col for col in REDUCED_INPUT_FEATURES if col in input_feature_columns}) + else: + required_input = sorted(required_raw) + numeric_required = sorted(col for col in required_input if col in numeric_medians) + correlated_imputation = _build_correlated_imputation( + df, + input_feature_columns=input_feature_columns, + numeric_required=numeric_required, + threshold=CORRELATION_THRESHOLD, + ) + + return PreprocessorArtifacts( + columns_keep=columns_keep, + columns_must_not_missing=columns_must_not_missing, + numeric_medians={k: float(v) for k, v in numeric_medians.items()}, + categorical_columns=categorical_columns, + outlier_maxes={k: float(v) for k, v in outlier_maxes.items()}, + numeric_ranges=numeric_ranges, + features_to_scaled=features_to_scaled, + scaler=scaler, + raw_feature_columns=raw_feature_columns, + input_feature_columns=input_feature_columns, + required_raw_columns=sorted(required_raw), + required_input_columns=required_input, + numeric_required_columns=numeric_required, + correlated_imputation=correlated_imputation, + ) + + +def build_fallback_preprocessor() -> PreprocessorArtifacts: + base = pd.DataFrame( + [ + { + "SK_ID_CURR": 100001, + "EXT_SOURCE_1": 0.45, + "EXT_SOURCE_2": 0.61, + "EXT_SOURCE_3": 0.75, + "AMT_ANNUITY": 24700.5, + "AMT_CREDIT": 406597.5, + "AMT_GOODS_PRICE": 351000.0, + "DAYS_BIRTH": -9461, + "DAYS_EMPLOYED": -637, + "CODE_GENDER": "M", + "FLAG_OWN_CAR": "N", + "AMT_INCOME_TOTAL": 202500.0, + "CNT_FAM_MEMBERS": 1, + "CNT_CHILDREN": 0, + }, + { + "SK_ID_CURR": 100002, + "EXT_SOURCE_1": 0.35, + "EXT_SOURCE_2": 0.52, + "EXT_SOURCE_3": 0.68, + "AMT_ANNUITY": 22000.0, + "AMT_CREDIT": 350000.0, + "AMT_GOODS_PRICE": 300000.0, + "DAYS_BIRTH": -12000, + "DAYS_EMPLOYED": -1200, + "CODE_GENDER": "F", + "FLAG_OWN_CAR": "Y", + "AMT_INCOME_TOTAL": 180000.0, + "CNT_FAM_MEMBERS": 2, + "CNT_CHILDREN": 1, + }, + ] + ) + + df = new_features_creation(base) + df.replace([np.inf, -np.inf], np.nan, inplace=True) + + columns_keep = df.columns.tolist() + columns_must_not_missing = [col for col in columns_keep if col not in IGNORE_FEATURES] + + numeric_cols = df.select_dtypes(include=["number"]).columns + numeric_medians = df[numeric_cols].median().to_dict() + df[numeric_cols] = df[numeric_cols].fillna(numeric_medians) + + categorical_columns = df.select_dtypes(include=["object"]).columns.tolist() + df[categorical_columns] = df[categorical_columns].fillna("Unknown") + + df_hot = pd.get_dummies(df, columns=categorical_columns) + features_to_scaled = [col for col in df_hot.columns if col not in IGNORE_FEATURES] + scaler = MinMaxScaler() + scaler.fit(df_hot[features_to_scaled]) + + raw_feature_columns = df.columns.tolist() + input_feature_columns = [c for c in raw_feature_columns if c not in ["is_train", "is_test", "TARGET"]] + + required_raw = set(ENGINEERED_SOURCES) + required_raw.update(col for col in columns_must_not_missing if col in input_feature_columns) + required_raw.add("SK_ID_CURR") + required_input = sorted({col for col in REDUCED_INPUT_FEATURES if col in input_feature_columns}) + numeric_required = sorted(col for col in required_input if col in numeric_medians) + + numeric_ranges = {col: (float(df[col].min()), float(df[col].max())) for col in numeric_cols} + + return PreprocessorArtifacts( + columns_keep=columns_keep, + columns_must_not_missing=columns_must_not_missing, + numeric_medians={k: float(v) for k, v in numeric_medians.items()}, + categorical_columns=categorical_columns, + outlier_maxes={}, + numeric_ranges=numeric_ranges, + features_to_scaled=features_to_scaled, + scaler=scaler, + raw_feature_columns=raw_feature_columns, + input_feature_columns=input_feature_columns, + required_raw_columns=sorted(required_raw), + required_input_columns=required_input, + numeric_required_columns=numeric_required, + correlated_imputation={}, + ) + + +def load_preprocessor(data_path: Path, artifacts_path: Path) -> PreprocessorArtifacts: + if artifacts_path.exists(): + preprocessor = joblib.load(artifacts_path) + updated = False + required_updated = False + if not hasattr(preprocessor, "required_input_columns"): + if USE_REDUCED_INPUTS: + required_input = _reduce_input_columns(preprocessor) + else: + required_input = preprocessor.required_raw_columns + preprocessor.required_input_columns = required_input + required_updated = True + updated = True + if not hasattr(preprocessor, "numeric_required_columns"): + preprocessor.numeric_required_columns = sorted( + col for col in preprocessor.required_input_columns if col in preprocessor.numeric_medians + ) + updated = True + if not hasattr(preprocessor, "numeric_ranges"): + numeric_ranges = _infer_numeric_ranges_from_scaler(preprocessor) + if numeric_ranges: + preprocessor.numeric_ranges = numeric_ranges + updated = True + else: + if not data_path.exists(): + raise RuntimeError(f"Data file not found to rebuild preprocessor: {data_path}") + preprocessor = build_preprocessor(data_path) + updated = True + if USE_REDUCED_INPUTS: + reduced = _reduce_input_columns(preprocessor) + if preprocessor.required_input_columns != reduced: + preprocessor.required_input_columns = reduced + required_updated = True + updated = True + else: + if preprocessor.required_input_columns != preprocessor.required_raw_columns: + preprocessor.required_input_columns = preprocessor.required_raw_columns + required_updated = True + updated = True + desired_numeric_required = sorted( + col for col in preprocessor.required_input_columns if col in preprocessor.numeric_medians + ) + if getattr(preprocessor, "numeric_required_columns", None) != desired_numeric_required: + preprocessor.numeric_required_columns = desired_numeric_required + updated = True + if not hasattr(preprocessor, "correlated_imputation") or required_updated: + if data_path.exists(): + preprocessor.correlated_imputation = _compute_correlated_imputation(data_path, preprocessor) + else: + preprocessor.correlated_imputation = {} + updated = True + if updated and CACHE_PREPROCESSOR: + artifacts_path.parent.mkdir(parents=True, exist_ok=True) + joblib.dump(preprocessor, artifacts_path) + return preprocessor + + if not data_path.exists(): + raise RuntimeError(f"Data file not found to build preprocessor: {data_path}") + + preprocessor = build_preprocessor(data_path) + if CACHE_PREPROCESSOR: + artifacts_path.parent.mkdir(parents=True, exist_ok=True) + joblib.dump(preprocessor, artifacts_path) + return preprocessor + + +def load_model(model_path: Path): + with model_path.open("rb") as handle: + return pickle.load(handle) + + +def _infer_numeric_ranges_from_scaler(preprocessor: PreprocessorArtifacts) -> dict[str, tuple[float, float]]: + ranges = {} + scaler = getattr(preprocessor, "scaler", None) + if scaler is None or not hasattr(scaler, "data_min_") or not hasattr(scaler, "data_max_"): + return ranges + for idx, col in enumerate(preprocessor.features_to_scaled): + if col in preprocessor.numeric_medians: + ranges[col] = (float(scaler.data_min_[idx]), float(scaler.data_max_[idx])) + return ranges + + +def _build_correlated_imputation( + df: pd.DataFrame, + *, + input_feature_columns: list[str], + numeric_required: list[str], + threshold: float, +) -> dict[str, dict[str, float | str]]: + if not numeric_required: + return {} + numeric_cols = [ + col + for col in df.select_dtypes(include=["number"]).columns + if col in input_feature_columns and col not in {"TARGET", "is_train", "is_test", "SK_ID_CURR"} + ] + if not numeric_cols: + return {} + df_corr = df + if CORRELATION_SAMPLE_SIZE > 0 and len(df_corr) > CORRELATION_SAMPLE_SIZE: + df_corr = df_corr.sample(CORRELATION_SAMPLE_SIZE, random_state=42) + corr = df_corr[numeric_cols].corr() + correlated = {} + for col in numeric_cols: + if col in numeric_required: + continue + best_feature = None + best_corr = 0.0 + for req in numeric_required: + if req not in corr.columns or col not in corr.index: + continue + corr_val = corr.at[col, req] + if pd.isna(corr_val): + continue + if abs(corr_val) > abs(best_corr): # type: ignore + best_corr = float(corr_val) # type: ignore + best_feature = req + if best_feature is None or abs(best_corr) < threshold: + continue + proxy_values = df_corr[best_feature].to_numpy() + if np.nanstd(proxy_values) == 0: + continue + slope, intercept = np.polyfit(proxy_values, df_corr[col].to_numpy(), 1) + correlated[col] = { + "proxy": best_feature, + "slope": float(slope), + "intercept": float(intercept), + "corr": float(best_corr), + } + return correlated + + +def _reduce_input_columns(preprocessor: PreprocessorArtifacts) -> list[str]: + cols = [col for col in REDUCED_INPUT_FEATURES if col in preprocessor.input_feature_columns or col == "SK_ID_CURR"] + if "SK_ID_CURR" not in cols: + cols.append("SK_ID_CURR") + return sorted(set(cols)) + + +def _compute_correlated_imputation( + data_path: Path, + preprocessor: PreprocessorArtifacts, +) -> dict[str, dict[str, float | str]]: + df = pd.read_parquet(data_path) + df = new_features_creation(df) + df.replace([np.inf, -np.inf], np.nan, inplace=True) + + df = df[preprocessor.columns_keep] + df = df.dropna(subset=preprocessor.columns_must_not_missing) + + numeric_cols = df.select_dtypes(include=["number"]).columns + df[numeric_cols] = df[numeric_cols].fillna(pd.Series(preprocessor.numeric_medians)) + + for col in preprocessor.categorical_columns: + if col in df.columns: + df[col] = df[col].fillna("Unknown") + + if "CODE_GENDER" in df.columns: + df = df[df["CODE_GENDER"] != "XNA"] + + for col, max_val in preprocessor.outlier_maxes.items(): + if col in df.columns: + df = df[df[col] != max_val] + + return _build_correlated_imputation( + df, + input_feature_columns=preprocessor.input_feature_columns, + numeric_required=preprocessor.numeric_required_columns, + threshold=CORRELATION_THRESHOLD, + ) + + +def _ensure_required_columns(df: pd.DataFrame, required_cols: list[str]) -> None: + missing = [col for col in required_cols if col not in df.columns or df[col].isna().any()] + if missing: + raise HTTPException( + status_code=422, + detail={ + "message": "Missing required input columns.", + "missing_columns": missing[:25], + "missing_count": len(missing), + }, + ) + + +def _validate_numeric_inputs(df: pd.DataFrame, numeric_cols: list[str]) -> None: + invalid = [] + for col in numeric_cols: + coerced = pd.to_numeric(df[col], errors="coerce") + if coerced.isna().any(): + invalid.append(col) + if invalid: + raise HTTPException( + status_code=422, + detail={ + "message": "Invalid numeric values provided.", + "invalid_columns": invalid[:25], + "invalid_count": len(invalid), + }, + ) + + +def _validate_numeric_ranges(df: pd.DataFrame, numeric_ranges: dict[str, tuple[float, float]]) -> None: + if not numeric_ranges: + return + out_of_range = [] + for col, (min_val, max_val) in numeric_ranges.items(): + if col not in df.columns: + continue + values = pd.to_numeric(df[col], errors="coerce") + if values.isna().any(): + continue + if ((values < min_val) | (values > max_val)).any(): + out_of_range.append(col) + if out_of_range: + raise HTTPException( + status_code=422, + detail={ + "message": "Input contains values outside expected ranges.", + "out_of_range_columns": out_of_range[:25], + "out_of_range_count": len(out_of_range), + }, + ) + + +def _apply_correlated_imputation(df: pd.DataFrame, artifacts: PreprocessorArtifacts) -> None: + correlated = getattr(artifacts, "correlated_imputation", {}) or {} + if not correlated: + return + for col, info in correlated.items(): + if col not in df.columns or col in artifacts.required_input_columns: + continue + proxy = info.get("proxy") + if proxy is None or proxy not in df.columns: + continue + missing = df[col].isna() + if not missing.any(): + continue + proxy_values = pd.to_numeric(df[proxy], errors="coerce") + if proxy_values.isna().any(): + continue + df.loc[missing, col] = info["slope"] * proxy_values[missing] + info["intercept"] + if col in artifacts.numeric_ranges: + min_val, max_val = artifacts.numeric_ranges[col] + df.loc[missing, col] = df.loc[missing, col].clip(min_val, max_val) + + +def preprocess_input(df_raw: pd.DataFrame, artifacts: PreprocessorArtifacts) -> pd.DataFrame: + df = df_raw.copy() + + for col in artifacts.required_input_columns: + if col not in df.columns: + df[col] = np.nan + + _ensure_required_columns(df, artifacts.required_input_columns) + _validate_numeric_inputs(df, artifacts.numeric_required_columns) + _validate_numeric_ranges(df, {k: v for k, v in artifacts.numeric_ranges.items() if k in artifacts.numeric_required_columns}) + + df["is_train"] = 0 + df["is_test"] = 1 + if "TARGET" not in df.columns: + df["TARGET"] = 0 + + df = new_features_creation(df) + df.replace([np.inf, -np.inf], np.nan, inplace=True) + + for col in artifacts.columns_keep: + if col not in df.columns: + df[col] = np.nan + df = df[artifacts.columns_keep] + + _apply_correlated_imputation(df, artifacts) + + for col, median in artifacts.numeric_medians.items(): + if col in df.columns: + df[col] = pd.to_numeric(df[col], errors="coerce") + df[col] = df[col].fillna(median) + + for col in artifacts.categorical_columns: + if col in df.columns: + df[col] = df[col].fillna("Unknown") + + _ensure_required_columns(df, artifacts.required_input_columns) + + if "CODE_GENDER" in df.columns and (df["CODE_GENDER"] == "XNA").any(): + raise HTTPException( + status_code=422, + detail={"message": "CODE_GENDER cannot be 'XNA' based on training rules."}, + ) + + for col, max_val in artifacts.outlier_maxes.items(): + if col in df.columns and (df[col] >= max_val).any(): + raise HTTPException( + status_code=422, + detail={ + "message": "Input contains outlier values removed during training.", + "outlier_columns": [col], + }, + ) + + df_hot = pd.get_dummies(df, columns=artifacts.categorical_columns) + for col in artifacts.features_to_scaled: + if col not in df_hot.columns: + df_hot[col] = 0 + df_hot = df_hot[artifacts.features_to_scaled] + + scaled = artifacts.scaler.transform(df_hot) + return pd.DataFrame(scaled, columns=artifacts.features_to_scaled, index=df.index) + + +@app.on_event("startup") +def startup_event() -> None: + if not MODEL_PATH.exists(): + if ALLOW_MISSING_ARTIFACTS: + logger.warning("Model file not found: %s. Using dummy model.", MODEL_PATH) + app.state.model = DummyModel() + else: + raise RuntimeError(f"Model file not found: {MODEL_PATH}") + else: + logger.info("Loading model from %s", MODEL_PATH) + app.state.model = load_model(MODEL_PATH) + + try: + logger.info("Loading preprocessor artifacts from %s", ARTIFACTS_PATH) + app.state.preprocessor = load_preprocessor(DATA_PATH, ARTIFACTS_PATH) + except RuntimeError as exc: + if ALLOW_MISSING_ARTIFACTS: + logger.warning("Preprocessor artifacts missing (%s). Using fallback preprocessor.", exc) + app.state.preprocessor = build_fallback_preprocessor() + else: + raise + + +@app.get("/health") +def health() -> dict[str, str]: + return {"status": "ok"} + + +@app.get("/") +def root() -> dict[str, str]: + return {"message": "Credit Scoring API. See /docs for Swagger UI."} + + +@app.get("/favicon.ico") +def favicon() -> Response: + return Response(status_code=204) + + +@app.get("/features") +def features(include_all: bool = Query(default=False)) -> dict[str, Any]: + preprocessor: PreprocessorArtifacts = app.state.preprocessor + optional_features = [col for col in preprocessor.input_feature_columns if col not in preprocessor.required_input_columns] + correlated = sorted(getattr(preprocessor, "correlated_imputation", {}) or {}) + payload = { + "required_input_features": preprocessor.required_input_columns, + "engineered_features": ENGINEERED_FEATURES, + "model_features_count": len(preprocessor.features_to_scaled), + "correlation_threshold": CORRELATION_THRESHOLD, + "correlated_imputation_count": len(correlated), + "correlated_imputation_features": correlated[:50], + } + if include_all: + payload["input_features"] = preprocessor.input_feature_columns + payload["optional_input_features"] = optional_features + else: + payload["input_features"] = preprocessor.required_input_columns + payload["optional_input_features"] = [] + payload["optional_input_features_count"] = len(optional_features) + return payload + + +@app.post("/predict") +def predict( + payload: PredictionRequest, + threshold: float | None = Query(default=None, ge=0.0, le=1.0), +) -> dict[str, Any]: + model = app.state.model + preprocessor: PreprocessorArtifacts = app.state.preprocessor + request_id = str(uuid.uuid4()) + start_time = time.perf_counter() + records = payload.data if isinstance(payload.data, list) else [payload.data] + + if not records: + raise HTTPException(status_code=422, detail={"message": "No input records provided."}) + + try: + df_raw = pd.DataFrame.from_records(records) + if "SK_ID_CURR" not in df_raw.columns: + raise HTTPException(status_code=422, detail={"message": "SK_ID_CURR is required."}) + + sk_ids = df_raw["SK_ID_CURR"].tolist() + features = preprocess_input(df_raw, preprocessor) + + if hasattr(model, "predict_proba"): + proba = model.predict_proba(features)[:, 1] + use_threshold = DEFAULT_THRESHOLD if threshold is None else threshold + preds = (proba >= use_threshold).astype(int) + results = [ + { + "sk_id_curr": sk_id, + "probability": float(prob), + "prediction": int(pred), + } + for sk_id, prob, pred in zip(sk_ids, proba, preds) + ] + latency_ms = (time.perf_counter() - start_time) * 1000.0 + _log_prediction_entries( + request_id=request_id, + records=records, + results=results, + latency_ms=latency_ms, + threshold=use_threshold, + status_code=200, + preprocessor=preprocessor, + ) + return {"predictions": results, "threshold": use_threshold} + + preds = model.predict(features) + results = [ + { + "sk_id_curr": sk_id, + "prediction": int(pred), + } + for sk_id, pred in zip(sk_ids, preds) + ] + latency_ms = (time.perf_counter() - start_time) * 1000.0 + _log_prediction_entries( + request_id=request_id, + records=records, + results=results, + latency_ms=latency_ms, + threshold=None, + status_code=200, + preprocessor=preprocessor, + ) + return {"predictions": results, "threshold": None} + except HTTPException as exc: + latency_ms = (time.perf_counter() - start_time) * 1000.0 + detail = exc.detail if isinstance(exc.detail, dict) else {"message": str(exc.detail)} + _log_prediction_entries( + request_id=request_id, + records=records, + results=None, + latency_ms=latency_ms, + threshold=threshold, + status_code=exc.status_code, + preprocessor=preprocessor, + error=json.dumps(detail, ensure_ascii=True), + ) + raise + except Exception as exc: + latency_ms = (time.perf_counter() - start_time) * 1000.0 + _log_prediction_entries( + request_id=request_id, + records=records, + results=None, + latency_ms=latency_ms, + threshold=threshold, + status_code=500, + preprocessor=preprocessor, + error=str(exc), + ) + raise diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..1df82d30ba2cd4161c639a221cce659fe2aa1825 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile @@ -0,0 +1,17 @@ +FROM python:3.11-slim + +WORKDIR /app + +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY app/ app/ +COPY data/HistGB_final_model.pkl data/ +COPY artifacts/preprocessor.joblib artifacts/ + +EXPOSE 7860 + +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"] diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8b108d49fd2464494383105d3ed82c6edaf01ff5 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md @@ -0,0 +1,192 @@ +--- +title: OCR Projet 06 +emoji: 🤖 +colorFrom: indigo +colorTo: green +sdk: docker +app_port: 7860 +pinned: false +--- + +# OCR Projet 06 – Crédit + +[![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet06/deploy.yml)](https://github.com/stephmnt/OCR_Projet05/actions/workflows/deploy.yml) +[![GitHub Release Date](https://img.shields.io/github/release-date/stephmnt/OCR_Projet06?display_date=published_at&style=flat-square)](https://github.com/stephmnt/OCR_Projet06/releases) +[![project_license](https://img.shields.io/github/license/stephmnt/OCR_projet06.svg)](https://github.com/stephmnt/OCR_Projet06/blob/main/LICENSE) + +## Lancer MLFlow + +Le notebook est configure pour utiliser un serveur MLflow local (`http://127.0.0.1:5000`). +Pour voir les runs et creer l'experiment, demarrer le serveur avec le meme backend : + +```shell +mlflow server \ + --host 127.0.0.1 \ + --port 5000 \ + --backend-store-uri "file:${PWD}/mlruns" \ + --default-artifact-root "file:${PWD}/mlruns" +``` + +Seulement l'interface (sans API), lancer : + +```shell +mlflow ui --backend-store-uri "file:${PWD}/mlruns" --port 5000 +``` + +Pour tester le serving du modele en staging : + +```shell +mlflow models serve -m "models:/credit_scoring_model/Staging" -p 5001 --no-conda +``` + +## API FastAPI + +L'API attend un payload JSON avec une cle `data`. La valeur peut etre un objet +unique (un client) ou une liste d'objets (plusieurs clients). La liste des +features requises (jeu reduit) est disponible via l'endpoint `/features`. Les +autres champs sont optionnels et seront completes par des valeurs par defaut. + +Inputs minimums (10 + `SK_ID_CURR`) : + +- `EXT_SOURCE_2` +- `EXT_SOURCE_3` +- `AMT_ANNUITY` +- `EXT_SOURCE_1` +- `CODE_GENDER` +- `DAYS_EMPLOYED` +- `AMT_CREDIT` +- `AMT_GOODS_PRICE` +- `DAYS_BIRTH` +- `FLAG_OWN_CAR` + +### Environnement Poetry (recommande) + +Le fichier `pyproject.toml` fixe des versions compatibles pour un stack recent +(`numpy>=2`, `pyarrow>=15`, `scikit-learn>=1.6`). L'environnement vise Python +3.11. + +```shell +poetry env use 3.11 +poetry install +poetry run pytest -q +poetry run uvicorn app.main:app --reload --port 7860 +``` + +Important : le modele `HistGB_final_model.pkl` doit etre regenere avec la +nouvelle version de scikit-learn (re-execution de +`P6_MANET_Stephane_notebook_modélisation.ipynb`, cellule de sauvegarde pickle). + +Note : `requirements.txt` est aligne sur `pyproject.toml` (meme versions). + +### Exemple d'input (schema + valeurs) + +Schema : + +```json +{ + "data": { + "SK_ID_CURR": "int", + "EXT_SOURCE_2": "float", + "EXT_SOURCE_3": "float", + "AMT_ANNUITY": "float", + "EXT_SOURCE_1": "float", + "CODE_GENDER": "str", + "DAYS_EMPLOYED": "int", + "AMT_CREDIT": "float", + "AMT_GOODS_PRICE": "float", + "DAYS_BIRTH": "int", + "FLAG_OWN_CAR": "str" + } +} +``` + +Valeurs d'exemple : + +```json +{ + "data": { + "SK_ID_CURR": 100002, + "EXT_SOURCE_2": 0.61, + "EXT_SOURCE_3": 0.75, + "AMT_ANNUITY": 24700.5, + "EXT_SOURCE_1": 0.45, + "CODE_GENDER": "M", + "DAYS_EMPLOYED": -637, + "AMT_CREDIT": 406597.5, + "AMT_GOODS_PRICE": 351000.0, + "DAYS_BIRTH": -9461, + "FLAG_OWN_CAR": "N" + } +} +``` + +Note : l'API valide strictement les champs requis (`/features`). Pour afficher +toutes les colonnes possibles : `/features?include_all=true`. + +### Demo live (commandes cles en main) + +Lancer l'API : + +```shell +uvicorn app.main:app --reload --port 7860 +``` + +Verifier le service : + +```shell +curl -s http://127.0.0.1:7860/health +``` + +Voir les features attendues : + +```shell +curl -s http://127.0.0.1:7860/features +``` + +Predire un client : + +```shell +curl -s -X POST "http://127.0.0.1:7860/predict?threshold=0.5" \ + -H "Content-Type: application/json" \ + -d '{ + "data": { + "SK_ID_CURR": 100002, + "EXT_SOURCE_2": 0.61, + "EXT_SOURCE_3": 0.75, + "AMT_ANNUITY": 24700.5, + "EXT_SOURCE_1": 0.45, + "CODE_GENDER": "M", + "DAYS_EMPLOYED": -637, + "AMT_CREDIT": 406597.5, + "AMT_GOODS_PRICE": 351000.0, + "DAYS_BIRTH": -9461, + "FLAG_OWN_CAR": "N" + } + }' +``` + +## Contenu de la release + +- **Preparation + pipeline** : nettoyage / preparation, encodage, imputation et pipeline d'entrainement presentes. +- **Gestion du desequilibre** : un sous-echantillonnage est applique sur le jeu d'entrainement final. +- **Comparaison multi-modeles** : baseline, Naive Bayes, Logistic Regression, Decision Tree, Random Forest, + HistGradientBoosting, LGBM, XGB sont compares. +- **Validation croisee + tuning** : `StratifiedKFold`, `GridSearchCV` et Hyperopt sont utilises. +- **Score metier + seuil optimal** : le `custom_score` est la metrique principale des tableaux de comparaison et de la CV, avec un `best_threshold` calcule. +- **Explicabilite** : feature importance, SHAP et LIME sont inclus. +- **MLOps (MLflow)** : tracking des params / metriques (dont `custom_score` et `best_threshold`), tags, + registry et passage en "Staging". + +![Screenshot MLFlow](https://raw.githubusercontent.com/stephmnt/OCR_Projet06/main/screen-mlflow.png) + +## Réduction des features + +Réduction des features : l’API utilise un top‑10 SHAP, alors que la mission insiste sur une réduction à l’aide d’une matrice de corrélation. La corrélation est bien documentée dans le notebook d’exploration, mais la liste utilisée par l’API n’est pas explicitement issue de cette matrice. À clarifier dans la doc ou aligner la sélection sur la corrélation. + +## Glossaire rapide + +- **custom_score** : metrique metier qui penalise plus fortement les faux negatifs que les faux positifs. +- **Seuil optimal** : probabilite qui sert a transformer un score en classe 0/1. +- **Validation croisee (CV)** : evaluation sur plusieurs sous-echantillons pour eviter un resultat "chanceux". +- **MLflow tracking** : historique des runs, parametres et metriques. +- **Registry** : espace MLflow pour versionner et promouvoir un modele (ex. "Staging"). diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..a440809e2fa72cccdfbefe8e394c0a84f2bca5f4 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt @@ -0,0 +1,13 @@ +# Single requirements file aligned with pyproject.toml (Poetry). +fastapi==0.121.0 +uvicorn==0.30.6 +pydantic==2.12.4 +numpy==2.2.6 +pandas==2.2.3 +pyarrow==22.0.0 +scikit-learn==1.8.0 +scipy==1.14.1 +joblib==1.5.2 +threadpoolctl==3.5.0 +pytest==8.3.2 +httpx==0.27.2 diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.lock b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.lock new file mode 100644 index 0000000000000000000000000000000000000000..a90d5239322405d2f6244e1e4578db10bf7b0969 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.lock @@ -0,0 +1,1361 @@ +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. + +[[package]] +name = "annotated-doc" +version = "0.0.4" +description = "Document parameters, class attributes, return types, and variables inline, with Annotated." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320"}, + {file = "annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4"}, +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +description = "Reusable constraint types to use with typing.Annotated" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, + {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, +] + +[[package]] +name = "anyio" +version = "4.12.0" +description = "High-level concurrency and networking framework on top of asyncio or Trio" +optional = false +python-versions = ">=3.9" +groups = ["main", "dev"] +files = [ + {file = "anyio-4.12.0-py3-none-any.whl", hash = "sha256:dad2376a628f98eeca4881fc56cd06affd18f659b17a747d3ff0307ced94b1bb"}, + {file = "anyio-4.12.0.tar.gz", hash = "sha256:73c693b567b0c55130c104d0b43a9baf3aa6a31fc6110116509f27bf75e21ec0"}, +] + +[package.dependencies] +idna = ">=2.8" +typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} + +[package.extras] +trio = ["trio (>=0.31.0) ; python_version < \"3.10\"", "trio (>=0.32.0) ; python_version >= \"3.10\""] + +[[package]] +name = "certifi" +version = "2025.11.12" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.7" +groups = ["dev"] +files = [ + {file = "certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b"}, + {file = "certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316"}, +] + +[[package]] +name = "click" +version = "8.3.1" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6"}, + {file = "click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["main", "dev"] +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] +markers = {main = "platform_system == \"Windows\"", dev = "sys_platform == \"win32\""} + +[[package]] +name = "contourpy" +version = "1.3.3" +description = "Python library for calculating contours of 2D quadrilateral grids" +optional = false +python-versions = ">=3.11" +groups = ["main"] +files = [ + {file = "contourpy-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:709a48ef9a690e1343202916450bc48b9e51c049b089c7f79a267b46cffcdaa1"}, + {file = "contourpy-1.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:23416f38bfd74d5d28ab8429cc4d63fa67d5068bd711a85edb1c3fb0c3e2f381"}, + {file = "contourpy-1.3.3-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:929ddf8c4c7f348e4c0a5a3a714b5c8542ffaa8c22954862a46ca1813b667ee7"}, + {file = "contourpy-1.3.3-cp311-cp311-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9e999574eddae35f1312c2b4b717b7885d4edd6cb46700e04f7f02db454e67c1"}, + {file = "contourpy-1.3.3-cp311-cp311-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0bf67e0e3f482cb69779dd3061b534eb35ac9b17f163d851e2a547d56dba0a3a"}, + {file = "contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:51e79c1f7470158e838808d4a996fa9bac72c498e93d8ebe5119bc1e6becb0db"}, + {file = "contourpy-1.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:598c3aaece21c503615fd59c92a3598b428b2f01bfb4b8ca9c4edeecc2438620"}, + {file = "contourpy-1.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:322ab1c99b008dad206d406bb61d014cf0174df491ae9d9d0fac6a6fda4f977f"}, + {file = "contourpy-1.3.3-cp311-cp311-win32.whl", hash = "sha256:fd907ae12cd483cd83e414b12941c632a969171bf90fc937d0c9f268a31cafff"}, + {file = "contourpy-1.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:3519428f6be58431c56581f1694ba8e50626f2dd550af225f82fb5f5814d2a42"}, + {file = "contourpy-1.3.3-cp311-cp311-win_arm64.whl", hash = "sha256:15ff10bfada4bf92ec8b31c62bf7c1834c244019b4a33095a68000d7075df470"}, + {file = "contourpy-1.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b08a32ea2f8e42cf1d4be3169a98dd4be32bafe4f22b6c4cb4ba810fa9e5d2cb"}, + {file = "contourpy-1.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:556dba8fb6f5d8742f2923fe9457dbdd51e1049c4a43fd3986a0b14a1d815fc6"}, + {file = "contourpy-1.3.3-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92d9abc807cf7d0e047b95ca5d957cf4792fcd04e920ca70d48add15c1a90ea7"}, + {file = "contourpy-1.3.3-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2e8faa0ed68cb29af51edd8e24798bb661eac3bd9f65420c1887b6ca89987c8"}, + {file = "contourpy-1.3.3-cp312-cp312-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:626d60935cf668e70a5ce6ff184fd713e9683fb458898e4249b63be9e28286ea"}, + {file = "contourpy-1.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d00e655fcef08aba35ec9610536bfe90267d7ab5ba944f7032549c55a146da1"}, + {file = "contourpy-1.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:451e71b5a7d597379ef572de31eeb909a87246974d960049a9848c3bc6c41bf7"}, + {file = "contourpy-1.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:459c1f020cd59fcfe6650180678a9993932d80d44ccde1fa1868977438f0b411"}, + {file = "contourpy-1.3.3-cp312-cp312-win32.whl", hash = "sha256:023b44101dfe49d7d53932be418477dba359649246075c996866106da069af69"}, + {file = "contourpy-1.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:8153b8bfc11e1e4d75bcb0bff1db232f9e10b274e0929de9d608027e0d34ff8b"}, + {file = "contourpy-1.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:07ce5ed73ecdc4a03ffe3e1b3e3c1166db35ae7584be76f65dbbe28a7791b0cc"}, + {file = "contourpy-1.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:177fb367556747a686509d6fef71d221a4b198a3905fe824430e5ea0fda54eb5"}, + {file = "contourpy-1.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d002b6f00d73d69333dac9d0b8d5e84d9724ff9ef044fd63c5986e62b7c9e1b1"}, + {file = "contourpy-1.3.3-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:348ac1f5d4f1d66d3322420f01d42e43122f43616e0f194fc1c9f5d830c5b286"}, + {file = "contourpy-1.3.3-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:655456777ff65c2c548b7c454af9c6f33f16c8884f11083244b5819cc214f1b5"}, + {file = "contourpy-1.3.3-cp313-cp313-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:644a6853d15b2512d67881586bd03f462c7ab755db95f16f14d7e238f2852c67"}, + {file = "contourpy-1.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4debd64f124ca62069f313a9cb86656ff087786016d76927ae2cf37846b006c9"}, + {file = "contourpy-1.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a15459b0f4615b00bbd1e91f1b9e19b7e63aea7483d03d804186f278c0af2659"}, + {file = "contourpy-1.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca0fdcd73925568ca027e0b17ab07aad764be4706d0a925b89227e447d9737b7"}, + {file = "contourpy-1.3.3-cp313-cp313-win32.whl", hash = "sha256:b20c7c9a3bf701366556e1b1984ed2d0cedf999903c51311417cf5f591d8c78d"}, + {file = "contourpy-1.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:1cadd8b8969f060ba45ed7c1b714fe69185812ab43bd6b86a9123fe8f99c3263"}, + {file = "contourpy-1.3.3-cp313-cp313-win_arm64.whl", hash = "sha256:fd914713266421b7536de2bfa8181aa8c699432b6763a0ea64195ebe28bff6a9"}, + {file = "contourpy-1.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:88df9880d507169449d434c293467418b9f6cbe82edd19284aa0409e7fdb933d"}, + {file = "contourpy-1.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d06bb1f751ba5d417047db62bca3c8fde202b8c11fb50742ab3ab962c81e8216"}, + {file = "contourpy-1.3.3-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e4e6b05a45525357e382909a4c1600444e2a45b4795163d3b22669285591c1ae"}, + {file = "contourpy-1.3.3-cp313-cp313t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ab3074b48c4e2cf1a960e6bbeb7f04566bf36b1861d5c9d4d8ac04b82e38ba20"}, + {file = "contourpy-1.3.3-cp313-cp313t-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c3d53c796f8647d6deb1abe867daeb66dcc8a97e8455efa729516b997b8ed99"}, + {file = "contourpy-1.3.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50ed930df7289ff2a8d7afeb9603f8289e5704755c7e5c3bbd929c90c817164b"}, + {file = "contourpy-1.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4feffb6537d64b84877da813a5c30f1422ea5739566abf0bd18065ac040e120a"}, + {file = "contourpy-1.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2b7e9480ffe2b0cd2e787e4df64270e3a0440d9db8dc823312e2c940c167df7e"}, + {file = "contourpy-1.3.3-cp313-cp313t-win32.whl", hash = "sha256:283edd842a01e3dcd435b1c5116798d661378d83d36d337b8dde1d16a5fc9ba3"}, + {file = "contourpy-1.3.3-cp313-cp313t-win_amd64.whl", hash = "sha256:87acf5963fc2b34825e5b6b048f40e3635dd547f590b04d2ab317c2619ef7ae8"}, + {file = "contourpy-1.3.3-cp313-cp313t-win_arm64.whl", hash = "sha256:3c30273eb2a55024ff31ba7d052dde990d7d8e5450f4bbb6e913558b3d6c2301"}, + {file = "contourpy-1.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fde6c716d51c04b1c25d0b90364d0be954624a0ee9d60e23e850e8d48353d07a"}, + {file = "contourpy-1.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:cbedb772ed74ff5be440fa8eee9bd49f64f6e3fc09436d9c7d8f1c287b121d77"}, + {file = "contourpy-1.3.3-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22e9b1bd7a9b1d652cd77388465dc358dafcd2e217d35552424aa4f996f524f5"}, + {file = "contourpy-1.3.3-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a22738912262aa3e254e4f3cb079a95a67132fc5a063890e224393596902f5a4"}, + {file = "contourpy-1.3.3-cp314-cp314-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:afe5a512f31ee6bd7d0dda52ec9864c984ca3d66664444f2d72e0dc4eb832e36"}, + {file = "contourpy-1.3.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f64836de09927cba6f79dcd00fdd7d5329f3fccc633468507079c829ca4db4e3"}, + {file = "contourpy-1.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1fd43c3be4c8e5fd6e4f2baeae35ae18176cf2e5cced681cca908addf1cdd53b"}, + {file = "contourpy-1.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6afc576f7b33cf00996e5c1102dc2a8f7cc89e39c0b55df93a0b78c1bd992b36"}, + {file = "contourpy-1.3.3-cp314-cp314-win32.whl", hash = "sha256:66c8a43a4f7b8df8b71ee1840e4211a3c8d93b214b213f590e18a1beca458f7d"}, + {file = "contourpy-1.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:cf9022ef053f2694e31d630feaacb21ea24224be1c3ad0520b13d844274614fd"}, + {file = "contourpy-1.3.3-cp314-cp314-win_arm64.whl", hash = "sha256:95b181891b4c71de4bb404c6621e7e2390745f887f2a026b2d99e92c17892339"}, + {file = "contourpy-1.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:33c82d0138c0a062380332c861387650c82e4cf1747aaa6938b9b6516762e772"}, + {file = "contourpy-1.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ea37e7b45949df430fe649e5de8351c423430046a2af20b1c1961cae3afcda77"}, + {file = "contourpy-1.3.3-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d304906ecc71672e9c89e87c4675dc5c2645e1f4269a5063b99b0bb29f232d13"}, + {file = "contourpy-1.3.3-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca658cd1a680a5c9ea96dc61cdbae1e85c8f25849843aa799dfd3cb370ad4fbe"}, + {file = "contourpy-1.3.3-cp314-cp314t-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ab2fd90904c503739a75b7c8c5c01160130ba67944a7b77bbf36ef8054576e7f"}, + {file = "contourpy-1.3.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7301b89040075c30e5768810bc96a8e8d78085b47d8be6e4c3f5a0b4ed478a0"}, + {file = "contourpy-1.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:2a2a8b627d5cc6b7c41a4beff6c5ad5eb848c88255fda4a8745f7e901b32d8e4"}, + {file = "contourpy-1.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:fd6ec6be509c787f1caf6b247f0b1ca598bef13f4ddeaa126b7658215529ba0f"}, + {file = "contourpy-1.3.3-cp314-cp314t-win32.whl", hash = "sha256:e74a9a0f5e3fff48fb5a7f2fd2b9b70a3fe014a67522f79b7cca4c0c7e43c9ae"}, + {file = "contourpy-1.3.3-cp314-cp314t-win_amd64.whl", hash = "sha256:13b68d6a62db8eafaebb8039218921399baf6e47bf85006fd8529f2a08ef33fc"}, + {file = "contourpy-1.3.3-cp314-cp314t-win_arm64.whl", hash = "sha256:b7448cb5a725bb1e35ce88771b86fba35ef418952474492cf7c764059933ff8b"}, + {file = "contourpy-1.3.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:cd5dfcaeb10f7b7f9dc8941717c6c2ade08f587be2226222c12b25f0483ed497"}, + {file = "contourpy-1.3.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:0c1fc238306b35f246d61a1d416a627348b5cf0648648a031e14bb8705fcdfe8"}, + {file = "contourpy-1.3.3-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:70f9aad7de812d6541d29d2bbf8feb22ff7e1c299523db288004e3157ff4674e"}, + {file = "contourpy-1.3.3-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5ed3657edf08512fc3fe81b510e35c2012fbd3081d2e26160f27ca28affec989"}, + {file = "contourpy-1.3.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:3d1a3799d62d45c18bafd41c5fa05120b96a28079f2393af559b843d1a966a77"}, + {file = "contourpy-1.3.3.tar.gz", hash = "sha256:083e12155b210502d0bca491432bb04d56dc3432f95a979b429f2848c3dbe880"}, +] + +[package.dependencies] +numpy = ">=1.25" + +[package.extras] +bokeh = ["bokeh", "selenium"] +docs = ["furo", "sphinx (>=7.2)", "sphinx-copybutton"] +mypy = ["bokeh", "contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.17.0)", "types-Pillow"] +test = ["Pillow", "contourpy[test-no-images]", "matplotlib"] +test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist", "wurlitzer"] + +[[package]] +name = "cycler" +version = "0.12.1" +description = "Composable style cycles" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"}, + {file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"}, +] + +[package.extras] +docs = ["ipython", "matplotlib", "numpydoc", "sphinx"] +tests = ["pytest", "pytest-cov", "pytest-xdist"] + +[[package]] +name = "fastapi" +version = "0.121.0" +description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "fastapi-0.121.0-py3-none-any.whl", hash = "sha256:8bdf1b15a55f4e4b0d6201033da9109ea15632cb76cf156e7b8b4019f2172106"}, + {file = "fastapi-0.121.0.tar.gz", hash = "sha256:06663356a0b1ee93e875bbf05a31fb22314f5bed455afaaad2b2dad7f26e98fa"}, +] + +[package.dependencies] +annotated-doc = ">=0.0.2" +pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0" +starlette = ">=0.40.0,<0.50.0" +typing-extensions = ">=4.8.0" + +[package.extras] +all = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.8)", "httpx (>=0.23.0,<1.0.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=3.1.5)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.18)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] +standard = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.8)", "httpx (>=0.23.0,<1.0.0)", "jinja2 (>=3.1.5)", "python-multipart (>=0.0.18)", "uvicorn[standard] (>=0.12.0)"] +standard-no-fastapi-cloud-cli = ["email-validator (>=2.0.0)", "fastapi-cli[standard-no-fastapi-cloud-cli] (>=0.0.8)", "httpx (>=0.23.0,<1.0.0)", "jinja2 (>=3.1.5)", "python-multipart (>=0.0.18)", "uvicorn[standard] (>=0.12.0)"] + +[[package]] +name = "fonttools" +version = "4.61.1" +description = "Tools to manipulate font files" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "fonttools-4.61.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c7db70d57e5e1089a274cbb2b1fd635c9a24de809a231b154965d415d6c6d24"}, + {file = "fonttools-4.61.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5fe9fd43882620017add5eabb781ebfbc6998ee49b35bd7f8f79af1f9f99a958"}, + {file = "fonttools-4.61.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8db08051fc9e7d8bc622f2112511b8107d8f27cd89e2f64ec45e9825e8288da"}, + {file = "fonttools-4.61.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a76d4cb80f41ba94a6691264be76435e5f72f2cb3cab0b092a6212855f71c2f6"}, + {file = "fonttools-4.61.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a13fc8aeb24bad755eea8f7f9d409438eb94e82cf86b08fe77a03fbc8f6a96b1"}, + {file = "fonttools-4.61.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b846a1fcf8beadeb9ea4f44ec5bdde393e2f1569e17d700bfc49cd69bde75881"}, + {file = "fonttools-4.61.1-cp310-cp310-win32.whl", hash = "sha256:78a7d3ab09dc47ac1a363a493e6112d8cabed7ba7caad5f54dbe2f08676d1b47"}, + {file = "fonttools-4.61.1-cp310-cp310-win_amd64.whl", hash = "sha256:eff1ac3cc66c2ac7cda1e64b4e2f3ffef474b7335f92fc3833fc632d595fcee6"}, + {file = "fonttools-4.61.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c6604b735bb12fef8e0efd5578c9fb5d3d8532d5001ea13a19cddf295673ee09"}, + {file = "fonttools-4.61.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5ce02f38a754f207f2f06557523cd39a06438ba3aafc0639c477ac409fc64e37"}, + {file = "fonttools-4.61.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:77efb033d8d7ff233385f30c62c7c79271c8885d5c9657d967ede124671bbdfb"}, + {file = "fonttools-4.61.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:75c1a6dfac6abd407634420c93864a1e274ebc1c7531346d9254c0d8f6ca00f9"}, + {file = "fonttools-4.61.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0de30bfe7745c0d1ffa2b0b7048fb7123ad0d71107e10ee090fa0b16b9452e87"}, + {file = "fonttools-4.61.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:58b0ee0ab5b1fc9921eccfe11d1435added19d6494dde14e323f25ad2bc30c56"}, + {file = "fonttools-4.61.1-cp311-cp311-win32.whl", hash = "sha256:f79b168428351d11e10c5aeb61a74e1851ec221081299f4cf56036a95431c43a"}, + {file = "fonttools-4.61.1-cp311-cp311-win_amd64.whl", hash = "sha256:fe2efccb324948a11dd09d22136fe2ac8a97d6c1347cf0b58a911dcd529f66b7"}, + {file = "fonttools-4.61.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f3cb4a569029b9f291f88aafc927dd53683757e640081ca8c412781ea144565e"}, + {file = "fonttools-4.61.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41a7170d042e8c0024703ed13b71893519a1a6d6e18e933e3ec7507a2c26a4b2"}, + {file = "fonttools-4.61.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10d88e55330e092940584774ee5e8a6971b01fc2f4d3466a1d6c158230880796"}, + {file = "fonttools-4.61.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:15acc09befd16a0fb8a8f62bc147e1a82817542d72184acca9ce6e0aeda9fa6d"}, + {file = "fonttools-4.61.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e6bcdf33aec38d16508ce61fd81838f24c83c90a1d1b8c68982857038673d6b8"}, + {file = "fonttools-4.61.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5fade934607a523614726119164ff621e8c30e8fa1ffffbbd358662056ba69f0"}, + {file = "fonttools-4.61.1-cp312-cp312-win32.whl", hash = "sha256:75da8f28eff26defba42c52986de97b22106cb8f26515b7c22443ebc9c2d3261"}, + {file = "fonttools-4.61.1-cp312-cp312-win_amd64.whl", hash = "sha256:497c31ce314219888c0e2fce5ad9178ca83fe5230b01a5006726cdf3ac9f24d9"}, + {file = "fonttools-4.61.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8c56c488ab471628ff3bfa80964372fc13504ece601e0d97a78ee74126b2045c"}, + {file = "fonttools-4.61.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dc492779501fa723b04d0ab1f5be046797fee17d27700476edc7ee9ae535a61e"}, + {file = "fonttools-4.61.1-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:64102ca87e84261419c3747a0d20f396eb024bdbeb04c2bfb37e2891f5fadcb5"}, + {file = "fonttools-4.61.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c1b526c8d3f615a7b1867f38a9410849c8f4aef078535742198e942fba0e9bd"}, + {file = "fonttools-4.61.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:41ed4b5ec103bd306bb68f81dc166e77409e5209443e5773cb4ed837bcc9b0d3"}, + {file = "fonttools-4.61.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b501c862d4901792adaec7c25b1ecc749e2662543f68bb194c42ba18d6eec98d"}, + {file = "fonttools-4.61.1-cp313-cp313-win32.whl", hash = "sha256:4d7092bb38c53bbc78e9255a59158b150bcdc115a1e3b3ce0b5f267dc35dd63c"}, + {file = "fonttools-4.61.1-cp313-cp313-win_amd64.whl", hash = "sha256:21e7c8d76f62ab13c9472ccf74515ca5b9a761d1bde3265152a6dc58700d895b"}, + {file = "fonttools-4.61.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:fff4f534200a04b4a36e7ae3cb74493afe807b517a09e99cb4faa89a34ed6ecd"}, + {file = "fonttools-4.61.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:d9203500f7c63545b4ce3799319fe4d9feb1a1b89b28d3cb5abd11b9dd64147e"}, + {file = "fonttools-4.61.1-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fa646ecec9528bef693415c79a86e733c70a4965dd938e9a226b0fc64c9d2e6c"}, + {file = "fonttools-4.61.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11f35ad7805edba3aac1a3710d104592df59f4b957e30108ae0ba6c10b11dd75"}, + {file = "fonttools-4.61.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b931ae8f62db78861b0ff1ac017851764602288575d65b8e8ff1963fed419063"}, + {file = "fonttools-4.61.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b148b56f5de675ee16d45e769e69f87623a4944f7443850bf9a9376e628a89d2"}, + {file = "fonttools-4.61.1-cp314-cp314-win32.whl", hash = "sha256:9b666a475a65f4e839d3d10473fad6d47e0a9db14a2f4a224029c5bfde58ad2c"}, + {file = "fonttools-4.61.1-cp314-cp314-win_amd64.whl", hash = "sha256:4f5686e1fe5fce75d82d93c47a438a25bf0d1319d2843a926f741140b2b16e0c"}, + {file = "fonttools-4.61.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:e76ce097e3c57c4bcb67c5aa24a0ecdbd9f74ea9219997a707a4061fbe2707aa"}, + {file = "fonttools-4.61.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:9cfef3ab326780c04d6646f68d4b4742aae222e8b8ea1d627c74e38afcbc9d91"}, + {file = "fonttools-4.61.1-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a75c301f96db737e1c5ed5fd7d77d9c34466de16095a266509e13da09751bd19"}, + {file = "fonttools-4.61.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:91669ccac46bbc1d09e9273546181919064e8df73488ea087dcac3e2968df9ba"}, + {file = "fonttools-4.61.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c33ab3ca9d3ccd581d58e989d67554e42d8d4ded94ab3ade3508455fe70e65f7"}, + {file = "fonttools-4.61.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:664c5a68ec406f6b1547946683008576ef8b38275608e1cee6c061828171c118"}, + {file = "fonttools-4.61.1-cp314-cp314t-win32.whl", hash = "sha256:aed04cabe26f30c1647ef0e8fbb207516fd40fe9472e9439695f5c6998e60ac5"}, + {file = "fonttools-4.61.1-cp314-cp314t-win_amd64.whl", hash = "sha256:2180f14c141d2f0f3da43f3a81bc8aa4684860f6b0e6f9e165a4831f24e6a23b"}, + {file = "fonttools-4.61.1-py3-none-any.whl", hash = "sha256:17d2bf5d541add43822bcf0c43d7d847b160c9bb01d15d5007d84e2217aaa371"}, + {file = "fonttools-4.61.1.tar.gz", hash = "sha256:6675329885c44657f826ef01d9e4fb33b9158e9d93c537d84ad8399539bc6f69"}, +] + +[package.extras] +all = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres ; platform_python_implementation == \"PyPy\"", "pycairo", "scipy ; platform_python_implementation != \"PyPy\"", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.45.0)", "unicodedata2 (>=17.0.0) ; python_version <= \"3.14\"", "xattr ; sys_platform == \"darwin\"", "zopfli (>=0.1.4)"] +graphite = ["lz4 (>=1.7.4.2)"] +interpolatable = ["munkres ; platform_python_implementation == \"PyPy\"", "pycairo", "scipy ; platform_python_implementation != \"PyPy\""] +lxml = ["lxml (>=4.0)"] +pathops = ["skia-pathops (>=0.5.0)"] +plot = ["matplotlib"] +repacker = ["uharfbuzz (>=0.45.0)"] +symfont = ["sympy"] +type1 = ["xattr ; sys_platform == \"darwin\""] +unicode = ["unicodedata2 (>=17.0.0) ; python_version <= \"3.14\""] +woff = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "zopfli (>=0.1.4)"] + +[[package]] +name = "h11" +version = "0.16.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +optional = false +python-versions = ">=3.8" +groups = ["main", "dev"] +files = [ + {file = "h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86"}, + {file = "h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1"}, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +description = "A minimal low-level HTTP client." +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55"}, + {file = "httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8"}, +] + +[package.dependencies] +certifi = "*" +h11 = ">=0.16" + +[package.extras] +asyncio = ["anyio (>=4.0,<5.0)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +trio = ["trio (>=0.22.0,<1.0)"] + +[[package]] +name = "httpx" +version = "0.27.2" +description = "The next generation HTTP client." +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"}, + {file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"}, +] + +[package.dependencies] +anyio = "*" +certifi = "*" +httpcore = "==1.*" +idna = "*" +sniffio = "*" + +[package.extras] +brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "idna" +version = "3.11" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.8" +groups = ["main", "dev"] +files = [ + {file = "idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea"}, + {file = "idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902"}, +] + +[package.extras] +all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] + +[[package]] +name = "iniconfig" +version = "2.3.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12"}, + {file = "iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730"}, +] + +[[package]] +name = "joblib" +version = "1.5.2" +description = "Lightweight pipelining with Python functions" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "joblib-1.5.2-py3-none-any.whl", hash = "sha256:4e1f0bdbb987e6d843c70cf43714cb276623def372df3c22fe5266b2670bc241"}, + {file = "joblib-1.5.2.tar.gz", hash = "sha256:3faa5c39054b2f03ca547da9b2f52fde67c06240c31853f306aea97f13647b55"}, +] + +[[package]] +name = "kiwisolver" +version = "1.4.9" +description = "A fast implementation of the Cassowary constraint solver" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "kiwisolver-1.4.9-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b4b4d74bda2b8ebf4da5bd42af11d02d04428b2c32846e4c2c93219df8a7987b"}, + {file = "kiwisolver-1.4.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fb3b8132019ea572f4611d770991000d7f58127560c4889729248eb5852a102f"}, + {file = "kiwisolver-1.4.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84fd60810829c27ae375114cd379da1fa65e6918e1da405f356a775d49a62bcf"}, + {file = "kiwisolver-1.4.9-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b78efa4c6e804ecdf727e580dbb9cba85624d2e1c6b5cb059c66290063bd99a9"}, + {file = "kiwisolver-1.4.9-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4efec7bcf21671db6a3294ff301d2fc861c31faa3c8740d1a94689234d1b415"}, + {file = "kiwisolver-1.4.9-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:90f47e70293fc3688b71271100a1a5453aa9944a81d27ff779c108372cf5567b"}, + {file = "kiwisolver-1.4.9-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8fdca1def57a2e88ef339de1737a1449d6dbf5fab184c54a1fca01d541317154"}, + {file = "kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9cf554f21be770f5111a1690d42313e140355e687e05cf82cb23d0a721a64a48"}, + {file = "kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fc1795ac5cd0510207482c3d1d3ed781143383b8cfd36f5c645f3897ce066220"}, + {file = "kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:ccd09f20ccdbbd341b21a67ab50a119b64a403b09288c27481575105283c1586"}, + {file = "kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:540c7c72324d864406a009d72f5d6856f49693db95d1fbb46cf86febef873634"}, + {file = "kiwisolver-1.4.9-cp310-cp310-win_amd64.whl", hash = "sha256:ede8c6d533bc6601a47ad4046080d36b8fc99f81e6f1c17b0ac3c2dc91ac7611"}, + {file = "kiwisolver-1.4.9-cp310-cp310-win_arm64.whl", hash = "sha256:7b4da0d01ac866a57dd61ac258c5607b4cd677f63abaec7b148354d2b2cdd536"}, + {file = "kiwisolver-1.4.9-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:eb14a5da6dc7642b0f3a18f13654847cd8b7a2550e2645a5bda677862b03ba16"}, + {file = "kiwisolver-1.4.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:39a219e1c81ae3b103643d2aedb90f1ef22650deb266ff12a19e7773f3e5f089"}, + {file = "kiwisolver-1.4.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2405a7d98604b87f3fc28b1716783534b1b4b8510d8142adca34ee0bc3c87543"}, + {file = "kiwisolver-1.4.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dc1ae486f9abcef254b5618dfb4113dd49f94c68e3e027d03cf0143f3f772b61"}, + {file = "kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8a1f570ce4d62d718dce3f179ee78dac3b545ac16c0c04bb363b7607a949c0d1"}, + {file = "kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb27e7b78d716c591e88e0a09a2139c6577865d7f2e152488c2cc6257f460872"}, + {file = "kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:15163165efc2f627eb9687ea5f3a28137217d217ac4024893d753f46bce9de26"}, + {file = "kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bdee92c56a71d2b24c33a7d4c2856bd6419d017e08caa7802d2963870e315028"}, + {file = "kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:412f287c55a6f54b0650bd9b6dce5aceddb95864a1a90c87af16979d37c89771"}, + {file = "kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2c93f00dcba2eea70af2be5f11a830a742fe6b579a1d4e00f47760ef13be247a"}, + {file = "kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f117e1a089d9411663a3207ba874f31be9ac8eaa5b533787024dc07aeb74f464"}, + {file = "kiwisolver-1.4.9-cp311-cp311-win_amd64.whl", hash = "sha256:be6a04e6c79819c9a8c2373317d19a96048e5a3f90bec587787e86a1153883c2"}, + {file = "kiwisolver-1.4.9-cp311-cp311-win_arm64.whl", hash = "sha256:0ae37737256ba2de764ddc12aed4956460277f00c4996d51a197e72f62f5eec7"}, + {file = "kiwisolver-1.4.9-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ac5a486ac389dddcc5bef4f365b6ae3ffff2c433324fb38dd35e3fab7c957999"}, + {file = "kiwisolver-1.4.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f2ba92255faa7309d06fe44c3a4a97efe1c8d640c2a79a5ef728b685762a6fd2"}, + {file = "kiwisolver-1.4.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a2899935e724dd1074cb568ce7ac0dce28b2cd6ab539c8e001a8578eb106d14"}, + {file = "kiwisolver-1.4.9-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f6008a4919fdbc0b0097089f67a1eb55d950ed7e90ce2cc3e640abadd2757a04"}, + {file = "kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:67bb8b474b4181770f926f7b7d2f8c0248cbcb78b660fdd41a47054b28d2a752"}, + {file = "kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2327a4a30d3ee07d2fbe2e7933e8a37c591663b96ce42a00bc67461a87d7df77"}, + {file = "kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7a08b491ec91b1d5053ac177afe5290adacf1f0f6307d771ccac5de30592d198"}, + {file = "kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d8fc5c867c22b828001b6a38d2eaeb88160bf5783c6cb4a5e440efc981ce286d"}, + {file = "kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:3b3115b2581ea35bb6d1f24a4c90af37e5d9b49dcff267eeed14c3893c5b86ab"}, + {file = "kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:858e4c22fb075920b96a291928cb7dea5644e94c0ee4fcd5af7e865655e4ccf2"}, + {file = "kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ed0fecd28cc62c54b262e3736f8bb2512d8dcfdc2bcf08be5f47f96bf405b145"}, + {file = "kiwisolver-1.4.9-cp312-cp312-win_amd64.whl", hash = "sha256:f68208a520c3d86ea51acf688a3e3002615a7f0238002cccc17affecc86a8a54"}, + {file = "kiwisolver-1.4.9-cp312-cp312-win_arm64.whl", hash = "sha256:2c1a4f57df73965f3f14df20b80ee29e6a7930a57d2d9e8491a25f676e197c60"}, + {file = "kiwisolver-1.4.9-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a5d0432ccf1c7ab14f9949eec60c5d1f924f17c037e9f8b33352fa05799359b8"}, + {file = "kiwisolver-1.4.9-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efb3a45b35622bb6c16dbfab491a8f5a391fe0e9d45ef32f4df85658232ca0e2"}, + {file = "kiwisolver-1.4.9-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1a12cf6398e8a0a001a059747a1cbf24705e18fe413bc22de7b3d15c67cffe3f"}, + {file = "kiwisolver-1.4.9-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b67e6efbf68e077dd71d1a6b37e43e1a99d0bff1a3d51867d45ee8908b931098"}, + {file = "kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5656aa670507437af0207645273ccdfee4f14bacd7f7c67a4306d0dcaeaf6eed"}, + {file = "kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:bfc08add558155345129c7803b3671cf195e6a56e7a12f3dde7c57d9b417f525"}, + {file = "kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:40092754720b174e6ccf9e845d0d8c7d8e12c3d71e7fc35f55f3813e96376f78"}, + {file = "kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:497d05f29a1300d14e02e6441cf0f5ee81c1ff5a304b0d9fb77423974684e08b"}, + {file = "kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:bdd1a81a1860476eb41ac4bc1e07b3f07259e6d55bbf739b79c8aaedcf512799"}, + {file = "kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:e6b93f13371d341afee3be9f7c5964e3fe61d5fa30f6a30eb49856935dfe4fc3"}, + {file = "kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d75aa530ccfaa593da12834b86a0724f58bff12706659baa9227c2ccaa06264c"}, + {file = "kiwisolver-1.4.9-cp313-cp313-win_amd64.whl", hash = "sha256:dd0a578400839256df88c16abddf9ba14813ec5f21362e1fe65022e00c883d4d"}, + {file = "kiwisolver-1.4.9-cp313-cp313-win_arm64.whl", hash = "sha256:d4188e73af84ca82468f09cadc5ac4db578109e52acb4518d8154698d3a87ca2"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:5a0f2724dfd4e3b3ac5a82436a8e6fd16baa7d507117e4279b660fe8ca38a3a1"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:1b11d6a633e4ed84fc0ddafd4ebfd8ea49b3f25082c04ad12b8315c11d504dc1"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61874cdb0a36016354853593cffc38e56fc9ca5aa97d2c05d3dcf6922cd55a11"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:60c439763a969a6af93b4881db0eed8fadf93ee98e18cbc35bc8da868d0c4f0c"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92a2f997387a1b79a75e7803aa7ded2cfbe2823852ccf1ba3bcf613b62ae3197"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a31d512c812daea6d8b3be3b2bfcbeb091dbb09177706569bcfc6240dcf8b41c"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:52a15b0f35dad39862d376df10c5230155243a2c1a436e39eb55623ccbd68185"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a30fd6fdef1430fd9e1ba7b3398b5ee4e2887783917a687d86ba69985fb08748"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:cc9617b46837c6468197b5945e196ee9ca43057bb7d9d1ae688101e4e1dddf64"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:0ab74e19f6a2b027ea4f845a78827969af45ce790e6cb3e1ebab71bdf9f215ff"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dba5ee5d3981160c28d5490f0d1b7ed730c22470ff7f6cc26cfcfaacb9896a07"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-win_arm64.whl", hash = "sha256:0749fd8f4218ad2e851e11cc4dc05c7cbc0cbc4267bdfdb31782e65aace4ee9c"}, + {file = "kiwisolver-1.4.9-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:9928fe1eb816d11ae170885a74d074f57af3a0d65777ca47e9aeb854a1fba386"}, + {file = "kiwisolver-1.4.9-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d0005b053977e7b43388ddec89fa567f43d4f6d5c2c0affe57de5ebf290dc552"}, + {file = "kiwisolver-1.4.9-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2635d352d67458b66fd0667c14cb1d4145e9560d503219034a18a87e971ce4f3"}, + {file = "kiwisolver-1.4.9-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:767c23ad1c58c9e827b649a9ab7809fd5fd9db266a9cf02b0e926ddc2c680d58"}, + {file = "kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72d0eb9fba308b8311685c2268cf7d0a0639a6cd027d8128659f72bdd8a024b4"}, + {file = "kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f68e4f3eeca8fb22cc3d731f9715a13b652795ef657a13df1ad0c7dc0e9731df"}, + {file = "kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d84cd4061ae292d8ac367b2c3fa3aad11cb8625a95d135fe93f286f914f3f5a6"}, + {file = "kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a60ea74330b91bd22a29638940d115df9dc00af5035a9a2a6ad9399ffb4ceca5"}, + {file = "kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ce6a3a4e106cf35c2d9c4fa17c05ce0b180db622736845d4315519397a77beaf"}, + {file = "kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:77937e5e2a38a7b48eef0585114fe7930346993a88060d0bf886086d2aa49ef5"}, + {file = "kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:24c175051354f4a28c5d6a31c93906dc653e2bf234e8a4bbfb964892078898ce"}, + {file = "kiwisolver-1.4.9-cp314-cp314-win_amd64.whl", hash = "sha256:0763515d4df10edf6d06a3c19734e2566368980d21ebec439f33f9eb936c07b7"}, + {file = "kiwisolver-1.4.9-cp314-cp314-win_arm64.whl", hash = "sha256:0e4e2bf29574a6a7b7f6cb5fa69293b9f96c928949ac4a53ba3f525dffb87f9c"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:d976bbb382b202f71c67f77b0ac11244021cfa3f7dfd9e562eefcea2df711548"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2489e4e5d7ef9a1c300a5e0196e43d9c739f066ef23270607d45aba368b91f2d"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e2ea9f7ab7fbf18fffb1b5434ce7c69a07582f7acc7717720f1d69f3e806f90c"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b34e51affded8faee0dfdb705416153819d8ea9250bbbf7ea1b249bdeb5f1122"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8aacd3d4b33b772542b2e01beb50187536967b514b00003bdda7589722d2a64"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7cf974dd4e35fa315563ac99d6287a1024e4dc2077b8a7d7cd3d2fb65d283134"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:85bd218b5ecfbee8c8a82e121802dcb519a86044c9c3b2e4aef02fa05c6da370"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0856e241c2d3df4efef7c04a1e46b1936b6120c9bcf36dd216e3acd84bc4fb21"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9af39d6551f97d31a4deebeac6f45b156f9755ddc59c07b402c148f5dbb6482a"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:bb4ae2b57fc1d8cbd1cf7b1d9913803681ffa903e7488012be5b76dedf49297f"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:aedff62918805fb62d43a4aa2ecd4482c380dc76cd31bd7c8878588a61bd0369"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-win_amd64.whl", hash = "sha256:1fa333e8b2ce4d9660f2cda9c0e1b6bafcfb2457a9d259faa82289e73ec24891"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-win_arm64.whl", hash = "sha256:4a48a2ce79d65d363597ef7b567ce3d14d68783d2b2263d98db3d9477805ba32"}, + {file = "kiwisolver-1.4.9-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:4d1d9e582ad4d63062d34077a9a1e9f3c34088a2ec5135b1f7190c07cf366527"}, + {file = "kiwisolver-1.4.9-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:deed0c7258ceb4c44ad5ec7d9918f9f14fd05b2be86378d86cf50e63d1e7b771"}, + {file = "kiwisolver-1.4.9-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0a590506f303f512dff6b7f75fd2fd18e16943efee932008fe7140e5fa91d80e"}, + {file = "kiwisolver-1.4.9-pp310-pypy310_pp73-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e09c2279a4d01f099f52d5c4b3d9e208e91edcbd1a175c9662a8b16e000fece9"}, + {file = "kiwisolver-1.4.9-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c9e7cdf45d594ee04d5be1b24dd9d49f3d1590959b2271fb30b5ca2b262c00fb"}, + {file = "kiwisolver-1.4.9-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:720e05574713db64c356e86732c0f3c5252818d05f9df320f0ad8380641acea5"}, + {file = "kiwisolver-1.4.9-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:17680d737d5335b552994a2008fab4c851bcd7de33094a82067ef3a576ff02fa"}, + {file = "kiwisolver-1.4.9-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:85b5352f94e490c028926ea567fc569c52ec79ce131dadb968d3853e809518c2"}, + {file = "kiwisolver-1.4.9-pp311-pypy311_pp73-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:464415881e4801295659462c49461a24fb107c140de781d55518c4b80cb6790f"}, + {file = "kiwisolver-1.4.9-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:fb940820c63a9590d31d88b815e7a3aa5915cad3ce735ab45f0c730b39547de1"}, + {file = "kiwisolver-1.4.9.tar.gz", hash = "sha256:c3b22c26c6fd6811b0ae8363b95ca8ce4ea3c202d3d0975b2914310ceb1bcc4d"}, +] + +[[package]] +name = "matplotlib" +version = "3.9.2" +description = "Python plotting package" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "matplotlib-3.9.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9d78bbc0cbc891ad55b4f39a48c22182e9bdaea7fc0e5dbd364f49f729ca1bbb"}, + {file = "matplotlib-3.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c375cc72229614632c87355366bdf2570c2dac01ac66b8ad048d2dabadf2d0d4"}, + {file = "matplotlib-3.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d94ff717eb2bd0b58fe66380bd8b14ac35f48a98e7c6765117fe67fb7684e64"}, + {file = "matplotlib-3.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab68d50c06938ef28681073327795c5db99bb4666214d2d5f880ed11aeaded66"}, + {file = "matplotlib-3.9.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:65aacf95b62272d568044531e41de26285d54aec8cb859031f511f84bd8b495a"}, + {file = "matplotlib-3.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:3fd595f34aa8a55b7fc8bf9ebea8aa665a84c82d275190a61118d33fbc82ccae"}, + {file = "matplotlib-3.9.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d8dd059447824eec055e829258ab092b56bb0579fc3164fa09c64f3acd478772"}, + {file = "matplotlib-3.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c797dac8bb9c7a3fd3382b16fe8f215b4cf0f22adccea36f1545a6d7be310b41"}, + {file = "matplotlib-3.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d719465db13267bcef19ea8954a971db03b9f48b4647e3860e4bc8e6ed86610f"}, + {file = "matplotlib-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8912ef7c2362f7193b5819d17dae8629b34a95c58603d781329712ada83f9447"}, + {file = "matplotlib-3.9.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7741f26a58a240f43bee74965c4882b6c93df3e7eb3de160126d8c8f53a6ae6e"}, + {file = "matplotlib-3.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:ae82a14dab96fbfad7965403c643cafe6515e386de723e498cf3eeb1e0b70cc7"}, + {file = "matplotlib-3.9.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ac43031375a65c3196bee99f6001e7fa5bdfb00ddf43379d3c0609bdca042df9"}, + {file = "matplotlib-3.9.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:be0fc24a5e4531ae4d8e858a1a548c1fe33b176bb13eff7f9d0d38ce5112a27d"}, + {file = "matplotlib-3.9.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf81de2926c2db243c9b2cbc3917619a0fc85796c6ba4e58f541df814bbf83c7"}, + {file = "matplotlib-3.9.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6ee45bc4245533111ced13f1f2cace1e7f89d1c793390392a80c139d6cf0e6c"}, + {file = "matplotlib-3.9.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:306c8dfc73239f0e72ac50e5a9cf19cc4e8e331dd0c54f5e69ca8758550f1e1e"}, + {file = "matplotlib-3.9.2-cp312-cp312-win_amd64.whl", hash = "sha256:5413401594cfaff0052f9d8b1aafc6d305b4bd7c4331dccd18f561ff7e1d3bd3"}, + {file = "matplotlib-3.9.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:18128cc08f0d3cfff10b76baa2f296fc28c4607368a8402de61bb3f2eb33c7d9"}, + {file = "matplotlib-3.9.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4876d7d40219e8ae8bb70f9263bcbe5714415acfdf781086601211335e24f8aa"}, + {file = "matplotlib-3.9.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d9f07a80deab4bb0b82858a9e9ad53d1382fd122be8cde11080f4e7dfedb38b"}, + {file = "matplotlib-3.9.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7c0410f181a531ec4e93bbc27692f2c71a15c2da16766f5ba9761e7ae518413"}, + {file = "matplotlib-3.9.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:909645cce2dc28b735674ce0931a4ac94e12f5b13f6bb0b5a5e65e7cea2c192b"}, + {file = "matplotlib-3.9.2-cp313-cp313-win_amd64.whl", hash = "sha256:f32c7410c7f246838a77d6d1eff0c0f87f3cb0e7c4247aebea71a6d5a68cab49"}, + {file = "matplotlib-3.9.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:37e51dd1c2db16ede9cfd7b5cabdfc818b2c6397c83f8b10e0e797501c963a03"}, + {file = "matplotlib-3.9.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b82c5045cebcecd8496a4d694d43f9cc84aeeb49fe2133e036b207abe73f4d30"}, + {file = "matplotlib-3.9.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f053c40f94bc51bc03832a41b4f153d83f2062d88c72b5e79997072594e97e51"}, + {file = "matplotlib-3.9.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbe196377a8248972f5cede786d4c5508ed5f5ca4a1e09b44bda889958b33f8c"}, + {file = "matplotlib-3.9.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5816b1e1fe8c192cbc013f8f3e3368ac56fbecf02fb41b8f8559303f24c5015e"}, + {file = "matplotlib-3.9.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:cef2a73d06601437be399908cf13aee74e86932a5ccc6ccdf173408ebc5f6bb2"}, + {file = "matplotlib-3.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e0830e188029c14e891fadd99702fd90d317df294c3298aad682739c5533721a"}, + {file = "matplotlib-3.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ba9c1299c920964e8d3857ba27173b4dbb51ca4bab47ffc2c2ba0eb5e2cbc5"}, + {file = "matplotlib-3.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1cd93b91ab47a3616b4d3c42b52f8363b88ca021e340804c6ab2536344fad9ca"}, + {file = "matplotlib-3.9.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6d1ce5ed2aefcdce11904fc5bbea7d9c21fff3d5f543841edf3dea84451a09ea"}, + {file = "matplotlib-3.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:b2696efdc08648536efd4e1601b5fd491fd47f4db97a5fbfd175549a7365c1b2"}, + {file = "matplotlib-3.9.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d52a3b618cb1cbb769ce2ee1dcdb333c3ab6e823944e9a2d36e37253815f9556"}, + {file = "matplotlib-3.9.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:039082812cacd6c6bec8e17a9c1e6baca230d4116d522e81e1f63a74d01d2e21"}, + {file = "matplotlib-3.9.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6758baae2ed64f2331d4fd19be38b7b4eae3ecec210049a26b6a4f3ae1c85dcc"}, + {file = "matplotlib-3.9.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:050598c2b29e0b9832cde72bcf97627bf00262adbc4a54e2b856426bb2ef0697"}, + {file = "matplotlib-3.9.2.tar.gz", hash = "sha256:96ab43906269ca64a6366934106fa01534454a69e471b7bf3d79083981aaab92"}, +] + +[package.dependencies] +contourpy = ">=1.0.1" +cycler = ">=0.10" +fonttools = ">=4.22.0" +kiwisolver = ">=1.3.1" +numpy = ">=1.23" +packaging = ">=20.0" +pillow = ">=8" +pyparsing = ">=2.3.1" +python-dateutil = ">=2.7" + +[package.extras] +dev = ["meson-python (>=0.13.1)", "numpy (>=1.25)", "pybind11 (>=2.6)", "setuptools (>=64)", "setuptools_scm (>=7)"] + +[[package]] +name = "numpy" +version = "2.2.6" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb"}, + {file = "numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90"}, + {file = "numpy-2.2.6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:37e990a01ae6ec7fe7fa1c26c55ecb672dd98b19c3d0e1d1f326fa13cb38d163"}, + {file = "numpy-2.2.6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:5a6429d4be8ca66d889b7cf70f536a397dc45ba6faeb5f8c5427935d9592e9cf"}, + {file = "numpy-2.2.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efd28d4e9cd7d7a8d39074a4d44c63eda73401580c5c76acda2ce969e0a38e83"}, + {file = "numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc7b73d02efb0e18c000e9ad8b83480dfcd5dfd11065997ed4c6747470ae8915"}, + {file = "numpy-2.2.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74d4531beb257d2c3f4b261bfb0fc09e0f9ebb8842d82a7b4209415896adc680"}, + {file = "numpy-2.2.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8fc377d995680230e83241d8a96def29f204b5782f371c532579b4f20607a289"}, + {file = "numpy-2.2.6-cp310-cp310-win32.whl", hash = "sha256:b093dd74e50a8cba3e873868d9e93a85b78e0daf2e98c6797566ad8044e8363d"}, + {file = "numpy-2.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:f0fd6321b839904e15c46e0d257fdd101dd7f530fe03fd6359c1ea63738703f3"}, + {file = "numpy-2.2.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f9f1adb22318e121c5c69a09142811a201ef17ab257a1e66ca3025065b7f53ae"}, + {file = "numpy-2.2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c820a93b0255bc360f53eca31a0e676fd1101f673dda8da93454a12e23fc5f7a"}, + {file = "numpy-2.2.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3d70692235e759f260c3d837193090014aebdf026dfd167834bcba43e30c2a42"}, + {file = "numpy-2.2.6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:481b49095335f8eed42e39e8041327c05b0f6f4780488f61286ed3c01368d491"}, + {file = "numpy-2.2.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b64d8d4d17135e00c8e346e0a738deb17e754230d7e0810ac5012750bbd85a5a"}, + {file = "numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba10f8411898fc418a521833e014a77d3ca01c15b0c6cdcce6a0d2897e6dbbdf"}, + {file = "numpy-2.2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bd48227a919f1bafbdda0583705e547892342c26fb127219d60a5c36882609d1"}, + {file = "numpy-2.2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9551a499bf125c1d4f9e250377c1ee2eddd02e01eac6644c080162c0c51778ab"}, + {file = "numpy-2.2.6-cp311-cp311-win32.whl", hash = "sha256:0678000bb9ac1475cd454c6b8c799206af8107e310843532b04d49649c717a47"}, + {file = "numpy-2.2.6-cp311-cp311-win_amd64.whl", hash = "sha256:e8213002e427c69c45a52bbd94163084025f533a55a59d6f9c5b820774ef3303"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282"}, + {file = "numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87"}, + {file = "numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249"}, + {file = "numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49"}, + {file = "numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de"}, + {file = "numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4"}, + {file = "numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566"}, + {file = "numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f"}, + {file = "numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f"}, + {file = "numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868"}, + {file = "numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d"}, + {file = "numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd"}, + {file = "numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8"}, + {file = "numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f"}, + {file = "numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa"}, + {file = "numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571"}, + {file = "numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1"}, + {file = "numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff"}, + {file = "numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06"}, + {file = "numpy-2.2.6-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0b605b275d7bd0c640cad4e5d30fa701a8d59302e127e5f79138ad62762c3e3d"}, + {file = "numpy-2.2.6-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:7befc596a7dc9da8a337f79802ee8adb30a552a94f792b9c9d18c840055907db"}, + {file = "numpy-2.2.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce47521a4754c8f4593837384bd3424880629f718d87c5d44f8ed763edd63543"}, + {file = "numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00"}, + {file = "numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd"}, +] + +[[package]] +name = "packaging" +version = "25.0" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.8" +groups = ["main", "dev"] +files = [ + {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"}, + {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, +] + +[[package]] +name = "pandas" +version = "2.2.3" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"}, + {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f"}, + {file = "pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32"}, + {file = "pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a"}, + {file = "pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb"}, + {file = "pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761"}, + {file = "pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e"}, + {file = "pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, +] +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.7" + +[package.extras] +all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"] +aws = ["s3fs (>=2022.11.0)"] +clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"] +compression = ["zstandard (>=0.19.0)"] +computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"] +feather = ["pyarrow (>=10.0.1)"] +fss = ["fsspec (>=2022.11.0)"] +gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"] +hdf5 = ["tables (>=3.8.0)"] +html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"] +mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"] +parquet = ["pyarrow (>=10.0.1)"] +performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"] +plot = ["matplotlib (>=3.6.3)"] +postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"] +pyarrow = ["pyarrow (>=10.0.1)"] +spss = ["pyreadstat (>=1.2.0)"] +sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.9.2)"] + +[[package]] +name = "pillow" +version = "12.0.0" +description = "Python Imaging Library (fork)" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "pillow-12.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:3adfb466bbc544b926d50fe8f4a4e6abd8c6bffd28a26177594e6e9b2b76572b"}, + {file = "pillow-12.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1ac11e8ea4f611c3c0147424eae514028b5e9077dd99ab91e1bd7bc33ff145e1"}, + {file = "pillow-12.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d49e2314c373f4c2b39446fb1a45ed333c850e09d0c59ac79b72eb3b95397363"}, + {file = "pillow-12.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c7b2a63fd6d5246349f3d3f37b14430d73ee7e8173154461785e43036ffa96ca"}, + {file = "pillow-12.0.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d64317d2587c70324b79861babb9c09f71fbb780bad212018874b2c013d8600e"}, + {file = "pillow-12.0.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d77153e14b709fd8b8af6f66a3afbb9ed6e9fc5ccf0b6b7e1ced7b036a228782"}, + {file = "pillow-12.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:32ed80ea8a90ee3e6fa08c21e2e091bba6eda8eccc83dbc34c95169507a91f10"}, + {file = "pillow-12.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c828a1ae702fc712978bda0320ba1b9893d99be0badf2647f693cc01cf0f04fa"}, + {file = "pillow-12.0.0-cp310-cp310-win32.whl", hash = "sha256:bd87e140e45399c818fac4247880b9ce719e4783d767e030a883a970be632275"}, + {file = "pillow-12.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:455247ac8a4cfb7b9bc45b7e432d10421aea9fc2e74d285ba4072688a74c2e9d"}, + {file = "pillow-12.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:6ace95230bfb7cd79ef66caa064bbe2f2a1e63d93471c3a2e1f1348d9f22d6b7"}, + {file = "pillow-12.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0fd00cac9c03256c8b2ff58f162ebcd2587ad3e1f2e397eab718c47e24d231cc"}, + {file = "pillow-12.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3475b96f5908b3b16c47533daaa87380c491357d197564e0ba34ae75c0f3257"}, + {file = "pillow-12.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:110486b79f2d112cf6add83b28b627e369219388f64ef2f960fef9ebaf54c642"}, + {file = "pillow-12.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5269cc1caeedb67e6f7269a42014f381f45e2e7cd42d834ede3c703a1d915fe3"}, + {file = "pillow-12.0.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa5129de4e174daccbc59d0a3b6d20eaf24417d59851c07ebb37aeb02947987c"}, + {file = "pillow-12.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bee2a6db3a7242ea309aa7ee8e2780726fed67ff4e5b40169f2c940e7eb09227"}, + {file = "pillow-12.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:90387104ee8400a7b4598253b4c406f8958f59fcf983a6cea2b50d59f7d63d0b"}, + {file = "pillow-12.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc91a56697869546d1b8f0a3ff35224557ae7f881050e99f615e0119bf934b4e"}, + {file = "pillow-12.0.0-cp311-cp311-win32.whl", hash = "sha256:27f95b12453d165099c84f8a8bfdfd46b9e4bda9e0e4b65f0635430027f55739"}, + {file = "pillow-12.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:b583dc9070312190192631373c6c8ed277254aa6e6084b74bdd0a6d3b221608e"}, + {file = "pillow-12.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:759de84a33be3b178a64c8ba28ad5c135900359e85fb662bc6e403ad4407791d"}, + {file = "pillow-12.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:53561a4ddc36facb432fae7a9d8afbfaf94795414f5cdc5fc52f28c1dca90371"}, + {file = "pillow-12.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:71db6b4c1653045dacc1585c1b0d184004f0d7e694c7b34ac165ca70c0838082"}, + {file = "pillow-12.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2fa5f0b6716fc88f11380b88b31fe591a06c6315e955c096c35715788b339e3f"}, + {file = "pillow-12.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:82240051c6ca513c616f7f9da06e871f61bfd7805f566275841af15015b8f98d"}, + {file = "pillow-12.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:55f818bd74fe2f11d4d7cbc65880a843c4075e0ac7226bc1a23261dbea531953"}, + {file = "pillow-12.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b87843e225e74576437fd5b6a4c2205d422754f84a06942cfaf1dc32243e45a8"}, + {file = "pillow-12.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c607c90ba67533e1b2355b821fef6764d1dd2cbe26b8c1005ae84f7aea25ff79"}, + {file = "pillow-12.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:21f241bdd5080a15bc86d3466a9f6074a9c2c2b314100dd896ac81ee6db2f1ba"}, + {file = "pillow-12.0.0-cp312-cp312-win32.whl", hash = "sha256:dd333073e0cacdc3089525c7df7d39b211bcdf31fc2824e49d01c6b6187b07d0"}, + {file = "pillow-12.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe611163f6303d1619bbcb653540a4d60f9e55e622d60a3108be0d5b441017a"}, + {file = "pillow-12.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:7dfb439562f234f7d57b1ac6bc8fe7f838a4bd49c79230e0f6a1da93e82f1fad"}, + {file = "pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:0869154a2d0546545cde61d1789a6524319fc1897d9ee31218eae7a60ccc5643"}, + {file = "pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:a7921c5a6d31b3d756ec980f2f47c0cfdbce0fc48c22a39347a895f41f4a6ea4"}, + {file = "pillow-12.0.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:1ee80a59f6ce048ae13cda1abf7fbd2a34ab9ee7d401c46be3ca685d1999a399"}, + {file = "pillow-12.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c50f36a62a22d350c96e49ad02d0da41dbd17ddc2e29750dbdba4323f85eb4a5"}, + {file = "pillow-12.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5193fde9a5f23c331ea26d0cf171fbf67e3f247585f50c08b3e205c7aeb4589b"}, + {file = "pillow-12.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bde737cff1a975b70652b62d626f7785e0480918dece11e8fef3c0cf057351c3"}, + {file = "pillow-12.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6597ff2b61d121172f5844b53f21467f7082f5fb385a9a29c01414463f93b07"}, + {file = "pillow-12.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b817e7035ea7f6b942c13aa03bb554fc44fea70838ea21f8eb31c638326584e"}, + {file = "pillow-12.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f4f1231b7dec408e8670264ce63e9c71409d9583dd21d32c163e25213ee2a344"}, + {file = "pillow-12.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e51b71417049ad6ab14c49608b4a24d8fb3fe605e5dfabfe523b58064dc3d27"}, + {file = "pillow-12.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d120c38a42c234dc9a8c5de7ceaaf899cf33561956acb4941653f8bdc657aa79"}, + {file = "pillow-12.0.0-cp313-cp313-win32.whl", hash = "sha256:4cc6b3b2efff105c6a1656cfe59da4fdde2cda9af1c5e0b58529b24525d0a098"}, + {file = "pillow-12.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:4cf7fed4b4580601c4345ceb5d4cbf5a980d030fd5ad07c4d2ec589f95f09905"}, + {file = "pillow-12.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:9f0b04c6b8584c2c193babcccc908b38ed29524b29dd464bc8801bf10d746a3a"}, + {file = "pillow-12.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7fa22993bac7b77b78cae22bad1e2a987ddf0d9015c63358032f84a53f23cdc3"}, + {file = "pillow-12.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f135c702ac42262573fe9714dfe99c944b4ba307af5eb507abef1667e2cbbced"}, + {file = "pillow-12.0.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c85de1136429c524e55cfa4e033b4a7940ac5c8ee4d9401cc2d1bf48154bbc7b"}, + {file = "pillow-12.0.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:38df9b4bfd3db902c9c2bd369bcacaf9d935b2fff73709429d95cc41554f7b3d"}, + {file = "pillow-12.0.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d87ef5795da03d742bf49439f9ca4d027cde49c82c5371ba52464aee266699a"}, + {file = "pillow-12.0.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aff9e4d82d082ff9513bdd6acd4f5bd359f5b2c870907d2b0a9c5e10d40c88fe"}, + {file = "pillow-12.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8d8ca2b210ada074d57fcee40c30446c9562e542fc46aedc19baf758a93532ee"}, + {file = "pillow-12.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:99a7f72fb6249302aa62245680754862a44179b545ded638cf1fef59befb57ef"}, + {file = "pillow-12.0.0-cp313-cp313t-win32.whl", hash = "sha256:4078242472387600b2ce8d93ade8899c12bf33fa89e55ec89fe126e9d6d5d9e9"}, + {file = "pillow-12.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2c54c1a783d6d60595d3514f0efe9b37c8808746a66920315bfd34a938d7994b"}, + {file = "pillow-12.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:26d9f7d2b604cd23aba3e9faf795787456ac25634d82cd060556998e39c6fa47"}, + {file = "pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:beeae3f27f62308f1ddbcfb0690bf44b10732f2ef43758f169d5e9303165d3f9"}, + {file = "pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:d4827615da15cd59784ce39d3388275ec093ae3ee8d7f0c089b76fa87af756c2"}, + {file = "pillow-12.0.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:3e42edad50b6909089750e65c91aa09aaf1e0a71310d383f11321b27c224ed8a"}, + {file = "pillow-12.0.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e5d8efac84c9afcb40914ab49ba063d94f5dbdf5066db4482c66a992f47a3a3b"}, + {file = "pillow-12.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:266cd5f2b63ff316d5a1bba46268e603c9caf5606d44f38c2873c380950576ad"}, + {file = "pillow-12.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:58eea5ebe51504057dd95c5b77d21700b77615ab0243d8152793dc00eb4faf01"}, + {file = "pillow-12.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f13711b1a5ba512d647a0e4ba79280d3a9a045aaf7e0cc6fbe96b91d4cdf6b0c"}, + {file = "pillow-12.0.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6846bd2d116ff42cba6b646edf5bf61d37e5cbd256425fa089fee4ff5c07a99e"}, + {file = "pillow-12.0.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c98fa880d695de164b4135a52fd2e9cd7b7c90a9d8ac5e9e443a24a95ef9248e"}, + {file = "pillow-12.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fa3ed2a29a9e9d2d488b4da81dcb54720ac3104a20bf0bd273f1e4648aff5af9"}, + {file = "pillow-12.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d034140032870024e6b9892c692fe2968493790dd57208b2c37e3fb35f6df3ab"}, + {file = "pillow-12.0.0-cp314-cp314-win32.whl", hash = "sha256:1b1b133e6e16105f524a8dec491e0586d072948ce15c9b914e41cdadd209052b"}, + {file = "pillow-12.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:8dc232e39d409036af549c86f24aed8273a40ffa459981146829a324e0848b4b"}, + {file = "pillow-12.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:d52610d51e265a51518692045e372a4c363056130d922a7351429ac9f27e70b0"}, + {file = "pillow-12.0.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1979f4566bb96c1e50a62d9831e2ea2d1211761e5662afc545fa766f996632f6"}, + {file = "pillow-12.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b2e4b27a6e15b04832fe9bf292b94b5ca156016bbc1ea9c2c20098a0320d6cf6"}, + {file = "pillow-12.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fb3096c30df99fd01c7bf8e544f392103d0795b9f98ba71a8054bcbf56b255f1"}, + {file = "pillow-12.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7438839e9e053ef79f7112c881cef684013855016f928b168b81ed5835f3e75e"}, + {file = "pillow-12.0.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d5c411a8eaa2299322b647cd932586b1427367fd3184ffbb8f7a219ea2041ca"}, + {file = "pillow-12.0.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d7e091d464ac59d2c7ad8e7e08105eaf9dafbc3883fd7265ffccc2baad6ac925"}, + {file = "pillow-12.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:792a2c0be4dcc18af9d4a2dfd8a11a17d5e25274a1062b0ec1c2d79c76f3e7f8"}, + {file = "pillow-12.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:afbefa430092f71a9593a99ab6a4e7538bc9eabbf7bf94f91510d3503943edc4"}, + {file = "pillow-12.0.0-cp314-cp314t-win32.whl", hash = "sha256:3830c769decf88f1289680a59d4f4c46c72573446352e2befec9a8512104fa52"}, + {file = "pillow-12.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:905b0365b210c73afb0ebe9101a32572152dfd1c144c7e28968a331b9217b94a"}, + {file = "pillow-12.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99353a06902c2e43b43e8ff74ee65a7d90307d82370604746738a1e0661ccca7"}, + {file = "pillow-12.0.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b22bd8c974942477156be55a768f7aa37c46904c175be4e158b6a86e3a6b7ca8"}, + {file = "pillow-12.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:805ebf596939e48dbb2e4922a1d3852cfc25c38160751ce02da93058b48d252a"}, + {file = "pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cae81479f77420d217def5f54b5b9d279804d17e982e0f2fa19b1d1e14ab5197"}, + {file = "pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:aeaefa96c768fc66818730b952a862235d68825c178f1b3ffd4efd7ad2edcb7c"}, + {file = "pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09f2d0abef9e4e2f349305a4f8cc784a8a6c2f58a8c4892eea13b10a943bd26e"}, + {file = "pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bdee52571a343d721fb2eb3b090a82d959ff37fc631e3f70422e0c2e029f3e76"}, + {file = "pillow-12.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b290fd8aa38422444d4b50d579de197557f182ef1068b75f5aa8558638b8d0a5"}, + {file = "pillow-12.0.0.tar.gz", hash = "sha256:87d4f8125c9988bfbed67af47dd7a953e2fc7b0cc1e7800ec6d2080d490bb353"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=8.2)", "sphinx-autobuild", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] +fpx = ["olefile"] +mic = ["olefile"] +test-arrow = ["arro3-compute", "arro3-core", "nanoarrow", "pyarrow"] +tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma (>=5)", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "trove-classifiers (>=2024.10.12)"] +xmp = ["defusedxml"] + +[[package]] +name = "pluggy" +version = "1.6.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"}, + {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["coverage", "pytest", "pytest-benchmark"] + +[[package]] +name = "pyarrow" +version = "22.0.0" +description = "Python library for Apache Arrow" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "pyarrow-22.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:77718810bd3066158db1e95a63c160ad7ce08c6b0710bc656055033e39cdad88"}, + {file = "pyarrow-22.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:44d2d26cda26d18f7af7db71453b7b783788322d756e81730acb98f24eb90ace"}, + {file = "pyarrow-22.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:b9d71701ce97c95480fecb0039ec5bb889e75f110da72005743451339262f4ce"}, + {file = "pyarrow-22.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:710624ab925dc2b05a6229d47f6f0dac1c1155e6ed559be7109f684eba048a48"}, + {file = "pyarrow-22.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f963ba8c3b0199f9d6b794c90ec77545e05eadc83973897a4523c9e8d84e9340"}, + {file = "pyarrow-22.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bd0d42297ace400d8febe55f13fdf46e86754842b860c978dfec16f081e5c653"}, + {file = "pyarrow-22.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:00626d9dc0f5ef3a75fe63fd68b9c7c8302d2b5bbc7f74ecaedba83447a24f84"}, + {file = "pyarrow-22.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:3e294c5eadfb93d78b0763e859a0c16d4051fc1c5231ae8956d61cb0b5666f5a"}, + {file = "pyarrow-22.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:69763ab2445f632d90b504a815a2a033f74332997052b721002298ed6de40f2e"}, + {file = "pyarrow-22.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:b41f37cabfe2463232684de44bad753d6be08a7a072f6a83447eeaf0e4d2a215"}, + {file = "pyarrow-22.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:35ad0f0378c9359b3f297299c3309778bb03b8612f987399a0333a560b43862d"}, + {file = "pyarrow-22.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8382ad21458075c2e66a82a29d650f963ce51c7708c7c0ff313a8c206c4fd5e8"}, + {file = "pyarrow-22.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1a812a5b727bc09c3d7ea072c4eebf657c2f7066155506ba31ebf4792f88f016"}, + {file = "pyarrow-22.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:ec5d40dd494882704fb876c16fa7261a69791e784ae34e6b5992e977bd2e238c"}, + {file = "pyarrow-22.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:bea79263d55c24a32b0d79c00a1c58bb2ee5f0757ed95656b01c0fb310c5af3d"}, + {file = "pyarrow-22.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:12fe549c9b10ac98c91cf791d2945e878875d95508e1a5d14091a7aaa66d9cf8"}, + {file = "pyarrow-22.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:334f900ff08ce0423407af97e6c26ad5d4e3b0763645559ece6fbf3747d6a8f5"}, + {file = "pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c6c791b09c57ed76a18b03f2631753a4960eefbbca80f846da8baefc6491fcfe"}, + {file = "pyarrow-22.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c3200cb41cdbc65156e5f8c908d739b0dfed57e890329413da2748d1a2cd1a4e"}, + {file = "pyarrow-22.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ac93252226cf288753d8b46280f4edf3433bf9508b6977f8dd8526b521a1bbb9"}, + {file = "pyarrow-22.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:44729980b6c50a5f2bfcc2668d36c569ce17f8b17bccaf470c4313dcbbf13c9d"}, + {file = "pyarrow-22.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e6e95176209257803a8b3d0394f21604e796dadb643d2f7ca21b66c9c0b30c9a"}, + {file = "pyarrow-22.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:001ea83a58024818826a9e3f89bf9310a114f7e26dfe404a4c32686f97bd7901"}, + {file = "pyarrow-22.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:ce20fe000754f477c8a9125543f1936ea5b8867c5406757c224d745ed033e691"}, + {file = "pyarrow-22.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e0a15757fccb38c410947df156f9749ae4a3c89b2393741a50521f39a8cf202a"}, + {file = "pyarrow-22.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cedb9dd9358e4ea1d9bce3665ce0797f6adf97ff142c8e25b46ba9cdd508e9b6"}, + {file = "pyarrow-22.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:252be4a05f9d9185bb8c18e83764ebcfea7185076c07a7a662253af3a8c07941"}, + {file = "pyarrow-22.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:a4893d31e5ef780b6edcaf63122df0f8d321088bb0dee4c8c06eccb1ca28d145"}, + {file = "pyarrow-22.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:f7fe3dbe871294ba70d789be16b6e7e52b418311e166e0e3cba9522f0f437fb1"}, + {file = "pyarrow-22.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:ba95112d15fd4f1105fb2402c4eab9068f0554435e9b7085924bcfaac2cc306f"}, + {file = "pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:c064e28361c05d72eed8e744c9605cbd6d2bb7481a511c74071fd9b24bc65d7d"}, + {file = "pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6f9762274496c244d951c819348afbcf212714902742225f649cf02823a6a10f"}, + {file = "pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a9d9ffdc2ab696f6b15b4d1f7cec6658e1d788124418cb30030afbae31c64746"}, + {file = "pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ec1a15968a9d80da01e1d30349b2b0d7cc91e96588ee324ce1b5228175043e95"}, + {file = "pyarrow-22.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:bba208d9c7decf9961998edf5c65e3ea4355d5818dd6cd0f6809bec1afb951cc"}, + {file = "pyarrow-22.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:9bddc2cade6561f6820d4cd73f99a0243532ad506bc510a75a5a65a522b2d74d"}, + {file = "pyarrow-22.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:e70ff90c64419709d38c8932ea9fe1cc98415c4f87ea8da81719e43f02534bc9"}, + {file = "pyarrow-22.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:92843c305330aa94a36e706c16209cd4df274693e777ca47112617db7d0ef3d7"}, + {file = "pyarrow-22.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:6dda1ddac033d27421c20d7a7943eec60be44e0db4e079f33cc5af3b8280ccde"}, + {file = "pyarrow-22.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:84378110dd9a6c06323b41b56e129c504d157d1a983ce8f5443761eb5256bafc"}, + {file = "pyarrow-22.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:854794239111d2b88b40b6ef92aa478024d1e5074f364033e73e21e3f76b25e0"}, + {file = "pyarrow-22.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:b883fe6fd85adad7932b3271c38ac289c65b7337c2c132e9569f9d3940620730"}, + {file = "pyarrow-22.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:7a820d8ae11facf32585507c11f04e3f38343c1e784c9b5a8b1da5c930547fe2"}, + {file = "pyarrow-22.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:c6ec3675d98915bf1ec8b3c7986422682f7232ea76cad276f4c8abd5b7319b70"}, + {file = "pyarrow-22.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3e739edd001b04f654b166204fc7a9de896cf6007eaff33409ee9e50ceaff754"}, + {file = "pyarrow-22.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:7388ac685cab5b279a41dfe0a6ccd99e4dbf322edfb63e02fc0443bf24134e91"}, + {file = "pyarrow-22.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f633074f36dbc33d5c05b5dc75371e5660f1dbf9c8b1d95669def05e5425989c"}, + {file = "pyarrow-22.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4c19236ae2402a8663a2c8f21f1870a03cc57f0bef7e4b6eb3238cc82944de80"}, + {file = "pyarrow-22.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0c34fe18094686194f204a3b1787a27456897d8a2d62caf84b61e8dfbc0252ae"}, + {file = "pyarrow-22.0.0.tar.gz", hash = "sha256:3d600dc583260d845c7d8a6db540339dd883081925da2bd1c5cb808f720b3cd9"}, +] + +[[package]] +name = "pydantic" +version = "2.12.4" +description = "Data validation using Python type hints" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "pydantic-2.12.4-py3-none-any.whl", hash = "sha256:92d3d202a745d46f9be6df459ac5a064fdaa3c1c4cd8adcfa332ccf3c05f871e"}, + {file = "pydantic-2.12.4.tar.gz", hash = "sha256:0f8cb9555000a4b5b617f66bfd2566264c4984b27589d3b845685983e8ea85ac"}, +] + +[package.dependencies] +annotated-types = ">=0.6.0" +pydantic-core = "2.41.5" +typing-extensions = ">=4.14.1" +typing-inspection = ">=0.4.2" + +[package.extras] +email = ["email-validator (>=2.0.0)"] +timezone = ["tzdata ; python_version >= \"3.9\" and platform_system == \"Windows\""] + +[[package]] +name = "pydantic-core" +version = "2.41.5" +description = "Core functionality for Pydantic validation and serialization" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "pydantic_core-2.41.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:77b63866ca88d804225eaa4af3e664c5faf3568cea95360d21f4725ab6e07146"}, + {file = "pydantic_core-2.41.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dfa8a0c812ac681395907e71e1274819dec685fec28273a28905df579ef137e2"}, + {file = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5921a4d3ca3aee735d9fd163808f5e8dd6c6972101e4adbda9a4667908849b97"}, + {file = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25c479382d26a2a41b7ebea1043564a937db462816ea07afa8a44c0866d52f9"}, + {file = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f547144f2966e1e16ae626d8ce72b4cfa0caedc7fa28052001c94fb2fcaa1c52"}, + {file = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f52298fbd394f9ed112d56f3d11aabd0d5bd27beb3084cc3d8ad069483b8941"}, + {file = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:100baa204bb412b74fe285fb0f3a385256dad1d1879f0a5cb1499ed2e83d132a"}, + {file = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:05a2c8852530ad2812cb7914dc61a1125dc4e06252ee98e5638a12da6cc6fb6c"}, + {file = "pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:29452c56df2ed968d18d7e21f4ab0ac55e71dc59524872f6fc57dcf4a3249ed2"}, + {file = "pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:d5160812ea7a8a2ffbe233d8da666880cad0cbaf5d4de74ae15c313213d62556"}, + {file = "pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:df3959765b553b9440adfd3c795617c352154e497a4eaf3752555cfb5da8fc49"}, + {file = "pydantic_core-2.41.5-cp310-cp310-win32.whl", hash = "sha256:1f8d33a7f4d5a7889e60dc39856d76d09333d8a6ed0f5f1190635cbec70ec4ba"}, + {file = "pydantic_core-2.41.5-cp310-cp310-win_amd64.whl", hash = "sha256:62de39db01b8d593e45871af2af9e497295db8d73b085f6bfd0b18c83c70a8f9"}, + {file = "pydantic_core-2.41.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a3a52f6156e73e7ccb0f8cced536adccb7042be67cb45f9562e12b319c119da6"}, + {file = "pydantic_core-2.41.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7f3bf998340c6d4b0c9a2f02d6a400e51f123b59565d74dc60d252ce888c260b"}, + {file = "pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:378bec5c66998815d224c9ca994f1e14c0c21cb95d2f52b6021cc0b2a58f2a5a"}, + {file = "pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e7b576130c69225432866fe2f4a469a85a54ade141d96fd396dffcf607b558f8"}, + {file = "pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6cb58b9c66f7e4179a2d5e0f849c48eff5c1fca560994d6eb6543abf955a149e"}, + {file = "pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88942d3a3dff3afc8288c21e565e476fc278902ae4d6d134f1eeda118cc830b1"}, + {file = "pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f31d95a179f8d64d90f6831d71fa93290893a33148d890ba15de25642c5d075b"}, + {file = "pydantic_core-2.41.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c1df3d34aced70add6f867a8cf413e299177e0c22660cc767218373d0779487b"}, + {file = "pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4009935984bd36bd2c774e13f9a09563ce8de4abaa7226f5108262fa3e637284"}, + {file = "pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:34a64bc3441dc1213096a20fe27e8e128bd3ff89921706e83c0b1ac971276594"}, + {file = "pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c9e19dd6e28fdcaa5a1de679aec4141f691023916427ef9bae8584f9c2fb3b0e"}, + {file = "pydantic_core-2.41.5-cp311-cp311-win32.whl", hash = "sha256:2c010c6ded393148374c0f6f0bf89d206bf3217f201faa0635dcd56bd1520f6b"}, + {file = "pydantic_core-2.41.5-cp311-cp311-win_amd64.whl", hash = "sha256:76ee27c6e9c7f16f47db7a94157112a2f3a00e958bc626e2f4ee8bec5c328fbe"}, + {file = "pydantic_core-2.41.5-cp311-cp311-win_arm64.whl", hash = "sha256:4bc36bbc0b7584de96561184ad7f012478987882ebf9f9c389b23f432ea3d90f"}, + {file = "pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7"}, + {file = "pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0"}, + {file = "pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69"}, + {file = "pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75"}, + {file = "pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05"}, + {file = "pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc"}, + {file = "pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c"}, + {file = "pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5"}, + {file = "pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c"}, + {file = "pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294"}, + {file = "pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1"}, + {file = "pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d"}, + {file = "pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815"}, + {file = "pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3"}, + {file = "pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9"}, + {file = "pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34"}, + {file = "pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0"}, + {file = "pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33"}, + {file = "pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e"}, + {file = "pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2"}, + {file = "pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586"}, + {file = "pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d"}, + {file = "pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740"}, + {file = "pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e"}, + {file = "pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858"}, + {file = "pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36"}, + {file = "pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11"}, + {file = "pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd"}, + {file = "pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a"}, + {file = "pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14"}, + {file = "pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1"}, + {file = "pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66"}, + {file = "pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869"}, + {file = "pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2"}, + {file = "pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375"}, + {file = "pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553"}, + {file = "pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90"}, + {file = "pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07"}, + {file = "pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb"}, + {file = "pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23"}, + {file = "pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf"}, + {file = "pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008"}, + {file = "pydantic_core-2.41.5-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:8bfeaf8735be79f225f3fefab7f941c712aaca36f1128c9d7e2352ee1aa87bdf"}, + {file = "pydantic_core-2.41.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:346285d28e4c8017da95144c7f3acd42740d637ff41946af5ce6e5e420502dd5"}, + {file = "pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a75dafbf87d6276ddc5b2bf6fae5254e3d0876b626eb24969a574fff9149ee5d"}, + {file = "pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7b93a4d08587e2b7e7882de461e82b6ed76d9026ce91ca7915e740ecc7855f60"}, + {file = "pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e8465ab91a4bd96d36dde3263f06caa6a8a6019e4113f24dc753d79a8b3a3f82"}, + {file = "pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:299e0a22e7ae2b85c1a57f104538b2656e8ab1873511fd718a1c1c6f149b77b5"}, + {file = "pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:707625ef0983fcfb461acfaf14de2067c5942c6bb0f3b4c99158bed6fedd3cf3"}, + {file = "pydantic_core-2.41.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f41eb9797986d6ebac5e8edff36d5cef9de40def462311b3eb3eeded1431e425"}, + {file = "pydantic_core-2.41.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0384e2e1021894b1ff5a786dbf94771e2986ebe2869533874d7e43bc79c6f504"}, + {file = "pydantic_core-2.41.5-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:f0cd744688278965817fd0839c4a4116add48d23890d468bc436f78beb28abf5"}, + {file = "pydantic_core-2.41.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:753e230374206729bf0a807954bcc6c150d3743928a73faffee51ac6557a03c3"}, + {file = "pydantic_core-2.41.5-cp39-cp39-win32.whl", hash = "sha256:873e0d5b4fb9b89ef7c2d2a963ea7d02879d9da0da8d9d4933dee8ee86a8b460"}, + {file = "pydantic_core-2.41.5-cp39-cp39-win_amd64.whl", hash = "sha256:e4f4a984405e91527a0d62649ee21138f8e3d0ef103be488c1dc11a80d7f184b"}, + {file = "pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b96d5f26b05d03cc60f11a7761a5ded1741da411e7fe0909e27a5e6a0cb7b034"}, + {file = "pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c"}, + {file = "pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2"}, + {file = "pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15489ba13d61f670dcc96772e733aad1a6f9c429cc27574c6cdaed82d0146ad"}, + {file = "pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd"}, + {file = "pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc"}, + {file = "pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56"}, + {file = "pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b"}, + {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b5819cd790dbf0c5eb9f82c73c16b39a65dd6dd4d1439dcdea7816ec9adddab8"}, + {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5a4e67afbc95fa5c34cf27d9089bca7fcab4e51e57278d710320a70b956d1b9a"}, + {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ece5c59f0ce7d001e017643d8d24da587ea1f74f6993467d85ae8a5ef9d4f42b"}, + {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:16f80f7abe3351f8ea6858914ddc8c77e02578544a0ebc15b4c2e1a0e813b0b2"}, + {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:33cb885e759a705b426baada1fe68cbb0a2e68e34c5d0d0289a364cf01709093"}, + {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:c8d8b4eb992936023be7dee581270af5c6e0697a8559895f527f5b7105ecd36a"}, + {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:242a206cd0318f95cd21bdacff3fcc3aab23e79bba5cac3db5a841c9ef9c6963"}, + {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d3a978c4f57a597908b7e697229d996d77a6d3c94901e9edee593adada95ce1a"}, + {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2379fa7ed44ddecb5bfe4e48577d752db9fc10be00a6b7446e9663ba143de26"}, + {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:266fb4cbf5e3cbd0b53669a6d1b039c45e3ce651fd5442eff4d07c2cc8d66808"}, + {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58133647260ea01e4d0500089a8c4f07bd7aa6ce109682b1426394988d8aaacc"}, + {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:287dad91cfb551c363dc62899a80e9e14da1f0e2b6ebde82c806612ca2a13ef1"}, + {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:03b77d184b9eb40240ae9fd676ca364ce1085f203e1b1256f8ab9984dca80a84"}, + {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:a668ce24de96165bb239160b3d854943128f4334822900534f2fe947930e5770"}, + {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f14f8f046c14563f8eb3f45f499cc658ab8d10072961e07225e507adb700e93f"}, + {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51"}, + {file = "pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e"}, +] + +[package.dependencies] +typing-extensions = ">=4.14.1" + +[[package]] +name = "pyparsing" +version = "3.3.1" +description = "pyparsing - Classes and methods to define and execute parsing grammars" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "pyparsing-3.3.1-py3-none-any.whl", hash = "sha256:023b5e7e5520ad96642e2c6db4cb683d3970bd640cdf7115049a6e9c3682df82"}, + {file = "pyparsing-3.3.1.tar.gz", hash = "sha256:47fad0f17ac1e2cad3de3b458570fbc9b03560aa029ed5e16ee5554da9a2251c"}, +] + +[package.extras] +diagrams = ["jinja2", "railroad-diagrams"] + +[[package]] +name = "pytest" +version = "8.3.2" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "pytest-8.3.2-py3-none-any.whl", hash = "sha256:4ba08f9ae7dcf84ded419494d229b48d0903ea6407b030eaec46df5e6a73bba5"}, + {file = "pytest-8.3.2.tar.gz", hash = "sha256:c132345d12ce551242c87269de812483f5bcc87cdbb4722e48487ba194f9fdce"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.5,<2" + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main"] +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pytz" +version = "2025.2" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, + {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, +] + +[[package]] +name = "scikit-learn" +version = "1.8.0" +description = "A set of python modules for machine learning and data mining" +optional = false +python-versions = ">=3.11" +groups = ["main"] +files = [ + {file = "scikit_learn-1.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:146b4d36f800c013d267b29168813f7a03a43ecd2895d04861f1240b564421da"}, + {file = "scikit_learn-1.8.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f984ca4b14914e6b4094c5d52a32ea16b49832c03bd17a110f004db3c223e8e1"}, + {file = "scikit_learn-1.8.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e30adb87f0cc81c7690a84f7932dd66be5bac57cfe16b91cb9151683a4a2d3b"}, + {file = "scikit_learn-1.8.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ada8121bcb4dac28d930febc791a69f7cb1673c8495e5eee274190b73a4559c1"}, + {file = "scikit_learn-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:c57b1b610bd1f40ba43970e11ce62821c2e6569e4d74023db19c6b26f246cb3b"}, + {file = "scikit_learn-1.8.0-cp311-cp311-win_arm64.whl", hash = "sha256:2838551e011a64e3053ad7618dda9310175f7515f1742fa2d756f7c874c05961"}, + {file = "scikit_learn-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5fb63362b5a7ddab88e52b6dbb47dac3fd7dafeee740dc6c8d8a446ddedade8e"}, + {file = "scikit_learn-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5025ce924beccb28298246e589c691fe1b8c1c96507e6d27d12c5fadd85bfd76"}, + {file = "scikit_learn-1.8.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4496bb2cf7a43ce1a2d7524a79e40bc5da45cf598dbf9545b7e8316ccba47bb4"}, + {file = "scikit_learn-1.8.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0bcfe4d0d14aec44921545fd2af2338c7471de9cb701f1da4c9d85906ab847a"}, + {file = "scikit_learn-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:35c007dedb2ffe38fe3ee7d201ebac4a2deccd2408e8621d53067733e3c74809"}, + {file = "scikit_learn-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:8c497fff237d7b4e07e9ef1a640887fa4fb765647f86fbe00f969ff6280ce2bb"}, + {file = "scikit_learn-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0d6ae97234d5d7079dc0040990a6f7aeb97cb7fa7e8945f1999a429b23569e0a"}, + {file = "scikit_learn-1.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:edec98c5e7c128328124a029bceb09eda2d526997780fef8d65e9a69eead963e"}, + {file = "scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:74b66d8689d52ed04c271e1329f0c61635bcaf5b926db9b12d58914cdc01fe57"}, + {file = "scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8fdf95767f989b0cfedb85f7ed8ca215d4be728031f56ff5a519ee1e3276dc2e"}, + {file = "scikit_learn-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:2de443b9373b3b615aec1bb57f9baa6bb3a9bd093f1269ba95c17d870422b271"}, + {file = "scikit_learn-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:eddde82a035681427cbedded4e6eff5e57fa59216c2e3e90b10b19ab1d0a65c3"}, + {file = "scikit_learn-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7cc267b6108f0a1499a734167282c00c4ebf61328566b55ef262d48e9849c735"}, + {file = "scikit_learn-1.8.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:fe1c011a640a9f0791146011dfd3c7d9669785f9fed2b2a5f9e207536cf5c2fd"}, + {file = "scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72358cce49465d140cc4e7792015bb1f0296a9742d5622c67e31399b75468b9e"}, + {file = "scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:80832434a6cc114f5219211eec13dcbc16c2bac0e31ef64c6d346cde3cf054cb"}, + {file = "scikit_learn-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ee787491dbfe082d9c3013f01f5991658b0f38aa8177e4cd4bf434c58f551702"}, + {file = "scikit_learn-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf97c10a3f5a7543f9b88cbf488d33d175e9146115a451ae34568597ba33dcde"}, + {file = "scikit_learn-1.8.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c22a2da7a198c28dd1a6e1136f19c830beab7fdca5b3e5c8bba8394f8a5c45b3"}, + {file = "scikit_learn-1.8.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:6b595b07a03069a2b1740dc08c2299993850ea81cce4fe19b2421e0c970de6b7"}, + {file = "scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:29ffc74089f3d5e87dfca4c2c8450f88bdc61b0fc6ed5d267f3988f19a1309f6"}, + {file = "scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fb65db5d7531bccf3a4f6bec3462223bea71384e2cda41da0f10b7c292b9e7c4"}, + {file = "scikit_learn-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:56079a99c20d230e873ea40753102102734c5953366972a71d5cb39a32bc40c6"}, + {file = "scikit_learn-1.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:3bad7565bc9cf37ce19a7c0d107742b320c1285df7aab1a6e2d28780df167242"}, + {file = "scikit_learn-1.8.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:4511be56637e46c25721e83d1a9cea9614e7badc7040c4d573d75fbe257d6fd7"}, + {file = "scikit_learn-1.8.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:a69525355a641bf8ef136a7fa447672fb54fe8d60cab5538d9eb7c6438543fb9"}, + {file = "scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c2656924ec73e5939c76ac4c8b026fc203b83d8900362eb2599d8aee80e4880f"}, + {file = "scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15fc3b5d19cc2be65404786857f2e13c70c83dd4782676dd6814e3b89dc8f5b9"}, + {file = "scikit_learn-1.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:00d6f1d66fbcf4eba6e356e1420d33cc06c70a45bb1363cd6f6a8e4ebbbdece2"}, + {file = "scikit_learn-1.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f28dd15c6bb0b66ba09728cf09fd8736c304be29409bd8445a080c1280619e8c"}, + {file = "scikit_learn-1.8.0.tar.gz", hash = "sha256:9bccbb3b40e3de10351f8f5068e105d0f4083b1a65fa07b6634fbc401a6287fd"}, +] + +[package.dependencies] +joblib = ">=1.3.0" +numpy = ">=1.24.1" +scipy = ">=1.10.0" +threadpoolctl = ">=3.2.0" + +[package.extras] +benchmark = ["matplotlib (>=3.6.1)", "memory_profiler (>=0.57.0)", "pandas (>=1.5.0)"] +build = ["cython (>=3.1.2)", "meson-python (>=0.17.1)", "numpy (>=1.24.1)", "scipy (>=1.10.0)"] +docs = ["Pillow (>=10.1.0)", "matplotlib (>=3.6.1)", "memory_profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.5.0)", "plotly (>=5.18.0)", "polars (>=0.20.30)", "pooch (>=1.8.0)", "pydata-sphinx-theme (>=0.15.3)", "scikit-image (>=0.22.0)", "seaborn (>=0.13.0)", "sphinx (>=7.3.7)", "sphinx-copybutton (>=0.5.2)", "sphinx-design (>=0.6.0)", "sphinx-gallery (>=0.17.1)", "sphinx-prompt (>=1.4.0)", "sphinx-remove-toctrees (>=1.0.0.post1)", "sphinxcontrib-sass (>=0.3.4)", "sphinxext-opengraph (>=0.9.1)", "towncrier (>=24.8.0)"] +examples = ["matplotlib (>=3.6.1)", "pandas (>=1.5.0)", "plotly (>=5.18.0)", "pooch (>=1.8.0)", "scikit-image (>=0.22.0)", "seaborn (>=0.13.0)"] +install = ["joblib (>=1.3.0)", "numpy (>=1.24.1)", "scipy (>=1.10.0)", "threadpoolctl (>=3.2.0)"] +maintenance = ["conda-lock (==3.0.1)"] +tests = ["matplotlib (>=3.6.1)", "mypy (>=1.15)", "numpydoc (>=1.2.0)", "pandas (>=1.5.0)", "polars (>=0.20.30)", "pooch (>=1.8.0)", "pyamg (>=5.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.11.7)"] + +[[package]] +name = "scipy" +version = "1.14.1" +description = "Fundamental algorithms for scientific computing in Python" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "scipy-1.14.1-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:b28d2ca4add7ac16ae8bb6632a3c86e4b9e4d52d3e34267f6e1b0c1f8d87e389"}, + {file = "scipy-1.14.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d0d2821003174de06b69e58cef2316a6622b60ee613121199cb2852a873f8cf3"}, + {file = "scipy-1.14.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:8bddf15838ba768bb5f5083c1ea012d64c9a444e16192762bd858f1e126196d0"}, + {file = "scipy-1.14.1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:97c5dddd5932bd2a1a31c927ba5e1463a53b87ca96b5c9bdf5dfd6096e27efc3"}, + {file = "scipy-1.14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ff0a7e01e422c15739ecd64432743cf7aae2b03f3084288f399affcefe5222d"}, + {file = "scipy-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e32dced201274bf96899e6491d9ba3e9a5f6b336708656466ad0522d8528f69"}, + {file = "scipy-1.14.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8426251ad1e4ad903a4514712d2fa8fdd5382c978010d1c6f5f37ef286a713ad"}, + {file = "scipy-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:a49f6ed96f83966f576b33a44257d869756df6cf1ef4934f59dd58b25e0327e5"}, + {file = "scipy-1.14.1-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:2da0469a4ef0ecd3693761acbdc20f2fdeafb69e6819cc081308cc978153c675"}, + {file = "scipy-1.14.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c0ee987efa6737242745f347835da2cc5bb9f1b42996a4d97d5c7ff7928cb6f2"}, + {file = "scipy-1.14.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3a1b111fac6baec1c1d92f27e76511c9e7218f1695d61b59e05e0fe04dc59617"}, + {file = "scipy-1.14.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:8475230e55549ab3f207bff11ebfc91c805dc3463ef62eda3ccf593254524ce8"}, + {file = "scipy-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:278266012eb69f4a720827bdd2dc54b2271c97d84255b2faaa8f161a158c3b37"}, + {file = "scipy-1.14.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fef8c87f8abfb884dac04e97824b61299880c43f4ce675dd2cbeadd3c9b466d2"}, + {file = "scipy-1.14.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b05d43735bb2f07d689f56f7b474788a13ed8adc484a85aa65c0fd931cf9ccd2"}, + {file = "scipy-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:716e389b694c4bb564b4fc0c51bc84d381735e0d39d3f26ec1af2556ec6aad94"}, + {file = "scipy-1.14.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:631f07b3734d34aced009aaf6fedfd0eb3498a97e581c3b1e5f14a04164a456d"}, + {file = "scipy-1.14.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:af29a935803cc707ab2ed7791c44288a682f9c8107bc00f0eccc4f92c08d6e07"}, + {file = "scipy-1.14.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:2843f2d527d9eebec9a43e6b406fb7266f3af25a751aa91d62ff416f54170bc5"}, + {file = "scipy-1.14.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:eb58ca0abd96911932f688528977858681a59d61a7ce908ffd355957f7025cfc"}, + {file = "scipy-1.14.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30ac8812c1d2aab7131a79ba62933a2a76f582d5dbbc695192453dae67ad6310"}, + {file = "scipy-1.14.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f9ea80f2e65bdaa0b7627fb00cbeb2daf163caa015e59b7516395fe3bd1e066"}, + {file = "scipy-1.14.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:edaf02b82cd7639db00dbff629995ef185c8df4c3ffa71a5562a595765a06ce1"}, + {file = "scipy-1.14.1-cp312-cp312-win_amd64.whl", hash = "sha256:2ff38e22128e6c03ff73b6bb0f85f897d2362f8c052e3b8ad00532198fbdae3f"}, + {file = "scipy-1.14.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1729560c906963fc8389f6aac023739ff3983e727b1a4d87696b7bf108316a79"}, + {file = "scipy-1.14.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:4079b90df244709e675cdc8b93bfd8a395d59af40b72e339c2287c91860deb8e"}, + {file = "scipy-1.14.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e0cf28db0f24a38b2a0ca33a85a54852586e43cf6fd876365c86e0657cfe7d73"}, + {file = "scipy-1.14.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0c2f95de3b04e26f5f3ad5bb05e74ba7f68b837133a4492414b3afd79dfe540e"}, + {file = "scipy-1.14.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b99722ea48b7ea25e8e015e8341ae74624f72e5f21fc2abd45f3a93266de4c5d"}, + {file = "scipy-1.14.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5149e3fd2d686e42144a093b206aef01932a0059c2a33ddfa67f5f035bdfe13e"}, + {file = "scipy-1.14.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e4f5a7c49323533f9103d4dacf4e4f07078f360743dec7f7596949149efeec06"}, + {file = "scipy-1.14.1-cp313-cp313-win_amd64.whl", hash = "sha256:baff393942b550823bfce952bb62270ee17504d02a1801d7fd0719534dfb9c84"}, + {file = "scipy-1.14.1.tar.gz", hash = "sha256:5a275584e726026a5699459aa72f828a610821006228e841b94275c4a7c08417"}, +] + +[package.dependencies] +numpy = ">=1.23.5,<2.3" + +[package.extras] +dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodestyle", "pydevtool", "rich-click", "ruff (>=0.0.292)", "types-psutil", "typing_extensions"] +doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.13.1)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<=7.3.7)", "sphinx-design (>=0.4.0)"] +test = ["Cython", "array-api-strict (>=2.0)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja ; sys_platform != \"emscripten\"", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + +[[package]] +name = "six" +version = "1.17.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main"] +files = [ + {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, + {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, +] + +[[package]] +name = "sniffio" +version = "1.3.1" +description = "Sniff out which async library your code is running under" +optional = false +python-versions = ">=3.7" +groups = ["dev"] +files = [ + {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, + {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, +] + +[[package]] +name = "starlette" +version = "0.49.3" +description = "The little ASGI library that shines." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "starlette-0.49.3-py3-none-any.whl", hash = "sha256:b579b99715fdc2980cf88c8ec96d3bf1ce16f5a8051a7c2b84ef9b1cdecaea2f"}, + {file = "starlette-0.49.3.tar.gz", hash = "sha256:1c14546f299b5901a1ea0e34410575bc33bbd741377a10484a54445588d00284"}, +] + +[package.dependencies] +anyio = ">=3.6.2,<5" +typing-extensions = {version = ">=4.10.0", markers = "python_version < \"3.13\""} + +[package.extras] +full = ["httpx (>=0.27.0,<0.29.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.18)", "pyyaml"] + +[[package]] +name = "threadpoolctl" +version = "3.5.0" +description = "threadpoolctl" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467"}, + {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"}, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +description = "Backported and Experimental Type Hints for Python 3.9+" +optional = false +python-versions = ">=3.9" +groups = ["main", "dev"] +files = [ + {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, + {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, +] +markers = {dev = "python_version < \"3.13\""} + +[[package]] +name = "typing-inspection" +version = "0.4.2" +description = "Runtime typing introspection tools" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7"}, + {file = "typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464"}, +] + +[package.dependencies] +typing-extensions = ">=4.12.0" + +[[package]] +name = "tzdata" +version = "2025.3" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +groups = ["main"] +files = [ + {file = "tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1"}, + {file = "tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7"}, +] + +[[package]] +name = "uvicorn" +version = "0.30.6" +description = "The lightning-fast ASGI server." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "uvicorn-0.30.6-py3-none-any.whl", hash = "sha256:65fd46fe3fda5bdc1b03b94eb634923ff18cd35b2f084813ea79d1f103f711b5"}, + {file = "uvicorn-0.30.6.tar.gz", hash = "sha256:4b15decdda1e72be08209e860a1e10e92439ad5b97cf44cc945fcbee66fc5788"}, +] + +[package.dependencies] +click = ">=7.0" +h11 = ">=0.8" + +[package.extras] +standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"] + +[metadata] +lock-version = "2.1" +python-versions = ">=3.11" +content-hash = "c13caf85eb3e16cc5fac0286cdcb43928c1f618d12acc794d58d6615f82e0399" diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..2f2d035b5498925485dd7abbd4bcdf48c84f7fb8 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml @@ -0,0 +1,29 @@ +[tool.poetry] +name = "ocr-projet06" +version = "0.1.0" +description = "Credit scoring API and notebooks" +authors = ["OCR Project"] +readme = "README.md" +packages = [{ include = "app" }] + +[tool.poetry.dependencies] +python = ">=3.11" +fastapi = "0.121.0" +uvicorn = "0.30.6" +pydantic = "2.12.4" +numpy = ">=2.2.6" +pandas = "2.2.3" +pyarrow = ">=15.0.2" +matplotlib = "3.9.2" +scikit-learn = "1.8.0" +scipy = "1.14.1" +joblib = "1.5.2" +threadpoolctl = "3.5.0" + +[tool.poetry.group.dev.dependencies] +pytest = "8.3.2" +httpx = "0.27.2" + +[build-system] +requires = ["poetry-core>=1.8.0"] +build-backend = "poetry.core.masonry.api" diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/conftest.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/conftest.py new file mode 100644 index 0000000000000000000000000000000000000000..f743e35540f3f591477e0c64210cdd52cb7b28d4 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/conftest.py @@ -0,0 +1,9 @@ +import sys +import os +from pathlib import Path + +os.environ.setdefault("ALLOW_MISSING_ARTIFACTS", "1") + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_api.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_api.py new file mode 100644 index 0000000000000000000000000000000000000000..0093ca065fcbbd2636e04466916ebfac42611fd6 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_api.py @@ -0,0 +1,109 @@ +import pytest +from fastapi.testclient import TestClient + +from app.main import app + + +@pytest.fixture(scope="session") +def client(): + with TestClient(app) as test_client: + yield test_client + + +def _build_payload(preprocessor): + data = {} + for col in preprocessor.required_input_columns: + if col in preprocessor.numeric_medians: + data[col] = preprocessor.numeric_medians[col] + elif col in preprocessor.categorical_columns: + data[col] = "Unknown" + else: + data[col] = 0 + data["SK_ID_CURR"] = int(data.get("SK_ID_CURR", 100001)) + return {"data": data} + + +def _pick_required_column(preprocessor, exclude=None): + exclude = set(exclude or []) + for col in preprocessor.required_input_columns: + if col not in exclude: + return col + raise AssertionError("No required column available for test.") + + +def _pick_numeric_range(preprocessor): + for col, bounds in preprocessor.numeric_ranges.items(): + if col in preprocessor.numeric_required_columns: + return col, bounds + raise AssertionError("No numeric range available for test.") + + +def _pick_numeric_required(preprocessor): + for col in preprocessor.numeric_required_columns: + if col != "SK_ID_CURR": + return col + raise AssertionError("No numeric required column available for test.") + + +def test_health(client): + resp = client.get("/health") + assert resp.status_code == 200 + assert resp.json() == {"status": "ok"} + + +def test_features(client): + resp = client.get("/features") + assert resp.status_code == 200 + payload = resp.json() + assert "input_features" in payload + assert "required_input_features" in payload + assert "SK_ID_CURR" in payload["input_features"] + assert "EXT_SOURCE_2" in payload["input_features"] + + +def test_predict(client): + preprocessor = client.app.state.preprocessor + payload = _build_payload(preprocessor) + resp = client.post("/predict", json=payload) + assert resp.status_code == 200 + data = resp.json() + assert "predictions" in data + assert len(data["predictions"]) == 1 + result = data["predictions"][0] + assert "sk_id_curr" in result + assert "prediction" in result + assert "probability" in result + assert 0.0 <= result["probability"] <= 1.0 + + +def test_predict_missing_required_field(client): + preprocessor = client.app.state.preprocessor + payload = _build_payload(preprocessor) + missing_col = _pick_required_column(preprocessor, exclude={"SK_ID_CURR"}) + payload["data"].pop(missing_col, None) + resp = client.post("/predict", json=payload) + assert resp.status_code == 422 + detail = resp.json().get("detail", {}) + assert detail.get("message") == "Missing required input columns." + + +def test_predict_invalid_type(client): + preprocessor = client.app.state.preprocessor + payload = _build_payload(preprocessor) + invalid_col = _pick_numeric_required(preprocessor) + payload["data"][invalid_col] = "not_a_number" + resp = client.post("/predict", json=payload) + assert resp.status_code == 422 + detail = resp.json().get("detail", {}) + assert detail.get("message") == "Invalid numeric values provided." + + +def test_predict_out_of_range(client): + preprocessor = client.app.state.preprocessor + payload = _build_payload(preprocessor) + col, (min_val, max_val) = _pick_numeric_range(preprocessor) + payload["data"][col] = max_val + 1 + resp = client.post("/predict", json=payload) + assert resp.status_code == 422 + detail = resp.json().get("detail", {}) + assert detail.get("message") == "Input contains values outside expected ranges." diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py new file mode 100644 index 0000000000000000000000000000000000000000..4ece42c280ab59b10d46b1ad62bbb9d923b987fe --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py @@ -0,0 +1,233 @@ +from __future__ import annotations + +import argparse +import json +import re +from pathlib import Path + +import numpy as np +import pandas as pd +from scipy import stats + +try: + import matplotlib.pyplot as plt +except ImportError as exc: # pragma: no cover - optional plotting dependency + raise SystemExit( + "matplotlib is required for plots. Install it with: pip install matplotlib" + ) from exc + + +DEFAULT_FEATURES = [ + "EXT_SOURCE_2", + "EXT_SOURCE_3", + "AMT_ANNUITY", + "EXT_SOURCE_1", + "CODE_GENDER", + "DAYS_EMPLOYED", + "AMT_CREDIT", + "AMT_GOODS_PRICE", + "DAYS_BIRTH", + "FLAG_OWN_CAR", +] + +CATEGORICAL_FEATURES = {"CODE_GENDER", "FLAG_OWN_CAR"} + + +def _safe_name(value: str) -> str: + return re.sub(r"[^a-zA-Z0-9_-]+", "_", value) + + +def _load_logs(log_path: Path) -> tuple[pd.DataFrame, pd.DataFrame]: + entries: list[dict[str, object]] = [] + with log_path.open("r", encoding="utf-8") as handle: + for line in handle: + line = line.strip() + if not line: + continue + entries.append(json.loads(line)) + if not entries: + return pd.DataFrame(), pd.DataFrame() + inputs = [entry.get("inputs", {}) for entry in entries if entry.get("inputs")] + inputs_df = pd.DataFrame.from_records(inputs) + meta_df = pd.DataFrame.from_records(entries) + return inputs_df, meta_df + + +def _psi(reference: pd.Series, production: pd.Series, eps: float = 1e-6) -> float: + ref_freq = reference.value_counts(normalize=True, dropna=False) + prod_freq = production.value_counts(normalize=True, dropna=False) + categories = ref_freq.index.union(prod_freq.index) + ref_probs = ref_freq.reindex(categories, fill_value=0).to_numpy() + prod_probs = prod_freq.reindex(categories, fill_value=0).to_numpy() + ref_probs = np.clip(ref_probs, eps, None) + prod_probs = np.clip(prod_probs, eps, None) + return float(np.sum((ref_probs - prod_probs) * np.log(ref_probs / prod_probs))) + + +def _coerce_numeric(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame: + out = df.copy() + for col in columns: + if col in out.columns: + out[col] = pd.to_numeric(out[col], errors="coerce") + return out + + +def _plot_numeric(ref: pd.Series, prod: pd.Series, output_path: Path) -> None: + plt.figure(figsize=(6, 4)) + plt.hist(ref.dropna(), bins=30, alpha=0.6, label="reference") + plt.hist(prod.dropna(), bins=30, alpha=0.6, label="production") + plt.title(f"Distribution: {ref.name}") + plt.legend() + plt.tight_layout() + plt.savefig(output_path) + plt.close() + + +def _plot_categorical(ref: pd.Series, prod: pd.Series, output_path: Path, max_categories: int = 10) -> None: + ref_series = ref.fillna("Unknown") + prod_series = prod.fillna("Unknown") + top = ref_series.value_counts().index[:max_categories] + ref_series = ref_series.where(ref_series.isin(top), other="__OTHER__") + prod_series = prod_series.where(prod_series.isin(top), other="__OTHER__") + ref_freq = ref_series.value_counts(normalize=True) + prod_freq = prod_series.value_counts(normalize=True) + plot_df = pd.DataFrame({"reference": ref_freq, "production": prod_freq}).fillna(0) + plot_df.sort_values("reference", ascending=False).plot(kind="bar", figsize=(7, 4)) + plt.title(f"Distribution: {ref.name}") + plt.tight_layout() + plt.savefig(output_path) + plt.close() + + +def generate_report( + log_path: Path, + reference_path: Path, + output_dir: Path, + sample_size: int, + psi_threshold: float, +) -> Path: + inputs_df, meta_df = _load_logs(log_path) + if inputs_df.empty: + raise SystemExit(f"No inputs found in logs: {log_path}") + + features = [col for col in DEFAULT_FEATURES if col in inputs_df.columns] + if not features: + raise SystemExit("No matching features found in production logs.") + + reference_df = pd.read_parquet(reference_path, columns=features) + if sample_size and len(reference_df) > sample_size: + reference_df = reference_df.sample(sample_size, random_state=42) + + numeric_features = [col for col in features if col not in CATEGORICAL_FEATURES] + production_df = _coerce_numeric(inputs_df, numeric_features) + reference_df = _coerce_numeric(reference_df, numeric_features) + + summary_rows: list[dict[str, object]] = [] + plots_dir = output_dir / "plots" + plots_dir.mkdir(parents=True, exist_ok=True) + + for feature in features: + if feature not in reference_df.columns: + continue + ref_series = reference_df[feature] + prod_series = production_df[feature] + if feature in CATEGORICAL_FEATURES: + psi_value = _psi(ref_series, prod_series) + summary_rows.append( + { + "feature": feature, + "type": "categorical", + "psi": round(psi_value, 4), + "drift_detected": psi_value >= psi_threshold, + } + ) + plot_path = plots_dir / f"{_safe_name(feature)}.png" + _plot_categorical(ref_series, prod_series, plot_path) + else: + ref_clean = ref_series.dropna() + prod_clean = prod_series.dropna() + if ref_clean.empty or prod_clean.empty: + continue + stat, pvalue = stats.ks_2samp(ref_clean, prod_clean) + summary_rows.append( + { + "feature": feature, + "type": "numeric", + "ks_stat": round(float(stat), 4), + "p_value": round(float(pvalue), 6), + "drift_detected": pvalue < 0.05, + } + ) + plot_path = plots_dir / f"{_safe_name(feature)}.png" + _plot_numeric(ref_series, prod_series, plot_path) + + summary_df = pd.DataFrame(summary_rows) + output_dir.mkdir(parents=True, exist_ok=True) + report_path = output_dir / "drift_report.html" + + total_calls = len(meta_df) + error_rate = float((meta_df.get("status_code", pd.Series(dtype=int)) >= 400).mean()) if total_calls else 0.0 + latency_ms = meta_df.get("latency_ms", pd.Series(dtype=float)).dropna() + latency_p50 = float(latency_ms.quantile(0.5)) if not latency_ms.empty else 0.0 + latency_p95 = float(latency_ms.quantile(0.95)) if not latency_ms.empty else 0.0 + + summary_html = summary_df.to_html(index=False, escape=False) + plots_html = "\n".join( + f"

{row['feature']}

" + for _, row in summary_df.iterrows() + ) + + html = f""" + + + + Drift Report + + + +

Production Monitoring Summary

+ +

Data Drift Summary

+ {summary_html} +

Feature Distributions

+ {plots_html} + + +""" + + report_path.write_text(html, encoding="utf-8") + return report_path + + +def main() -> None: + parser = argparse.ArgumentParser(description="Generate a drift report from production logs.") + parser.add_argument("--logs", type=Path, default=Path("logs/predictions.jsonl")) + parser.add_argument("--reference", type=Path, default=Path("data/data_final.parquet")) + parser.add_argument("--output-dir", type=Path, default=Path("reports")) + parser.add_argument("--sample-size", type=int, default=50000) + parser.add_argument("--psi-threshold", type=float, default=0.2) + args = parser.parse_args() + + report_path = generate_report( + log_path=args.logs, + reference_path=args.reference, + output_dir=args.output_dir, + sample_size=args.sample_size, + psi_threshold=args.psi_threshold, + ) + print(f"Drift report saved to {report_path}") + + +if __name__ == "__main__": + main() diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt index a440809e2fa72cccdfbefe8e394c0a84f2bca5f4..900b0c422455c914c48ba1df7957a9907b69a6e7 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt @@ -5,9 +5,11 @@ pydantic==2.12.4 numpy==2.2.6 pandas==2.2.3 pyarrow==22.0.0 +matplotlib==3.9.2 scikit-learn==1.8.0 scipy==1.14.1 joblib==1.5.2 threadpoolctl==3.5.0 pytest==8.3.2 +pytest-cov==5.0.0 httpx==0.27.2 diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py index 4ece42c280ab59b10d46b1ad62bbb9d923b987fe..ed57998f989c7dec8f1321094dcdb466e819fe07 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py @@ -1,3 +1,5 @@ +# construire drift avec evidently + from __future__ import annotations import argparse @@ -10,6 +12,8 @@ import pandas as pd from scipy import stats try: + import matplotlib + matplotlib.use("Agg") import matplotlib.pyplot as plt except ImportError as exc: # pragma: no cover - optional plotting dependency raise SystemExit( @@ -99,12 +103,37 @@ def _plot_categorical(ref: pd.Series, prod: pd.Series, output_path: Path, max_ca plt.close() +def _plot_score_distribution(scores: pd.Series, output_path: Path, bins: int = 30) -> None: + plt.figure(figsize=(6, 4)) + plt.hist(scores.dropna(), bins=bins, range=(0, 1), alpha=0.8, color="#4C78A8") + plt.title("Prediction score distribution") + plt.xlabel("Predicted probability") + plt.ylabel("Count") + plt.tight_layout() + plt.savefig(output_path) + plt.close() + + +def _plot_prediction_rate(predictions: pd.Series, output_path: Path) -> None: + counts = predictions.value_counts(normalize=True, dropna=False).sort_index() + plt.figure(figsize=(4, 4)) + plt.bar(counts.index.astype(str), counts.values, color="#F58518") + plt.title("Prediction rate") + plt.xlabel("Predicted class") + plt.ylabel("Share") + plt.ylim(0, 1) + plt.tight_layout() + plt.savefig(output_path) + plt.close() + + def generate_report( log_path: Path, reference_path: Path, output_dir: Path, sample_size: int, psi_threshold: float, + score_bins: int, ) -> Path: inputs_df, meta_df = _load_logs(log_path) if inputs_df.empty: @@ -171,6 +200,45 @@ def generate_report( latency_p50 = float(latency_ms.quantile(0.5)) if not latency_ms.empty else 0.0 latency_p95 = float(latency_ms.quantile(0.95)) if not latency_ms.empty else 0.0 + valid_meta = meta_df + if "status_code" in meta_df.columns: + valid_meta = meta_df[meta_df["status_code"] < 400] + score_series = ( + pd.to_numeric(valid_meta.get("probability", pd.Series(dtype=float)), errors="coerce") + .dropna() + ) + pred_series = ( + pd.to_numeric(valid_meta.get("prediction", pd.Series(dtype=float)), errors="coerce") + .dropna() + ) + + score_metrics_html = "
  • No prediction scores available.
  • " + score_plots_html = "" + if not score_series.empty: + score_mean = float(score_series.mean()) + score_p50 = float(score_series.quantile(0.5)) + score_p95 = float(score_series.quantile(0.95)) + score_min = float(score_series.min()) + score_max = float(score_series.max()) + score_metrics = [ + f"
  • Score mean: {score_mean:.4f}
  • ", + f"
  • Score p50: {score_p50:.4f}
  • ", + f"
  • Score p95: {score_p95:.4f}
  • ", + f"
  • Score min: {score_min:.4f}
  • ", + f"
  • Score max: {score_max:.4f}
  • ", + ] + score_metrics_html = "\n".join(score_metrics) + score_plot_path = plots_dir / "score_distribution.png" + _plot_score_distribution(score_series, score_plot_path, bins=score_bins) + score_plots_html = "" + + if not pred_series.empty: + pred_rate = float(pred_series.mean()) + score_metrics_html += f"\n
  • Predicted default rate: {pred_rate:.2%}
  • " + pred_plot_path = plots_dir / "prediction_rate.png" + _plot_prediction_rate(pred_series, pred_plot_path) + score_plots_html += "\n" + summary_html = summary_df.to_html(index=False, escape=False) plots_html = "\n".join( f"

    {row['feature']}

    " @@ -198,6 +266,11 @@ def generate_report(
  • Latency p50: {latency_p50:.2f} ms
  • Latency p95: {latency_p95:.2f} ms
  • +

    Score Monitoring

    + + {score_plots_html}

    Data Drift Summary

    {summary_html}

    Feature Distributions

    @@ -217,6 +290,7 @@ def main() -> None: parser.add_argument("--output-dir", type=Path, default=Path("reports")) parser.add_argument("--sample-size", type=int, default=50000) parser.add_argument("--psi-threshold", type=float, default=0.2) + parser.add_argument("--score-bins", type=int, default=30) args = parser.parse_args() report_path = generate_report( @@ -225,6 +299,7 @@ def main() -> None: output_dir=args.output_dir, sample_size=args.sample_size, psi_threshold=args.psi_threshold, + score_bins=args.score_bins, ) print(f"Drift report saved to {report_path}") diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.lock b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.lock index a90d5239322405d2f6244e1e4578db10bf7b0969..842c70456593a29c82aab2bccdeb38c4928c5015 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.lock +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.lock @@ -1,5 +1,30 @@ # This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. +[[package]] +name = "altair" +version = "5.5.0" +description = "Vega-Altair: A declarative statistical visualization library for Python." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "altair-5.5.0-py3-none-any.whl", hash = "sha256:91a310b926508d560fe0148d02a194f38b824122641ef528113d029fcd129f8c"}, + {file = "altair-5.5.0.tar.gz", hash = "sha256:d960ebe6178c56de3855a68c47b516be38640b73fb3b5111c2a9ca90546dd73d"}, +] + +[package.dependencies] +jinja2 = "*" +jsonschema = ">=3.0" +narwhals = ">=1.14.2" +packaging = "*" +typing-extensions = {version = ">=4.10.0", markers = "python_version < \"3.14\""} + +[package.extras] +all = ["altair-tiles (>=0.3.0)", "anywidget (>=0.9.0)", "numpy", "pandas (>=1.1.3)", "pyarrow (>=11)", "vega-datasets (>=0.9.0)", "vegafusion[embed] (>=1.6.6)", "vl-convert-python (>=1.7.0)"] +dev = ["duckdb (>=1.0)", "geopandas", "hatch (>=1.13.0)", "ipython[kernel]", "mistune", "mypy", "pandas (>=1.1.3)", "pandas-stubs", "polars (>=0.20.3)", "pyarrow-stubs", "pytest", "pytest-cov", "pytest-xdist[psutil] (>=3.5,<4.0)", "ruff (>=0.6.0)", "types-jsonschema", "types-setuptools"] +doc = ["docutils", "jinja2", "myst-parser", "numpydoc", "pillow (>=9,<10)", "pydata-sphinx-theme (>=0.14.1)", "scipy", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinxext-altair"] +save = ["vl-convert-python (>=1.7.0)"] + [[package]] name = "annotated-doc" version = "0.0.4" @@ -43,18 +68,177 @@ typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} [package.extras] trio = ["trio (>=0.31.0) ; python_version < \"3.10\"", "trio (>=0.32.0) ; python_version >= \"3.10\""] +[[package]] +name = "attrs" +version = "25.4.0" +description = "Classes Without Boilerplate" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373"}, + {file = "attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11"}, +] + +[[package]] +name = "blinker" +version = "1.9.0" +description = "Fast, simple object-to-object and broadcast signaling" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc"}, + {file = "blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf"}, +] + +[[package]] +name = "cachetools" +version = "5.5.2" +description = "Extensible memoizing collections and decorators" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a"}, + {file = "cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4"}, +] + [[package]] name = "certifi" version = "2025.11.12" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.7" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b"}, {file = "certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316"}, ] +[[package]] +name = "charset-normalizer" +version = "3.4.4" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "charset_normalizer-3.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f820802628d2694cb7e56db99213f930856014862f3fd943d290ea8438d07ca8"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:798d75d81754988d2565bff1b97ba5a44411867c0cf32b77a7e8f8d84796b10d"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9cd98cdc06614a2f768d2b7286d66805f94c48cde050acdbbb7db2600ab3197e"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:077fbb858e903c73f6c9db43374fd213b0b6a778106bc7032446a8e8b5b38b93"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:244bfb999c71b35de57821b8ea746b24e863398194a4014e4c76adc2bbdfeff0"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:64b55f9dce520635f018f907ff1b0df1fdc31f2795a922fb49dd14fbcdf48c84"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:faa3a41b2b66b6e50f84ae4a68c64fcd0c44355741c6374813a800cd6695db9e"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6515f3182dbe4ea06ced2d9e8666d97b46ef4c75e326b79bb624110f122551db"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-win32.whl", hash = "sha256:f34be2938726fc13801220747472850852fe6b1ea75869a048d6f896838c896f"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:a61900df84c667873b292c3de315a786dd8dac506704dea57bc957bd31e22c7d"}, + {file = "charset_normalizer-3.4.4-cp310-cp310-win_arm64.whl", hash = "sha256:cead0978fc57397645f12578bfd2d5ea9138ea0fac82b2f63f7f7c6877986a69"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-win32.whl", hash = "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016"}, + {file = "charset_normalizer-3.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525"}, + {file = "charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14"}, + {file = "charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c"}, + {file = "charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ce8a0633f41a967713a59c4139d29110c07e826d131a316b50ce11b1d79b4f84"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eaabd426fe94daf8fd157c32e571c85cb12e66692f15516a83a03264b08d06c3"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c4ef880e27901b6cc782f1b95f82da9313c0eb95c3af699103088fa0ac3ce9ac"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2aaba3b0819274cc41757a1da876f810a3e4d7b6eb25699253a4effef9e8e4af"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:778d2e08eda00f4256d7f672ca9fef386071c9202f5e4607920b86d7803387f2"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f155a433c2ec037d4e8df17d18922c3a0d9b3232a396690f17175d2946f0218d"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a8bf8d0f749c5757af2142fe7903a9df1d2e8aa3841559b2bad34b08d0e2bcf3"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:194f08cbb32dc406d6e1aea671a68be0823673db2832b38405deba2fb0d88f63"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:6aee717dcfead04c6eb1ce3bd29ac1e22663cdea57f943c87d1eab9a025438d7"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:cd4b7ca9984e5e7985c12bc60a6f173f3c958eae74f3ef6624bb6b26e2abbae4"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_riscv64.whl", hash = "sha256:b7cf1017d601aa35e6bb650b6ad28652c9cd78ee6caff19f3c28d03e1c80acbf"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:e912091979546adf63357d7e2ccff9b44f026c075aeaf25a52d0e95ad2281074"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:5cb4d72eea50c8868f5288b7f7f33ed276118325c1dfd3957089f6b519e1382a"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-win32.whl", hash = "sha256:837c2ce8c5a65a2035be9b3569c684358dfbf109fd3b6969630a87535495ceaa"}, + {file = "charset_normalizer-3.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:44c2a8734b333e0578090c4cd6b16f275e07aa6614ca8715e6c038e865e70576"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a9768c477b9d7bd54bc0c86dbaebdec6f03306675526c9927c0e8a04e8f94af9"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1bee1e43c28aa63cb16e5c14e582580546b08e535299b8b6158a7c9c768a1f3d"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:fd44c878ea55ba351104cb93cc85e74916eb8fa440ca7903e57575e97394f608"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0f04b14ffe5fdc8c4933862d8306109a2c51e0704acfa35d51598eb45a1e89fc"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:cd09d08005f958f370f539f186d10aec3377d55b9eeb0d796025d4886119d76e"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4fe7859a4e3e8457458e2ff592f15ccb02f3da787fcd31e0183879c3ad4692a1"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fa09f53c465e532f4d3db095e0c55b615f010ad81803d383195b6b5ca6cbf5f3"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:7fa17817dc5625de8a027cb8b26d9fefa3ea28c8253929b8d6649e705d2835b6"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:5947809c8a2417be3267efc979c47d76a079758166f7d43ef5ae8e9f92751f88"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:4902828217069c3c5c71094537a8e623f5d097858ac6ca8252f7b4d10b7560f1"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:7c308f7e26e4363d79df40ca5b2be1c6ba9f02bdbccfed5abddb7859a6ce72cf"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:2c9d3c380143a1fedbff95a312aa798578371eb29da42106a29019368a475318"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:cb01158d8b88ee68f15949894ccc6712278243d95f344770fa7593fa2d94410c"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-win32.whl", hash = "sha256:2677acec1a2f8ef614c6888b5b4ae4060cc184174a938ed4e8ef690e15d3e505"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:f8e160feb2aed042cd657a72acc0b481212ed28b1b9a95c0cee1621b524e1966"}, + {file = "charset_normalizer-3.4.4-cp39-cp39-win_arm64.whl", hash = "sha256:b5d84d37db046c5ca74ee7bb47dd6cbc13f80665fdde3e8040bdd3fb015ecb50"}, + {file = "charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f"}, + {file = "charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a"}, +] + [[package]] name = "click" version = "8.3.1" @@ -175,6 +359,111 @@ mypy = ["bokeh", "contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.17.0)", " test = ["Pillow", "contourpy[test-no-images]", "matplotlib"] test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist", "wurlitzer"] +[[package]] +name = "coverage" +version = "7.13.1" +description = "Code coverage measurement for Python" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "coverage-7.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e1fa280b3ad78eea5be86f94f461c04943d942697e0dac889fa18fff8f5f9147"}, + {file = "coverage-7.13.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c3d8c679607220979434f494b139dfb00131ebf70bb406553d69c1ff01a5c33d"}, + {file = "coverage-7.13.1-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:339dc63b3eba969067b00f41f15ad161bf2946613156fb131266d8debc8e44d0"}, + {file = "coverage-7.13.1-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:db622b999ffe49cb891f2fff3b340cdc2f9797d01a0a202a0973ba2562501d90"}, + {file = "coverage-7.13.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1443ba9acbb593fa7c1c29e011d7c9761545fe35e7652e85ce7f51a16f7e08d"}, + {file = "coverage-7.13.1-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c832ec92c4499ac463186af72f9ed4d8daec15499b16f0a879b0d1c8e5cf4a3b"}, + {file = "coverage-7.13.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:562ec27dfa3f311e0db1ba243ec6e5f6ab96b1edfcfc6cf86f28038bc4961ce6"}, + {file = "coverage-7.13.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:4de84e71173d4dada2897e5a0e1b7877e5eefbfe0d6a44edee6ce31d9b8ec09e"}, + {file = "coverage-7.13.1-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:a5a68357f686f8c4d527a2dc04f52e669c2fc1cbde38f6f7eb6a0e58cbd17cae"}, + {file = "coverage-7.13.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:77cc258aeb29a3417062758975521eae60af6f79e930d6993555eeac6a8eac29"}, + {file = "coverage-7.13.1-cp310-cp310-win32.whl", hash = "sha256:bb4f8c3c9a9f34423dba193f241f617b08ffc63e27f67159f60ae6baf2dcfe0f"}, + {file = "coverage-7.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:c8e2706ceb622bc63bac98ebb10ef5da80ed70fbd8a7999a5076de3afaef0fb1"}, + {file = "coverage-7.13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a55d509a1dc5a5b708b5dad3b5334e07a16ad4c2185e27b40e4dba796ab7f88"}, + {file = "coverage-7.13.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4d010d080c4888371033baab27e47c9df7d6fb28d0b7b7adf85a4a49be9298b3"}, + {file = "coverage-7.13.1-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d938b4a840fb1523b9dfbbb454f652967f18e197569c32266d4d13f37244c3d9"}, + {file = "coverage-7.13.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bf100a3288f9bb7f919b87eb84f87101e197535b9bd0e2c2b5b3179633324fee"}, + {file = "coverage-7.13.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef6688db9bf91ba111ae734ba6ef1a063304a881749726e0d3575f5c10a9facf"}, + {file = "coverage-7.13.1-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0b609fc9cdbd1f02e51f67f51e5aee60a841ef58a68d00d5ee2c0faf357481a3"}, + {file = "coverage-7.13.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c43257717611ff5e9a1d79dce8e47566235ebda63328718d9b65dd640bc832ef"}, + {file = "coverage-7.13.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e09fbecc007f7b6afdfb3b07ce5bd9f8494b6856dd4f577d26c66c391b829851"}, + {file = "coverage-7.13.1-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:a03a4f3a19a189919c7055098790285cc5c5b0b3976f8d227aea39dbf9f8bfdb"}, + {file = "coverage-7.13.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3820778ea1387c2b6a818caec01c63adc5b3750211af6447e8dcfb9b6f08dbba"}, + {file = "coverage-7.13.1-cp311-cp311-win32.whl", hash = "sha256:ff10896fa55167371960c5908150b434b71c876dfab97b69478f22c8b445ea19"}, + {file = "coverage-7.13.1-cp311-cp311-win_amd64.whl", hash = "sha256:a998cc0aeeea4c6d5622a3754da5a493055d2d95186bad877b0a34ea6e6dbe0a"}, + {file = "coverage-7.13.1-cp311-cp311-win_arm64.whl", hash = "sha256:fea07c1a39a22614acb762e3fbbb4011f65eedafcb2948feeef641ac78b4ee5c"}, + {file = "coverage-7.13.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6f34591000f06e62085b1865c9bc5f7858df748834662a51edadfd2c3bfe0dd3"}, + {file = "coverage-7.13.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b67e47c5595b9224599016e333f5ec25392597a89d5744658f837d204e16c63e"}, + {file = "coverage-7.13.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3e7b8bd70c48ffb28461ebe092c2345536fb18bbbf19d287c8913699735f505c"}, + {file = "coverage-7.13.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c223d078112e90dc0e5c4e35b98b9584164bea9fbbd221c0b21c5241f6d51b62"}, + {file = "coverage-7.13.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:794f7c05af0763b1bbd1b9e6eff0e52ad068be3b12cd96c87de037b01390c968"}, + {file = "coverage-7.13.1-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0642eae483cc8c2902e4af7298bf886d605e80f26382124cddc3967c2a3df09e"}, + {file = "coverage-7.13.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9f5e772ed5fef25b3de9f2008fe67b92d46831bd2bc5bdc5dd6bfd06b83b316f"}, + {file = "coverage-7.13.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:45980ea19277dc0a579e432aef6a504fe098ef3a9032ead15e446eb0f1191aee"}, + {file = "coverage-7.13.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:e4f18eca6028ffa62adbd185a8f1e1dd242f2e68164dba5c2b74a5204850b4cf"}, + {file = "coverage-7.13.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f8dca5590fec7a89ed6826fce625595279e586ead52e9e958d3237821fbc750c"}, + {file = "coverage-7.13.1-cp312-cp312-win32.whl", hash = "sha256:ff86d4e85188bba72cfb876df3e11fa243439882c55957184af44a35bd5880b7"}, + {file = "coverage-7.13.1-cp312-cp312-win_amd64.whl", hash = "sha256:16cc1da46c04fb0fb128b4dc430b78fa2aba8a6c0c9f8eb391fd5103409a6ac6"}, + {file = "coverage-7.13.1-cp312-cp312-win_arm64.whl", hash = "sha256:8d9bc218650022a768f3775dd7fdac1886437325d8d295d923ebcfef4892ad5c"}, + {file = "coverage-7.13.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:cb237bfd0ef4d5eb6a19e29f9e528ac67ac3be932ea6b44fb6cc09b9f3ecff78"}, + {file = "coverage-7.13.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1dcb645d7e34dcbcc96cd7c132b1fc55c39263ca62eb961c064eb3928997363b"}, + {file = "coverage-7.13.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3d42df8201e00384736f0df9be2ced39324c3907607d17d50d50116c989d84cd"}, + {file = "coverage-7.13.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fa3edde1aa8807de1d05934982416cb3ec46d1d4d91e280bcce7cca01c507992"}, + {file = "coverage-7.13.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9edd0e01a343766add6817bc448408858ba6b489039eaaa2018474e4001651a4"}, + {file = "coverage-7.13.1-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:985b7836931d033570b94c94713c6dba5f9d3ff26045f72c3e5dbc5fe3361e5a"}, + {file = "coverage-7.13.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ffed1e4980889765c84a5d1a566159e363b71d6b6fbaf0bebc9d3c30bc016766"}, + {file = "coverage-7.13.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8842af7f175078456b8b17f1b73a0d16a65dcbdc653ecefeb00a56b3c8c298c4"}, + {file = "coverage-7.13.1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:ccd7a6fca48ca9c131d9b0a2972a581e28b13416fc313fb98b6d24a03ce9a398"}, + {file = "coverage-7.13.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0403f647055de2609be776965108447deb8e384fe4a553c119e3ff6bfbab4784"}, + {file = "coverage-7.13.1-cp313-cp313-win32.whl", hash = "sha256:549d195116a1ba1e1ae2f5ca143f9777800f6636eab917d4f02b5310d6d73461"}, + {file = "coverage-7.13.1-cp313-cp313-win_amd64.whl", hash = "sha256:5899d28b5276f536fcf840b18b61a9fce23cc3aec1d114c44c07fe94ebeaa500"}, + {file = "coverage-7.13.1-cp313-cp313-win_arm64.whl", hash = "sha256:868a2fae76dfb06e87291bcbd4dcbcc778a8500510b618d50496e520bd94d9b9"}, + {file = "coverage-7.13.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:67170979de0dacac3f3097d02b0ad188d8edcea44ccc44aaa0550af49150c7dc"}, + {file = "coverage-7.13.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f80e2bb21bfab56ed7405c2d79d34b5dc0bc96c2c1d2a067b643a09fb756c43a"}, + {file = "coverage-7.13.1-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f83351e0f7dcdb14d7326c3d8d8c4e915fa685cbfdc6281f9470d97a04e9dfe4"}, + {file = "coverage-7.13.1-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb3f6562e89bad0110afbe64e485aac2462efdce6232cdec7862a095dc3412f6"}, + {file = "coverage-7.13.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:77545b5dcda13b70f872c3b5974ac64c21d05e65b1590b441c8560115dc3a0d1"}, + {file = "coverage-7.13.1-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a4d240d260a1aed814790bbe1f10a5ff31ce6c21bc78f0da4a1e8268d6c80dbd"}, + {file = "coverage-7.13.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d2287ac9360dec3837bfdad969963a5d073a09a85d898bd86bea82aa8876ef3c"}, + {file = "coverage-7.13.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:0d2c11f3ea4db66b5cbded23b20185c35066892c67d80ec4be4bab257b9ad1e0"}, + {file = "coverage-7.13.1-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:3fc6a169517ca0d7ca6846c3c5392ef2b9e38896f61d615cb75b9e7134d4ee1e"}, + {file = "coverage-7.13.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d10a2ed46386e850bb3de503a54f9fe8192e5917fcbb143bfef653a9355e9a53"}, + {file = "coverage-7.13.1-cp313-cp313t-win32.whl", hash = "sha256:75a6f4aa904301dab8022397a22c0039edc1f51e90b83dbd4464b8a38dc87842"}, + {file = "coverage-7.13.1-cp313-cp313t-win_amd64.whl", hash = "sha256:309ef5706e95e62578cda256b97f5e097916a2c26247c287bbe74794e7150df2"}, + {file = "coverage-7.13.1-cp313-cp313t-win_arm64.whl", hash = "sha256:92f980729e79b5d16d221038dbf2e8f9a9136afa072f9d5d6ed4cb984b126a09"}, + {file = "coverage-7.13.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:97ab3647280d458a1f9adb85244e81587505a43c0c7cff851f5116cd2814b894"}, + {file = "coverage-7.13.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8f572d989142e0908e6acf57ad1b9b86989ff057c006d13b76c146ec6a20216a"}, + {file = "coverage-7.13.1-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d72140ccf8a147e94274024ff6fd8fb7811354cf7ef88b1f0a988ebaa5bc774f"}, + {file = "coverage-7.13.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d3c9f051b028810f5a87c88e5d6e9af3c0ff32ef62763bf15d29f740453ca909"}, + {file = "coverage-7.13.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f398ba4df52d30b1763f62eed9de5620dcde96e6f491f4c62686736b155aa6e4"}, + {file = "coverage-7.13.1-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:132718176cc723026d201e347f800cd1a9e4b62ccd3f82476950834dad501c75"}, + {file = "coverage-7.13.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9e549d642426e3579b3f4b92d0431543b012dcb6e825c91619d4e93b7363c3f9"}, + {file = "coverage-7.13.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:90480b2134999301eea795b3a9dbf606c6fbab1b489150c501da84a959442465"}, + {file = "coverage-7.13.1-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:e825dbb7f84dfa24663dd75835e7257f8882629fc11f03ecf77d84a75134b864"}, + {file = "coverage-7.13.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:623dcc6d7a7ba450bbdbeedbaa0c42b329bdae16491af2282f12a7e809be7eb9"}, + {file = "coverage-7.13.1-cp314-cp314-win32.whl", hash = "sha256:6e73ebb44dca5f708dc871fe0b90cf4cff1a13f9956f747cc87b535a840386f5"}, + {file = "coverage-7.13.1-cp314-cp314-win_amd64.whl", hash = "sha256:be753b225d159feb397bd0bf91ae86f689bad0da09d3b301478cd39b878ab31a"}, + {file = "coverage-7.13.1-cp314-cp314-win_arm64.whl", hash = "sha256:228b90f613b25ba0019361e4ab81520b343b622fc657daf7e501c4ed6a2366c0"}, + {file = "coverage-7.13.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:60cfb538fe9ef86e5b2ab0ca8fc8d62524777f6c611dcaf76dc16fbe9b8e698a"}, + {file = "coverage-7.13.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:57dfc8048c72ba48a8c45e188d811e5efd7e49b387effc8fb17e97936dde5bf6"}, + {file = "coverage-7.13.1-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3f2f725aa3e909b3c5fdb8192490bdd8e1495e85906af74fe6e34a2a77ba0673"}, + {file = "coverage-7.13.1-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9ee68b21909686eeb21dfcba2c3b81fee70dcf38b140dcd5aa70680995fa3aa5"}, + {file = "coverage-7.13.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:724b1b270cb13ea2e6503476e34541a0b1f62280bc997eab443f87790202033d"}, + {file = "coverage-7.13.1-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:916abf1ac5cf7eb16bc540a5bf75c71c43a676f5c52fcb9fe75a2bd75fb944e8"}, + {file = "coverage-7.13.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:776483fd35b58d8afe3acbd9988d5de592ab6da2d2a865edfdbc9fdb43e7c486"}, + {file = "coverage-7.13.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:b6f3b96617e9852703f5b633ea01315ca45c77e879584f283c44127f0f1ec564"}, + {file = "coverage-7.13.1-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:bd63e7b74661fed317212fab774e2a648bc4bb09b35f25474f8e3325d2945cd7"}, + {file = "coverage-7.13.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:933082f161bbb3e9f90d00990dc956120f608cdbcaeea15c4d897f56ef4fe416"}, + {file = "coverage-7.13.1-cp314-cp314t-win32.whl", hash = "sha256:18be793c4c87de2965e1c0f060f03d9e5aff66cfeae8e1dbe6e5b88056ec153f"}, + {file = "coverage-7.13.1-cp314-cp314t-win_amd64.whl", hash = "sha256:0e42e0ec0cd3e0d851cb3c91f770c9301f48647cb2877cb78f74bdaa07639a79"}, + {file = "coverage-7.13.1-cp314-cp314t-win_arm64.whl", hash = "sha256:eaecf47ef10c72ece9a2a92118257da87e460e113b83cc0d2905cbbe931792b4"}, + {file = "coverage-7.13.1-py3-none-any.whl", hash = "sha256:2016745cb3ba554469d02819d78958b571792bb68e31302610e898f80dd3a573"}, + {file = "coverage-7.13.1.tar.gz", hash = "sha256:b7593fe7eb5feaa3fbb461ac79aac9f9fc0387a5ca8080b0c6fe2ca27b091afd"}, +] + +[package.extras] +toml = ["tomli ; python_full_version <= \"3.11.0a6\""] + [[package]] name = "cycler" version = "0.12.1" @@ -287,6 +576,40 @@ type1 = ["xattr ; sys_platform == \"darwin\""] unicode = ["unicodedata2 (>=17.0.0) ; python_version <= \"3.14\""] woff = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "zopfli (>=0.1.4)"] +[[package]] +name = "gitdb" +version = "4.0.12" +description = "Git Object Database" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf"}, + {file = "gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571"}, +] + +[package.dependencies] +smmap = ">=3.0.1,<6" + +[[package]] +name = "gitpython" +version = "3.1.46" +description = "GitPython is a Python library used to interact with Git repositories" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "gitpython-3.1.46-py3-none-any.whl", hash = "sha256:79812ed143d9d25b6d176a10bb511de0f9c67b1fa641d82097b0ab90398a2058"}, + {file = "gitpython-3.1.46.tar.gz", hash = "sha256:400124c7d0ef4ea03f7310ac2fbf7151e09ff97f2a3288d64a440c584a29c37f"}, +] + +[package.dependencies] +gitdb = ">=4.0.1,<5" + +[package.extras] +doc = ["sphinx (>=7.1.2,<7.2)", "sphinx-autodoc-typehints", "sphinx_rtd_theme"] +test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock ; python_version < \"3.8\"", "mypy (==1.18.2) ; python_version >= \"3.9\"", "pre-commit", "pytest (>=7.3.1)", "pytest-cov", "pytest-instafail", "pytest-mock", "pytest-sugar", "typing-extensions ; python_version < \"3.11\""] + [[package]] name = "h11" version = "0.16.0" @@ -374,6 +697,24 @@ files = [ {file = "iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730"}, ] +[[package]] +name = "jinja2" +version = "3.1.6" +description = "A very fast and expressive template engine." +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"}, + {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"}, +] + +[package.dependencies] +MarkupSafe = ">=2.0" + +[package.extras] +i18n = ["Babel (>=2.7)"] + [[package]] name = "joblib" version = "1.5.2" @@ -386,6 +727,43 @@ files = [ {file = "joblib-1.5.2.tar.gz", hash = "sha256:3faa5c39054b2f03ca547da9b2f52fde67c06240c31853f306aea97f13647b55"}, ] +[[package]] +name = "jsonschema" +version = "4.25.1" +description = "An implementation of JSON Schema validation for Python" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63"}, + {file = "jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85"}, +] + +[package.dependencies] +attrs = ">=22.2.0" +jsonschema-specifications = ">=2023.03.6" +referencing = ">=0.28.4" +rpds-py = ">=0.7.1" + +[package.extras] +format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] +format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "rfc3987-syntax (>=1.1.0)", "uri-template", "webcolors (>=24.6.0)"] + +[[package]] +name = "jsonschema-specifications" +version = "2025.9.1" +description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe"}, + {file = "jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d"}, +] + +[package.dependencies] +referencing = ">=0.31.0" + [[package]] name = "kiwisolver" version = "1.4.9" @@ -497,6 +875,129 @@ files = [ {file = "kiwisolver-1.4.9.tar.gz", hash = "sha256:c3b22c26c6fd6811b0ae8363b95ca8ce4ea3c202d3d0975b2914310ceb1bcc4d"}, ] +[[package]] +name = "markdown-it-py" +version = "4.0.0" +description = "Python port of markdown-it. Markdown parsing, done right!" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147"}, + {file = "markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3"}, +] + +[package.dependencies] +mdurl = ">=0.1,<1.0" + +[package.extras] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "markdown-it-pyrs", "mistletoe (>=1.0,<2.0)", "mistune (>=3.0,<4.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins (>=0.5.0)"] +profiling = ["gprof2dot"] +rtd = ["ipykernel", "jupyter_sphinx", "mdit-py-plugins (>=0.5.0)", "myst-parser", "pyyaml", "sphinx", "sphinx-book-theme (>=1.0,<2.0)", "sphinx-copybutton", "sphinx-design"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions", "requests"] + +[[package]] +name = "markupsafe" +version = "3.0.3" +description = "Safely add untrusted strings to HTML/XML markup." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "markupsafe-3.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f981d352f04553a7171b8e44369f2af4055f888dfb147d55e42d29e29e74559"}, + {file = "markupsafe-3.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e1c1493fb6e50ab01d20a22826e57520f1284df32f2d8601fdd90b6304601419"}, + {file = "markupsafe-3.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ba88449deb3de88bd40044603fafffb7bc2b055d626a330323a9ed736661695"}, + {file = "markupsafe-3.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f42d0984e947b8adf7dd6dde396e720934d12c506ce84eea8476409563607591"}, + {file = "markupsafe-3.0.3-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c0c0b3ade1c0b13b936d7970b1d37a57acde9199dc2aecc4c336773e1d86049c"}, + {file = "markupsafe-3.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0303439a41979d9e74d18ff5e2dd8c43ed6c6001fd40e5bf2e43f7bd9bbc523f"}, + {file = "markupsafe-3.0.3-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:d2ee202e79d8ed691ceebae8e0486bd9a2cd4794cec4824e1c99b6f5009502f6"}, + {file = "markupsafe-3.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:177b5253b2834fe3678cb4a5f0059808258584c559193998be2601324fdeafb1"}, + {file = "markupsafe-3.0.3-cp310-cp310-win32.whl", hash = "sha256:2a15a08b17dd94c53a1da0438822d70ebcd13f8c3a95abe3a9ef9f11a94830aa"}, + {file = "markupsafe-3.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:c4ffb7ebf07cfe8931028e3e4c85f0357459a3f9f9490886198848f4fa002ec8"}, + {file = "markupsafe-3.0.3-cp310-cp310-win_arm64.whl", hash = "sha256:e2103a929dfa2fcaf9bb4e7c091983a49c9ac3b19c9061b6d5427dd7d14d81a1"}, + {file = "markupsafe-3.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1cc7ea17a6824959616c525620e387f6dd30fec8cb44f649e31712db02123dad"}, + {file = "markupsafe-3.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a"}, + {file = "markupsafe-3.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50"}, + {file = "markupsafe-3.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0bf2a864d67e76e5c9a34dc26ec616a66b9888e25e7b9460e1c76d3293bd9dbf"}, + {file = "markupsafe-3.0.3-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc51efed119bc9cfdf792cdeaa4d67e8f6fcccab66ed4bfdd6bde3e59bfcbb2f"}, + {file = "markupsafe-3.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a"}, + {file = "markupsafe-3.0.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7be7b61bb172e1ed687f1754f8e7484f1c8019780f6f6b0786e76bb01c2ae115"}, + {file = "markupsafe-3.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9e130248f4462aaa8e2552d547f36ddadbeaa573879158d721bbd33dfe4743a"}, + {file = "markupsafe-3.0.3-cp311-cp311-win32.whl", hash = "sha256:0db14f5dafddbb6d9208827849fad01f1a2609380add406671a26386cdf15a19"}, + {file = "markupsafe-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:de8a88e63464af587c950061a5e6a67d3632e36df62b986892331d4620a35c01"}, + {file = "markupsafe-3.0.3-cp311-cp311-win_arm64.whl", hash = "sha256:3b562dd9e9ea93f13d53989d23a7e775fdfd1066c33494ff43f5418bc8c58a5c"}, + {file = "markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e"}, + {file = "markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce"}, + {file = "markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d"}, + {file = "markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d"}, + {file = "markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a"}, + {file = "markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b"}, + {file = "markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f"}, + {file = "markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b"}, + {file = "markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d"}, + {file = "markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c"}, + {file = "markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f"}, + {file = "markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795"}, + {file = "markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219"}, + {file = "markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6"}, + {file = "markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676"}, + {file = "markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9"}, + {file = "markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1"}, + {file = "markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc"}, + {file = "markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12"}, + {file = "markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed"}, + {file = "markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5"}, + {file = "markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485"}, + {file = "markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73"}, + {file = "markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37"}, + {file = "markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19"}, + {file = "markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025"}, + {file = "markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6"}, + {file = "markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f"}, + {file = "markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb"}, + {file = "markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009"}, + {file = "markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354"}, + {file = "markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218"}, + {file = "markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287"}, + {file = "markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe"}, + {file = "markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026"}, + {file = "markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737"}, + {file = "markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97"}, + {file = "markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d"}, + {file = "markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda"}, + {file = "markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf"}, + {file = "markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe"}, + {file = "markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9"}, + {file = "markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581"}, + {file = "markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4"}, + {file = "markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab"}, + {file = "markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175"}, + {file = "markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634"}, + {file = "markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50"}, + {file = "markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e"}, + {file = "markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5"}, + {file = "markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523"}, + {file = "markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc"}, + {file = "markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d"}, + {file = "markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9"}, + {file = "markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa"}, + {file = "markupsafe-3.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:15d939a21d546304880945ca1ecb8a039db6b4dc49b2c5a400387cdae6a62e26"}, + {file = "markupsafe-3.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f71a396b3bf33ecaa1626c255855702aca4d3d9fea5e051b41ac59a9c1c41edc"}, + {file = "markupsafe-3.0.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f4b68347f8c5eab4a13419215bdfd7f8c9b19f2b25520968adfad23eb0ce60c"}, + {file = "markupsafe-3.0.3-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8fc20152abba6b83724d7ff268c249fa196d8259ff481f3b1476383f8f24e42"}, + {file = "markupsafe-3.0.3-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:949b8d66bc381ee8b007cd945914c721d9aba8e27f71959d750a46f7c282b20b"}, + {file = "markupsafe-3.0.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:3537e01efc9d4dccdf77221fb1cb3b8e1a38d5428920e0657ce299b20324d758"}, + {file = "markupsafe-3.0.3-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:591ae9f2a647529ca990bc681daebdd52c8791ff06c2bfa05b65163e28102ef2"}, + {file = "markupsafe-3.0.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a320721ab5a1aba0a233739394eb907f8c8da5c98c9181d1161e77a0c8e36f2d"}, + {file = "markupsafe-3.0.3-cp39-cp39-win32.whl", hash = "sha256:df2449253ef108a379b8b5d6b43f4b1a8e81a061d6537becd5582fba5f9196d7"}, + {file = "markupsafe-3.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:7c3fb7d25180895632e5d3148dbdc29ea38ccb7fd210aa27acbd1201a1902c6e"}, + {file = "markupsafe-3.0.3-cp39-cp39-win_arm64.whl", hash = "sha256:38664109c14ffc9e7437e86b4dceb442b0096dfe3541d7864d9cbe1da4cf36c8"}, + {file = "markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698"}, +] + [[package]] name = "matplotlib" version = "3.9.2" @@ -561,6 +1062,43 @@ python-dateutil = ">=2.7" [package.extras] dev = ["meson-python (>=0.13.1)", "numpy (>=1.25)", "pybind11 (>=2.6)", "setuptools (>=64)", "setuptools_scm (>=7)"] +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] + +[[package]] +name = "narwhals" +version = "2.14.0" +description = "Extremely lightweight compatibility layer between dataframe libraries" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "narwhals-2.14.0-py3-none-any.whl", hash = "sha256:b56796c9a00179bd757d15282c540024e1d5c910b19b8c9944d836566c030acf"}, + {file = "narwhals-2.14.0.tar.gz", hash = "sha256:98be155c3599db4d5c211e565c3190c398c87e7bf5b3cdb157dece67641946e0"}, +] + +[package.extras] +cudf = ["cudf (>=24.10.0)"] +dask = ["dask[dataframe] (>=2024.8)"] +duckdb = ["duckdb (>=1.1)"] +ibis = ["ibis-framework (>=6.0.0)", "packaging", "pyarrow-hotfix", "rich"] +modin = ["modin"] +pandas = ["pandas (>=1.1.3)"] +polars = ["polars (>=0.20.4)"] +pyarrow = ["pyarrow (>=13.0.0)"] +pyspark = ["pyspark (>=3.5.0)"] +pyspark-connect = ["pyspark[connect] (>=3.5.0)"] +sqlframe = ["sqlframe (>=3.22.0,!=3.39.3)"] + [[package]] name = "numpy" version = "2.2.6" @@ -628,14 +1166,14 @@ files = [ [[package]] name = "packaging" -version = "25.0" +version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" groups = ["main", "dev"] files = [ - {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"}, - {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] [[package]] @@ -726,111 +1264,100 @@ xml = ["lxml (>=4.9.2)"] [[package]] name = "pillow" -version = "12.0.0" -description = "Python Imaging Library (fork)" +version = "10.4.0" +description = "Python Imaging Library (Fork)" optional = false -python-versions = ">=3.10" +python-versions = ">=3.8" groups = ["main"] files = [ - {file = "pillow-12.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:3adfb466bbc544b926d50fe8f4a4e6abd8c6bffd28a26177594e6e9b2b76572b"}, - {file = "pillow-12.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1ac11e8ea4f611c3c0147424eae514028b5e9077dd99ab91e1bd7bc33ff145e1"}, - {file = "pillow-12.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d49e2314c373f4c2b39446fb1a45ed333c850e09d0c59ac79b72eb3b95397363"}, - {file = "pillow-12.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c7b2a63fd6d5246349f3d3f37b14430d73ee7e8173154461785e43036ffa96ca"}, - {file = "pillow-12.0.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d64317d2587c70324b79861babb9c09f71fbb780bad212018874b2c013d8600e"}, - {file = "pillow-12.0.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d77153e14b709fd8b8af6f66a3afbb9ed6e9fc5ccf0b6b7e1ced7b036a228782"}, - {file = "pillow-12.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:32ed80ea8a90ee3e6fa08c21e2e091bba6eda8eccc83dbc34c95169507a91f10"}, - {file = "pillow-12.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c828a1ae702fc712978bda0320ba1b9893d99be0badf2647f693cc01cf0f04fa"}, - {file = "pillow-12.0.0-cp310-cp310-win32.whl", hash = "sha256:bd87e140e45399c818fac4247880b9ce719e4783d767e030a883a970be632275"}, - {file = "pillow-12.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:455247ac8a4cfb7b9bc45b7e432d10421aea9fc2e74d285ba4072688a74c2e9d"}, - {file = "pillow-12.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:6ace95230bfb7cd79ef66caa064bbe2f2a1e63d93471c3a2e1f1348d9f22d6b7"}, - {file = "pillow-12.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0fd00cac9c03256c8b2ff58f162ebcd2587ad3e1f2e397eab718c47e24d231cc"}, - {file = "pillow-12.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3475b96f5908b3b16c47533daaa87380c491357d197564e0ba34ae75c0f3257"}, - {file = "pillow-12.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:110486b79f2d112cf6add83b28b627e369219388f64ef2f960fef9ebaf54c642"}, - {file = "pillow-12.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5269cc1caeedb67e6f7269a42014f381f45e2e7cd42d834ede3c703a1d915fe3"}, - {file = "pillow-12.0.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa5129de4e174daccbc59d0a3b6d20eaf24417d59851c07ebb37aeb02947987c"}, - {file = "pillow-12.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bee2a6db3a7242ea309aa7ee8e2780726fed67ff4e5b40169f2c940e7eb09227"}, - {file = "pillow-12.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:90387104ee8400a7b4598253b4c406f8958f59fcf983a6cea2b50d59f7d63d0b"}, - {file = "pillow-12.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc91a56697869546d1b8f0a3ff35224557ae7f881050e99f615e0119bf934b4e"}, - {file = "pillow-12.0.0-cp311-cp311-win32.whl", hash = "sha256:27f95b12453d165099c84f8a8bfdfd46b9e4bda9e0e4b65f0635430027f55739"}, - {file = "pillow-12.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:b583dc9070312190192631373c6c8ed277254aa6e6084b74bdd0a6d3b221608e"}, - {file = "pillow-12.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:759de84a33be3b178a64c8ba28ad5c135900359e85fb662bc6e403ad4407791d"}, - {file = "pillow-12.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:53561a4ddc36facb432fae7a9d8afbfaf94795414f5cdc5fc52f28c1dca90371"}, - {file = "pillow-12.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:71db6b4c1653045dacc1585c1b0d184004f0d7e694c7b34ac165ca70c0838082"}, - {file = "pillow-12.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2fa5f0b6716fc88f11380b88b31fe591a06c6315e955c096c35715788b339e3f"}, - {file = "pillow-12.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:82240051c6ca513c616f7f9da06e871f61bfd7805f566275841af15015b8f98d"}, - {file = "pillow-12.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:55f818bd74fe2f11d4d7cbc65880a843c4075e0ac7226bc1a23261dbea531953"}, - {file = "pillow-12.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b87843e225e74576437fd5b6a4c2205d422754f84a06942cfaf1dc32243e45a8"}, - {file = "pillow-12.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c607c90ba67533e1b2355b821fef6764d1dd2cbe26b8c1005ae84f7aea25ff79"}, - {file = "pillow-12.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:21f241bdd5080a15bc86d3466a9f6074a9c2c2b314100dd896ac81ee6db2f1ba"}, - {file = "pillow-12.0.0-cp312-cp312-win32.whl", hash = "sha256:dd333073e0cacdc3089525c7df7d39b211bcdf31fc2824e49d01c6b6187b07d0"}, - {file = "pillow-12.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe611163f6303d1619bbcb653540a4d60f9e55e622d60a3108be0d5b441017a"}, - {file = "pillow-12.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:7dfb439562f234f7d57b1ac6bc8fe7f838a4bd49c79230e0f6a1da93e82f1fad"}, - {file = "pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:0869154a2d0546545cde61d1789a6524319fc1897d9ee31218eae7a60ccc5643"}, - {file = "pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:a7921c5a6d31b3d756ec980f2f47c0cfdbce0fc48c22a39347a895f41f4a6ea4"}, - {file = "pillow-12.0.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:1ee80a59f6ce048ae13cda1abf7fbd2a34ab9ee7d401c46be3ca685d1999a399"}, - {file = "pillow-12.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c50f36a62a22d350c96e49ad02d0da41dbd17ddc2e29750dbdba4323f85eb4a5"}, - {file = "pillow-12.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5193fde9a5f23c331ea26d0cf171fbf67e3f247585f50c08b3e205c7aeb4589b"}, - {file = "pillow-12.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bde737cff1a975b70652b62d626f7785e0480918dece11e8fef3c0cf057351c3"}, - {file = "pillow-12.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6597ff2b61d121172f5844b53f21467f7082f5fb385a9a29c01414463f93b07"}, - {file = "pillow-12.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b817e7035ea7f6b942c13aa03bb554fc44fea70838ea21f8eb31c638326584e"}, - {file = "pillow-12.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f4f1231b7dec408e8670264ce63e9c71409d9583dd21d32c163e25213ee2a344"}, - {file = "pillow-12.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e51b71417049ad6ab14c49608b4a24d8fb3fe605e5dfabfe523b58064dc3d27"}, - {file = "pillow-12.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d120c38a42c234dc9a8c5de7ceaaf899cf33561956acb4941653f8bdc657aa79"}, - {file = "pillow-12.0.0-cp313-cp313-win32.whl", hash = "sha256:4cc6b3b2efff105c6a1656cfe59da4fdde2cda9af1c5e0b58529b24525d0a098"}, - {file = "pillow-12.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:4cf7fed4b4580601c4345ceb5d4cbf5a980d030fd5ad07c4d2ec589f95f09905"}, - {file = "pillow-12.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:9f0b04c6b8584c2c193babcccc908b38ed29524b29dd464bc8801bf10d746a3a"}, - {file = "pillow-12.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7fa22993bac7b77b78cae22bad1e2a987ddf0d9015c63358032f84a53f23cdc3"}, - {file = "pillow-12.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f135c702ac42262573fe9714dfe99c944b4ba307af5eb507abef1667e2cbbced"}, - {file = "pillow-12.0.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c85de1136429c524e55cfa4e033b4a7940ac5c8ee4d9401cc2d1bf48154bbc7b"}, - {file = "pillow-12.0.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:38df9b4bfd3db902c9c2bd369bcacaf9d935b2fff73709429d95cc41554f7b3d"}, - {file = "pillow-12.0.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d87ef5795da03d742bf49439f9ca4d027cde49c82c5371ba52464aee266699a"}, - {file = "pillow-12.0.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aff9e4d82d082ff9513bdd6acd4f5bd359f5b2c870907d2b0a9c5e10d40c88fe"}, - {file = "pillow-12.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8d8ca2b210ada074d57fcee40c30446c9562e542fc46aedc19baf758a93532ee"}, - {file = "pillow-12.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:99a7f72fb6249302aa62245680754862a44179b545ded638cf1fef59befb57ef"}, - {file = "pillow-12.0.0-cp313-cp313t-win32.whl", hash = "sha256:4078242472387600b2ce8d93ade8899c12bf33fa89e55ec89fe126e9d6d5d9e9"}, - {file = "pillow-12.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2c54c1a783d6d60595d3514f0efe9b37c8808746a66920315bfd34a938d7994b"}, - {file = "pillow-12.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:26d9f7d2b604cd23aba3e9faf795787456ac25634d82cd060556998e39c6fa47"}, - {file = "pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:beeae3f27f62308f1ddbcfb0690bf44b10732f2ef43758f169d5e9303165d3f9"}, - {file = "pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:d4827615da15cd59784ce39d3388275ec093ae3ee8d7f0c089b76fa87af756c2"}, - {file = "pillow-12.0.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:3e42edad50b6909089750e65c91aa09aaf1e0a71310d383f11321b27c224ed8a"}, - {file = "pillow-12.0.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e5d8efac84c9afcb40914ab49ba063d94f5dbdf5066db4482c66a992f47a3a3b"}, - {file = "pillow-12.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:266cd5f2b63ff316d5a1bba46268e603c9caf5606d44f38c2873c380950576ad"}, - {file = "pillow-12.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:58eea5ebe51504057dd95c5b77d21700b77615ab0243d8152793dc00eb4faf01"}, - {file = "pillow-12.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f13711b1a5ba512d647a0e4ba79280d3a9a045aaf7e0cc6fbe96b91d4cdf6b0c"}, - {file = "pillow-12.0.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6846bd2d116ff42cba6b646edf5bf61d37e5cbd256425fa089fee4ff5c07a99e"}, - {file = "pillow-12.0.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c98fa880d695de164b4135a52fd2e9cd7b7c90a9d8ac5e9e443a24a95ef9248e"}, - {file = "pillow-12.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fa3ed2a29a9e9d2d488b4da81dcb54720ac3104a20bf0bd273f1e4648aff5af9"}, - {file = "pillow-12.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d034140032870024e6b9892c692fe2968493790dd57208b2c37e3fb35f6df3ab"}, - {file = "pillow-12.0.0-cp314-cp314-win32.whl", hash = "sha256:1b1b133e6e16105f524a8dec491e0586d072948ce15c9b914e41cdadd209052b"}, - {file = "pillow-12.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:8dc232e39d409036af549c86f24aed8273a40ffa459981146829a324e0848b4b"}, - {file = "pillow-12.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:d52610d51e265a51518692045e372a4c363056130d922a7351429ac9f27e70b0"}, - {file = "pillow-12.0.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1979f4566bb96c1e50a62d9831e2ea2d1211761e5662afc545fa766f996632f6"}, - {file = "pillow-12.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b2e4b27a6e15b04832fe9bf292b94b5ca156016bbc1ea9c2c20098a0320d6cf6"}, - {file = "pillow-12.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fb3096c30df99fd01c7bf8e544f392103d0795b9f98ba71a8054bcbf56b255f1"}, - {file = "pillow-12.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7438839e9e053ef79f7112c881cef684013855016f928b168b81ed5835f3e75e"}, - {file = "pillow-12.0.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d5c411a8eaa2299322b647cd932586b1427367fd3184ffbb8f7a219ea2041ca"}, - {file = "pillow-12.0.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d7e091d464ac59d2c7ad8e7e08105eaf9dafbc3883fd7265ffccc2baad6ac925"}, - {file = "pillow-12.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:792a2c0be4dcc18af9d4a2dfd8a11a17d5e25274a1062b0ec1c2d79c76f3e7f8"}, - {file = "pillow-12.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:afbefa430092f71a9593a99ab6a4e7538bc9eabbf7bf94f91510d3503943edc4"}, - {file = "pillow-12.0.0-cp314-cp314t-win32.whl", hash = "sha256:3830c769decf88f1289680a59d4f4c46c72573446352e2befec9a8512104fa52"}, - {file = "pillow-12.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:905b0365b210c73afb0ebe9101a32572152dfd1c144c7e28968a331b9217b94a"}, - {file = "pillow-12.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99353a06902c2e43b43e8ff74ee65a7d90307d82370604746738a1e0661ccca7"}, - {file = "pillow-12.0.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b22bd8c974942477156be55a768f7aa37c46904c175be4e158b6a86e3a6b7ca8"}, - {file = "pillow-12.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:805ebf596939e48dbb2e4922a1d3852cfc25c38160751ce02da93058b48d252a"}, - {file = "pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cae81479f77420d217def5f54b5b9d279804d17e982e0f2fa19b1d1e14ab5197"}, - {file = "pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:aeaefa96c768fc66818730b952a862235d68825c178f1b3ffd4efd7ad2edcb7c"}, - {file = "pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09f2d0abef9e4e2f349305a4f8cc784a8a6c2f58a8c4892eea13b10a943bd26e"}, - {file = "pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bdee52571a343d721fb2eb3b090a82d959ff37fc631e3f70422e0c2e029f3e76"}, - {file = "pillow-12.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b290fd8aa38422444d4b50d579de197557f182ef1068b75f5aa8558638b8d0a5"}, - {file = "pillow-12.0.0.tar.gz", hash = "sha256:87d4f8125c9988bfbed67af47dd7a953e2fc7b0cc1e7800ec6d2080d490bb353"}, + {file = "pillow-10.4.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:4d9667937cfa347525b319ae34375c37b9ee6b525440f3ef48542fcf66f2731e"}, + {file = "pillow-10.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:543f3dc61c18dafb755773efc89aae60d06b6596a63914107f75459cf984164d"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7928ecbf1ece13956b95d9cbcfc77137652b02763ba384d9ab508099a2eca856"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4d49b85c4348ea0b31ea63bc75a9f3857869174e2bf17e7aba02945cd218e6f"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6c762a5b0997f5659a5ef2266abc1d8851ad7749ad9a6a5506eb23d314e4f46b"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a985e028fc183bf12a77a8bbf36318db4238a3ded7fa9df1b9a133f1cb79f8fc"}, + {file = "pillow-10.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:812f7342b0eee081eaec84d91423d1b4650bb9828eb53d8511bcef8ce5aecf1e"}, + {file = "pillow-10.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ac1452d2fbe4978c2eec89fb5a23b8387aba707ac72810d9490118817d9c0b46"}, + {file = "pillow-10.4.0-cp310-cp310-win32.whl", hash = "sha256:bcd5e41a859bf2e84fdc42f4edb7d9aba0a13d29a2abadccafad99de3feff984"}, + {file = "pillow-10.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:ecd85a8d3e79cd7158dec1c9e5808e821feea088e2f69a974db5edf84dc53141"}, + {file = "pillow-10.4.0-cp310-cp310-win_arm64.whl", hash = "sha256:ff337c552345e95702c5fde3158acb0625111017d0e5f24bf3acdb9cc16b90d1"}, + {file = "pillow-10.4.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0a9ec697746f268507404647e531e92889890a087e03681a3606d9b920fbee3c"}, + {file = "pillow-10.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dfe91cb65544a1321e631e696759491ae04a2ea11d36715eca01ce07284738be"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5dc6761a6efc781e6a1544206f22c80c3af4c8cf461206d46a1e6006e4429ff3"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e84b6cc6a4a3d76c153a6b19270b3526a5a8ed6b09501d3af891daa2a9de7d6"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:bbc527b519bd3aa9d7f429d152fea69f9ad37c95f0b02aebddff592688998abe"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:76a911dfe51a36041f2e756b00f96ed84677cdeb75d25c767f296c1c1eda1319"}, + {file = "pillow-10.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:59291fb29317122398786c2d44427bbd1a6d7ff54017075b22be9d21aa59bd8d"}, + {file = "pillow-10.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:416d3a5d0e8cfe4f27f574362435bc9bae57f679a7158e0096ad2beb427b8696"}, + {file = "pillow-10.4.0-cp311-cp311-win32.whl", hash = "sha256:7086cc1d5eebb91ad24ded9f58bec6c688e9f0ed7eb3dbbf1e4800280a896496"}, + {file = "pillow-10.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:cbed61494057c0f83b83eb3a310f0bf774b09513307c434d4366ed64f4128a91"}, + {file = "pillow-10.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:f5f0c3e969c8f12dd2bb7e0b15d5c468b51e5017e01e2e867335c81903046a22"}, + {file = "pillow-10.4.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:673655af3eadf4df6b5457033f086e90299fdd7a47983a13827acf7459c15d94"}, + {file = "pillow-10.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:866b6942a92f56300012f5fbac71f2d610312ee65e22f1aa2609e491284e5597"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29dbdc4207642ea6aad70fbde1a9338753d33fb23ed6956e706936706f52dd80"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf2342ac639c4cf38799a44950bbc2dfcb685f052b9e262f446482afaf4bffca"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:f5b92f4d70791b4a67157321c4e8225d60b119c5cc9aee8ecf153aace4aad4ef"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:86dcb5a1eb778d8b25659d5e4341269e8590ad6b4e8b44d9f4b07f8d136c414a"}, + {file = "pillow-10.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:780c072c2e11c9b2c7ca37f9a2ee8ba66f44367ac3e5c7832afcfe5104fd6d1b"}, + {file = "pillow-10.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:37fb69d905be665f68f28a8bba3c6d3223c8efe1edf14cc4cfa06c241f8c81d9"}, + {file = "pillow-10.4.0-cp312-cp312-win32.whl", hash = "sha256:7dfecdbad5c301d7b5bde160150b4db4c659cee2b69589705b6f8a0c509d9f42"}, + {file = "pillow-10.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1d846aea995ad352d4bdcc847535bd56e0fd88d36829d2c90be880ef1ee4668a"}, + {file = "pillow-10.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:e553cad5179a66ba15bb18b353a19020e73a7921296a7979c4a2b7f6a5cd57f9"}, + {file = "pillow-10.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8bc1a764ed8c957a2e9cacf97c8b2b053b70307cf2996aafd70e91a082e70df3"}, + {file = "pillow-10.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6209bb41dc692ddfee4942517c19ee81b86c864b626dbfca272ec0f7cff5d9fb"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bee197b30783295d2eb680b311af15a20a8b24024a19c3a26431ff83eb8d1f70"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ef61f5dd14c300786318482456481463b9d6b91ebe5ef12f405afbba77ed0be"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:297e388da6e248c98bc4a02e018966af0c5f92dfacf5a5ca22fa01cb3179bca0"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e4db64794ccdf6cb83a59d73405f63adbe2a1887012e308828596100a0b2f6cc"}, + {file = "pillow-10.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bd2880a07482090a3bcb01f4265f1936a903d70bc740bfcb1fd4e8a2ffe5cf5a"}, + {file = "pillow-10.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b35b21b819ac1dbd1233317adeecd63495f6babf21b7b2512d244ff6c6ce309"}, + {file = "pillow-10.4.0-cp313-cp313-win32.whl", hash = "sha256:551d3fd6e9dc15e4c1eb6fc4ba2b39c0c7933fa113b220057a34f4bb3268a060"}, + {file = "pillow-10.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:030abdbe43ee02e0de642aee345efa443740aa4d828bfe8e2eb11922ea6a21ea"}, + {file = "pillow-10.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:5b001114dd152cfd6b23befeb28d7aee43553e2402c9f159807bf55f33af8a8d"}, + {file = "pillow-10.4.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:8d4d5063501b6dd4024b8ac2f04962d661222d120381272deea52e3fc52d3736"}, + {file = "pillow-10.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7c1ee6f42250df403c5f103cbd2768a28fe1a0ea1f0f03fe151c8741e1469c8b"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15e02e9bb4c21e39876698abf233c8c579127986f8207200bc8a8f6bb27acf2"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a8d4bade9952ea9a77d0c3e49cbd8b2890a399422258a77f357b9cc9be8d680"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:43efea75eb06b95d1631cb784aa40156177bf9dd5b4b03ff38979e048258bc6b"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:950be4d8ba92aca4b2bb0741285a46bfae3ca699ef913ec8416c1b78eadd64cd"}, + {file = "pillow-10.4.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d7480af14364494365e89d6fddc510a13e5a2c3584cb19ef65415ca57252fb84"}, + {file = "pillow-10.4.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:73664fe514b34c8f02452ffb73b7a92c6774e39a647087f83d67f010eb9a0cf0"}, + {file = "pillow-10.4.0-cp38-cp38-win32.whl", hash = "sha256:e88d5e6ad0d026fba7bdab8c3f225a69f063f116462c49892b0149e21b6c0a0e"}, + {file = "pillow-10.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:5161eef006d335e46895297f642341111945e2c1c899eb406882a6c61a4357ab"}, + {file = "pillow-10.4.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:0ae24a547e8b711ccaaf99c9ae3cd975470e1a30caa80a6aaee9a2f19c05701d"}, + {file = "pillow-10.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:298478fe4f77a4408895605f3482b6cc6222c018b2ce565c2b6b9c354ac3229b"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:134ace6dc392116566980ee7436477d844520a26a4b1bd4053f6f47d096997fd"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:930044bb7679ab003b14023138b50181899da3f25de50e9dbee23b61b4de2126"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:c76e5786951e72ed3686e122d14c5d7012f16c8303a674d18cdcd6d89557fc5b"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b2724fdb354a868ddf9a880cb84d102da914e99119211ef7ecbdc613b8c96b3c"}, + {file = "pillow-10.4.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:dbc6ae66518ab3c5847659e9988c3b60dc94ffb48ef9168656e0019a93dbf8a1"}, + {file = "pillow-10.4.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:06b2f7898047ae93fad74467ec3d28fe84f7831370e3c258afa533f81ef7f3df"}, + {file = "pillow-10.4.0-cp39-cp39-win32.whl", hash = "sha256:7970285ab628a3779aecc35823296a7869f889b8329c16ad5a71e4901a3dc4ef"}, + {file = "pillow-10.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:961a7293b2457b405967af9c77dcaa43cc1a8cd50d23c532e62d48ab6cdd56f5"}, + {file = "pillow-10.4.0-cp39-cp39-win_arm64.whl", hash = "sha256:32cda9e3d601a52baccb2856b8ea1fc213c90b340c542dcef77140dfa3278a9e"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5b4815f2e65b30f5fbae9dfffa8636d992d49705723fe86a3661806e069352d4"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:8f0aef4ef59694b12cadee839e2ba6afeab89c0f39a3adc02ed51d109117b8da"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f4727572e2918acaa9077c919cbbeb73bd2b3ebcfe033b72f858fc9fbef0026"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff25afb18123cea58a591ea0244b92eb1e61a1fd497bf6d6384f09bc3262ec3e"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:dc3e2db6ba09ffd7d02ae9141cfa0ae23393ee7687248d46a7507b75d610f4f5"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:02a2be69f9c9b8c1e97cf2713e789d4e398c751ecfd9967c18d0ce304efbf885"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0755ffd4a0c6f267cccbae2e9903d95477ca2f77c4fcf3a3a09570001856c8a5"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:a02364621fe369e06200d4a16558e056fe2805d3468350df3aef21e00d26214b"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:1b5dea9831a90e9d0721ec417a80d4cbd7022093ac38a568db2dd78363b00908"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b885f89040bb8c4a1573566bbb2f44f5c505ef6e74cec7ab9068c900047f04b"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87dd88ded2e6d74d31e1e0a99a726a6765cda32d00ba72dc37f0651f306daaa8"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:2db98790afc70118bd0255c2eeb465e9767ecf1f3c25f9a1abb8ffc8cfd1fe0a"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f7baece4ce06bade126fb84b8af1c33439a76d8a6fd818970215e0560ca28c27"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:cfdd747216947628af7b259d274771d84db2268ca062dd5faf373639d00113a3"}, + {file = "pillow-10.4.0.tar.gz", hash = "sha256:166c1cd4d24309b30d61f79f4a9114b7b2313d7450912277855ff5dfd7cd4a06"}, ] [package.extras] -docs = ["furo", "olefile", "sphinx (>=8.2)", "sphinx-autobuild", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] +docs = ["furo", "olefile", "sphinx (>=7.3)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] fpx = ["olefile"] mic = ["olefile"] -test-arrow = ["arro3-compute", "arro3-core", "nanoarrow", "pyarrow"] -tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma (>=5)", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "trove-classifiers (>=2024.10.12)"] +tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] +typing = ["typing-extensions ; python_version < \"3.10\""] xmp = ["defusedxml"] [[package]] @@ -849,6 +1376,27 @@ files = [ dev = ["pre-commit", "tox"] testing = ["coverage", "pytest", "pytest-benchmark"] +[[package]] +name = "protobuf" +version = "5.29.5" +description = "" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "protobuf-5.29.5-cp310-abi3-win32.whl", hash = "sha256:3f1c6468a2cfd102ff4703976138844f78ebd1fb45f49011afc5139e9e283079"}, + {file = "protobuf-5.29.5-cp310-abi3-win_amd64.whl", hash = "sha256:3f76e3a3675b4a4d867b52e4a5f5b78a2ef9565549d4037e06cf7b0942b1d3fc"}, + {file = "protobuf-5.29.5-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e38c5add5a311f2a6eb0340716ef9b039c1dfa428b28f25a7838ac329204a671"}, + {file = "protobuf-5.29.5-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:fa18533a299d7ab6c55a238bf8629311439995f2e7eca5caaff08663606e9015"}, + {file = "protobuf-5.29.5-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:63848923da3325e1bf7e9003d680ce6e14b07e55d0473253a690c3a8b8fd6e61"}, + {file = "protobuf-5.29.5-cp38-cp38-win32.whl", hash = "sha256:ef91363ad4faba7b25d844ef1ada59ff1604184c0bcd8b39b8a6bef15e1af238"}, + {file = "protobuf-5.29.5-cp38-cp38-win_amd64.whl", hash = "sha256:7318608d56b6402d2ea7704ff1e1e4597bee46d760e7e4dd42a3d45e24b87f2e"}, + {file = "protobuf-5.29.5-cp39-cp39-win32.whl", hash = "sha256:6f642dc9a61782fa72b90878af134c5afe1917c89a568cd3476d758d3c3a0736"}, + {file = "protobuf-5.29.5-cp39-cp39-win_amd64.whl", hash = "sha256:470f3af547ef17847a28e1f47200a1cbf0ba3ff57b7de50d22776607cd2ea353"}, + {file = "protobuf-5.29.5-py3-none-any.whl", hash = "sha256:6cf42630262c59b2d8de33954443d94b746c952b01434fc58a417fdbd2e84bd5"}, + {file = "protobuf-5.29.5.tar.gz", hash = "sha256:bc1463bafd4b0929216c35f437a8e28731a2b7fe3d98bb77a600efced5a15c84"}, +] + [[package]] name = "pyarrow" version = "22.0.0" @@ -1065,6 +1613,41 @@ files = [ [package.dependencies] typing-extensions = ">=4.14.1" +[[package]] +name = "pydeck" +version = "0.9.1" +description = "Widget for deck.gl maps" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "pydeck-0.9.1-py2.py3-none-any.whl", hash = "sha256:b3f75ba0d273fc917094fa61224f3f6076ca8752b93d46faf3bcfd9f9d59b038"}, + {file = "pydeck-0.9.1.tar.gz", hash = "sha256:f74475ae637951d63f2ee58326757f8d4f9cd9f2a457cf42950715003e2cb605"}, +] + +[package.dependencies] +jinja2 = ">=2.10.1" +numpy = ">=1.16.4" + +[package.extras] +carto = ["pydeck-carto"] +jupyter = ["ipykernel (>=5.1.2) ; python_version >= \"3.4\"", "ipython (>=5.8.0) ; python_version < \"3.4\"", "ipywidgets (>=7,<8)", "traitlets (>=4.3.2)"] + +[[package]] +name = "pygments" +version = "2.19.2" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"}, + {file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"}, +] + +[package.extras] +windows-terminal = ["colorama (>=0.4.6)"] + [[package]] name = "pyparsing" version = "3.3.1" @@ -1101,6 +1684,25 @@ pluggy = ">=1.5,<2" [package.extras] dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +[[package]] +name = "pytest-cov" +version = "5.0.0" +description = "Pytest plugin for measuring coverage." +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "pytest-cov-5.0.0.tar.gz", hash = "sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857"}, + {file = "pytest_cov-5.0.0-py3-none-any.whl", hash = "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652"}, +] + +[package.dependencies] +coverage = {version = ">=5.2.1", extras = ["toml"]} +pytest = ">=4.6" + +[package.extras] +testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -1128,6 +1730,189 @@ files = [ {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, ] +[[package]] +name = "referencing" +version = "0.37.0" +description = "JSON Referencing + Python" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231"}, + {file = "referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8"}, +] + +[package.dependencies] +attrs = ">=22.2.0" +rpds-py = ">=0.7.0" +typing-extensions = {version = ">=4.4.0", markers = "python_version < \"3.13\""} + +[[package]] +name = "requests" +version = "2.32.5" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6"}, + {file = "requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset_normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "rich" +version = "13.9.4" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +optional = false +python-versions = ">=3.8.0" +groups = ["main"] +files = [ + {file = "rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90"}, + {file = "rich-13.9.4.tar.gz", hash = "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098"}, +] + +[package.dependencies] +markdown-it-py = ">=2.2.0" +pygments = ">=2.13.0,<3.0.0" + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<9)"] + +[[package]] +name = "rpds-py" +version = "0.30.0" +description = "Python bindings to Rust's persistent data structures (rpds)" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "rpds_py-0.30.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:679ae98e00c0e8d68a7fda324e16b90fd5260945b45d3b824c892cec9eea3288"}, + {file = "rpds_py-0.30.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4cc2206b76b4f576934f0ed374b10d7ca5f457858b157ca52064bdfc26b9fc00"}, + {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:389a2d49eded1896c3d48b0136ead37c48e221b391c052fba3f4055c367f60a6"}, + {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:32c8528634e1bf7121f3de08fa85b138f4e0dc47657866630611b03967f041d7"}, + {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f207f69853edd6f6700b86efb84999651baf3789e78a466431df1331608e5324"}, + {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:67b02ec25ba7a9e8fa74c63b6ca44cf5707f2fbfadae3ee8e7494297d56aa9df"}, + {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c0e95f6819a19965ff420f65578bacb0b00f251fefe2c8b23347c37174271f3"}, + {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:a452763cc5198f2f98898eb98f7569649fe5da666c2dc6b5ddb10fde5a574221"}, + {file = "rpds_py-0.30.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e0b65193a413ccc930671c55153a03ee57cecb49e6227204b04fae512eb657a7"}, + {file = "rpds_py-0.30.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:858738e9c32147f78b3ac24dc0edb6610000e56dc0f700fd5f651d0a0f0eb9ff"}, + {file = "rpds_py-0.30.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:da279aa314f00acbb803da1e76fa18666778e8a8f83484fba94526da5de2cba7"}, + {file = "rpds_py-0.30.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7c64d38fb49b6cdeda16ab49e35fe0da2e1e9b34bc38bd78386530f218b37139"}, + {file = "rpds_py-0.30.0-cp310-cp310-win32.whl", hash = "sha256:6de2a32a1665b93233cde140ff8b3467bdb9e2af2b91079f0333a0974d12d464"}, + {file = "rpds_py-0.30.0-cp310-cp310-win_amd64.whl", hash = "sha256:1726859cd0de969f88dc8673bdd954185b9104e05806be64bcd87badbe313169"}, + {file = "rpds_py-0.30.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a2bffea6a4ca9f01b3f8e548302470306689684e61602aa3d141e34da06cf425"}, + {file = "rpds_py-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dc4f992dfe1e2bc3ebc7444f6c7051b4bc13cd8e33e43511e8ffd13bf407010d"}, + {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:422c3cb9856d80b09d30d2eb255d0754b23e090034e1deb4083f8004bd0761e4"}, + {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:07ae8a593e1c3c6b82ca3292efbe73c30b61332fd612e05abee07c79359f292f"}, + {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12f90dd7557b6bd57f40abe7747e81e0c0b119bef015ea7726e69fe550e394a4"}, + {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:99b47d6ad9a6da00bec6aabe5a6279ecd3c06a329d4aa4771034a21e335c3a97"}, + {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33f559f3104504506a44bb666b93a33f5d33133765b0c216a5bf2f1e1503af89"}, + {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:946fe926af6e44f3697abbc305ea168c2c31d3e3ef1058cf68f379bf0335a78d"}, + {file = "rpds_py-0.30.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:495aeca4b93d465efde585977365187149e75383ad2684f81519f504f5c13038"}, + {file = "rpds_py-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9a0ca5da0386dee0655b4ccdf46119df60e0f10da268d04fe7cc87886872ba7"}, + {file = "rpds_py-0.30.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8d6d1cc13664ec13c1b84241204ff3b12f9bb82464b8ad6e7a5d3486975c2eed"}, + {file = "rpds_py-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3896fa1be39912cf0757753826bc8bdc8ca331a28a7c4ae46b7a21280b06bb85"}, + {file = "rpds_py-0.30.0-cp311-cp311-win32.whl", hash = "sha256:55f66022632205940f1827effeff17c4fa7ae1953d2b74a8581baaefb7d16f8c"}, + {file = "rpds_py-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:a51033ff701fca756439d641c0ad09a41d9242fa69121c7d8769604a0a629825"}, + {file = "rpds_py-0.30.0-cp311-cp311-win_arm64.whl", hash = "sha256:47b0ef6231c58f506ef0b74d44e330405caa8428e770fec25329ed2cb971a229"}, + {file = "rpds_py-0.30.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a161f20d9a43006833cd7068375a94d035714d73a172b681d8881820600abfad"}, + {file = "rpds_py-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6abc8880d9d036ecaafe709079969f56e876fcf107f7a8e9920ba6d5a3878d05"}, + {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca28829ae5f5d569bb62a79512c842a03a12576375d5ece7d2cadf8abe96ec28"}, + {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1010ed9524c73b94d15919ca4d41d8780980e1765babf85f9a2f90d247153dd"}, + {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8d1736cfb49381ba528cd5baa46f82fdc65c06e843dab24dd70b63d09121b3f"}, + {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d948b135c4693daff7bc2dcfc4ec57237a29bd37e60c2fabf5aff2bbacf3e2f1"}, + {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47f236970bccb2233267d89173d3ad2703cd36a0e2a6e92d0560d333871a3d23"}, + {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2e6ecb5a5bcacf59c3f912155044479af1d0b6681280048b338b28e364aca1f6"}, + {file = "rpds_py-0.30.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8fa71a2e078c527c3e9dc9fc5a98c9db40bcc8a92b4e8858e36d329f8684b51"}, + {file = "rpds_py-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73c67f2db7bc334e518d097c6d1e6fed021bbc9b7d678d6cc433478365d1d5f5"}, + {file = "rpds_py-0.30.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5ba103fb455be00f3b1c2076c9d4264bfcb037c976167a6047ed82f23153f02e"}, + {file = "rpds_py-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7cee9c752c0364588353e627da8a7e808a66873672bcb5f52890c33fd965b394"}, + {file = "rpds_py-0.30.0-cp312-cp312-win32.whl", hash = "sha256:1ab5b83dbcf55acc8b08fc62b796ef672c457b17dbd7820a11d6c52c06839bdf"}, + {file = "rpds_py-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:a090322ca841abd453d43456ac34db46e8b05fd9b3b4ac0c78bcde8b089f959b"}, + {file = "rpds_py-0.30.0-cp312-cp312-win_arm64.whl", hash = "sha256:669b1805bd639dd2989b281be2cfd951c6121b65e729d9b843e9639ef1fd555e"}, + {file = "rpds_py-0.30.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f83424d738204d9770830d35290ff3273fbb02b41f919870479fab14b9d303b2"}, + {file = "rpds_py-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e7536cd91353c5273434b4e003cbda89034d67e7710eab8761fd918ec6c69cf8"}, + {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2771c6c15973347f50fece41fc447c054b7ac2ae0502388ce3b6738cd366e3d4"}, + {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0a59119fc6e3f460315fe9d08149f8102aa322299deaa5cab5b40092345c2136"}, + {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76fec018282b4ead0364022e3c54b60bf368b9d926877957a8624b58419169b7"}, + {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:692bef75a5525db97318e8cd061542b5a79812d711ea03dbc1f6f8dbb0c5f0d2"}, + {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9027da1ce107104c50c81383cae773ef5c24d296dd11c99e2629dbd7967a20c6"}, + {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:9cf69cdda1f5968a30a359aba2f7f9aa648a9ce4b580d6826437f2b291cfc86e"}, + {file = "rpds_py-0.30.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a4796a717bf12b9da9d3ad002519a86063dcac8988b030e405704ef7d74d2d9d"}, + {file = "rpds_py-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d4c2aa7c50ad4728a094ebd5eb46c452e9cb7edbfdb18f9e1221f597a73e1e7"}, + {file = "rpds_py-0.30.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba81a9203d07805435eb06f536d95a266c21e5b2dfbf6517748ca40c98d19e31"}, + {file = "rpds_py-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:945dccface01af02675628334f7cf49c2af4c1c904748efc5cf7bbdf0b579f95"}, + {file = "rpds_py-0.30.0-cp313-cp313-win32.whl", hash = "sha256:b40fb160a2db369a194cb27943582b38f79fc4887291417685f3ad693c5a1d5d"}, + {file = "rpds_py-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:806f36b1b605e2d6a72716f321f20036b9489d29c51c91f4dd29a3e3afb73b15"}, + {file = "rpds_py-0.30.0-cp313-cp313-win_arm64.whl", hash = "sha256:d96c2086587c7c30d44f31f42eae4eac89b60dabbac18c7669be3700f13c3ce1"}, + {file = "rpds_py-0.30.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:eb0b93f2e5c2189ee831ee43f156ed34e2a89a78a66b98cadad955972548be5a"}, + {file = "rpds_py-0.30.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:922e10f31f303c7c920da8981051ff6d8c1a56207dbdf330d9047f6d30b70e5e"}, + {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdc62c8286ba9bf7f47befdcea13ea0e26bf294bda99758fd90535cbaf408000"}, + {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47f9a91efc418b54fb8190a6b4aa7813a23fb79c51f4bb84e418f5476c38b8db"}, + {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f3587eb9b17f3789ad50824084fa6f81921bbf9a795826570bda82cb3ed91f2"}, + {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39c02563fc592411c2c61d26b6c5fe1e51eaa44a75aa2c8735ca88b0d9599daa"}, + {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a1234d8febafdfd33a42d97da7a43f5dcb120c1060e352a3fbc0c6d36e2083"}, + {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:eb2c4071ab598733724c08221091e8d80e89064cd472819285a9ab0f24bcedb9"}, + {file = "rpds_py-0.30.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6bdfdb946967d816e6adf9a3d8201bfad269c67efe6cefd7093ef959683c8de0"}, + {file = "rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c77afbd5f5250bf27bf516c7c4a016813eb2d3e116139aed0096940c5982da94"}, + {file = "rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:61046904275472a76c8c90c9ccee9013d70a6d0f73eecefd38c1ae7c39045a08"}, + {file = "rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c5f36a861bc4b7da6516dbdf302c55313afa09b81931e8280361a4f6c9a2d27"}, + {file = "rpds_py-0.30.0-cp313-cp313t-win32.whl", hash = "sha256:3d4a69de7a3e50ffc214ae16d79d8fbb0922972da0356dcf4d0fdca2878559c6"}, + {file = "rpds_py-0.30.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f14fc5df50a716f7ece6a80b6c78bb35ea2ca47c499e422aa4463455dd96d56d"}, + {file = "rpds_py-0.30.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:68f19c879420aa08f61203801423f6cd5ac5f0ac4ac82a2368a9fcd6a9a075e0"}, + {file = "rpds_py-0.30.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ec7c4490c672c1a0389d319b3a9cfcd098dcdc4783991553c332a15acf7249be"}, + {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f251c812357a3fed308d684a5079ddfb9d933860fc6de89f2b7ab00da481e65f"}, + {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac98b175585ecf4c0348fd7b29c3864bda53b805c773cbf7bfdaffc8070c976f"}, + {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3e62880792319dbeb7eb866547f2e35973289e7d5696c6e295476448f5b63c87"}, + {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e7fc54e0900ab35d041b0601431b0a0eb495f0851a0639b6ef90f7741b39a18"}, + {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47e77dc9822d3ad616c3d5759ea5631a75e5809d5a28707744ef79d7a1bcfcad"}, + {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b4dc1a6ff022ff85ecafef7979a2c6eb423430e05f1165d6688234e62ba99a07"}, + {file = "rpds_py-0.30.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4559c972db3a360808309e06a74628b95eaccbf961c335c8fe0d590cf587456f"}, + {file = "rpds_py-0.30.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0ed177ed9bded28f8deb6ab40c183cd1192aa0de40c12f38be4d59cd33cb5c65"}, + {file = "rpds_py-0.30.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ad1fa8db769b76ea911cb4e10f049d80bf518c104f15b3edb2371cc65375c46f"}, + {file = "rpds_py-0.30.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:46e83c697b1f1c72b50e5ee5adb4353eef7406fb3f2043d64c33f20ad1c2fc53"}, + {file = "rpds_py-0.30.0-cp314-cp314-win32.whl", hash = "sha256:ee454b2a007d57363c2dfd5b6ca4a5d7e2c518938f8ed3b706e37e5d470801ed"}, + {file = "rpds_py-0.30.0-cp314-cp314-win_amd64.whl", hash = "sha256:95f0802447ac2d10bcc69f6dc28fe95fdf17940367b21d34e34c737870758950"}, + {file = "rpds_py-0.30.0-cp314-cp314-win_arm64.whl", hash = "sha256:613aa4771c99f03346e54c3f038e4cc574ac09a3ddfb0e8878487335e96dead6"}, + {file = "rpds_py-0.30.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7e6ecfcb62edfd632e56983964e6884851786443739dbfe3582947e87274f7cb"}, + {file = "rpds_py-0.30.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a1d0bc22a7cdc173fedebb73ef81e07faef93692b8c1ad3733b67e31e1b6e1b8"}, + {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d08f00679177226c4cb8c5265012eea897c8ca3b93f429e546600c971bcbae7"}, + {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5965af57d5848192c13534f90f9dd16464f3c37aaf166cc1da1cae1fd5a34898"}, + {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a4e86e34e9ab6b667c27f3211ca48f73dba7cd3d90f8d5b11be56e5dbc3fb4e"}, + {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d3e6b26f2c785d65cc25ef1e5267ccbe1b069c5c21b8cc724efee290554419"}, + {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:626a7433c34566535b6e56a1b39a7b17ba961e97ce3b80ec62e6f1312c025551"}, + {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:acd7eb3f4471577b9b5a41baf02a978e8bdeb08b4b355273994f8b87032000a8"}, + {file = "rpds_py-0.30.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe5fa731a1fa8a0a56b0977413f8cacac1768dad38d16b3a296712709476fbd5"}, + {file = "rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:74a3243a411126362712ee1524dfc90c650a503502f135d54d1b352bd01f2404"}, + {file = "rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:3e8eeb0544f2eb0d2581774be4c3410356eba189529a6b3e36bbbf9696175856"}, + {file = "rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:dbd936cde57abfee19ab3213cf9c26be06d60750e60a8e4dd85d1ab12c8b1f40"}, + {file = "rpds_py-0.30.0-cp314-cp314t-win32.whl", hash = "sha256:dc824125c72246d924f7f796b4f63c1e9dc810c7d9e2355864b3c3a73d59ade0"}, + {file = "rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c2262bdba0ad4fc6fb5545660673925c2d2a5d9e2e0fb603aad545427be0fc58"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:ee6af14263f25eedc3bb918a3c04245106a42dfd4f5c2285ea6f997b1fc3f89a"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3adbb8179ce342d235c31ab8ec511e66c73faa27a47e076ccc92421add53e2bb"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:250fa00e9543ac9b97ac258bd37367ff5256666122c2d0f2bc97577c60a1818c"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9854cf4f488b3d57b9aaeb105f06d78e5529d3145b1e4a41750167e8c213c6d3"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:993914b8e560023bc0a8bf742c5f303551992dcb85e247b1e5c7f4a7d145bda5"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58edca431fb9b29950807e301826586e5bbf24163677732429770a697ffe6738"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:dea5b552272a944763b34394d04577cf0f9bd013207bc32323b5a89a53cf9c2f"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ba3af48635eb83d03f6c9735dfb21785303e73d22ad03d489e88adae6eab8877"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:dff13836529b921e22f15cb099751209a60009731a68519630a24d61f0b1b30a"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:1b151685b23929ab7beec71080a8889d4d6d9fa9a983d213f07121205d48e2c4"}, + {file = "rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e"}, + {file = "rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84"}, +] + [[package]] name = "scikit-learn" version = "1.8.0" @@ -1253,6 +2038,18 @@ files = [ {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, ] +[[package]] +name = "smmap" +version = "5.0.2" +description = "A pure Python implementation of a sliding window memory map manager" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e"}, + {file = "smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5"}, +] + [[package]] name = "sniffio" version = "1.3.1" @@ -1284,6 +2081,58 @@ typing-extensions = {version = ">=4.10.0", markers = "python_version < \"3.13\"" [package.extras] full = ["httpx (>=0.27.0,<0.29.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.18)", "pyyaml"] +[[package]] +name = "streamlit" +version = "1.37.1" +description = "A faster way to build and share data apps" +optional = false +python-versions = "!=3.9.7,>=3.8" +groups = ["main"] +files = [ + {file = "streamlit-1.37.1-py2.py3-none-any.whl", hash = "sha256:0651240fccc569900cc9450390b0a67473fda55be65f317e46285f99e2bddf04"}, + {file = "streamlit-1.37.1.tar.gz", hash = "sha256:bc7e3813d94a39dda56f15678437eb37830973c601e8e574f2225a7bf188ea5a"}, +] + +[package.dependencies] +altair = ">=4.0,<6" +blinker = ">=1.0.0,<2" +cachetools = ">=4.0,<6" +click = ">=7.0,<9" +gitpython = ">=3.0.7,<3.1.19 || >3.1.19,<4" +numpy = ">=1.20,<3" +packaging = ">=20,<25" +pandas = ">=1.3.0,<3" +pillow = ">=7.1.0,<11" +protobuf = ">=3.20,<6" +pyarrow = ">=7.0" +pydeck = ">=0.8.0b4,<1" +requests = ">=2.27,<3" +rich = ">=10.14.0,<14" +tenacity = ">=8.1.0,<9" +toml = ">=0.10.1,<2" +tornado = ">=6.0.3,<7" +typing-extensions = ">=4.3.0,<5" +watchdog = {version = ">=2.1.5,<5", markers = "platform_system != \"Darwin\""} + +[package.extras] +snowflake = ["snowflake-connector-python (>=2.8.0) ; python_version < \"3.12\"", "snowflake-snowpark-python (>=0.9.0) ; python_version < \"3.12\""] + +[[package]] +name = "tenacity" +version = "8.5.0" +description = "Retry code until it succeeds" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "tenacity-8.5.0-py3-none-any.whl", hash = "sha256:b594c2a5945830c267ce6b79a166228323ed52718f30302c1359836112346687"}, + {file = "tenacity-8.5.0.tar.gz", hash = "sha256:8bc6c0c8a09b31e6cad13c47afbed1a567518250a9a171418582ed8d9c20ca78"}, +] + +[package.extras] +doc = ["reno", "sphinx"] +test = ["pytest", "tornado (>=4.5)", "typeguard"] + [[package]] name = "threadpoolctl" version = "3.5.0" @@ -1296,6 +2145,40 @@ files = [ {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"}, ] +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +groups = ["main"] +files = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] + +[[package]] +name = "tornado" +version = "6.5.4" +description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "tornado-6.5.4-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d6241c1a16b1c9e4cc28148b1cda97dd1c6cb4fb7068ac1bedc610768dff0ba9"}, + {file = "tornado-6.5.4-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2d50f63dda1d2cac3ae1fa23d254e16b5e38153758470e9956cbc3d813d40843"}, + {file = "tornado-6.5.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1cf66105dc6acb5af613c054955b8137e34a03698aa53272dbda4afe252be17"}, + {file = "tornado-6.5.4-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50ff0a58b0dc97939d29da29cd624da010e7f804746621c78d14b80238669335"}, + {file = "tornado-6.5.4-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5fb5e04efa54cf0baabdd10061eb4148e0be137166146fff835745f59ab9f7f"}, + {file = "tornado-6.5.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9c86b1643b33a4cd415f8d0fe53045f913bf07b4a3ef646b735a6a86047dda84"}, + {file = "tornado-6.5.4-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:6eb82872335a53dd063a4f10917b3efd28270b56a33db69009606a0312660a6f"}, + {file = "tornado-6.5.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6076d5dda368c9328ff41ab5d9dd3608e695e8225d1cd0fd1e006f05da3635a8"}, + {file = "tornado-6.5.4-cp39-abi3-win32.whl", hash = "sha256:1768110f2411d5cd281bac0a090f707223ce77fd110424361092859e089b38d1"}, + {file = "tornado-6.5.4-cp39-abi3-win_amd64.whl", hash = "sha256:fa07d31e0cd85c60713f2b995da613588aa03e1303d75705dca6af8babc18ddc"}, + {file = "tornado-6.5.4-cp39-abi3-win_arm64.whl", hash = "sha256:053e6e16701eb6cbe641f308f4c1a9541f91b6261991160391bfc342e8a551a1"}, + {file = "tornado-6.5.4.tar.gz", hash = "sha256:a22fa9047405d03260b483980635f0b041989d8bcc9a313f8fe18b411d84b1d7"}, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -1336,6 +2219,24 @@ files = [ {file = "tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7"}, ] +[[package]] +name = "urllib3" +version = "2.6.2" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"}, + {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"}, +] + +[package.extras] +brotli = ["brotli (>=1.2.0) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=1.2.0.0) ; platform_python_implementation != \"CPython\""] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["backports-zstd (>=1.0.0) ; python_version < \"3.14\""] + [[package]] name = "uvicorn" version = "0.30.6" @@ -1355,7 +2256,56 @@ h11 = ">=0.8" [package.extras] standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"] +[[package]] +name = "watchdog" +version = "4.0.2" +description = "Filesystem events monitoring" +optional = false +python-versions = ">=3.8" +groups = ["main"] +markers = "platform_system != \"Darwin\"" +files = [ + {file = "watchdog-4.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ede7f010f2239b97cc79e6cb3c249e72962404ae3865860855d5cbe708b0fd22"}, + {file = "watchdog-4.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a2cffa171445b0efa0726c561eca9a27d00a1f2b83846dbd5a4f639c4f8ca8e1"}, + {file = "watchdog-4.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c50f148b31b03fbadd6d0b5980e38b558046b127dc483e5e4505fcef250f9503"}, + {file = "watchdog-4.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7c7d4bf585ad501c5f6c980e7be9c4f15604c7cc150e942d82083b31a7548930"}, + {file = "watchdog-4.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:914285126ad0b6eb2258bbbcb7b288d9dfd655ae88fa28945be05a7b475a800b"}, + {file = "watchdog-4.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:984306dc4720da5498b16fc037b36ac443816125a3705dfde4fd90652d8028ef"}, + {file = "watchdog-4.0.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:1cdcfd8142f604630deef34722d695fb455d04ab7cfe9963055df1fc69e6727a"}, + {file = "watchdog-4.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d7ab624ff2f663f98cd03c8b7eedc09375a911794dfea6bf2a359fcc266bff29"}, + {file = "watchdog-4.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:132937547a716027bd5714383dfc40dc66c26769f1ce8a72a859d6a48f371f3a"}, + {file = "watchdog-4.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:cd67c7df93eb58f360c43802acc945fa8da70c675b6fa37a241e17ca698ca49b"}, + {file = "watchdog-4.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:bcfd02377be80ef3b6bc4ce481ef3959640458d6feaae0bd43dd90a43da90a7d"}, + {file = "watchdog-4.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:980b71510f59c884d684b3663d46e7a14b457c9611c481e5cef08f4dd022eed7"}, + {file = "watchdog-4.0.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:aa160781cafff2719b663c8a506156e9289d111d80f3387cf3af49cedee1f040"}, + {file = "watchdog-4.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f6ee8dedd255087bc7fe82adf046f0b75479b989185fb0bdf9a98b612170eac7"}, + {file = "watchdog-4.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0b4359067d30d5b864e09c8597b112fe0a0a59321a0f331498b013fb097406b4"}, + {file = "watchdog-4.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:770eef5372f146997638d737c9a3c597a3b41037cfbc5c41538fc27c09c3a3f9"}, + {file = "watchdog-4.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eeea812f38536a0aa859972d50c76e37f4456474b02bd93674d1947cf1e39578"}, + {file = "watchdog-4.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b2c45f6e1e57ebb4687690c05bc3a2c1fb6ab260550c4290b8abb1335e0fd08b"}, + {file = "watchdog-4.0.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:10b6683df70d340ac3279eff0b2766813f00f35a1d37515d2c99959ada8f05fa"}, + {file = "watchdog-4.0.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:f7c739888c20f99824f7aa9d31ac8a97353e22d0c0e54703a547a218f6637eb3"}, + {file = "watchdog-4.0.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c100d09ac72a8a08ddbf0629ddfa0b8ee41740f9051429baa8e31bb903ad7508"}, + {file = "watchdog-4.0.2-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:f5315a8c8dd6dd9425b974515081fc0aadca1d1d61e078d2246509fd756141ee"}, + {file = "watchdog-4.0.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:2d468028a77b42cc685ed694a7a550a8d1771bb05193ba7b24006b8241a571a1"}, + {file = "watchdog-4.0.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:f15edcae3830ff20e55d1f4e743e92970c847bcddc8b7509bcd172aa04de506e"}, + {file = "watchdog-4.0.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:936acba76d636f70db8f3c66e76aa6cb5136a936fc2a5088b9ce1c7a3508fc83"}, + {file = "watchdog-4.0.2-py3-none-manylinux2014_armv7l.whl", hash = "sha256:e252f8ca942a870f38cf785aef420285431311652d871409a64e2a0a52a2174c"}, + {file = "watchdog-4.0.2-py3-none-manylinux2014_i686.whl", hash = "sha256:0e83619a2d5d436a7e58a1aea957a3c1ccbf9782c43c0b4fed80580e5e4acd1a"}, + {file = "watchdog-4.0.2-py3-none-manylinux2014_ppc64.whl", hash = "sha256:88456d65f207b39f1981bf772e473799fcdc10801062c36fd5ad9f9d1d463a73"}, + {file = "watchdog-4.0.2-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:32be97f3b75693a93c683787a87a0dc8db98bb84701539954eef991fb35f5fbc"}, + {file = "watchdog-4.0.2-py3-none-manylinux2014_s390x.whl", hash = "sha256:c82253cfc9be68e3e49282831afad2c1f6593af80c0daf1287f6a92657986757"}, + {file = "watchdog-4.0.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:c0b14488bd336c5b1845cee83d3e631a1f8b4e9c5091ec539406e4a324f882d8"}, + {file = "watchdog-4.0.2-py3-none-win32.whl", hash = "sha256:0d8a7e523ef03757a5aa29f591437d64d0d894635f8a50f370fe37f913ce4e19"}, + {file = "watchdog-4.0.2-py3-none-win_amd64.whl", hash = "sha256:c344453ef3bf875a535b0488e3ad28e341adbd5a9ffb0f7d62cefacc8824ef2b"}, + {file = "watchdog-4.0.2-py3-none-win_ia64.whl", hash = "sha256:baececaa8edff42cd16558a639a9b0ddf425f93d892e8392a56bf904f5eff22c"}, + {file = "watchdog-4.0.2.tar.gz", hash = "sha256:b4dfbb6c49221be4535623ea4474a4d6ee0a9cef4a80b20c28db4d858b64e270"}, +] + +[package.extras] +watchmedo = ["PyYAML (>=3.10)"] + [metadata] lock-version = "2.1" python-versions = ">=3.11" -content-hash = "c13caf85eb3e16cc5fac0286cdcb43928c1f618d12acc794d58d6615f82e0399" +content-hash = "f033a146734efc7387eb6ce37c87c07579cc7fc24c3b982466fe4a333514ff6f" diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml index 2f2d035b5498925485dd7abbd4bcdf48c84f7fb8..c93cb017c9f2a94ce575dc96ab30bf6a4ff68e3c 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml @@ -11,17 +11,19 @@ python = ">=3.11" fastapi = "0.121.0" uvicorn = "0.30.6" pydantic = "2.12.4" -numpy = ">=2.2.6" +numpy = "2.2.6" pandas = "2.2.3" -pyarrow = ">=15.0.2" +pyarrow = "22.0.0" matplotlib = "3.9.2" scikit-learn = "1.8.0" scipy = "1.14.1" joblib = "1.5.2" threadpoolctl = "3.5.0" +streamlit = "1.37.1" [tool.poetry.group.dev.dependencies] pytest = "8.3.2" +pytest-cov = "5.0.0" httpx = "0.27.2" [build-system] diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_api.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_api.py index 0093ca065fcbbd2636e04466916ebfac42611fd6..740b42024d3ab37b61b2e89ce75ce58ac31c6908 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_api.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_api.py @@ -57,8 +57,9 @@ def test_features(client): payload = resp.json() assert "input_features" in payload assert "required_input_features" in payload + assert "feature_selection_method" in payload assert "SK_ID_CURR" in payload["input_features"] - assert "EXT_SOURCE_2" in payload["input_features"] + assert len(payload["input_features"]) >= 2 def test_predict(client): diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/__init__.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9219d4b54f6b0727bb311bf470acb39dc06e4504 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/__init__.py @@ -0,0 +1 @@ +"""Monitoring package.""" diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py index ed57998f989c7dec8f1321094dcdb466e819fe07..3a8502f66cc64ee2be123c7ed73434a95cbd67ab 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py @@ -35,6 +35,37 @@ DEFAULT_FEATURES = [ ] CATEGORICAL_FEATURES = {"CODE_GENDER", "FLAG_OWN_CAR"} +MIN_PROD_SAMPLES_DEFAULT = 200 +PSI_EPS_DEFAULT = 1e-4 +RARE_CATEGORY_MIN_SHARE_DEFAULT = 0.01 +FDR_ALPHA_DEFAULT = 0.05 +DAYS_EMPLOYED_SENTINEL = 365243 + +CATEGORY_NORMALIZATION = { + "CODE_GENDER": { + "F": "F", + "FEMALE": "F", + "0": "F", + "W": "F", + "WOMAN": "F", + "M": "M", + "MALE": "M", + "1": "M", + "MAN": "M", + }, + "FLAG_OWN_CAR": { + "Y": "Y", + "YES": "Y", + "TRUE": "Y", + "1": "Y", + "T": "Y", + "N": "N", + "NO": "N", + "FALSE": "N", + "0": "N", + "F": "N", + }, +} def _safe_name(value: str) -> str: @@ -51,13 +82,60 @@ def _load_logs(log_path: Path) -> tuple[pd.DataFrame, pd.DataFrame]: entries.append(json.loads(line)) if not entries: return pd.DataFrame(), pd.DataFrame() - inputs = [entry.get("inputs", {}) for entry in entries if entry.get("inputs")] + inputs = [ + entry.get("inputs") if isinstance(entry.get("inputs"), dict) else {} + for entry in entries + ] inputs_df = pd.DataFrame.from_records(inputs) meta_df = pd.DataFrame.from_records(entries) return inputs_df, meta_df -def _psi(reference: pd.Series, production: pd.Series, eps: float = 1e-6) -> float: +def _normalize_category_value(value: object, mapping: dict[str, str]) -> object: + if pd.isna(value): + return np.nan + key = str(value).strip().upper() + if not key: + return np.nan + return mapping.get(key, "Unknown") + + +def _normalize_categories(df: pd.DataFrame) -> pd.DataFrame: + out = df.copy() + for feature, mapping in CATEGORY_NORMALIZATION.items(): + if feature in out.columns: + out[feature] = out[feature].apply(lambda v: _normalize_category_value(v, mapping)) + return out + + +def _replace_sentinel(series: pd.Series, sentinel: float) -> tuple[pd.Series, float]: + values = pd.to_numeric(series, errors="coerce") + sentinel_mask = values == sentinel + if sentinel_mask.any(): + series = series.copy() + series[sentinel_mask] = np.nan + return series, float(sentinel_mask.mean()) if len(values) else 0.0 + + +def _prepare_categorical( + reference: pd.Series, + production: pd.Series, + min_share: float, + max_categories: int | None = None, + other_label: str = "__OTHER__", +) -> tuple[pd.Series, pd.Series]: + ref_series = reference.fillna("Unknown") + prod_series = production.fillna("Unknown") + ref_freq = ref_series.value_counts(normalize=True) + keep = ref_freq[ref_freq >= min_share].index.tolist() + if max_categories is not None: + keep = keep[:max_categories] + ref_series = ref_series.where(ref_series.isin(keep), other=other_label) + prod_series = prod_series.where(prod_series.isin(keep), other=other_label) + return ref_series, prod_series + + +def _psi(reference: pd.Series, production: pd.Series, eps: float = PSI_EPS_DEFAULT) -> float: ref_freq = reference.value_counts(normalize=True, dropna=False) prod_freq = production.value_counts(normalize=True, dropna=False) categories = ref_freq.index.union(prod_freq.index) @@ -87,14 +165,9 @@ def _plot_numeric(ref: pd.Series, prod: pd.Series, output_path: Path) -> None: plt.close() -def _plot_categorical(ref: pd.Series, prod: pd.Series, output_path: Path, max_categories: int = 10) -> None: - ref_series = ref.fillna("Unknown") - prod_series = prod.fillna("Unknown") - top = ref_series.value_counts().index[:max_categories] - ref_series = ref_series.where(ref_series.isin(top), other="__OTHER__") - prod_series = prod_series.where(prod_series.isin(top), other="__OTHER__") - ref_freq = ref_series.value_counts(normalize=True) - prod_freq = prod_series.value_counts(normalize=True) +def _plot_categorical(ref: pd.Series, prod: pd.Series, output_path: Path) -> None: + ref_freq = ref.value_counts(normalize=True) + prod_freq = prod.value_counts(normalize=True) plot_df = pd.DataFrame({"reference": ref_freq, "production": prod_freq}).fillna(0) plot_df.sort_values("reference", ascending=False).plot(kind="bar", figsize=(7, 4)) plt.title(f"Distribution: {ref.name}") @@ -103,6 +176,132 @@ def _plot_categorical(ref: pd.Series, prod: pd.Series, output_path: Path, max_ca plt.close() +def _benjamini_hochberg(pvalues: list[float], alpha: float) -> tuple[list[float], list[bool]]: + if not pvalues: + return [], [] + pvals = np.array(pvalues, dtype=float) + order = np.argsort(pvals) + ranked = pvals[order] + m = len(pvals) + thresholds = alpha * (np.arange(1, m + 1) / m) + below = ranked <= thresholds + reject = np.zeros(m, dtype=bool) + if below.any(): + cutoff = np.max(np.where(below)[0]) + reject[order[:cutoff + 1]] = True + qvals = ranked * m / np.arange(1, m + 1) + qvals = np.minimum.accumulate(qvals[::-1])[::-1] + adjusted = np.empty_like(qvals) + adjusted[order] = qvals + return adjusted.tolist(), reject.tolist() + + +def _extract_data_quality(meta_df: pd.DataFrame) -> list[dict[str, object]]: + if "data_quality" not in meta_df.columns: + return [] + dq_entries = [] + for item in meta_df["data_quality"].dropna(): + if isinstance(item, dict): + dq_entries.append(item) + return dq_entries + + +def _dq_has_unknown(dq: dict[str, object], feature: str) -> bool: + unknown = dq.get("unknown_categories") + if isinstance(unknown, dict): + return feature in unknown + if isinstance(unknown, list): + return feature in unknown + return False + + +def _summarize_data_quality( + meta_df: pd.DataFrame, + production_df: pd.DataFrame, + sentinel_rates: dict[str, float], +) -> dict[str, object]: + dq_entries = _extract_data_quality(meta_df) + if dq_entries: + total = len(dq_entries) + missing_rate = np.mean( + [bool(dq.get("missing_required_columns")) for dq in dq_entries] + ) + invalid_rate = np.mean( + [bool(dq.get("invalid_numeric_columns")) for dq in dq_entries] + ) + out_of_range_rate = np.mean( + [bool(dq.get("out_of_range_columns")) for dq in dq_entries] + ) + nan_rate = np.mean([float(dq.get("nan_rate", 0.0)) for dq in dq_entries]) + unknown_gender = np.mean( + [_dq_has_unknown(dq, "CODE_GENDER") for dq in dq_entries] + ) + unknown_car = np.mean( + [_dq_has_unknown(dq, "FLAG_OWN_CAR") for dq in dq_entries] + ) + sentinel_rate = np.mean( + [bool(dq.get("days_employed_sentinel")) for dq in dq_entries] + ) + return { + "source": "log", + "sample_size": total, + "missing_required_rate": float(missing_rate), + "invalid_numeric_rate": float(invalid_rate), + "out_of_range_rate": float(out_of_range_rate), + "nan_rate": float(nan_rate), + "unknown_gender_rate": float(unknown_gender), + "unknown_car_rate": float(unknown_car), + "days_employed_sentinel_rate": float(sentinel_rate), + } + + if production_df.empty: + return {"source": "none"} + + missing_rate = float(production_df.isna().any(axis=1).mean()) + unknown_gender_rate = 0.0 + unknown_car_rate = 0.0 + if "CODE_GENDER" in production_df.columns: + unknown_gender_rate = float( + (production_df["CODE_GENDER"] == "Unknown").mean() + ) + if "FLAG_OWN_CAR" in production_df.columns: + unknown_car_rate = float((production_df["FLAG_OWN_CAR"] == "Unknown").mean()) + sentinel_rate = float(sentinel_rates.get("production", 0.0)) + return { + "source": "fallback", + "sample_size": len(production_df), + "missing_required_rate": missing_rate, + "unknown_gender_rate": unknown_gender_rate, + "unknown_car_rate": unknown_car_rate, + "days_employed_sentinel_rate": sentinel_rate, + } + + +def _filter_by_time( + meta_df: pd.DataFrame, + inputs_df: pd.DataFrame, + since: str | None, + until: str | None, +) -> tuple[pd.DataFrame, pd.DataFrame, str]: + if not since and not until: + return meta_df, inputs_df, "" + if "timestamp" not in meta_df.columns: + return meta_df, inputs_df, "timestamp_missing" + timestamps = pd.to_datetime(meta_df["timestamp"], errors="coerce", utc=True) + if timestamps.isna().all(): + return meta_df, inputs_df, "timestamp_invalid" + mask = pd.Series(True, index=meta_df.index) + if since: + since_dt = pd.to_datetime(since, errors="coerce", utc=True) + if not pd.isna(since_dt): + mask &= timestamps >= since_dt + if until: + until_dt = pd.to_datetime(until, errors="coerce", utc=True) + if not pd.isna(until_dt): + mask &= timestamps <= until_dt + return meta_df.loc[mask].reset_index(drop=True), inputs_df.loc[mask].reset_index(drop=True), "filtered" + + def _plot_score_distribution(scores: pd.Series, output_path: Path, bins: int = 30) -> None: plt.figure(figsize=(6, 4)) plt.hist(scores.dropna(), bins=bins, range=(0, 1), alpha=0.8, color="#4C78A8") @@ -134,11 +333,30 @@ def generate_report( sample_size: int, psi_threshold: float, score_bins: int, + min_prod_samples: int = MIN_PROD_SAMPLES_DEFAULT, + psi_eps: float = PSI_EPS_DEFAULT, + min_category_share: float = RARE_CATEGORY_MIN_SHARE_DEFAULT, + fdr_alpha: float = FDR_ALPHA_DEFAULT, + min_drift_features: int = 1, + prod_since: str | None = None, + prod_until: str | None = None, ) -> Path: inputs_df, meta_df = _load_logs(log_path) - if inputs_df.empty: + if meta_df.empty: raise SystemExit(f"No inputs found in logs: {log_path}") + meta_df, inputs_df, window_status = _filter_by_time( + meta_df, inputs_df, since=prod_since, until=prod_until + ) + valid_mask = pd.Series(True, index=meta_df.index) + if "status_code" in meta_df.columns: + valid_mask = meta_df["status_code"].fillna(0) < 400 + inputs_df = inputs_df.loc[valid_mask].reset_index(drop=True) + meta_df_valid = meta_df.loc[valid_mask].reset_index(drop=True) + + if inputs_df.empty: + raise SystemExit(f"No valid inputs found in logs: {log_path}") + features = [col for col in DEFAULT_FEATURES if col in inputs_df.columns] if not features: raise SystemExit("No matching features found in production logs.") @@ -148,12 +366,29 @@ def generate_report( reference_df = reference_df.sample(sample_size, random_state=42) numeric_features = [col for col in features if col not in CATEGORICAL_FEATURES] - production_df = _coerce_numeric(inputs_df, numeric_features) + production_df = _normalize_categories(inputs_df) + reference_df = _normalize_categories(reference_df) + production_df = _coerce_numeric(production_df, numeric_features) reference_df = _coerce_numeric(reference_df, numeric_features) + sentinel_rates = {} + if "DAYS_EMPLOYED" in production_df.columns: + production_df["DAYS_EMPLOYED"], prod_rate = _replace_sentinel( + production_df["DAYS_EMPLOYED"], DAYS_EMPLOYED_SENTINEL + ) + reference_df["DAYS_EMPLOYED"], ref_rate = _replace_sentinel( + reference_df["DAYS_EMPLOYED"], DAYS_EMPLOYED_SENTINEL + ) + sentinel_rates = { + "production": prod_rate, + "reference": ref_rate, + } + summary_rows: list[dict[str, object]] = [] plots_dir = output_dir / "plots" plots_dir.mkdir(parents=True, exist_ok=True) + n_prod = len(production_df) + n_ref = len(reference_df) for feature in features: if feature not in reference_df.columns: @@ -161,13 +396,24 @@ def generate_report( ref_series = reference_df[feature] prod_series = production_df[feature] if feature in CATEGORICAL_FEATURES: - psi_value = _psi(ref_series, prod_series) + feature_n_prod = int(prod_series.dropna().shape[0]) + feature_n_ref = int(ref_series.dropna().shape[0]) + ref_series, prod_series = _prepare_categorical( + ref_series, prod_series, min_share=min_category_share, other_label="OTHER" + ) + insufficient_sample = feature_n_prod < min_prod_samples + psi_value = None + if not insufficient_sample: + psi_value = _psi(ref_series, prod_series, eps=psi_eps) summary_rows.append( { "feature": feature, "type": "categorical", - "psi": round(psi_value, 4), - "drift_detected": psi_value >= psi_threshold, + "psi": round(psi_value, 4) if psi_value is not None else None, + "drift_detected": bool(psi_value is not None and psi_value >= psi_threshold), + "n_prod": feature_n_prod, + "n_ref": feature_n_ref, + "note": "insufficient_sample" if insufficient_sample else "", } ) plot_path = plots_dir / f"{_safe_name(feature)}.png" @@ -177,19 +423,40 @@ def generate_report( prod_clean = prod_series.dropna() if ref_clean.empty or prod_clean.empty: continue - stat, pvalue = stats.ks_2samp(ref_clean, prod_clean) + feature_n_prod = int(len(prod_clean)) + insufficient_sample = feature_n_prod < min_prod_samples + stat = None + pvalue = None + if not insufficient_sample: + stat, pvalue = stats.ks_2samp(ref_clean, prod_clean) summary_rows.append( { "feature": feature, "type": "numeric", - "ks_stat": round(float(stat), 4), - "p_value": round(float(pvalue), 6), - "drift_detected": pvalue < 0.05, + "ks_stat": round(float(stat), 4) if stat is not None else None, + "p_value": round(float(pvalue), 6) if pvalue is not None else None, + "p_value_fdr": None, + "drift_detected": bool(pvalue is not None and pvalue < 0.05), + "n_prod": feature_n_prod, + "n_ref": int(len(ref_clean)), + "note": "insufficient_sample" if insufficient_sample else "", } ) plot_path = plots_dir / f"{_safe_name(feature)}.png" _plot_numeric(ref_series, prod_series, plot_path) + numeric_rows = [ + (idx, row) + for idx, row in enumerate(summary_rows) + if row.get("type") == "numeric" and row.get("p_value") is not None + ] + if numeric_rows: + pvalues = [row["p_value"] for _, row in numeric_rows] + qvals, reject = _benjamini_hochberg(pvalues, alpha=fdr_alpha) + for (idx, _), qval, rejected in zip(numeric_rows, qvals, reject): + summary_rows[idx]["p_value_fdr"] = round(float(qval), 6) + summary_rows[idx]["drift_detected"] = bool(rejected) + summary_df = pd.DataFrame(summary_rows) output_dir.mkdir(parents=True, exist_ok=True) report_path = output_dir / "drift_report.html" @@ -200,9 +467,7 @@ def generate_report( latency_p50 = float(latency_ms.quantile(0.5)) if not latency_ms.empty else 0.0 latency_p95 = float(latency_ms.quantile(0.95)) if not latency_ms.empty else 0.0 - valid_meta = meta_df - if "status_code" in meta_df.columns: - valid_meta = meta_df[meta_df["status_code"] < 400] + valid_meta = meta_df_valid score_series = ( pd.to_numeric(valid_meta.get("probability", pd.Series(dtype=float)), errors="coerce") .dropna() @@ -239,12 +504,61 @@ def generate_report( _plot_prediction_rate(pred_series, pred_plot_path) score_plots_html += "\n" + drift_flags = summary_df.get("drift_detected", pd.Series(dtype=bool)).fillna(False) + drift_count = int(drift_flags.sum()) + overall_drift = drift_count >= max(min_drift_features, 1) and n_prod >= min_prod_samples + drift_features = summary_df.loc[drift_flags, "feature"].tolist() if not summary_df.empty else [] + + dq_metrics = _summarize_data_quality(meta_df, production_df, sentinel_rates) + if dq_metrics.get("source") == "none": + dq_html = "

    No data quality metrics available.

    " + else: + dq_items = [ + f"
  • Source: {dq_metrics.get('source')}
  • ", + f"
  • Sample size: {dq_metrics.get('sample_size')}
  • ", + f"
  • Missing required rate: {dq_metrics.get('missing_required_rate', 0.0):.2%}
  • ", + ] + if "invalid_numeric_rate" in dq_metrics: + dq_items.append(f"
  • Invalid numeric rate: {dq_metrics.get('invalid_numeric_rate', 0.0):.2%}
  • ") + if "out_of_range_rate" in dq_metrics: + dq_items.append(f"
  • Out-of-range rate: {dq_metrics.get('out_of_range_rate', 0.0):.2%}
  • ") + if "nan_rate" in dq_metrics: + dq_items.append(f"
  • NaN rate (avg): {dq_metrics.get('nan_rate', 0.0):.2%}
  • ") + dq_items.append( + f"
  • Unknown CODE_GENDER rate: {dq_metrics.get('unknown_gender_rate', 0.0):.2%}
  • " + ) + dq_items.append( + f"
  • Unknown FLAG_OWN_CAR rate: {dq_metrics.get('unknown_car_rate', 0.0):.2%}
  • " + ) + dq_items.append( + f"
  • DAYS_EMPLOYED sentinel rate: {dq_metrics.get('days_employed_sentinel_rate', 0.0):.2%}
  • " + ) + dq_html = "" + summary_html = summary_df.to_html(index=False, escape=False) plots_html = "\n".join( f"

    {row['feature']}

    " for _, row in summary_df.iterrows() ) + sample_badge = "" + if n_prod < min_prod_samples: + sample_badge = ( + "
    Sample insuffisant: " + f"{n_prod} < {min_prod_samples} (resultats non fiables).
    " + ) + drift_badge = ( + "
    No drift alert
    " + if not overall_drift + else "
    Drift alert
    " + ) + if not prod_since and not prod_until: + window_info = "full_log" + elif window_status in {"timestamp_missing", "timestamp_invalid"}: + window_info = f"{window_status} (no filter applied)" + else: + window_info = f"{prod_since or '...'} to {prod_until or '...'}" + html = f""" @@ -256,6 +570,10 @@ def generate_report( th, td {{ border: 1px solid #ddd; padding: 8px; }} th {{ background: #f3f3f3; }} img {{ max-width: 720px; }} + .badge {{ display: inline-block; padding: 6px 10px; border-radius: 6px; font-weight: bold; margin: 6px 0; }} + .badge.warning {{ background: #fde68a; color: #92400e; }} + .badge.ok {{ background: #d1fae5; color: #065f46; }} + .badge.alert {{ background: #fee2e2; color: #991b1b; }} @@ -266,12 +584,29 @@ def generate_report(
  • Latency p50: {latency_p50:.2f} ms
  • Latency p95: {latency_p95:.2f} ms
  • + {sample_badge}

    Score Monitoring

    {score_plots_html} +

    Data Quality

    + {dq_html}

    Data Drift Summary

    + {drift_badge} + {summary_html}

    Feature Distributions

    {plots_html} @@ -291,6 +626,13 @@ def main() -> None: parser.add_argument("--sample-size", type=int, default=50000) parser.add_argument("--psi-threshold", type=float, default=0.2) parser.add_argument("--score-bins", type=int, default=30) + parser.add_argument("--min-prod-samples", type=int, default=MIN_PROD_SAMPLES_DEFAULT) + parser.add_argument("--psi-eps", type=float, default=PSI_EPS_DEFAULT) + parser.add_argument("--min-category-share", type=float, default=RARE_CATEGORY_MIN_SHARE_DEFAULT) + parser.add_argument("--fdr-alpha", type=float, default=FDR_ALPHA_DEFAULT) + parser.add_argument("--min-drift-features", type=int, default=1) + parser.add_argument("--prod-since", type=str, default=None) + parser.add_argument("--prod-until", type=str, default=None) args = parser.parse_args() report_path = generate_report( @@ -300,6 +642,13 @@ def main() -> None: sample_size=args.sample_size, psi_threshold=args.psi_threshold, score_bins=args.score_bins, + min_prod_samples=args.min_prod_samples, + psi_eps=args.psi_eps, + min_category_share=args.min_category_share, + fdr_alpha=args.fdr_alpha, + min_drift_features=args.min_drift_features, + prod_since=args.prod_since, + prod_until=args.prod_until, ) print(f"Drift report saved to {report_path}") diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/streamlit_app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/streamlit_app.py new file mode 100644 index 0000000000000000000000000000000000000000..b76e7bf60887c30da836f296e43377aea063de63 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/streamlit_app.py @@ -0,0 +1,142 @@ +from __future__ import annotations + +from pathlib import Path +import sys + +import numpy as np +import pandas as pd +import streamlit as st +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from monitoring.drift_report import generate_report, _load_logs + + +def _load_logs_safe(log_path: Path) -> tuple[pd.DataFrame, pd.DataFrame]: + if not log_path.exists(): + return pd.DataFrame(), pd.DataFrame() + return _load_logs(log_path) + + +st.set_page_config(page_title="Credit Scoring Monitoring", layout="wide") +st.title("Credit Scoring Monitoring") + +with st.sidebar: + st.header("Inputs") + log_path = Path(st.text_input("Logs path", "logs/predictions.jsonl")) + reference_path = Path(st.text_input("Reference data", "data/data_final.parquet")) + output_dir = Path(st.text_input("Output dir", "reports")) + sample_size = st.number_input("Sample size", min_value=1000, max_value=200000, value=50000, step=1000) + psi_threshold = st.number_input("PSI threshold", min_value=0.05, max_value=1.0, value=0.2, step=0.05) + score_bins = st.number_input("Score bins", min_value=10, max_value=100, value=30, step=5) + min_prod_samples = st.number_input("Min prod samples", min_value=10, max_value=5000, value=200, step=50) + psi_eps = st.number_input("PSI epsilon", min_value=1e-6, max_value=1e-2, value=1e-4, format="%.6f") + min_category_share = st.number_input( + "Min category share", + min_value=0.001, + max_value=0.2, + value=0.01, + step=0.005, + format="%.3f", + ) + fdr_alpha = st.number_input("FDR alpha", min_value=0.01, max_value=0.2, value=0.05, step=0.01, format="%.2f") + min_drift_features = st.number_input("Min drift features", min_value=1, max_value=10, value=1, step=1) + prod_since = st.text_input("Prod since (ISO)", "") + prod_until = st.text_input("Prod until (ISO)", "") + +inputs_df, meta_df = _load_logs_safe(log_path) + +if meta_df.empty: + st.warning("No logs found. Check the logs path.") + st.stop() + +total_calls = len(meta_df) +valid_mask = meta_df.get("status_code", pd.Series(dtype=int)).fillna(0) < 400 +prod_inputs = inputs_df.loc[valid_mask] if not inputs_df.empty else inputs_df +n_prod = len(prod_inputs) +error_rate = float((meta_df.get("status_code", pd.Series(dtype=int)) >= 400).mean()) if total_calls else 0.0 +latency_ms = meta_df.get("latency_ms", pd.Series(dtype=float)).dropna() +latency_p50 = float(latency_ms.quantile(0.5)) if not latency_ms.empty else 0.0 +latency_p95 = float(latency_ms.quantile(0.95)) if not latency_ms.empty else 0.0 + +valid_meta = meta_df +if "status_code" in meta_df.columns: + valid_meta = meta_df[meta_df["status_code"] < 400] +scores = pd.to_numeric(valid_meta.get("probability", pd.Series(dtype=float)), errors="coerce").dropna() +predictions = pd.to_numeric(valid_meta.get("prediction", pd.Series(dtype=float)), errors="coerce").dropna() + +col1, col2, col3, col4 = st.columns(4) +col1.metric("Total calls", f"{total_calls}") +col2.metric("Error rate", f"{error_rate:.2%}") +col3.metric("Latency p50", f"{latency_p50:.2f} ms") +col4.metric("Latency p95", f"{latency_p95:.2f} ms") +st.caption(f"Production sample size (status < 400): {n_prod}") +if n_prod < int(min_prod_samples): + st.warning("Sample insuffisant: drift non fiable (gate active).") + +st.subheader("Score Monitoring") +if not scores.empty: + score_stats = { + "mean": float(scores.mean()), + "p50": float(scores.quantile(0.5)), + "p95": float(scores.quantile(0.95)), + "min": float(scores.min()), + "max": float(scores.max()), + } + st.json(score_stats) + hist, bin_edges = np.histogram(scores, bins=int(score_bins), range=(0, 1)) + fig, ax = plt.subplots(figsize=(6, 3)) + ax.bar(bin_edges[:-1], hist, width=np.diff(bin_edges), align="edge", color="#4C78A8") + ax.set_xlabel("Predicted probability") + ax.set_ylabel("Count") + ax.set_title("Score distribution") + st.pyplot(fig, clear_figure=True) +else: + st.info("No probability scores available in logs.") + +if not predictions.empty: + pred_rate = float(predictions.mean()) + st.metric("Predicted default rate", f"{pred_rate:.2%}") + pred_counts = predictions.value_counts(normalize=True, dropna=False).sort_index() + fig, ax = plt.subplots(figsize=(4, 3)) + ax.bar(pred_counts.index.astype(str), pred_counts.values, color="#F58518") + ax.set_xlabel("Predicted class") + ax.set_ylabel("Share") + ax.set_ylim(0, 1) + ax.set_title("Prediction rate") + st.pyplot(fig, clear_figure=True) + +st.subheader("Data Drift") +if st.button("Generate drift report"): + try: + report_path = generate_report( + log_path=log_path, + reference_path=reference_path, + output_dir=output_dir, + sample_size=int(sample_size), + psi_threshold=float(psi_threshold), + score_bins=int(score_bins), + min_prod_samples=int(min_prod_samples), + psi_eps=float(psi_eps), + min_category_share=float(min_category_share), + fdr_alpha=float(fdr_alpha), + min_drift_features=int(min_drift_features), + prod_since=prod_since or None, + prod_until=prod_until or None, + ) + st.success(f"Generated: {report_path}") + except ImportError as exc: + st.error( + "Parquet engine missing. Install `pyarrow` in this environment or run " + "`python -m streamlit run monitoring/streamlit_app.py`." + ) + st.exception(exc) + +report_file = output_dir / "drift_report.html" +if report_file.exists(): + st.markdown(f"Report available at `{report_file}`") diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/profiling/profile_inference.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/profiling/profile_inference.py new file mode 100644 index 0000000000000000000000000000000000000000..5174d6314f7871916caa518cbdc1df01287f74d8 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/profiling/profile_inference.py @@ -0,0 +1,241 @@ +from __future__ import annotations + +import argparse +import json +import warnings +import time +from pathlib import Path +import sys +from typing import Any +import cProfile +import io +import pstats + +import numpy as np +import pandas as pd +from pandas.errors import PerformanceWarning + +try: + from sklearn.exceptions import InconsistentVersionWarning +except Exception: # pragma: no cover + InconsistentVersionWarning = Warning # type: ignore[misc] + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +warnings.filterwarnings("ignore", category=PerformanceWarning) +warnings.filterwarnings("ignore", category=InconsistentVersionWarning) + +from app.main import ( + DATA_PATH, + MODEL_PATH, + ARTIFACTS_PATH, + load_model, + load_preprocessor, + preprocess_input, + new_features_creation, + _apply_correlated_imputation, + _ensure_required_columns, + _validate_numeric_inputs, + _validate_numeric_ranges, +) + + +def preprocess_input_legacy(df_raw: pd.DataFrame, artifacts) -> pd.DataFrame: + df = df_raw.copy() + + for col in artifacts.required_input_columns: + if col not in df.columns: + df[col] = np.nan + + _ensure_required_columns(df, artifacts.required_input_columns) + _validate_numeric_inputs(df, artifacts.numeric_required_columns) + _validate_numeric_ranges( + df, + {k: v for k, v in artifacts.numeric_ranges.items() if k in artifacts.numeric_required_columns}, + ) + + df["is_train"] = 0 + df["is_test"] = 1 + if "TARGET" not in df.columns: + df["TARGET"] = 0 + + df = new_features_creation(df) + df.replace([np.inf, -np.inf], np.nan, inplace=True) + + for col in artifacts.columns_keep: + if col not in df.columns: + df[col] = np.nan + df = df[artifacts.columns_keep] + + _apply_correlated_imputation(df, artifacts) + + for col, median in artifacts.numeric_medians.items(): + if col in df.columns: + df[col] = pd.to_numeric(df[col], errors="coerce") + df[col] = df[col].fillna(median) + + for col in artifacts.categorical_columns: + if col in df.columns: + df[col] = df[col].fillna("Unknown") + + _ensure_required_columns(df, artifacts.required_input_columns) + + if "CODE_GENDER" in df.columns and (df["CODE_GENDER"] == "XNA").any(): + raise ValueError("CODE_GENDER cannot be 'XNA' based on training rules.") + + for col, max_val in artifacts.outlier_maxes.items(): + if col in df.columns and (df[col] >= max_val).any(): + raise ValueError(f"Input contains outlier values removed during training: {col}") + + df_hot = pd.get_dummies(df, columns=artifacts.categorical_columns) + for col in artifacts.features_to_scaled: + if col not in df_hot.columns: + df_hot[col] = 0 + df_hot = df_hot[artifacts.features_to_scaled] + + scaled = artifacts.scaler.transform(df_hot) + return pd.DataFrame(scaled, columns=artifacts.features_to_scaled, index=df.index) + + +def _load_input_sample(data_path: Path, columns: list[str], sample_size: int) -> pd.DataFrame: + df = pd.read_parquet(data_path, columns=columns) + if sample_size and len(df) > sample_size: + df = df.sample(sample_size, random_state=42) + return df.reset_index(drop=True) + + +def _fill_required_inputs(df: pd.DataFrame, artifacts) -> pd.DataFrame: + df_filled = df.copy() + for col in artifacts.required_input_columns: + if col not in df_filled.columns: + df_filled[col] = np.nan + if col in artifacts.numeric_medians: + df_filled[col] = pd.to_numeric(df_filled[col], errors="coerce").fillna( + artifacts.numeric_medians[col] + ) + if col in artifacts.numeric_ranges: + min_val, max_val = artifacts.numeric_ranges[col] + df_filled[col] = df_filled[col].clip(min_val, max_val) + elif col in artifacts.categorical_columns: + df_filled[col] = df_filled[col].fillna("Unknown") + else: + df_filled[col] = df_filled[col].fillna(0) + if col in artifacts.outlier_maxes: + max_val = artifacts.outlier_maxes[col] + if pd.api.types.is_integer_dtype(df_filled[col]): + replace_val = max_val - 1 + else: + replace_val = np.nextafter(max_val, -np.inf) + df_filled.loc[df_filled[col] >= max_val, col] = replace_val + return df_filled + + +def _benchmark( + *, + name: str, + preprocess_fn, + model, + artifacts, + df_inputs: pd.DataFrame, + batch_size: int, + runs: int, +) -> dict[str, Any]: + durations = [] + for _ in range(runs): + for start in range(0, len(df_inputs), batch_size): + batch = df_inputs.iloc[start:start + batch_size] + t0 = time.perf_counter() + features = preprocess_fn(batch, artifacts) + if hasattr(model, "predict_proba"): + _ = model.predict_proba(features)[:, 1] + else: + _ = model.predict(features) + durations.append((time.perf_counter() - t0) * 1000.0) + durations = np.array(durations, dtype=float) + return { + "name": name, + "batches": int(len(durations)), + "batch_size": int(batch_size), + "mean_ms": float(durations.mean()) if durations.size else 0.0, + "p50_ms": float(np.percentile(durations, 50)) if durations.size else 0.0, + "p95_ms": float(np.percentile(durations, 95)) if durations.size else 0.0, + "throughput_rows_per_sec": float( + (batch_size / (durations.mean() / 1000.0)) if durations.size else 0.0 + ), + } + + +def _profile(preprocess_fn, model, artifacts, df_inputs: pd.DataFrame, batch_size: int) -> str: + profiler = cProfile.Profile() + batch = df_inputs.iloc[:batch_size] + profiler.enable() + features = preprocess_fn(batch, artifacts) + if hasattr(model, "predict_proba"): + _ = model.predict_proba(features)[:, 1] + else: + _ = model.predict(features) + profiler.disable() + stream = io.StringIO() + stats = pstats.Stats(profiler, stream=stream).sort_stats("cumulative") + stats.print_stats(30) + return stream.getvalue() + + +def main() -> None: + parser = argparse.ArgumentParser(description="Profile and benchmark inference latency.") + parser.add_argument("--data-path", type=Path, default=DATA_PATH) + parser.add_argument("--model-path", type=Path, default=MODEL_PATH) + parser.add_argument("--artifacts-path", type=Path, default=ARTIFACTS_PATH) + parser.add_argument("--sample-size", type=int, default=2000) + parser.add_argument("--batch-size", type=int, default=128) + parser.add_argument("--runs", type=int, default=3) + parser.add_argument("--output-json", type=Path, default=Path("docs/performance/benchmark_results.json")) + parser.add_argument("--output-profile", type=Path, default=Path("docs/performance/profile_summary.txt")) + args = parser.parse_args() + + preprocessor = load_preprocessor(args.data_path, args.artifacts_path) + model = load_model(args.model_path) + + input_cols = list(preprocessor.required_input_columns) + df_inputs = _load_input_sample(args.data_path, input_cols, args.sample_size) + df_inputs = _fill_required_inputs(df_inputs, preprocessor) + + results = [] + results.append( + _benchmark( + name="optimized_preprocess", + preprocess_fn=preprocess_input, + model=model, + artifacts=preprocessor, + df_inputs=df_inputs, + batch_size=args.batch_size, + runs=args.runs, + ) + ) + results.append( + _benchmark( + name="legacy_preprocess_alignment", + preprocess_fn=preprocess_input_legacy, + model=model, + artifacts=preprocessor, + df_inputs=df_inputs, + batch_size=args.batch_size, + runs=args.runs, + ) + ) + + args.output_json.parent.mkdir(parents=True, exist_ok=True) + args.output_json.write_text(json.dumps(results, indent=2), encoding="utf-8") + + profile_text = _profile(preprocess_input, model, preprocessor, df_inputs, args.batch_size) + args.output_profile.parent.mkdir(parents=True, exist_ok=True) + args.output_profile.write_text(profile_text, encoding="utf-8") + + print(f"Saved benchmarks to {args.output_json}") + print(f"Saved profile to {args.output_profile}") + + +if __name__ == "__main__": + main() diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt index 900b0c422455c914c48ba1df7957a9907b69a6e7..af08057a898d8c18fba107d8b4338f025e625925 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt @@ -1,15 +1,153 @@ -# Single requirements file aligned with pyproject.toml (Poetry). +aiofiles==23.2.1 +alembic==1.17.2 +altair==5.5.0 +annotated-doc==0.0.4 +annotated-types==0.7.0 +anyio==4.12.0 +attrs==25.4.0 +blinker==1.9.0 +build==1.3.0 +CacheControl==0.14.4 +cachetools==5.5.2 +certifi==2025.11.12 +cffi==2.0.0 +charset-normalizer==3.4.4 +cleo==2.1.0 +click==8.3.1 +cloudpickle==3.1.2 +contourpy==1.3.3 +coverage==7.13.1 +crashtest==0.4.1 +cryptography==46.0.3 +cycler==0.12.1 +databricks-sdk==0.74.0 +distlib==0.4.0 +docker==7.1.0 +dulwich==0.24.10 fastapi==0.121.0 -uvicorn==0.30.6 -pydantic==2.12.4 +fastjsonschema==2.21.2 +ffmpy==1.0.0 +filelock==3.20.1 +findpython==0.7.1 +Flask==3.1.2 +flask-cors==6.0.2 +fonttools==4.61.1 +fsspec==2025.12.0 +gitdb==4.0.12 +GitPython==3.1.45 +google-auth==2.45.0 +gradio==4.44.1 +gradio_client==1.3.0 +graphene==3.4.3 +graphql-core==3.2.7 +graphql-relay==3.2.0 +gunicorn==23.0.0 +h11==0.16.0 +hf-xet==1.2.0 +httpcore==1.0.9 +httpx==0.27.2 +huey==2.5.5 +huggingface_hub==1.2.3 +idna==3.11 +importlib_metadata==8.7.0 +importlib_resources==6.5.2 +iniconfig==2.3.0 +installer==0.7.0 +itsdangerous==2.2.0 +jaraco.classes==3.4.0 +jaraco.context==6.0.2 +jaraco.functools==4.4.0 +Jinja2==3.1.6 +joblib==1.5.2 +jsonschema==4.25.1 +jsonschema-specifications==2025.9.1 +keyring==25.7.0 +kiwisolver==1.4.9 +lightgbm==4.6.0 +lxml==6.0.2 +Mako==1.3.10 +markdown-it-py==4.0.0 +MarkupSafe==2.1.5 +matplotlib==3.9.2 +mdurl==0.1.2 +mlflow==3.7.0 +mlflow-skinny==3.7.0 +mlflow-tracing==3.7.0 +more-itertools==10.8.0 +msgpack==1.1.2 +narwhals==2.14.0 numpy==2.2.6 +opentelemetry-api==1.39.1 +opentelemetry-proto==1.39.1 +opentelemetry-sdk==1.39.1 +opentelemetry-semantic-conventions==0.60b1 +orjson==3.11.5 +packaging==24.2 pandas==2.2.3 +pbs-installer==2025.12.17 +pillow==10.4.0 +pkginfo==1.12.1.2 +platformdirs==4.5.1 +pluggy==1.6.0 +poetry==2.2.1 +poetry-core==2.2.1 +protobuf==5.29.5 pyarrow==22.0.0 -matplotlib==3.9.2 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pycparser==2.23 +pydantic==2.12.4 +pydantic_core==2.41.5 +pydeck==0.9.1 +pydub==0.25.1 +Pygments==2.19.2 +pyparsing==3.2.5 +pyproject_hooks==1.2.0 +pytest==8.3.2 +pytest-cov==5.0.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.2.1 +python-multipart==0.0.21 +python-pptx==1.0.2 +pytz==2025.2 +PyYAML==6.0.3 +RapidFuzz==3.14.3 +referencing==0.37.0 +requests==2.32.5 +requests-toolbelt==1.0.0 +rich==13.9.4 +rpds-py==0.30.0 +rsa==4.9.1 +ruff==0.14.10 scikit-learn==1.8.0 scipy==1.14.1 -joblib==1.5.2 +semantic-version==2.10.0 +shellingham==1.5.4 +six==1.17.0 +smmap==5.0.2 +sniffio==1.3.1 +SQLAlchemy==2.0.45 +sqlparse==0.5.4 +starlette==0.49.3 +streamlit==1.37.1 +tenacity==8.5.0 threadpoolctl==3.5.0 -pytest==8.3.2 -pytest-cov==5.0.0 -httpx==0.27.2 +toml==0.10.2 +tomlkit==0.12.0 +tornado==6.5.4 +tqdm==4.67.1 +trove-classifiers==2025.12.1.14 +typer==0.21.0 +typer-slim==0.21.0 +typing-inspection==0.4.2 +typing_extensions==4.15.0 +tzdata==2025.3 +urllib3==2.6.2 +uvicorn==0.30.6 +virtualenv==20.35.4 +websockets==12.0 +Werkzeug==3.1.4 +xattr==1.3.0 +xlsxwriter==3.2.9 +zipp==3.23.0 +zstandard==0.25.0 diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_api.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_api.py index 740b42024d3ab37b61b2e89ce75ce58ac31c6908..bf5088ab967d58763c701874745fae423a53a407 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_api.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_api.py @@ -108,3 +108,23 @@ def test_predict_out_of_range(client): assert resp.status_code == 422 detail = resp.json().get("detail", {}) assert detail.get("message") == "Input contains values outside expected ranges." + + +def test_predict_normalizes_categoricals(client): + preprocessor = client.app.state.preprocessor + payload = _build_payload(preprocessor) + if "CODE_GENDER" in payload["data"]: + payload["data"]["CODE_GENDER"] = "female" + if "FLAG_OWN_CAR" in payload["data"]: + payload["data"]["FLAG_OWN_CAR"] = "true" + resp = client.post("/predict", json=payload) + assert resp.status_code == 200 + + +def test_predict_days_employed_sentinel(client): + preprocessor = client.app.state.preprocessor + payload = _build_payload(preprocessor) + if "DAYS_EMPLOYED" in payload["data"]: + payload["data"]["DAYS_EMPLOYED"] = 365243 + resp = client.post("/predict", json=payload) + assert resp.status_code == 200 diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_drift_report.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_drift_report.py new file mode 100644 index 0000000000000000000000000000000000000000..8ddf286777ad5fe336273c93b12e331b3d2c7f10 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_drift_report.py @@ -0,0 +1,80 @@ +import json +from pathlib import Path + +import pandas as pd + +from monitoring.drift_report import generate_report + + +def _write_jsonl(path: Path, entries: list[dict]) -> None: + with path.open("w", encoding="utf-8") as handle: + for entry in entries: + handle.write(json.dumps(entry, ensure_ascii=True) + "\n") + + +def test_generate_report_insufficient_sample(tmp_path): + log_path = tmp_path / "predictions.jsonl" + entries = [ + { + "inputs": { + "EXT_SOURCE_2": 0.45, + "EXT_SOURCE_3": 0.62, + "AMT_ANNUITY": 24700.5, + "EXT_SOURCE_1": 0.41, + "CODE_GENDER": "F", + "DAYS_EMPLOYED": -1200, + "AMT_CREDIT": 406597.5, + "AMT_GOODS_PRICE": 351000.0, + "DAYS_BIRTH": -9461, + "FLAG_OWN_CAR": "N", + }, + "status_code": 200, + "probability": 0.4, + "prediction": 0, + } + ] + _write_jsonl(log_path, entries) + + reference_df = pd.DataFrame( + [ + { + "EXT_SOURCE_2": 0.44, + "EXT_SOURCE_3": 0.61, + "AMT_ANNUITY": 25500.0, + "EXT_SOURCE_1": 0.39, + "CODE_GENDER": "F", + "DAYS_EMPLOYED": -1500, + "AMT_CREDIT": 405000.0, + "AMT_GOODS_PRICE": 350000.0, + "DAYS_BIRTH": -9500, + "FLAG_OWN_CAR": "N", + }, + { + "EXT_SOURCE_2": 0.33, + "EXT_SOURCE_3": 0.55, + "AMT_ANNUITY": 21000.0, + "EXT_SOURCE_1": 0.35, + "CODE_GENDER": "M", + "DAYS_EMPLOYED": -2000, + "AMT_CREDIT": 300000.0, + "AMT_GOODS_PRICE": 250000.0, + "DAYS_BIRTH": -10000, + "FLAG_OWN_CAR": "Y", + }, + ] + ) + reference_path = tmp_path / "reference.parquet" + reference_df.to_parquet(reference_path) + + output_dir = tmp_path / "reports" + report_path = generate_report( + log_path=log_path, + reference_path=reference_path, + output_dir=output_dir, + sample_size=10, + psi_threshold=0.2, + score_bins=10, + min_prod_samples=200, + ) + html = report_path.read_text(encoding="utf-8") + assert "Sample insuffisant" in html diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml index c93cb017c9f2a94ce575dc96ab30bf6a4ff68e3c..dba4ad5eb4c5837b07f81f5582248469e232bb1f 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml @@ -20,6 +20,8 @@ scipy = "1.14.1" joblib = "1.5.2" threadpoolctl = "3.5.0" streamlit = "1.37.1" +gradio = "4.44.1" +huggingface_hub = "1.2.3" [tool.poetry.group.dev.dependencies] pytest = "8.3.2" diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml index dba4ad5eb4c5837b07f81f5582248469e232bb1f..8752a86f93c214434cce2c682dc4b838b0cdf7cb 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml @@ -21,7 +21,7 @@ joblib = "1.5.2" threadpoolctl = "3.5.0" streamlit = "1.37.1" gradio = "4.44.1" -huggingface_hub = "1.2.3" +huggingface_hub = "0.24.7" [tool.poetry.group.dev.dependencies] pytest = "8.3.2" diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt index af08057a898d8c18fba107d8b4338f025e625925..f19d1a870aad8ad095cb6525910aa25d0fcde3ba 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt @@ -47,7 +47,7 @@ hf-xet==1.2.0 httpcore==1.0.9 httpx==0.27.2 huey==2.5.5 -huggingface_hub==1.2.3 +huggingface_hub==0.24.7 idna==3.11 importlib_metadata==8.7.0 importlib_resources==6.5.2 diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml index 8752a86f93c214434cce2c682dc4b838b0cdf7cb..d17d7ee9fedbbe55a535007a8904957055b41c06 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml @@ -7,27 +7,196 @@ readme = "README.md" packages = [{ include = "app" }] [tool.poetry.dependencies] -python = ">=3.11" +python = ">=3.11,<3.13" +aiofiles = "23.2.1" +alembic = "1.17.2" +altair = "5.5.0" +annotated-doc = "0.0.4" +annotated-types = "0.7.0" +anyio = "4.12.0" +appnope = "0.1.4" +asttokens = "3.0.1" +attrs = "25.4.0" +blinker = "1.9.0" +build = "1.3.0" +CacheControl = "0.14.4" +cachetools = "5.5.2" +certifi = "2025.11.12" +cffi = "2.0.0" +charset-normalizer = "3.4.4" +cleo = "2.1.0" +click = "8.3.1" +cloudpickle = "3.1.2" +comm = "0.2.3" +contourpy = "1.3.3" +coverage = "7.13.1" +crashtest = "0.4.1" +cryptography = "46.0.3" +cycler = "0.12.1" +databricks-sdk = "0.74.0" +debugpy = "1.8.19" +decorator = "5.2.1" +distlib = "0.4.0" +docker = "7.1.0" +dulwich = "0.24.10" +executing = "2.2.1" fastapi = "0.121.0" -uvicorn = "0.30.6" -pydantic = "2.12.4" +fastjsonschema = "2.21.2" +ffmpy = "1.0.0" +filelock = "3.20.1" +findpython = "0.7.1" +Flask = "3.1.2" +flask-cors = "6.0.2" +fonttools = "4.61.1" +fsspec = "2025.12.0" +future = "1.0.0" +gitdb = "4.0.12" +GitPython = "3.1.45" +google-auth = "2.45.0" +gradio = "4.44.1" +gradio_client = "1.3.0" +graphene = "3.4.3" +graphql-core = "3.2.7" +graphql-relay = "3.2.0" +gunicorn = "23.0.0" +h11 = "0.16.0" +hf-xet = "1.2.0" +httpcore = "1.0.9" +httpx = "0.27.2" +huey = "2.5.5" +huggingface-hub = "0.24.7" +hyperopt = "0.2.7" +idna = "3.11" +importlib_metadata = "8.7.0" +importlib_resources = "6.5.2" +iniconfig = "2.3.0" +installer = "0.7.0" +ipykernel = "7.1.0" +ipython = "9.8.0" +ipython_pygments_lexers = "1.1.1" +itsdangerous = "2.2.0" +jaraco.classes = "3.4.0" +jaraco.context = "6.0.2" +jaraco.functools = "4.4.0" +jedi = "0.19.2" +Jinja2 = "3.1.6" +joblib = "1.5.2" +jsonschema = "4.25.1" +jsonschema-specifications = "2025.9.1" +jupyter_client = "8.7.0" +jupyter_core = "5.9.1" +keyring = "25.7.0" +kiwisolver = "1.4.9" +lightgbm = "4.6.0" +llvmlite = "0.46.0" +lxml = "6.0.2" +Mako = "1.3.10" +markdown-it-py = "4.0.0" +MarkupSafe = "2.1.5" +matplotlib = "3.9.2" +matplotlib-inline = "0.2.1" +mdurl = "0.1.2" +missingno = "0.5.2" +mlflow = "3.7.0" +mlflow-skinny = "3.7.0" +mlflow-tracing = "3.7.0" +more-itertools = "10.8.0" +msgpack = "1.1.2" +narwhals = "2.14.0" +nest-asyncio = "1.6.0" +networkx = "3.6.1" +numba = "0.63.1" numpy = "2.2.6" +opentelemetry-api = "1.39.1" +opentelemetry-proto = "1.39.1" +opentelemetry-sdk = "1.39.1" +opentelemetry-semantic-conventions = "0.60b1" +orjson = "3.11.5" +packaging = "25.0" pandas = "2.2.3" +parso = "0.8.5" +pbs-installer = "2025.12.17" +pexpect = "4.9.0" +pillow = "10.4.0" +pkginfo = "1.12.1.2" +platformdirs = "4.5.1" +pluggy = "1.6.0" +poetry = "2.2.1" +poetry-core = "2.2.1" +prompt_toolkit = "3.0.52" +protobuf = "5.29.5" +psutil = "7.2.1" +ptyprocess = "0.7.0" +pure_eval = "0.2.3" +py4j = "0.10.9.9" pyarrow = "22.0.0" -matplotlib = "3.9.2" +pyasn1 = "0.6.1" +pyasn1_modules = "0.4.2" +pycparser = "2.23" +pydantic = "2.12.4" +pydantic_core = "2.41.5" +pydeck = "0.9.1" +pydub = "0.25.1" +Pygments = "2.19.2" +pyparsing = "3.2.5" +pyproject_hooks = "1.2.0" +pytest = "8.3.2" +pytest-cov = "5.0.0" +python-dateutil = "2.9.0.post0" +python-dotenv = "1.2.1" +python-multipart = "0.0.21" +python-pptx = "1.0.2" +pytz = "2025.2" +PyYAML = "6.0.3" +pyzmq = "27.1.0" +RapidFuzz = "3.14.3" +referencing = "0.37.0" +requests = "2.32.5" +requests-toolbelt = "1.0.0" +rich = "13.9.4" +rpds-py = "0.30.0" +rsa = "4.9.1" +ruff = "0.14.10" scikit-learn = "1.8.0" scipy = "1.14.1" -joblib = "1.5.2" -threadpoolctl = "3.5.0" +seaborn = "0.13.2" +semantic-version = "2.10.0" +setuptools = "80.9.0" +shap = "0.46.0" +shellingham = "1.5.4" +six = "1.17.0" +slicer = "0.0.8" +smmap = "5.0.2" +sniffio = "1.3.1" +SQLAlchemy = "2.0.45" +sqlparse = "0.5.4" +stack-data = "0.6.3" +starlette = "0.49.3" streamlit = "1.37.1" -gradio = "4.44.1" -huggingface_hub = "0.24.7" - -[tool.poetry.group.dev.dependencies] -pytest = "8.3.2" -pytest-cov = "5.0.0" -httpx = "0.27.2" - +tenacity = "8.5.0" +threadpoolctl = "3.5.0" +toml = "0.10.2" +tomlkit = "0.12.0" +tornado = "6.5.4" +tqdm = "4.67.1" +traitlets = "5.14.3" +trove-classifiers = "2025.12.1.14" +typer = "0.21.0" +typer-slim = "0.21.0" +typing-inspection = "0.4.2" +typing_extensions = "4.15.0" +tzdata = "2025.3" +urllib3 = "2.6.2" +uvicorn = "0.30.6" +virtualenv = "20.35.4" +wcwidth = "0.2.14" +websockets = "12.0" +Werkzeug = "3.1.4" +xattr = "1.3.0" +xgboost = "3.1.2" +xlsxwriter = "3.2.9" +zipp = "3.23.0" +zstandard = "0.25.0" [build-system] requires = ["poetry-core>=1.8.0"] build-backend = "poetry.core.masonry.api" diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt index f19d1a870aad8ad095cb6525910aa25d0fcde3ba..e7f09f68e0fe34ec6755e637033e128edd00186b 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt @@ -4,6 +4,9 @@ altair==5.5.0 annotated-doc==0.0.4 annotated-types==0.7.0 anyio==4.12.0 +appdirs==1.4.4 +appnope==0.1.4 +asttokens==3.0.1 attrs==25.4.0 blinker==1.9.0 build==1.3.0 @@ -15,15 +18,24 @@ charset-normalizer==3.4.4 cleo==2.1.0 click==8.3.1 cloudpickle==3.1.2 +comm==0.2.3 contourpy==1.3.3 coverage==7.13.1 crashtest==0.4.1 cryptography==46.0.3 cycler==0.12.1 databricks-sdk==0.74.0 +debugpy==1.8.19 +decorator==5.2.1 +deprecation==2.1.0 distlib==0.4.0 +distro==1.9.0 docker==7.1.0 dulwich==0.24.10 +dynaconf==3.2.12 +evidently==0.7.18 +executing==2.2.1 +Faker==40.1.0 fastapi==0.121.0 fastjsonschema==2.21.2 ffmpy==1.0.0 @@ -33,6 +45,7 @@ Flask==3.1.2 flask-cors==6.0.2 fonttools==4.61.1 fsspec==2025.12.0 +future==1.0.0 gitdb==4.0.12 GitPython==3.1.45 google-auth==2.45.0 @@ -45,37 +58,63 @@ gunicorn==23.0.0 h11==0.16.0 hf-xet==1.2.0 httpcore==1.0.9 +httptools==0.7.1 httpx==0.27.2 huey==2.5.5 -huggingface_hub==0.24.7 +huggingface-hub==0.24.7 +hyperopt==0.2.7 idna==3.11 importlib_metadata==8.7.0 importlib_resources==6.5.2 iniconfig==2.3.0 installer==0.7.0 +ipykernel==7.1.0 +ipython==9.8.0 +ipython_pygments_lexers==1.1.1 +ipywidgets==8.1.8 +iterative-telemetry==0.0.10 itsdangerous==2.2.0 jaraco.classes==3.4.0 jaraco.context==6.0.2 jaraco.functools==4.4.0 +jedi==0.19.2 Jinja2==3.1.6 joblib==1.5.2 jsonschema==4.25.1 jsonschema-specifications==2025.9.1 +jupyter_client==8.7.0 +jupyter_core==5.9.1 +jupyterlab_widgets==3.0.16 keyring==25.7.0 kiwisolver==1.4.9 lightgbm==4.6.0 +litestar==2.19.0 +litestar-htmx==0.5.0 +llvmlite==0.46.0 lxml==6.0.2 Mako==1.3.10 markdown-it-py==4.0.0 MarkupSafe==2.1.5 matplotlib==3.9.2 +matplotlib-inline==0.2.1 mdurl==0.1.2 +missingno==0.5.2 mlflow==3.7.0 mlflow-skinny==3.7.0 mlflow-tracing==3.7.0 more-itertools==10.8.0 msgpack==1.1.2 +msgspec==0.20.0 +multidict==6.7.0 +multipart==1.3.0 +mypy_extensions==1.1.0 narwhals==2.14.0 +nbclient==0.10.4 +nbformat==5.10.4 +nest-asyncio==1.6.0 +networkx==3.6.1 +nltk==3.9.2 +numba==0.63.1 numpy==2.2.6 opentelemetry-api==1.39.1 opentelemetry-proto==1.39.1 @@ -84,14 +123,24 @@ opentelemetry-semantic-conventions==0.60b1 orjson==3.11.5 packaging==24.2 pandas==2.2.3 +parso==0.8.5 +patsy==1.0.2 pbs-installer==2025.12.17 +pexpect==4.9.0 pillow==10.4.0 pkginfo==1.12.1.2 platformdirs==4.5.1 +plotly==5.24.1 pluggy==1.6.0 poetry==2.2.1 poetry-core==2.2.1 +polyfactory==3.2.0 +prompt_toolkit==3.0.52 protobuf==5.29.5 +psutil==7.2.1 +ptyprocess==0.7.0 +pure_eval==0.2.3 +py4j==0.10.9.9 pyarrow==22.0.0 pyasn1==0.6.1 pyasn1_modules==0.4.2 @@ -111,24 +160,33 @@ python-multipart==0.0.21 python-pptx==1.0.2 pytz==2025.2 PyYAML==6.0.3 +pyzmq==27.1.0 RapidFuzz==3.14.3 referencing==0.37.0 +regex==2025.11.3 requests==2.32.5 requests-toolbelt==1.0.0 rich==13.9.4 +rich-click==1.9.5 rpds-py==0.30.0 rsa==4.9.1 ruff==0.14.10 scikit-learn==1.8.0 scipy==1.14.1 +seaborn==0.13.2 semantic-version==2.10.0 +setuptools==80.9.0 +shap==0.46.0 shellingham==1.5.4 six==1.17.0 +slicer==0.0.8 smmap==5.0.2 sniffio==1.3.1 SQLAlchemy==2.0.45 sqlparse==0.5.4 +stack-data==0.6.3 starlette==0.49.3 +statsmodels==0.14.6 streamlit==1.37.1 tenacity==8.5.0 threadpoolctl==3.5.0 @@ -136,18 +194,28 @@ toml==0.10.2 tomlkit==0.12.0 tornado==6.5.4 tqdm==4.67.1 +traitlets==5.14.3 trove-classifiers==2025.12.1.14 typer==0.21.0 typer-slim==0.21.0 +typing-inspect==0.9.0 typing-inspection==0.4.2 typing_extensions==4.15.0 tzdata==2025.3 +ujson==5.11.0 urllib3==2.6.2 +uuid6==2025.0.1 uvicorn==0.30.6 +uvloop==0.22.1 virtualenv==20.35.4 +watchdog==4.0.2 +watchfiles==1.1.1 +wcwidth==0.2.14 websockets==12.0 Werkzeug==3.1.4 +widgetsnbextension==4.0.15 xattr==1.3.0 +xgboost==3.1.2 xlsxwriter==3.2.9 zipp==3.23.0 zstandard==0.25.0 diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py index 3a8502f66cc64ee2be123c7ed73434a95cbd67ab..67f0c6fc6189ad1c93f739b281b17176be9430a5 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py @@ -1,657 +1,111 @@ -# construire drift avec evidently - -from __future__ import annotations +# scripts/drift_report.py import argparse import json -import re from pathlib import Path -import numpy as np import pandas as pd -from scipy import stats - -try: - import matplotlib - matplotlib.use("Agg") - import matplotlib.pyplot as plt -except ImportError as exc: # pragma: no cover - optional plotting dependency - raise SystemExit( - "matplotlib is required for plots. Install it with: pip install matplotlib" - ) from exc - - -DEFAULT_FEATURES = [ - "EXT_SOURCE_2", - "EXT_SOURCE_3", - "AMT_ANNUITY", - "EXT_SOURCE_1", - "CODE_GENDER", - "DAYS_EMPLOYED", - "AMT_CREDIT", - "AMT_GOODS_PRICE", - "DAYS_BIRTH", - "FLAG_OWN_CAR", -] - -CATEGORICAL_FEATURES = {"CODE_GENDER", "FLAG_OWN_CAR"} -MIN_PROD_SAMPLES_DEFAULT = 200 -PSI_EPS_DEFAULT = 1e-4 -RARE_CATEGORY_MIN_SHARE_DEFAULT = 0.01 -FDR_ALPHA_DEFAULT = 0.05 -DAYS_EMPLOYED_SENTINEL = 365243 - -CATEGORY_NORMALIZATION = { - "CODE_GENDER": { - "F": "F", - "FEMALE": "F", - "0": "F", - "W": "F", - "WOMAN": "F", - "M": "M", - "MALE": "M", - "1": "M", - "MAN": "M", - }, - "FLAG_OWN_CAR": { - "Y": "Y", - "YES": "Y", - "TRUE": "Y", - "1": "Y", - "T": "Y", - "N": "N", - "NO": "N", - "FALSE": "N", - "0": "N", - "F": "N", - }, -} - - -def _safe_name(value: str) -> str: - return re.sub(r"[^a-zA-Z0-9_-]+", "_", value) - - -def _load_logs(log_path: Path) -> tuple[pd.DataFrame, pd.DataFrame]: - entries: list[dict[str, object]] = [] - with log_path.open("r", encoding="utf-8") as handle: - for line in handle: - line = line.strip() - if not line: - continue - entries.append(json.loads(line)) - if not entries: - return pd.DataFrame(), pd.DataFrame() - inputs = [ - entry.get("inputs") if isinstance(entry.get("inputs"), dict) else {} - for entry in entries - ] - inputs_df = pd.DataFrame.from_records(inputs) - meta_df = pd.DataFrame.from_records(entries) - return inputs_df, meta_df - - -def _normalize_category_value(value: object, mapping: dict[str, str]) -> object: - if pd.isna(value): - return np.nan - key = str(value).strip().upper() - if not key: - return np.nan - return mapping.get(key, "Unknown") - - -def _normalize_categories(df: pd.DataFrame) -> pd.DataFrame: - out = df.copy() - for feature, mapping in CATEGORY_NORMALIZATION.items(): - if feature in out.columns: - out[feature] = out[feature].apply(lambda v: _normalize_category_value(v, mapping)) - return out - - -def _replace_sentinel(series: pd.Series, sentinel: float) -> tuple[pd.Series, float]: - values = pd.to_numeric(series, errors="coerce") - sentinel_mask = values == sentinel - if sentinel_mask.any(): - series = series.copy() - series[sentinel_mask] = np.nan - return series, float(sentinel_mask.mean()) if len(values) else 0.0 - - -def _prepare_categorical( - reference: pd.Series, - production: pd.Series, - min_share: float, - max_categories: int | None = None, - other_label: str = "__OTHER__", -) -> tuple[pd.Series, pd.Series]: - ref_series = reference.fillna("Unknown") - prod_series = production.fillna("Unknown") - ref_freq = ref_series.value_counts(normalize=True) - keep = ref_freq[ref_freq >= min_share].index.tolist() - if max_categories is not None: - keep = keep[:max_categories] - ref_series = ref_series.where(ref_series.isin(keep), other=other_label) - prod_series = prod_series.where(prod_series.isin(keep), other=other_label) - return ref_series, prod_series - - -def _psi(reference: pd.Series, production: pd.Series, eps: float = PSI_EPS_DEFAULT) -> float: - ref_freq = reference.value_counts(normalize=True, dropna=False) - prod_freq = production.value_counts(normalize=True, dropna=False) - categories = ref_freq.index.union(prod_freq.index) - ref_probs = ref_freq.reindex(categories, fill_value=0).to_numpy() - prod_probs = prod_freq.reindex(categories, fill_value=0).to_numpy() - ref_probs = np.clip(ref_probs, eps, None) - prod_probs = np.clip(prod_probs, eps, None) - return float(np.sum((ref_probs - prod_probs) * np.log(ref_probs / prod_probs))) - - -def _coerce_numeric(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame: - out = df.copy() - for col in columns: - if col in out.columns: - out[col] = pd.to_numeric(out[col], errors="coerce") - return out - - -def _plot_numeric(ref: pd.Series, prod: pd.Series, output_path: Path) -> None: - plt.figure(figsize=(6, 4)) - plt.hist(ref.dropna(), bins=30, alpha=0.6, label="reference") - plt.hist(prod.dropna(), bins=30, alpha=0.6, label="production") - plt.title(f"Distribution: {ref.name}") - plt.legend() - plt.tight_layout() - plt.savefig(output_path) - plt.close() - - -def _plot_categorical(ref: pd.Series, prod: pd.Series, output_path: Path) -> None: - ref_freq = ref.value_counts(normalize=True) - prod_freq = prod.value_counts(normalize=True) - plot_df = pd.DataFrame({"reference": ref_freq, "production": prod_freq}).fillna(0) - plot_df.sort_values("reference", ascending=False).plot(kind="bar", figsize=(7, 4)) - plt.title(f"Distribution: {ref.name}") - plt.tight_layout() - plt.savefig(output_path) - plt.close() - - -def _benjamini_hochberg(pvalues: list[float], alpha: float) -> tuple[list[float], list[bool]]: - if not pvalues: - return [], [] - pvals = np.array(pvalues, dtype=float) - order = np.argsort(pvals) - ranked = pvals[order] - m = len(pvals) - thresholds = alpha * (np.arange(1, m + 1) / m) - below = ranked <= thresholds - reject = np.zeros(m, dtype=bool) - if below.any(): - cutoff = np.max(np.where(below)[0]) - reject[order[:cutoff + 1]] = True - qvals = ranked * m / np.arange(1, m + 1) - qvals = np.minimum.accumulate(qvals[::-1])[::-1] - adjusted = np.empty_like(qvals) - adjusted[order] = qvals - return adjusted.tolist(), reject.tolist() - - -def _extract_data_quality(meta_df: pd.DataFrame) -> list[dict[str, object]]: - if "data_quality" not in meta_df.columns: - return [] - dq_entries = [] - for item in meta_df["data_quality"].dropna(): - if isinstance(item, dict): - dq_entries.append(item) - return dq_entries - - -def _dq_has_unknown(dq: dict[str, object], feature: str) -> bool: - unknown = dq.get("unknown_categories") - if isinstance(unknown, dict): - return feature in unknown - if isinstance(unknown, list): - return feature in unknown - return False - - -def _summarize_data_quality( - meta_df: pd.DataFrame, - production_df: pd.DataFrame, - sentinel_rates: dict[str, float], -) -> dict[str, object]: - dq_entries = _extract_data_quality(meta_df) - if dq_entries: - total = len(dq_entries) - missing_rate = np.mean( - [bool(dq.get("missing_required_columns")) for dq in dq_entries] - ) - invalid_rate = np.mean( - [bool(dq.get("invalid_numeric_columns")) for dq in dq_entries] - ) - out_of_range_rate = np.mean( - [bool(dq.get("out_of_range_columns")) for dq in dq_entries] - ) - nan_rate = np.mean([float(dq.get("nan_rate", 0.0)) for dq in dq_entries]) - unknown_gender = np.mean( - [_dq_has_unknown(dq, "CODE_GENDER") for dq in dq_entries] - ) - unknown_car = np.mean( - [_dq_has_unknown(dq, "FLAG_OWN_CAR") for dq in dq_entries] - ) - sentinel_rate = np.mean( - [bool(dq.get("days_employed_sentinel")) for dq in dq_entries] - ) - return { - "source": "log", - "sample_size": total, - "missing_required_rate": float(missing_rate), - "invalid_numeric_rate": float(invalid_rate), - "out_of_range_rate": float(out_of_range_rate), - "nan_rate": float(nan_rate), - "unknown_gender_rate": float(unknown_gender), - "unknown_car_rate": float(unknown_car), - "days_employed_sentinel_rate": float(sentinel_rate), - } - - if production_df.empty: - return {"source": "none"} - - missing_rate = float(production_df.isna().any(axis=1).mean()) - unknown_gender_rate = 0.0 - unknown_car_rate = 0.0 - if "CODE_GENDER" in production_df.columns: - unknown_gender_rate = float( - (production_df["CODE_GENDER"] == "Unknown").mean() - ) - if "FLAG_OWN_CAR" in production_df.columns: - unknown_car_rate = float((production_df["FLAG_OWN_CAR"] == "Unknown").mean()) - sentinel_rate = float(sentinel_rates.get("production", 0.0)) - return { - "source": "fallback", - "sample_size": len(production_df), - "missing_required_rate": missing_rate, - "unknown_gender_rate": unknown_gender_rate, - "unknown_car_rate": unknown_car_rate, - "days_employed_sentinel_rate": sentinel_rate, - } - - -def _filter_by_time( - meta_df: pd.DataFrame, - inputs_df: pd.DataFrame, - since: str | None, - until: str | None, -) -> tuple[pd.DataFrame, pd.DataFrame, str]: - if not since and not until: - return meta_df, inputs_df, "" - if "timestamp" not in meta_df.columns: - return meta_df, inputs_df, "timestamp_missing" - timestamps = pd.to_datetime(meta_df["timestamp"], errors="coerce", utc=True) - if timestamps.isna().all(): - return meta_df, inputs_df, "timestamp_invalid" - mask = pd.Series(True, index=meta_df.index) - if since: - since_dt = pd.to_datetime(since, errors="coerce", utc=True) - if not pd.isna(since_dt): - mask &= timestamps >= since_dt - if until: - until_dt = pd.to_datetime(until, errors="coerce", utc=True) - if not pd.isna(until_dt): - mask &= timestamps <= until_dt - return meta_df.loc[mask].reset_index(drop=True), inputs_df.loc[mask].reset_index(drop=True), "filtered" - - -def _plot_score_distribution(scores: pd.Series, output_path: Path, bins: int = 30) -> None: - plt.figure(figsize=(6, 4)) - plt.hist(scores.dropna(), bins=bins, range=(0, 1), alpha=0.8, color="#4C78A8") - plt.title("Prediction score distribution") - plt.xlabel("Predicted probability") - plt.ylabel("Count") - plt.tight_layout() - plt.savefig(output_path) - plt.close() - - -def _plot_prediction_rate(predictions: pd.Series, output_path: Path) -> None: - counts = predictions.value_counts(normalize=True, dropna=False).sort_index() - plt.figure(figsize=(4, 4)) - plt.bar(counts.index.astype(str), counts.values, color="#F58518") - plt.title("Prediction rate") - plt.xlabel("Predicted class") - plt.ylabel("Share") - plt.ylim(0, 1) - plt.tight_layout() - plt.savefig(output_path) - plt.close() - - -def generate_report( - log_path: Path, +from evidently import Report + +# On peut réutiliser les fonctions de normalisation si elles sont dans un module partagé +# Pour cet exemple, je les recopie ici pour la clarté. + +def _normalize_gender(series: pd.Series) -> pd.Series: + """Normalise la colonne CODE_GENDER.""" + return series.str.upper().replace({'XNA': 'F', 'MALE': 'M', 'FEMALE': 'F'}) + +def _replace_sentinel(df: pd.DataFrame) -> pd.DataFrame: + """Remplace la valeur sentinelle dans DAYS_EMPLOYED.""" + if 'DAYS_EMPLOYED' in df.columns: + df['DAYS_EMPLOYED'] = df['DAYS_EMPLOYED'].replace({365243: pd.NA}) + return df + +def _load_logs(log_path: Path) -> pd.DataFrame | None: + """Charge les inputs depuis les logs JSONL.""" + if not log_path.exists(): + print(f"Warning: Log file not found at {log_path}") + return None + + records = [] + with log_path.open('r', encoding='utf-8') as f: + for line in f: + try: + log_entry = json.loads(line) + # On ne garde que les appels réussis avec des inputs + if log_entry.get('status_code', 500) < 400 and 'inputs' in log_entry: + records.append(log_entry['inputs']) + except json.JSONDecodeError: + continue # Ignore les lignes mal formées + + if not records: + return None + + return pd.DataFrame(records) + + +def generate_evidently_report( reference_path: Path, - output_dir: Path, - sample_size: int, - psi_threshold: float, - score_bins: int, - min_prod_samples: int = MIN_PROD_SAMPLES_DEFAULT, - psi_eps: float = PSI_EPS_DEFAULT, - min_category_share: float = RARE_CATEGORY_MIN_SHARE_DEFAULT, - fdr_alpha: float = FDR_ALPHA_DEFAULT, - min_drift_features: int = 1, - prod_since: str | None = None, - prod_until: str | None = None, -) -> Path: - inputs_df, meta_df = _load_logs(log_path) - if meta_df.empty: - raise SystemExit(f"No inputs found in logs: {log_path}") - - meta_df, inputs_df, window_status = _filter_by_time( - meta_df, inputs_df, since=prod_since, until=prod_until - ) - valid_mask = pd.Series(True, index=meta_df.index) - if "status_code" in meta_df.columns: - valid_mask = meta_df["status_code"].fillna(0) < 400 - inputs_df = inputs_df.loc[valid_mask].reset_index(drop=True) - meta_df_valid = meta_df.loc[valid_mask].reset_index(drop=True) - - if inputs_df.empty: - raise SystemExit(f"No valid inputs found in logs: {log_path}") - - features = [col for col in DEFAULT_FEATURES if col in inputs_df.columns] - if not features: - raise SystemExit("No matching features found in production logs.") - - reference_df = pd.read_parquet(reference_path, columns=features) - if sample_size and len(reference_df) > sample_size: - reference_df = reference_df.sample(sample_size, random_state=42) - - numeric_features = [col for col in features if col not in CATEGORICAL_FEATURES] - production_df = _normalize_categories(inputs_df) - reference_df = _normalize_categories(reference_df) - production_df = _coerce_numeric(production_df, numeric_features) - reference_df = _coerce_numeric(reference_df, numeric_features) - - sentinel_rates = {} - if "DAYS_EMPLOYED" in production_df.columns: - production_df["DAYS_EMPLOYED"], prod_rate = _replace_sentinel( - production_df["DAYS_EMPLOYED"], DAYS_EMPLOYED_SENTINEL - ) - reference_df["DAYS_EMPLOYED"], ref_rate = _replace_sentinel( - reference_df["DAYS_EMPLOYED"], DAYS_EMPLOYED_SENTINEL - ) - sentinel_rates = { - "production": prod_rate, - "reference": ref_rate, - } - - summary_rows: list[dict[str, object]] = [] - plots_dir = output_dir / "plots" - plots_dir.mkdir(parents=True, exist_ok=True) - n_prod = len(production_df) - n_ref = len(reference_df) - - for feature in features: - if feature not in reference_df.columns: - continue - ref_series = reference_df[feature] - prod_series = production_df[feature] - if feature in CATEGORICAL_FEATURES: - feature_n_prod = int(prod_series.dropna().shape[0]) - feature_n_ref = int(ref_series.dropna().shape[0]) - ref_series, prod_series = _prepare_categorical( - ref_series, prod_series, min_share=min_category_share, other_label="OTHER" - ) - insufficient_sample = feature_n_prod < min_prod_samples - psi_value = None - if not insufficient_sample: - psi_value = _psi(ref_series, prod_series, eps=psi_eps) - summary_rows.append( - { - "feature": feature, - "type": "categorical", - "psi": round(psi_value, 4) if psi_value is not None else None, - "drift_detected": bool(psi_value is not None and psi_value >= psi_threshold), - "n_prod": feature_n_prod, - "n_ref": feature_n_ref, - "note": "insufficient_sample" if insufficient_sample else "", - } - ) - plot_path = plots_dir / f"{_safe_name(feature)}.png" - _plot_categorical(ref_series, prod_series, plot_path) - else: - ref_clean = ref_series.dropna() - prod_clean = prod_series.dropna() - if ref_clean.empty or prod_clean.empty: - continue - feature_n_prod = int(len(prod_clean)) - insufficient_sample = feature_n_prod < min_prod_samples - stat = None - pvalue = None - if not insufficient_sample: - stat, pvalue = stats.ks_2samp(ref_clean, prod_clean) - summary_rows.append( - { - "feature": feature, - "type": "numeric", - "ks_stat": round(float(stat), 4) if stat is not None else None, - "p_value": round(float(pvalue), 6) if pvalue is not None else None, - "p_value_fdr": None, - "drift_detected": bool(pvalue is not None and pvalue < 0.05), - "n_prod": feature_n_prod, - "n_ref": int(len(ref_clean)), - "note": "insufficient_sample" if insufficient_sample else "", - } - ) - plot_path = plots_dir / f"{_safe_name(feature)}.png" - _plot_numeric(ref_series, prod_series, plot_path) - - numeric_rows = [ - (idx, row) - for idx, row in enumerate(summary_rows) - if row.get("type") == "numeric" and row.get("p_value") is not None - ] - if numeric_rows: - pvalues = [row["p_value"] for _, row in numeric_rows] - qvals, reject = _benjamini_hochberg(pvalues, alpha=fdr_alpha) - for (idx, _), qval, rejected in zip(numeric_rows, qvals, reject): - summary_rows[idx]["p_value_fdr"] = round(float(qval), 6) - summary_rows[idx]["drift_detected"] = bool(rejected) - - summary_df = pd.DataFrame(summary_rows) - output_dir.mkdir(parents=True, exist_ok=True) - report_path = output_dir / "drift_report.html" - - total_calls = len(meta_df) - error_rate = float((meta_df.get("status_code", pd.Series(dtype=int)) >= 400).mean()) if total_calls else 0.0 - latency_ms = meta_df.get("latency_ms", pd.Series(dtype=float)).dropna() - latency_p50 = float(latency_ms.quantile(0.5)) if not latency_ms.empty else 0.0 - latency_p95 = float(latency_ms.quantile(0.95)) if not latency_ms.empty else 0.0 - - valid_meta = meta_df_valid - score_series = ( - pd.to_numeric(valid_meta.get("probability", pd.Series(dtype=float)), errors="coerce") - .dropna() - ) - pred_series = ( - pd.to_numeric(valid_meta.get("prediction", pd.Series(dtype=float)), errors="coerce") - .dropna() - ) - - score_metrics_html = "
  • No prediction scores available.
  • " - score_plots_html = "" - if not score_series.empty: - score_mean = float(score_series.mean()) - score_p50 = float(score_series.quantile(0.5)) - score_p95 = float(score_series.quantile(0.95)) - score_min = float(score_series.min()) - score_max = float(score_series.max()) - score_metrics = [ - f"
  • Score mean: {score_mean:.4f}
  • ", - f"
  • Score p50: {score_p50:.4f}
  • ", - f"
  • Score p95: {score_p95:.4f}
  • ", - f"
  • Score min: {score_min:.4f}
  • ", - f"
  • Score max: {score_max:.4f}
  • ", - ] - score_metrics_html = "\n".join(score_metrics) - score_plot_path = plots_dir / "score_distribution.png" - _plot_score_distribution(score_series, score_plot_path, bins=score_bins) - score_plots_html = "" - - if not pred_series.empty: - pred_rate = float(pred_series.mean()) - score_metrics_html += f"\n
  • Predicted default rate: {pred_rate:.2%}
  • " - pred_plot_path = plots_dir / "prediction_rate.png" - _plot_prediction_rate(pred_series, pred_plot_path) - score_plots_html += "\n" - - drift_flags = summary_df.get("drift_detected", pd.Series(dtype=bool)).fillna(False) - drift_count = int(drift_flags.sum()) - overall_drift = drift_count >= max(min_drift_features, 1) and n_prod >= min_prod_samples - drift_features = summary_df.loc[drift_flags, "feature"].tolist() if not summary_df.empty else [] - - dq_metrics = _summarize_data_quality(meta_df, production_df, sentinel_rates) - if dq_metrics.get("source") == "none": - dq_html = "

    No data quality metrics available.

    " - else: - dq_items = [ - f"
  • Source: {dq_metrics.get('source')}
  • ", - f"
  • Sample size: {dq_metrics.get('sample_size')}
  • ", - f"
  • Missing required rate: {dq_metrics.get('missing_required_rate', 0.0):.2%}
  • ", - ] - if "invalid_numeric_rate" in dq_metrics: - dq_items.append(f"
  • Invalid numeric rate: {dq_metrics.get('invalid_numeric_rate', 0.0):.2%}
  • ") - if "out_of_range_rate" in dq_metrics: - dq_items.append(f"
  • Out-of-range rate: {dq_metrics.get('out_of_range_rate', 0.0):.2%}
  • ") - if "nan_rate" in dq_metrics: - dq_items.append(f"
  • NaN rate (avg): {dq_metrics.get('nan_rate', 0.0):.2%}
  • ") - dq_items.append( - f"
  • Unknown CODE_GENDER rate: {dq_metrics.get('unknown_gender_rate', 0.0):.2%}
  • " - ) - dq_items.append( - f"
  • Unknown FLAG_OWN_CAR rate: {dq_metrics.get('unknown_car_rate', 0.0):.2%}
  • " - ) - dq_items.append( - f"
  • DAYS_EMPLOYED sentinel rate: {dq_metrics.get('days_employed_sentinel_rate', 0.0):.2%}
  • " - ) - dq_html = "" - - summary_html = summary_df.to_html(index=False, escape=False) - plots_html = "\n".join( - f"

    {row['feature']}

    " - for _, row in summary_df.iterrows() - ) - - sample_badge = "" - if n_prod < min_prod_samples: - sample_badge = ( - "
    Sample insuffisant: " - f"{n_prod} < {min_prod_samples} (resultats non fiables).
    " - ) - drift_badge = ( - "
    No drift alert
    " - if not overall_drift - else "
    Drift alert
    " - ) - if not prod_since and not prod_until: - window_info = "full_log" - elif window_status in {"timestamp_missing", "timestamp_invalid"}: - window_info = f"{window_status} (no filter applied)" - else: - window_info = f"{prod_since or '...'} to {prod_until or '...'}" - - html = f""" - - - - Drift Report - - - -

    Production Monitoring Summary

    - - {sample_badge} -

    Score Monitoring

    - - {score_plots_html} -

    Data Quality

    - {dq_html} -

    Data Drift Summary

    - {drift_badge} - - {summary_html} -

    Feature Distributions

    - {plots_html} - - -""" - - report_path.write_text(html, encoding="utf-8") - return report_path + log_path: Path, + output_path: Path, + sample_size: int = 20000, +): + """Génère un rapport de dérive de données avec Evidently.""" + + print("1. Chargement des données de référence...") + reference_df = pd.read_parquet(reference_path) + if sample_size > 0 and sample_size < len(reference_df): + reference_df = reference_df.sample(n=sample_size, random_state=42) + + print("2. Chargement et préparation des données de production (logs)...") + prod_df = _load_logs(log_path) + + if prod_df is None or prod_df.empty: + print("Aucune donnée de production valide trouvée. Le rapport ne peut être généré.") + return + + # Appliquer le même pré-traitement pour une comparaison juste + reference_df = _replace_sentinel(reference_df) + prod_df = _replace_sentinel(prod_df) + + if 'CODE_GENDER' in reference_df.columns: + reference_df['CODE_GENDER'] = _normalize_gender(reference_df['CODE_GENDER']) + if 'CODE_GENDER' in prod_df.columns: + prod_df['CODE_GENDER'] = _normalize_gender(prod_df['CODE_GENDER']) + + # S'assurer que les colonnes correspondent + # Evidently gère les colonnes manquantes, mais c'est une bonne pratique + ref_cols = set(reference_df.columns) + prod_cols = set(prod_df.columns) + common_cols = list(ref_cols.intersection(prod_cols)) + + reference_df = reference_df[common_cols] + prod_df = prod_df[common_cols] + + print(f"3. Génération du rapport de dérive sur {len(common_cols)} features communes...") + data_drift_report = Report(metrics=[ + DataDriftPreset(), + ]) + + data_drift_report.run(reference_data=reference_df, current_data=prod_df) + + output_path.parent.mkdir(parents=True, exist_ok=True) + data_drift_report.save_html(str(output_path)) + + print(f"✅ Rapport de dérive généré avec succès : {output_path}") -def main() -> None: - parser = argparse.ArgumentParser(description="Generate a drift report from production logs.") - parser.add_argument("--logs", type=Path, default=Path("logs/predictions.jsonl")) - parser.add_argument("--reference", type=Path, default=Path("data/data_final.parquet")) - parser.add_argument("--output-dir", type=Path, default=Path("reports")) - parser.add_argument("--sample-size", type=int, default=50000) - parser.add_argument("--psi-threshold", type=float, default=0.2) - parser.add_argument("--score-bins", type=int, default=30) - parser.add_argument("--min-prod-samples", type=int, default=MIN_PROD_SAMPLES_DEFAULT) - parser.add_argument("--psi-eps", type=float, default=PSI_EPS_DEFAULT) - parser.add_argument("--min-category-share", type=float, default=RARE_CATEGORY_MIN_SHARE_DEFAULT) - parser.add_argument("--fdr-alpha", type=float, default=FDR_ALPHA_DEFAULT) - parser.add_argument("--min-drift-features", type=int, default=1) - parser.add_argument("--prod-since", type=str, default=None) - parser.add_argument("--prod-until", type=str, default=None) +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Generate a data drift report using Evidently AI.") + parser.add_argument("--reference-path", type=Path, default="data/data_final.parquet", help="Path to the reference dataset.") + parser.add_argument("--log-path", type=Path, default="logs/predictions.jsonl", help="Path to the production log file.") + parser.add_argument("--output-path", type=Path, default="reports/evidently_drift_report.html", help="Path to save the HTML report.") + parser.add_argument("--sample-size", type=int, default=20000, help="Size of the reference sample to use (0 for full dataset).") + args = parser.parse_args() - report_path = generate_report( - log_path=args.logs, - reference_path=args.reference, - output_dir=args.output_dir, + generate_evidently_report( + reference_path=args.reference_path, + log_path=args.log_path, + output_path=args.output_path, sample_size=args.sample_size, - psi_threshold=args.psi_threshold, - score_bins=args.score_bins, - min_prod_samples=args.min_prod_samples, - psi_eps=args.psi_eps, - min_category_share=args.min_category_share, - fdr_alpha=args.fdr_alpha, - min_drift_features=args.min_drift_features, - prod_since=args.prod_since, - prod_until=args.prod_until, ) - print(f"Drift report saved to {report_path}") - - -if __name__ == "__main__": - main() diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report_old.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report_old.py new file mode 100644 index 0000000000000000000000000000000000000000..2c3272f5575dda8c9ae9331b10bdc39f6498d002 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report_old.py @@ -0,0 +1,719 @@ +# construire drift avec evidently + +from __future__ import annotations + +import argparse +import json +import re +from pathlib import Path + +import numpy as np +import pandas as pd +from scipy import stats + +try: + import matplotlib + matplotlib.use("Agg") + import matplotlib.pyplot as plt +except ImportError as exc: # pragma: no cover - optional plotting dependency + raise SystemExit( + "matplotlib is required for plots. Install it with: pip install matplotlib" + ) from exc + + +DEFAULT_FEATURES = [ + "EXT_SOURCE_2", + "EXT_SOURCE_3", + "AMT_ANNUITY", + "EXT_SOURCE_1", + "CODE_GENDER", + "DAYS_EMPLOYED", + "AMT_CREDIT", + "AMT_GOODS_PRICE", + "DAYS_BIRTH", + "FLAG_OWN_CAR", +] + +CATEGORICAL_FEATURES = {"CODE_GENDER", "FLAG_OWN_CAR"} +MIN_PROD_SAMPLES_DEFAULT = 200 +PSI_EPS_DEFAULT = 1e-4 +RARE_CATEGORY_MIN_SHARE_DEFAULT = 0.01 +FDR_ALPHA_DEFAULT = 0.05 +DAYS_EMPLOYED_SENTINEL = 365243 + +CATEGORY_NORMALIZATION = { + "CODE_GENDER": { + "F": "F", + "FEMALE": "F", + "0": "F", + "W": "F", + "WOMAN": "F", + "M": "M", + "MALE": "M", + "1": "M", + "MAN": "M", + }, + "FLAG_OWN_CAR": { + "Y": "Y", + "YES": "Y", + "TRUE": "Y", + "1": "Y", + "T": "Y", + "N": "N", + "NO": "N", + "FALSE": "N", + "0": "N", + "F": "N", + }, +} + + +def _safe_name(value: str) -> str: + return re.sub(r"[^a-zA-Z0-9_-]+", "_", value) + + +def _load_logs(log_path: Path) -> tuple[pd.DataFrame, pd.DataFrame]: + entries: list[dict[str, object]] = [] + with log_path.open("r", encoding="utf-8") as handle: + for line in handle: + line = line.strip() + if not line: + continue + entries.append(json.loads(line)) + if not entries: + return pd.DataFrame(), pd.DataFrame() + inputs = [ + entry.get("inputs") if isinstance(entry.get("inputs"), dict) else {} + for entry in entries + ] + inputs_df = pd.DataFrame.from_records(inputs) + meta_df = pd.DataFrame.from_records(entries) + return inputs_df, meta_df + + +def _normalize_category_value(value: object, mapping: dict[str, str]) -> object: + if pd.isna(value): + return np.nan + key = str(value).strip().upper() + if not key: + return np.nan + return mapping.get(key, "Unknown") + + +def _normalize_categories(df: pd.DataFrame) -> pd.DataFrame: + out = df.copy() + for feature, mapping in CATEGORY_NORMALIZATION.items(): + if feature in out.columns: + out[feature] = out[feature].apply(lambda v: _normalize_category_value(v, mapping)) + return out + + +def _replace_sentinel(series: pd.Series, sentinel: float) -> tuple[pd.Series, float]: + values = pd.to_numeric(series, errors="coerce") + sentinel_mask = values == sentinel + if sentinel_mask.any(): + series = series.copy() + series[sentinel_mask] = np.nan + return series, float(sentinel_mask.mean()) if len(values) else 0.0 + + +def _prepare_categorical( + reference: pd.Series, + production: pd.Series, + min_share: float, + max_categories: int | None = None, + other_label: str = "__OTHER__", +) -> tuple[pd.Series, pd.Series]: + ref_series = reference.fillna("Unknown") + prod_series = production.fillna("Unknown") + ref_freq = ref_series.value_counts(normalize=True) + keep = ref_freq[ref_freq >= min_share].index.tolist() + if max_categories is not None: + keep = keep[:max_categories] + ref_series = ref_series.where(ref_series.isin(keep), other=other_label) + prod_series = prod_series.where(prod_series.isin(keep), other=other_label) + return ref_series, prod_series + + +def _psi(reference: pd.Series, production: pd.Series, eps: float = PSI_EPS_DEFAULT) -> float: + ref_freq = reference.value_counts(normalize=True, dropna=False) + prod_freq = production.value_counts(normalize=True, dropna=False) + categories = ref_freq.index.union(prod_freq.index) + ref_probs = ref_freq.reindex(categories, fill_value=0).to_numpy() + prod_probs = prod_freq.reindex(categories, fill_value=0).to_numpy() + ref_probs = np.clip(ref_probs, eps, None) + prod_probs = np.clip(prod_probs, eps, None) + return float(np.sum((ref_probs - prod_probs) * np.log(ref_probs / prod_probs))) + + +def _coerce_numeric(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame: + out = df.copy() + for col in columns: + if col in out.columns: + out[col] = pd.to_numeric(out[col], errors="coerce") + return out + + +def _plot_numeric(ref: pd.Series, prod: pd.Series, output_path: Path) -> None: + plt.figure(figsize=(6, 4)) + plt.hist(ref.dropna(), bins=30, alpha=0.6, label="reference") + plt.hist(prod.dropna(), bins=30, alpha=0.6, label="production") + plt.title(f"Distribution: {ref.name}") + plt.legend() + plt.tight_layout() + plt.savefig(output_path) + plt.close() + + +def _plot_categorical(ref: pd.Series, prod: pd.Series, output_path: Path) -> None: + ref_freq = ref.value_counts(normalize=True) + prod_freq = prod.value_counts(normalize=True) + plot_df = pd.DataFrame({"reference": ref_freq, "production": prod_freq}).fillna(0) + plot_df.sort_values("reference", ascending=False).plot(kind="bar", figsize=(7, 4)) + plt.title(f"Distribution: {ref.name}") + plt.tight_layout() + plt.savefig(output_path) + plt.close() + + +def _benjamini_hochberg(pvalues: list[float], alpha: float) -> tuple[list[float], list[bool]]: + if not pvalues: + return [], [] + pvals = np.array(pvalues, dtype=float) + order = np.argsort(pvals) + ranked = pvals[order] + m = len(pvals) + thresholds = alpha * (np.arange(1, m + 1) / m) + below = ranked <= thresholds + reject = np.zeros(m, dtype=bool) + if below.any(): + cutoff = np.max(np.where(below)[0]) + reject[order[:cutoff + 1]] = True + qvals = ranked * m / np.arange(1, m + 1) + qvals = np.minimum.accumulate(qvals[::-1])[::-1] + adjusted = np.empty_like(qvals) + adjusted[order] = qvals + return adjusted.tolist(), reject.tolist() + + +def _extract_data_quality(meta_df: pd.DataFrame) -> list[dict[str, object]]: + if "data_quality" not in meta_df.columns: + return [] + dq_entries = [] + for item in meta_df["data_quality"].dropna(): + if isinstance(item, dict): + dq_entries.append(item) + return dq_entries + + +def _normalize_error_message(value: object) -> str: + if value is None: + return "" + if isinstance(value, dict): + message = value.get("message") + return str(message) if message else json.dumps(value, ensure_ascii=True) + if isinstance(value, list): + return str(value[0]) if value else "" + if isinstance(value, str): + cleaned = value.strip() + if not cleaned: + return "" + try: + parsed = json.loads(cleaned) + except json.JSONDecodeError: + return cleaned + return _normalize_error_message(parsed) + return str(value) + + +def _summarize_errors(meta_df: pd.DataFrame, max_items: int = 5) -> list[tuple[str, int]]: + if "error" not in meta_df.columns: + return [] + errors = meta_df["error"].dropna().apply(_normalize_error_message) + errors = errors[errors != ""] + if errors.empty: + return [] + counts = errors.value_counts().head(max_items) + return list(zip(counts.index.tolist(), counts.tolist())) + + +def _dq_has_unknown(dq: dict[str, object], feature: str) -> bool: + unknown = dq.get("unknown_categories") + if isinstance(unknown, dict): + return feature in unknown + if isinstance(unknown, list): + return feature in unknown + return False + + +def _summarize_data_quality( + meta_df: pd.DataFrame, + production_df: pd.DataFrame, + sentinel_rates: dict[str, float], +) -> dict[str, object]: + dq_entries = _extract_data_quality(meta_df) + if dq_entries: + total = len(dq_entries) + missing_rate = np.mean( + [bool(dq.get("missing_required_columns")) for dq in dq_entries] + ) + invalid_rate = np.mean( + [bool(dq.get("invalid_numeric_columns")) for dq in dq_entries] + ) + out_of_range_rate = np.mean( + [bool(dq.get("out_of_range_columns")) for dq in dq_entries] + ) + outlier_rate = np.mean( + [bool(dq.get("outlier_columns")) for dq in dq_entries] + ) + nan_rate = np.mean([float(dq.get("nan_rate", 0.0)) for dq in dq_entries]) + unknown_gender = np.mean( + [_dq_has_unknown(dq, "CODE_GENDER") for dq in dq_entries] + ) + unknown_car = np.mean( + [_dq_has_unknown(dq, "FLAG_OWN_CAR") for dq in dq_entries] + ) + sentinel_rate = np.mean( + [bool(dq.get("days_employed_sentinel")) for dq in dq_entries] + ) + return { + "source": "log", + "sample_size": total, + "missing_required_rate": float(missing_rate), + "invalid_numeric_rate": float(invalid_rate), + "out_of_range_rate": float(out_of_range_rate), + "outlier_rate": float(outlier_rate), + "nan_rate": float(nan_rate), + "unknown_gender_rate": float(unknown_gender), + "unknown_car_rate": float(unknown_car), + "days_employed_sentinel_rate": float(sentinel_rate), + } + + if production_df.empty: + return {"source": "none"} + + missing_rate = float(production_df.isna().any(axis=1).mean()) + unknown_gender_rate = 0.0 + unknown_car_rate = 0.0 + if "CODE_GENDER" in production_df.columns: + unknown_gender_rate = float( + (production_df["CODE_GENDER"] == "Unknown").mean() + ) + if "FLAG_OWN_CAR" in production_df.columns: + unknown_car_rate = float((production_df["FLAG_OWN_CAR"] == "Unknown").mean()) + sentinel_rate = float(sentinel_rates.get("production", 0.0)) + return { + "source": "fallback", + "sample_size": len(production_df), + "missing_required_rate": missing_rate, + "unknown_gender_rate": unknown_gender_rate, + "unknown_car_rate": unknown_car_rate, + "days_employed_sentinel_rate": sentinel_rate, + } + + +def _filter_by_time( + meta_df: pd.DataFrame, + inputs_df: pd.DataFrame, + since: str | None, + until: str | None, +) -> tuple[pd.DataFrame, pd.DataFrame, str]: + if not since and not until: + return meta_df, inputs_df, "" + if "timestamp" not in meta_df.columns: + return meta_df, inputs_df, "timestamp_missing" + timestamps = pd.to_datetime(meta_df["timestamp"], errors="coerce", utc=True) + if timestamps.isna().all(): + return meta_df, inputs_df, "timestamp_invalid" + mask = pd.Series(True, index=meta_df.index) + if since: + since_dt = pd.to_datetime(since, errors="coerce", utc=True) + if not pd.isna(since_dt): + mask &= timestamps >= since_dt + if until: + until_dt = pd.to_datetime(until, errors="coerce", utc=True) + if not pd.isna(until_dt): + mask &= timestamps <= until_dt + return meta_df.loc[mask].reset_index(drop=True), inputs_df.loc[mask].reset_index(drop=True), "filtered" + + +def _plot_score_distribution(scores: pd.Series, output_path: Path, bins: int = 30) -> None: + plt.figure(figsize=(6, 4)) + plt.hist(scores.dropna(), bins=bins, range=(0, 1), alpha=0.8, color="#4C78A8") + plt.title("Prediction score distribution") + plt.xlabel("Predicted probability") + plt.ylabel("Count") + plt.tight_layout() + plt.savefig(output_path) + plt.close() + + +def _plot_prediction_rate(predictions: pd.Series, output_path: Path) -> None: + counts = predictions.value_counts(normalize=True, dropna=False).sort_index() + plt.figure(figsize=(4, 4)) + plt.bar(counts.index.astype(str), counts.values, color="#F58518") + plt.title("Prediction rate") + plt.xlabel("Predicted class") + plt.ylabel("Share") + plt.ylim(0, 1) + plt.tight_layout() + plt.savefig(output_path) + plt.close() + + +def generate_report( + log_path: Path, + reference_path: Path, + output_dir: Path, + sample_size: int, + psi_threshold: float, + score_bins: int, + min_prod_samples: int = MIN_PROD_SAMPLES_DEFAULT, + psi_eps: float = PSI_EPS_DEFAULT, + min_category_share: float = RARE_CATEGORY_MIN_SHARE_DEFAULT, + fdr_alpha: float = FDR_ALPHA_DEFAULT, + min_drift_features: int = 1, + prod_since: str | None = None, + prod_until: str | None = None, +) -> Path: + inputs_df, meta_df = _load_logs(log_path) + if meta_df.empty: + raise SystemExit(f"No inputs found in logs: {log_path}") + + meta_df, inputs_df, window_status = _filter_by_time( + meta_df, inputs_df, since=prod_since, until=prod_until + ) + meta_df_all = meta_df.copy() + inputs_df_all = inputs_df.copy() + valid_mask = pd.Series(True, index=meta_df.index) + if "status_code" in meta_df.columns: + valid_mask = meta_df["status_code"].fillna(0) < 400 + inputs_df = inputs_df.loc[valid_mask].reset_index(drop=True) + meta_df_valid = meta_df.loc[valid_mask].reset_index(drop=True) + + if inputs_df.empty: + raise SystemExit(f"No valid inputs found in logs: {log_path}") + + features = [col for col in DEFAULT_FEATURES if col in inputs_df.columns] + if not features: + raise SystemExit("No matching features found in production logs.") + + reference_df = pd.read_parquet(reference_path, columns=features) + if sample_size and len(reference_df) > sample_size: + reference_df = reference_df.sample(sample_size, random_state=42) + + numeric_features = [col for col in features if col not in CATEGORICAL_FEATURES] + production_df = _normalize_categories(inputs_df) + reference_df = _normalize_categories(reference_df) + production_df = _coerce_numeric(production_df, numeric_features) + reference_df = _coerce_numeric(reference_df, numeric_features) + + sentinel_rates = {} + if "DAYS_EMPLOYED" in production_df.columns: + production_df["DAYS_EMPLOYED"], prod_rate = _replace_sentinel( + production_df["DAYS_EMPLOYED"], DAYS_EMPLOYED_SENTINEL + ) + reference_df["DAYS_EMPLOYED"], ref_rate = _replace_sentinel( + reference_df["DAYS_EMPLOYED"], DAYS_EMPLOYED_SENTINEL + ) + sentinel_rates = { + "production": prod_rate, + "reference": ref_rate, + } + + summary_rows: list[dict[str, object]] = [] + plots_dir = output_dir / "plots" + plots_dir.mkdir(parents=True, exist_ok=True) + n_prod = len(production_df) + n_ref = len(reference_df) + + for feature in features: + if feature not in reference_df.columns: + continue + ref_series = reference_df[feature] + prod_series = production_df[feature] + if feature in CATEGORICAL_FEATURES: + feature_n_prod = int(prod_series.dropna().shape[0]) + feature_n_ref = int(ref_series.dropna().shape[0]) + ref_series, prod_series = _prepare_categorical( + ref_series, prod_series, min_share=min_category_share, other_label="OTHER" + ) + insufficient_sample = feature_n_prod < min_prod_samples + psi_value = None + if not insufficient_sample: + psi_value = _psi(ref_series, prod_series, eps=psi_eps) + summary_rows.append( + { + "feature": feature, + "type": "categorical", + "psi": round(psi_value, 4) if psi_value is not None else None, + "drift_detected": bool(psi_value is not None and psi_value >= psi_threshold), + "n_prod": feature_n_prod, + "n_ref": feature_n_ref, + "note": "insufficient_sample" if insufficient_sample else "", + } + ) + plot_path = plots_dir / f"{_safe_name(feature)}.png" + _plot_categorical(ref_series, prod_series, plot_path) + else: + ref_clean = ref_series.dropna() + prod_clean = prod_series.dropna() + if ref_clean.empty or prod_clean.empty: + continue + feature_n_prod = int(len(prod_clean)) + insufficient_sample = feature_n_prod < min_prod_samples + stat = None + pvalue = None + if not insufficient_sample: + stat, pvalue = stats.ks_2samp(ref_clean, prod_clean) + summary_rows.append( + { + "feature": feature, + "type": "numeric", + "ks_stat": round(float(stat), 4) if stat is not None else None, + "p_value": round(float(pvalue), 6) if pvalue is not None else None, + "p_value_fdr": None, + "drift_detected": bool(pvalue is not None and pvalue < 0.05), + "n_prod": feature_n_prod, + "n_ref": int(len(ref_clean)), + "note": "insufficient_sample" if insufficient_sample else "", + } + ) + plot_path = plots_dir / f"{_safe_name(feature)}.png" + _plot_numeric(ref_series, prod_series, plot_path) + + numeric_rows = [ + (idx, row) + for idx, row in enumerate(summary_rows) + if row.get("type") == "numeric" and row.get("p_value") is not None + ] + if numeric_rows: + pvalues = [row["p_value"] for _, row in numeric_rows] + qvals, reject = _benjamini_hochberg(pvalues, alpha=fdr_alpha) + for (idx, _), qval, rejected in zip(numeric_rows, qvals, reject): + summary_rows[idx]["p_value_fdr"] = round(float(qval), 6) + summary_rows[idx]["drift_detected"] = bool(rejected) + + summary_df = pd.DataFrame(summary_rows) + output_dir.mkdir(parents=True, exist_ok=True) + report_path = output_dir / "drift_report.html" + + total_calls = len(meta_df_all) + error_series = meta_df_all.get("status_code", pd.Series(dtype=int)) + error_rate = float((error_series >= 400).mean()) if total_calls else 0.0 + latency_ms = meta_df_all.get("latency_ms", pd.Series(dtype=float)).dropna() + latency_p50 = float(latency_ms.quantile(0.5)) if not latency_ms.empty else 0.0 + latency_p95 = float(latency_ms.quantile(0.95)) if not latency_ms.empty else 0.0 + calls_with_inputs = int(inputs_df_all.notna().any(axis=1).sum()) if not inputs_df_all.empty else 0 + calls_with_dq = int(meta_df_all.get("data_quality", pd.Series(dtype=object)).notna().sum()) if total_calls else 0 + calls_success = int(valid_mask.sum()) + + valid_meta = meta_df_valid + score_series = ( + pd.to_numeric(valid_meta.get("probability", pd.Series(dtype=float)), errors="coerce") + .dropna() + ) + pred_series = ( + pd.to_numeric(valid_meta.get("prediction", pd.Series(dtype=float)), errors="coerce") + .dropna() + ) + + score_metrics_html = "
  • No prediction scores available.
  • " + score_plots_html = "" + if not score_series.empty: + score_mean = float(score_series.mean()) + score_p50 = float(score_series.quantile(0.5)) + score_p95 = float(score_series.quantile(0.95)) + score_min = float(score_series.min()) + score_max = float(score_series.max()) + score_metrics = [ + f"
  • Score mean: {score_mean:.4f}
  • ", + f"
  • Score p50: {score_p50:.4f}
  • ", + f"
  • Score p95: {score_p95:.4f}
  • ", + f"
  • Score min: {score_min:.4f}
  • ", + f"
  • Score max: {score_max:.4f}
  • ", + ] + score_metrics_html = "\n".join(score_metrics) + score_plot_path = plots_dir / "score_distribution.png" + _plot_score_distribution(score_series, score_plot_path, bins=score_bins) + score_plots_html = "" + + if not pred_series.empty: + pred_rate = float(pred_series.mean()) + score_metrics_html += f"\n
  • Predicted default rate: {pred_rate:.2%}
  • " + pred_plot_path = plots_dir / "prediction_rate.png" + _plot_prediction_rate(pred_series, pred_plot_path) + score_plots_html += "\n" + + error_breakdown = _summarize_errors(meta_df_all[error_series >= 400]) + if error_breakdown: + error_items = "\n".join( + f"
  • {message} ({count})
  • " for message, count in error_breakdown + ) + error_html = "" + else: + error_html = "

    No error details logged.

    " + + drift_flags = summary_df.get("drift_detected", pd.Series(dtype=bool)).fillna(False) + drift_count = int(drift_flags.sum()) + overall_drift = drift_count >= max(min_drift_features, 1) and n_prod >= min_prod_samples + drift_features = summary_df.loc[drift_flags, "feature"].tolist() if not summary_df.empty else [] + + dq_metrics = _summarize_data_quality(meta_df, production_df, sentinel_rates) + if dq_metrics.get("source") == "none": + dq_html = "

    No data quality metrics available.

    " + else: + dq_items = [ + f"
  • Source: {dq_metrics.get('source')}
  • ", + f"
  • Sample size: {dq_metrics.get('sample_size')}
  • ", + f"
  • Missing required rate: {dq_metrics.get('missing_required_rate', 0.0):.2%}
  • ", + ] + if "invalid_numeric_rate" in dq_metrics: + dq_items.append(f"
  • Invalid numeric rate: {dq_metrics.get('invalid_numeric_rate', 0.0):.2%}
  • ") + if "out_of_range_rate" in dq_metrics: + dq_items.append(f"
  • Out-of-range rate: {dq_metrics.get('out_of_range_rate', 0.0):.2%}
  • ") + if "outlier_rate" in dq_metrics: + dq_items.append(f"
  • Outlier rate: {dq_metrics.get('outlier_rate', 0.0):.2%}
  • ") + if "nan_rate" in dq_metrics: + dq_items.append(f"
  • NaN rate (avg): {dq_metrics.get('nan_rate', 0.0):.2%}
  • ") + dq_items.append( + f"
  • Unknown CODE_GENDER rate: {dq_metrics.get('unknown_gender_rate', 0.0):.2%}
  • " + ) + dq_items.append( + f"
  • Unknown FLAG_OWN_CAR rate: {dq_metrics.get('unknown_car_rate', 0.0):.2%}
  • " + ) + dq_items.append( + f"
  • DAYS_EMPLOYED sentinel rate: {dq_metrics.get('days_employed_sentinel_rate', 0.0):.2%}
  • " + ) + dq_html = "" + + summary_html = summary_df.to_html(index=False, escape=False) + plots_html = "\n".join( + f"

    {row['feature']}

    " + for _, row in summary_df.iterrows() + ) + + sample_badge = "" + if n_prod < min_prod_samples: + sample_badge = ( + "
    Sample insuffisant: " + f"{n_prod} < {min_prod_samples} (resultats non fiables).
    " + ) + if n_prod < min_prod_samples: + drift_badge = ( + "
    Drift non calcule " + f"(n_prod < {min_prod_samples}).
    " + ) + elif overall_drift: + drift_badge = "
    Drift alert
    " + else: + drift_badge = "
    No drift alert
    " + if not prod_since and not prod_until: + window_info = "full_log" + elif window_status in {"timestamp_missing", "timestamp_invalid"}: + window_info = f"{window_status} (no filter applied)" + else: + window_info = f"{prod_since or '...'} to {prod_until or '...'}" + + html = f""" + + + + Drift Report + + + +

    Production Monitoring Summary

    + +

    Top error reasons

    + {error_html} + {sample_badge} +

    Score Monitoring

    + + {score_plots_html} +

    Data Quality

    + {dq_html} +

    Data Drift Summary

    + {drift_badge} + + {summary_html} +

    Feature Distributions

    + {plots_html} + + +""" + + report_path.write_text(html, encoding="utf-8") + return report_path + + +def main() -> None: + parser = argparse.ArgumentParser(description="Generate a drift report from production logs.") + parser.add_argument("--logs", type=Path, default=Path("logs/predictions.jsonl")) + parser.add_argument("--reference", type=Path, default=Path("data/data_final.parquet")) + parser.add_argument("--output-dir", type=Path, default=Path("reports")) + parser.add_argument("--sample-size", type=int, default=50000) + parser.add_argument("--psi-threshold", type=float, default=0.2) + parser.add_argument("--score-bins", type=int, default=30) + parser.add_argument("--min-prod-samples", type=int, default=MIN_PROD_SAMPLES_DEFAULT) + parser.add_argument("--psi-eps", type=float, default=PSI_EPS_DEFAULT) + parser.add_argument("--min-category-share", type=float, default=RARE_CATEGORY_MIN_SHARE_DEFAULT) + parser.add_argument("--fdr-alpha", type=float, default=FDR_ALPHA_DEFAULT) + parser.add_argument("--min-drift-features", type=int, default=1) + parser.add_argument("--prod-since", type=str, default=None) + parser.add_argument("--prod-until", type=str, default=None) + args = parser.parse_args() + + report_path = generate_report( + log_path=args.logs, + reference_path=args.reference, + output_dir=args.output_dir, + sample_size=args.sample_size, + psi_threshold=args.psi_threshold, + score_bins=args.score_bins, + min_prod_samples=args.min_prod_samples, + psi_eps=args.psi_eps, + min_category_share=args.min_category_share, + fdr_alpha=args.fdr_alpha, + min_drift_features=args.min_drift_features, + prod_since=args.prod_since, + prod_until=args.prod_until, + ) + print(f"Drift report saved to {report_path}") + + +if __name__ == "__main__": + main() diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/streamlit_app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/streamlit_app.py index b76e7bf60887c30da836f296e43377aea063de63..46e461cfd6df705e4bc2a19d14948bdc4e3964b1 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/streamlit_app.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/streamlit_app.py @@ -14,7 +14,7 @@ ROOT = Path(__file__).resolve().parents[1] if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) -from monitoring.drift_report import generate_report, _load_logs +from monitoring.drift_report_old import generate_report, _load_logs def _load_logs_safe(log_path: Path) -> tuple[pd.DataFrame, pd.DataFrame]: diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/dev_preprocess_utils.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/dev_preprocess_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..77c5abadf03a7bbf880f678dd7bffad12b3ee8af --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/dev_preprocess_utils.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +from typing import Any + +import numpy as np +import pandas as pd + +from app.main import ( + DAYS_EMPLOYED_SENTINEL, + ENGINEERED_SOURCES, + IGNORE_FEATURES, + MISSING_INDICATOR_MIN_RATE, + OUTLIER_COLUMNS, + OUTLIER_LOWER_Q, + OUTLIER_UPPER_Q, + _apply_correlated_imputation, + _validate_numeric_inputs, + add_missingness_indicators, + apply_outlier_clipping, + compute_outlier_bounds, + new_features_creation, + select_missing_indicator_columns, +) + + +def preprocess_for_training(df_raw: pd.DataFrame, artifacts: Any) -> pd.DataFrame: + df = df_raw.copy() + + for col in artifacts.required_input_columns: + if col not in df.columns: + df[col] = np.nan + + _validate_numeric_inputs(df, artifacts.numeric_required_columns) + + df['is_train'] = 0 + df['is_test'] = 1 + if 'TARGET' not in df.columns: + df['TARGET'] = 0 + + df = new_features_creation( + df, + days_employed_sentinel=DAYS_EMPLOYED_SENTINEL, + engineered_sources=ENGINEERED_SOURCES, + ) + df.replace([np.inf, -np.inf], np.nan, inplace=True) + + df = df.reindex(columns=artifacts.columns_keep, fill_value=np.nan) + + indicator_cols = getattr(artifacts, 'missing_indicator_columns', None) or select_missing_indicator_columns( + df, + exclude_cols=set(IGNORE_FEATURES), + min_missing_rate=MISSING_INDICATOR_MIN_RATE, + ) + df = add_missingness_indicators(df, indicator_cols) + + outlier_bounds = getattr(artifacts, 'outlier_bounds', {}) or compute_outlier_bounds( + df, + OUTLIER_COLUMNS, + lower_q=OUTLIER_LOWER_Q, + upper_q=OUTLIER_UPPER_Q, + ) + df = apply_outlier_clipping(df, outlier_bounds) + + _apply_correlated_imputation(df, artifacts) + + for col, median in artifacts.numeric_medians.items(): + if col in df.columns: + df[col] = pd.to_numeric(df[col], errors='coerce') + df[col] = df[col].fillna(median) + + for col in artifacts.categorical_columns: + if col in df.columns: + df[col] = df[col].fillna('Unknown') + + df_hot = pd.get_dummies(df, columns=artifacts.categorical_columns) + df_hot = df_hot.reindex(columns=artifacts.features_to_scaled, fill_value=0) + scaled = artifacts.scaler.transform(df_hot) + return pd.DataFrame(scaled, columns=artifacts.features_to_scaled, index=df.index) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/notebook_params.yaml b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/notebook_params.yaml new file mode 100644 index 0000000000000000000000000000000000000000..88fb77ce793c8e73d4d1638b94859637aa7585e5 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/notebook_params.yaml @@ -0,0 +1,22 @@ +exploration: + SAMPLE_DATA: False + SAMPLE_FRACTION: 0.01 + +compare_tuning: + DATA_PATH: data/data_final.parquet + ARTIFACTS_PATH: artifacts/preprocessor.joblib + REPORT_PATH: reports/best_model.json + SEED: 42 + SAMPLE_SIZE: 50000 + TEST_SIZE: 0.2 + CV_SPLITS: 5 + N_JOBS: 1 + TUNING_TRIALS: 30 + LOG_ARTIFACTS: true + EXPERIMENT_NAME: Model-Compare-Tuning + SELECTION_RULE: 0.7*f1_best + 0.3*pr_auc + +modelisation: + RUN_LEGACY_PREPROCESSING: true + RUN_FULL_BENCHMARK: true + RUN_MLFLOW_LOGGING: true diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/run_notebooks.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/run_notebooks.py new file mode 100644 index 0000000000000000000000000000000000000000..ffe6fd3d6444873b453352089658ef8d21ba1057 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/run_notebooks.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import subprocess +import sys +import time +from pathlib import Path +from datetime import datetime + + +def _resolve_notebook(notebooks_dir: Path, pattern: str) -> Path: + matches = sorted(notebooks_dir.glob(pattern)) + if len(matches) != 1: + raise FileNotFoundError( + f"Expected 1 notebook for pattern '{pattern}', found {len(matches)}: {matches}" + ) + return matches[0] + + +def _execute_with_nbclient(nb_path: Path, output_path: Path) -> None: + import nbformat + from nbclient import NotebookClient + + nb = nbformat.read(nb_path, as_version=4) + client = NotebookClient( + nb, + timeout=None, + resources={"metadata": {"path": str(nb_path.parent)}}, + ) + client.execute() + nbformat.write(nb, output_path) + + +def _execute_with_nbconvert(nb_path: Path, output_dir: Path) -> None: + cmd = [ + sys.executable, + "-m", + "jupyter", + "nbconvert", + "--to", + "notebook", + "--execute", + "--output", + nb_path.name, + "--output-dir", + str(output_dir), + str(nb_path), + ] + subprocess.run(cmd, check=True, cwd=str(nb_path.parent)) + + +def run_notebooks() -> None: + notebooks_dir = Path(__file__).resolve().parent + output_dir = notebooks_dir / "_executed" + output_dir.mkdir(exist_ok=True) + + notebooks = [ + _resolve_notebook(notebooks_dir, "P6_MANET_Stephane_notebook_exploration.ipynb"), + _resolve_notebook(notebooks_dir, "P6_MANET_Stephane_notebook_compare_tuning_mlflow.ipynb"), + _resolve_notebook(notebooks_dir, "P6_MANET_Stephane_notebook_mod*.ipynb"), + ] + + total = len(notebooks) + print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Starting run of {total} notebooks") + for idx, nb_path in enumerate(notebooks, start=1): + start = time.time() + print( + f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] " + f"Launching notebook {idx}/{total}: {nb_path.name}" + ) + output_path = output_dir / nb_path.name + try: + _execute_with_nbclient(nb_path, output_path) + except ModuleNotFoundError as exc: + if exc.name not in {"nbclient", "nbformat"}: + raise + print( + f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] " + "nbclient not available; falling back to nbconvert." + ) + _execute_with_nbconvert(nb_path, output_dir) + elapsed = time.time() - start + print( + f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] " + f"Finished {nb_path.name} in {elapsed:.1f}s" + ) + print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] All notebooks completed") + + +if __name__ == "__main__": + run_notebooks() diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/src/__init__.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/src/feature_engineering.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/src/feature_engineering.py new file mode 100644 index 0000000000000000000000000000000000000000..83393633dce36840a85c48f793e9645934b5a7bc --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/src/feature_engineering.py @@ -0,0 +1,17 @@ +from src.features import ( + add_missingness_indicators, + apply_outlier_clipping, + compute_outlier_bounds, + new_features_creation, + safe_divide, + select_missing_indicator_columns, +) + +__all__ = [ + "add_missingness_indicators", + "apply_outlier_clipping", + "compute_outlier_bounds", + "new_features_creation", + "safe_divide", + "select_missing_indicator_columns", +] diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/src/features.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/src/features.py new file mode 100644 index 0000000000000000000000000000000000000000..4af08d4c8c04ddf05be688bb47040fbd0bd80b4b --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/src/features.py @@ -0,0 +1,120 @@ +from __future__ import annotations + +from typing import Iterable + +import numpy as np +import pandas as pd + + +def safe_divide(numer: pd.Series, denom: pd.Series) -> tuple[pd.Series, pd.Series]: + numer = pd.to_numeric(numer, errors="coerce") + denom = pd.to_numeric(denom, errors="coerce") + denom_zero = denom.isna() | (denom == 0) + return numer / denom.replace(0, np.nan), denom_zero + + +def new_features_creation( + df: pd.DataFrame, + *, + days_employed_sentinel: int = 365243, + engineered_sources: Iterable[str] | None = None, +) -> pd.DataFrame: + df_features = df.copy() + if engineered_sources is not None: + for col in engineered_sources: + if col not in df_features.columns: + df_features[col] = np.nan + + if "DAYS_EMPLOYED" in df_features.columns: + if "DAYS_EMPLOYED_ANOM" not in df_features.columns: + sentinel_mask = df_features["DAYS_EMPLOYED"] == days_employed_sentinel + df_features["DAYS_EMPLOYED_ANOM"] = sentinel_mask.astype(int) + df_features.loc[sentinel_mask, "DAYS_EMPLOYED"] = np.nan + + def _add_ratio(numer_col: str, denom_col: str, ratio_name: str) -> None: + if numer_col not in df_features.columns or denom_col not in df_features.columns: + df_features[ratio_name] = np.nan + df_features[f"DENOM_ZERO_{ratio_name}"] = 1 + return + ratio, denom_zero = safe_divide(df_features[numer_col], df_features[denom_col]) + df_features[ratio_name] = ratio + df_features[f"DENOM_ZERO_{ratio_name}"] = denom_zero.astype(int) + + _add_ratio("DAYS_EMPLOYED", "DAYS_BIRTH", "DAYS_EMPLOYED_PERC") + _add_ratio("AMT_INCOME_TOTAL", "AMT_CREDIT", "INCOME_CREDIT_PERC") + _add_ratio("AMT_INCOME_TOTAL", "CNT_FAM_MEMBERS", "INCOME_PER_PERSON") + _add_ratio("AMT_ANNUITY", "AMT_INCOME_TOTAL", "ANNUITY_INCOME_PERC") + _add_ratio("AMT_ANNUITY", "AMT_CREDIT", "PAYMENT_RATE") + + df_features.replace([np.inf, -np.inf], np.nan, inplace=True) + return df_features + + +def select_missing_indicator_columns( + df: pd.DataFrame, + *, + exclude_cols: set[str] | None = None, + min_missing_rate: float = 0.0, +) -> list[str]: + exclude = exclude_cols or set() + numeric_cols = df.select_dtypes(include=["number"]).columns + missing_rate = df[numeric_cols].isna().mean() + cols: list[str] = [] + for col in numeric_cols: + if col in exclude: + continue + if col.startswith(("DENOM_ZERO_", "is_missing_", "is_outlier_")): + continue + if col.endswith("_ANOM"): + continue + if missing_rate.get(col, 0.0) <= min_missing_rate: + continue + cols.append(col) + return cols + + +def add_missingness_indicators( + df: pd.DataFrame, indicator_cols: list[str] +) -> pd.DataFrame: + for col in indicator_cols: + if col not in df.columns: + df[col] = np.nan + df[f"is_missing_{col}"] = pd.to_numeric(df[col], errors="coerce").isna().astype(int) + return df + + +def compute_outlier_bounds( + df: pd.DataFrame, + outlier_columns: list[str], + *, + lower_q: float = 0.01, + upper_q: float = 0.99, +) -> dict[str, tuple[float, float]]: + bounds: dict[str, tuple[float, float]] = {} + for col in outlier_columns: + if col not in df.columns: + continue + values = pd.to_numeric(df[col], errors="coerce") + if values.dropna().empty: + continue + bounds[col] = ( + float(values.quantile(lower_q)), + float(values.quantile(upper_q)), + ) + return bounds + + +def apply_outlier_clipping( + df: pd.DataFrame, outlier_bounds: dict[str, tuple[float, float]] +) -> pd.DataFrame: + if not outlier_bounds: + return df + df = df.copy() + for col, (low, high) in outlier_bounds.items(): + if col not in df.columns: + continue + values = pd.to_numeric(df[col], errors="coerce") + mask = values.notna() & ((values < low) | (values > high)) + df[f"is_outlier_{col}"] = mask.astype(int) + df[col] = values.clip(lower=low, upper=high) + return df diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_drift_report.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_drift_report.py index 8ddf286777ad5fe336273c93b12e331b3d2c7f10..ba0c4225a895d28daa1274351bee6a712d7d84e8 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_drift_report.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_drift_report.py @@ -3,7 +3,7 @@ from pathlib import Path import pandas as pd -from monitoring.drift_report import generate_report +from monitoring.drift_report_old import generate_report def _write_jsonl(path: Path, entries: list[dict]) -> None: diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt index e7f09f68e0fe34ec6755e637033e128edd00186b..404801f5a268d5590bf06077f2737cb0fb1967e1 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt @@ -106,7 +106,6 @@ more-itertools==10.8.0 msgpack==1.1.2 msgspec==0.20.0 multidict==6.7.0 -multipart==1.3.0 mypy_extensions==1.1.0 narwhals==2.14.0 nbclient==0.10.4 diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/streamlit_app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/streamlit_app.py index 46e461cfd6df705e4bc2a19d14948bdc4e3964b1..b76e7bf60887c30da836f296e43377aea063de63 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/streamlit_app.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/streamlit_app.py @@ -14,7 +14,7 @@ ROOT = Path(__file__).resolve().parents[1] if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) -from monitoring.drift_report_old import generate_report, _load_logs +from monitoring.drift_report import generate_report, _load_logs def _load_logs_safe(log_path: Path) -> tuple[pd.DataFrame, pd.DataFrame]: diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/postBuild b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/postBuild new file mode 100644 index 0000000000000000000000000000000000000000..e1217f14d1d4640dc2c2b5094dfed4b9901533a8 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/postBuild @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +set -e + +# Remove the conflicting "multipart" distribution so python-multipart provides the package. +python -m pip uninstall -y multipart || true diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py index 67f0c6fc6189ad1c93f739b281b17176be9430a5..e0ad4f72dd42da31756250d381dcf1b31a683c70 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py @@ -6,6 +6,9 @@ from pathlib import Path import pandas as pd from evidently import Report +from evidently.metric_preset import DataDriftPreset + + # On peut réutiliser les fonctions de normalisation si elles sont dans un module partagé # Pour cet exemple, je les recopie ici pour la clarté. @@ -109,3 +112,62 @@ if __name__ == "__main__": output_path=args.output_path, sample_size=args.sample_size, ) + +def _load_prod_inputs_for_evidently(log_path: Path) -> pd.DataFrame: + inputs_df, meta_df = _load_logs(log_path) # <-- réutilise ton parseur existant + if meta_df.empty: + return pd.DataFrame() + ok = meta_df.get("status_code", pd.Series(dtype=int)).fillna(0) < 400 + return inputs_df.loc[ok].copy() if not inputs_df.empty else pd.DataFrame() + +def generate_evidently_report( + reference_path: Path, + log_path: Path, + output_dir: Path, + sample_size: int = 50000, + output_name: str = "evidently_drift_report.html", +) -> Path: + # 1) Load reference + ref = pd.read_parquet(reference_path) + if sample_size > 0 and len(ref) > sample_size: + ref = ref.sample(sample_size, random_state=42) + + # 2) Load prod (inputs) depuis logs + cur = _load_prod_inputs_for_evidently(log_path) + if cur.empty: + raise ValueError("No valid production inputs found in logs (status_code < 400).") + + # 3) Pré-traitements minimalistes identiques des deux côtés (si tu en as besoin) + # (garde tes helpers si tu veux: sentinel, normalisation genre, etc.) + # ref = _replace_sentinel(ref); cur = _replace_sentinel(cur) + # ... + + # 4) Aligner les colonnes communes + common = sorted(set(ref.columns).intersection(cur.columns)) + if not common: + raise ValueError("No common columns between reference and production data.") + ref = ref[common] + cur = cur[common] + + # 5) Evidently report + report = Report(metrics=[ + DataDriftPreset(), + DataQualityPreset(), + ]) + + run_result = report.run(reference_data=ref, current_data=cur) + + output_dir.mkdir(parents=True, exist_ok=True) + out = output_dir / output_name + + # Compat versions Evidently : + # - certaines versions: report.save_html(...) + # - d'autres: run_result.save_html(...) + if hasattr(run_result, "save_html"): + run_result.save_html(str(out)) + elif hasattr(report, "save_html"): + report.save_html(str(out)) + else: + raise RuntimeError("Evidently save_html not available on this version.") + + return out diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/streamlit_app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/streamlit_app.py index b76e7bf60887c30da836f296e43377aea063de63..b7fa91d0c4da7cf3bdcb2dbd4bef767385a601ee 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/streamlit_app.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/streamlit_app.py @@ -9,6 +9,10 @@ import streamlit as st import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt +import streamlit.components.v1 as components + +from monitoring.drift_report import generate_report, generate_evidently_report, _load_logs + ROOT = Path(__file__).resolve().parents[1] if str(ROOT) not in sys.path: @@ -137,6 +141,37 @@ if st.button("Generate drift report"): ) st.exception(exc) +if st.button("Generate drift report"): + try: + if drift_engine == "Evidently": + report_path = generate_evidently_report( + reference_path=reference_path, + log_path=log_path, + output_dir=output_dir, + sample_size=int(sample_size), + ) + else: + report_path = generate_report( + log_path=log_path, + reference_path=reference_path, + output_dir=output_dir, + sample_size=int(sample_size), + psi_threshold=float(psi_threshold), + score_bins=int(score_bins), + min_prod_samples=int(min_prod_samples), + psi_eps=float(psi_eps), + min_category_share=float(min_category_share), + fdr_alpha=float(fdr_alpha), + min_drift_features=int(min_drift_features), + prod_since=prod_since or None, + prod_until=prod_until or None, + ) + st.success(f"Generated: {report_path}") + except Exception as exc: + st.error(str(exc)) + st.exception(exc) + + report_file = output_dir / "drift_report.html" if report_file.exists(): st.markdown(f"Report available at `{report_file}`") diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py index e0ad4f72dd42da31756250d381dcf1b31a683c70..676591404d2345e8200b6961f4a17ab914e73db9 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/drift_report.py @@ -5,8 +5,9 @@ import json from pathlib import Path import pandas as pd -from evidently import Report -from evidently.metric_preset import DataDriftPreset +#from evidently.report import Report +#from evidently.metric_preset import DataDriftPreset, DataQualityPreset # DataQualityPreset optionnel + diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/streamlit_app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/streamlit_app.py index b7fa91d0c4da7cf3bdcb2dbd4bef767385a601ee..cae224e552842ba3376010bd706e98780997b841 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/streamlit_app.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/monitoring/streamlit_app.py @@ -11,7 +11,7 @@ matplotlib.use("Agg") import matplotlib.pyplot as plt import streamlit.components.v1 as components -from monitoring.drift_report import generate_report, generate_evidently_report, _load_logs +from drift_report import generate_report, generate_evidently_report, _load_logs ROOT = Path(__file__).resolve().parents[1] diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt index 404801f5a268d5590bf06077f2737cb0fb1967e1..18b39efbdac1130fe0ba144ee58cb5a77e28844d 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt @@ -33,7 +33,6 @@ distro==1.9.0 docker==7.1.0 dulwich==0.24.10 dynaconf==3.2.12 -evidently==0.7.18 executing==2.2.1 Faker==40.1.0 fastapi==0.121.0 @@ -88,7 +87,6 @@ jupyterlab_widgets==3.0.16 keyring==25.7.0 kiwisolver==1.4.9 lightgbm==4.6.0 -litestar==2.19.0 litestar-htmx==0.5.0 llvmlite==0.46.0 lxml==6.0.2 diff --git a/hf_space/monitoring/drift_report.py b/hf_space/monitoring/drift_report.py index 676591404d2345e8200b6961f4a17ab914e73db9..30852e950f05d37c0e28526d9229757ae232b688 100644 --- a/hf_space/monitoring/drift_report.py +++ b/hf_space/monitoring/drift_report.py @@ -1,174 +1,808 @@ -# scripts/drift_report.py +# construire drift avec evidently + +from __future__ import annotations import argparse import json +import re from pathlib import Path +from typing import Any +import numpy as np import pandas as pd -#from evidently.report import Report -#from evidently.metric_preset import DataDriftPreset, DataQualityPreset # DataQualityPreset optionnel - - - - -# On peut réutiliser les fonctions de normalisation si elles sont dans un module partagé -# Pour cet exemple, je les recopie ici pour la clarté. - -def _normalize_gender(series: pd.Series) -> pd.Series: - """Normalise la colonne CODE_GENDER.""" - return series.str.upper().replace({'XNA': 'F', 'MALE': 'M', 'FEMALE': 'F'}) - -def _replace_sentinel(df: pd.DataFrame) -> pd.DataFrame: - """Remplace la valeur sentinelle dans DAYS_EMPLOYED.""" - if 'DAYS_EMPLOYED' in df.columns: - df['DAYS_EMPLOYED'] = df['DAYS_EMPLOYED'].replace({365243: pd.NA}) - return df - -def _load_logs(log_path: Path) -> pd.DataFrame | None: - """Charge les inputs depuis les logs JSONL.""" - if not log_path.exists(): - print(f"Warning: Log file not found at {log_path}") - return None - - records = [] - with log_path.open('r', encoding='utf-8') as f: - for line in f: - try: - log_entry = json.loads(line) - # On ne garde que les appels réussis avec des inputs - if log_entry.get('status_code', 500) < 400 and 'inputs' in log_entry: - records.append(log_entry['inputs']) - except json.JSONDecodeError: - continue # Ignore les lignes mal formées - - if not records: - return None - - return pd.DataFrame(records) - - -def generate_evidently_report( - reference_path: Path, - log_path: Path, - output_path: Path, - sample_size: int = 20000, -): - """Génère un rapport de dérive de données avec Evidently.""" - - print("1. Chargement des données de référence...") - reference_df = pd.read_parquet(reference_path) - if sample_size > 0 and sample_size < len(reference_df): - reference_df = reference_df.sample(n=sample_size, random_state=42) - - print("2. Chargement et préparation des données de production (logs)...") - prod_df = _load_logs(log_path) - - if prod_df is None or prod_df.empty: - print("Aucune donnée de production valide trouvée. Le rapport ne peut être généré.") - return - - # Appliquer le même pré-traitement pour une comparaison juste - reference_df = _replace_sentinel(reference_df) - prod_df = _replace_sentinel(prod_df) - - if 'CODE_GENDER' in reference_df.columns: - reference_df['CODE_GENDER'] = _normalize_gender(reference_df['CODE_GENDER']) - if 'CODE_GENDER' in prod_df.columns: - prod_df['CODE_GENDER'] = _normalize_gender(prod_df['CODE_GENDER']) - - # S'assurer que les colonnes correspondent - # Evidently gère les colonnes manquantes, mais c'est une bonne pratique - ref_cols = set(reference_df.columns) - prod_cols = set(prod_df.columns) - common_cols = list(ref_cols.intersection(prod_cols)) - - reference_df = reference_df[common_cols] - prod_df = prod_df[common_cols] - - print(f"3. Génération du rapport de dérive sur {len(common_cols)} features communes...") - data_drift_report = Report(metrics=[ - DataDriftPreset(), - ]) - - data_drift_report.run(reference_data=reference_df, current_data=prod_df) - - output_path.parent.mkdir(parents=True, exist_ok=True) - data_drift_report.save_html(str(output_path)) - - print(f"✅ Rapport de dérive généré avec succès : {output_path}") +from scipy import stats +try: + import matplotlib + matplotlib.use("Agg") + import matplotlib.pyplot as plt +except ImportError as exc: # pragma: no cover - optional plotting dependency + raise SystemExit( + "matplotlib is required for plots. Install it with: pip install matplotlib" + ) from exc -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Generate a data drift report using Evidently AI.") - parser.add_argument("--reference-path", type=Path, default="data/data_final.parquet", help="Path to the reference dataset.") - parser.add_argument("--log-path", type=Path, default="logs/predictions.jsonl", help="Path to the production log file.") - parser.add_argument("--output-path", type=Path, default="reports/evidently_drift_report.html", help="Path to save the HTML report.") - parser.add_argument("--sample-size", type=int, default=20000, help="Size of the reference sample to use (0 for full dataset).") - - args = parser.parse_args() - generate_evidently_report( - reference_path=args.reference_path, - log_path=args.log_path, - output_path=args.output_path, - sample_size=args.sample_size, - ) +DEFAULT_FEATURES = [ + "EXT_SOURCE_2", + "EXT_SOURCE_3", + "AMT_ANNUITY", + "EXT_SOURCE_1", + "CODE_GENDER", + "DAYS_EMPLOYED", + "AMT_CREDIT", + "AMT_GOODS_PRICE", + "DAYS_BIRTH", + "FLAG_OWN_CAR", +] + +CATEGORICAL_FEATURES = {"CODE_GENDER", "FLAG_OWN_CAR"} +MIN_PROD_SAMPLES_DEFAULT = 50 +PSI_EPS_DEFAULT = 1e-4 +RARE_CATEGORY_MIN_SHARE_DEFAULT = 0.01 +FDR_ALPHA_DEFAULT = 0.05 +DAYS_EMPLOYED_SENTINEL = 365243 + +CATEGORY_NORMALIZATION = { + "CODE_GENDER": { + "F": "F", + "FEMALE": "F", + "0": "F", + "W": "F", + "WOMAN": "F", + "M": "M", + "MALE": "M", + "1": "M", + "MAN": "M", + }, + "FLAG_OWN_CAR": { + "Y": "Y", + "YES": "Y", + "TRUE": "Y", + "1": "Y", + "T": "Y", + "N": "N", + "NO": "N", + "FALSE": "N", + "0": "N", + "F": "N", + }, +} + + +def _safe_name(value: str) -> str: + return re.sub(r"[^a-zA-Z0-9_-]+", "_", value) + + +def _load_logs(log_path: Path) -> tuple[pd.DataFrame, pd.DataFrame]: + entries: list[dict[str, object]] = [] + with log_path.open("r", encoding="utf-8") as handle: + for line in handle: + line = line.strip() + if not line: + continue + entries.append(json.loads(line)) + if not entries: + return pd.DataFrame(), pd.DataFrame() + inputs = [ + entry.get("inputs") if isinstance(entry.get("inputs"), dict) else {} + for entry in entries + ] + inputs_df = pd.DataFrame.from_records(inputs) + meta_df = pd.DataFrame.from_records(entries) + return inputs_df, meta_df + + +def _normalize_category_value(value: object, mapping: dict[str, str]) -> object: + if pd.isna(value): + return np.nan + key = str(value).strip().upper() + if not key: + return np.nan + return mapping.get(key, "Unknown") + + +def _normalize_categories(df: pd.DataFrame) -> pd.DataFrame: + out = df.copy() + for feature, mapping in CATEGORY_NORMALIZATION.items(): + if feature in out.columns: + out[feature] = out[feature].apply(lambda v: _normalize_category_value(v, mapping)) + return out + + +def _replace_sentinel(series: pd.Series, sentinel: float) -> tuple[pd.Series, float]: + values = pd.to_numeric(series, errors="coerce") + sentinel_mask = values == sentinel + if sentinel_mask.any(): + series = series.copy() + series[sentinel_mask] = np.nan + return series, float(sentinel_mask.mean()) if len(values) else 0.0 + + +def _prepare_categorical( + reference: pd.Series, + production: pd.Series, + min_share: float, + max_categories: int | None = None, + other_label: str = "__OTHER__", +) -> tuple[pd.Series, pd.Series]: + ref_series = reference.fillna("Unknown") + prod_series = production.fillna("Unknown") + ref_freq = ref_series.value_counts(normalize=True) + keep = ref_freq[ref_freq >= min_share].index.tolist() + if max_categories is not None: + keep = keep[:max_categories] + ref_series = ref_series.where(ref_series.isin(keep), other=other_label) + prod_series = prod_series.where(prod_series.isin(keep), other=other_label) + return ref_series, prod_series + + +def _psi(reference: pd.Series, production: pd.Series, eps: float = PSI_EPS_DEFAULT) -> float: + ref_freq = reference.value_counts(normalize=True, dropna=False) + prod_freq = production.value_counts(normalize=True, dropna=False) + categories = ref_freq.index.union(prod_freq.index) + ref_probs = ref_freq.reindex(categories, fill_value=0).to_numpy() + prod_probs = prod_freq.reindex(categories, fill_value=0).to_numpy() + ref_probs = np.clip(ref_probs, eps, None) + prod_probs = np.clip(prod_probs, eps, None) + return float(np.sum((ref_probs - prod_probs) * np.log(ref_probs / prod_probs))) + + +def _coerce_numeric(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame: + out = df.copy() + for col in columns: + if col in out.columns: + out[col] = pd.to_numeric(out[col], errors="coerce") + return out + + +def _plot_numeric(ref: pd.Series, prod: pd.Series, output_path: Path) -> None: + plt.figure(figsize=(6, 4)) + plt.hist(ref.dropna(), bins=30, alpha=0.6, label="reference") + plt.hist(prod.dropna(), bins=30, alpha=0.6, label="production") + plt.title(f"Distribution: {ref.name}") + plt.legend() + plt.tight_layout() + plt.savefig(output_path) + plt.close() + + +def _plot_categorical(ref: pd.Series, prod: pd.Series, output_path: Path) -> None: + ref_freq = ref.value_counts(normalize=True) + prod_freq = prod.value_counts(normalize=True) + plot_df = pd.DataFrame({"reference": ref_freq, "production": prod_freq}).fillna(0) + plot_df.sort_values("reference", ascending=False).plot(kind="bar", figsize=(7, 4)) + plt.title(f"Distribution: {ref.name}") + plt.tight_layout() + plt.savefig(output_path) + plt.close() + + +def _benjamini_hochberg(pvalues: list[float], alpha: float) -> tuple[list[float], list[bool]]: + if not pvalues: + return [], [] + pvals = np.array(pvalues, dtype=float) + order = np.argsort(pvals) + ranked = pvals[order] + m = len(pvals) + thresholds = alpha * (np.arange(1, m + 1) / m) + below = ranked <= thresholds + reject = np.zeros(m, dtype=bool) + if below.any(): + cutoff = np.max(np.where(below)[0]) + reject[order[:cutoff + 1]] = True + qvals = ranked * m / np.arange(1, m + 1) + qvals = np.minimum.accumulate(qvals[::-1])[::-1] + adjusted = np.empty_like(qvals) + adjusted[order] = qvals + return adjusted.tolist(), reject.tolist() + + +def _extract_data_quality(meta_df: pd.DataFrame) -> list[dict[str, object]]: + if "data_quality" not in meta_df.columns: + return [] + dq_entries = [] + for item in meta_df["data_quality"].dropna(): + if isinstance(item, dict): + dq_entries.append(item) + return dq_entries + + +def _normalize_error_message(value: object) -> str: + if value is None: + return "" + if isinstance(value, dict): + message = value.get("message") + return str(message) if message else json.dumps(value, ensure_ascii=True) + if isinstance(value, list): + return str(value[0]) if value else "" + if isinstance(value, str): + cleaned = value.strip() + if not cleaned: + return "" + try: + parsed = json.loads(cleaned) + except json.JSONDecodeError: + return cleaned + return _normalize_error_message(parsed) + return str(value) + + +def _summarize_errors(meta_df: pd.DataFrame, max_items: int = 5) -> list[tuple[str, int]]: + if "error" not in meta_df.columns: + return [] + errors = meta_df["error"].dropna().apply(_normalize_error_message) + errors = errors[errors != ""] + if errors.empty: + return [] + counts = errors.value_counts().head(max_items) + return list(zip(counts.index.tolist(), counts.tolist())) + + +def _dq_has_unknown(dq: dict[str, object], feature: str) -> bool: + unknown = dq.get("unknown_categories") + if isinstance(unknown, dict): + return feature in unknown + if isinstance(unknown, list): + return feature in unknown + return False + + +def _summarize_data_quality( + meta_df: pd.DataFrame, + production_df: pd.DataFrame, + sentinel_rates: dict[str, float], +) -> dict[str, object]: + dq_entries = _extract_data_quality(meta_df) + if dq_entries: + total = len(dq_entries) + missing_rate = np.mean( + [bool(dq.get("missing_required_columns")) for dq in dq_entries] + ) + invalid_rate = np.mean( + [bool(dq.get("invalid_numeric_columns")) for dq in dq_entries] + ) + out_of_range_rate = np.mean( + [bool(dq.get("out_of_range_columns")) for dq in dq_entries] + ) + outlier_rate = np.mean( + [bool(dq.get("outlier_columns")) for dq in dq_entries] + ) + nan_rate = np.mean([float(dq.get("nan_rate", 0.0)) for dq in dq_entries]) + unknown_gender = np.mean( + [_dq_has_unknown(dq, "CODE_GENDER") for dq in dq_entries] + ) + unknown_car = np.mean( + [_dq_has_unknown(dq, "FLAG_OWN_CAR") for dq in dq_entries] + ) + sentinel_rate = np.mean( + [bool(dq.get("days_employed_sentinel")) for dq in dq_entries] + ) + return { + "source": "log", + "sample_size": total, + "missing_required_rate": float(missing_rate), + "invalid_numeric_rate": float(invalid_rate), + "out_of_range_rate": float(out_of_range_rate), + "outlier_rate": float(outlier_rate), + "nan_rate": float(nan_rate), + "unknown_gender_rate": float(unknown_gender), + "unknown_car_rate": float(unknown_car), + "days_employed_sentinel_rate": float(sentinel_rate), + } + + if production_df.empty: + return {"source": "none"} + + missing_rate = float(production_df.isna().any(axis=1).mean()) + unknown_gender_rate = 0.0 + unknown_car_rate = 0.0 + if "CODE_GENDER" in production_df.columns: + unknown_gender_rate = float( + (production_df["CODE_GENDER"] == "Unknown").mean() + ) + if "FLAG_OWN_CAR" in production_df.columns: + unknown_car_rate = float((production_df["FLAG_OWN_CAR"] == "Unknown").mean()) + sentinel_rate = float(sentinel_rates.get("production", 0.0)) + return { + "source": "fallback", + "sample_size": len(production_df), + "missing_required_rate": missing_rate, + "unknown_gender_rate": unknown_gender_rate, + "unknown_car_rate": unknown_car_rate, + "days_employed_sentinel_rate": sentinel_rate, + } + + +def _filter_by_time( + meta_df: pd.DataFrame, + inputs_df: pd.DataFrame, + since: str | None, + until: str | None, +) -> tuple[pd.DataFrame, pd.DataFrame, str]: + if not since and not until: + return meta_df, inputs_df, "" + if "timestamp" not in meta_df.columns: + return meta_df, inputs_df, "timestamp_missing" + timestamps = pd.to_datetime(meta_df["timestamp"], errors="coerce", utc=True) + if timestamps.isna().all(): + return meta_df, inputs_df, "timestamp_invalid" + mask = pd.Series(True, index=meta_df.index) + if since: + since_dt = pd.to_datetime(since, errors="coerce", utc=True) + if not pd.isna(since_dt): + mask &= timestamps >= since_dt + if until: + until_dt = pd.to_datetime(until, errors="coerce", utc=True) + if not pd.isna(until_dt): + mask &= timestamps <= until_dt + return meta_df.loc[mask].reset_index(drop=True), inputs_df.loc[mask].reset_index(drop=True), "filtered" + + +def _plot_score_distribution(scores: pd.Series, output_path: Path, bins: int = 30) -> None: + plt.figure(figsize=(6, 4)) + plt.hist(scores.dropna(), bins=bins, range=(0, 1), alpha=0.8, color="#4C78A8") + plt.title("Prediction score distribution") + plt.xlabel("Predicted probability") + plt.ylabel("Count") + plt.tight_layout() + plt.savefig(output_path) + plt.close() + + +def _plot_prediction_rate(predictions: pd.Series, output_path: Path) -> None: + counts = predictions.value_counts(normalize=True, dropna=False).sort_index() + plt.figure(figsize=(4, 4)) + plt.bar(counts.index.astype(str), counts.values, color="#F58518") + plt.title("Prediction rate") + plt.xlabel("Predicted class") + plt.ylabel("Share") + plt.ylim(0, 1) + plt.tight_layout() + plt.savefig(output_path) + plt.close() -def _load_prod_inputs_for_evidently(log_path: Path) -> pd.DataFrame: - inputs_df, meta_df = _load_logs(log_path) # <-- réutilise ton parseur existant - if meta_df.empty: - return pd.DataFrame() - ok = meta_df.get("status_code", pd.Series(dtype=int)).fillna(0) < 400 - return inputs_df.loc[ok].copy() if not inputs_df.empty else pd.DataFrame() -def generate_evidently_report( +def summarize_errors(meta_df: pd.DataFrame, max_items: int = 5) -> list[tuple[str, int]]: + return _summarize_errors(meta_df, max_items=max_items) + + +def summarize_data_quality( + meta_df: pd.DataFrame, + production_df: pd.DataFrame, + sentinel_rates: dict[str, float], +) -> dict[str, object]: + return _summarize_data_quality(meta_df, production_df, sentinel_rates) + + +def compute_drift_summary( + log_path: Path, reference_path: Path, + sample_size: int, + psi_threshold: float, + score_bins: int, + min_prod_samples: int = MIN_PROD_SAMPLES_DEFAULT, + psi_eps: float = PSI_EPS_DEFAULT, + min_category_share: float = RARE_CATEGORY_MIN_SHARE_DEFAULT, + fdr_alpha: float = FDR_ALPHA_DEFAULT, + min_drift_features: int = 1, + prod_since: str | None = None, + prod_until: str | None = None, +) -> dict[str, Any]: + inputs_df, meta_df = _load_logs(log_path) + if meta_df.empty: + raise SystemExit(f"No inputs found in logs: {log_path}") + + meta_df, inputs_df, window_status = _filter_by_time( + meta_df, inputs_df, since=prod_since, until=prod_until + ) + meta_df_all = meta_df.copy() + inputs_df_all = inputs_df.copy() + valid_mask = pd.Series(True, index=meta_df.index) + if "status_code" in meta_df.columns: + valid_mask = meta_df["status_code"].fillna(0) < 400 + inputs_df = inputs_df.loc[valid_mask].reset_index(drop=True) + meta_df_valid = meta_df.loc[valid_mask].reset_index(drop=True) + + if inputs_df.empty: + raise SystemExit(f"No valid inputs found in logs: {log_path}") + + features = [col for col in DEFAULT_FEATURES if col in inputs_df.columns] + if not features: + raise SystemExit("No matching features found in production logs.") + + reference_df = pd.read_parquet(reference_path, columns=features) + if sample_size and len(reference_df) > sample_size: + reference_df = reference_df.sample(sample_size, random_state=42) + + numeric_features = [col for col in features if col not in CATEGORICAL_FEATURES] + production_df = _normalize_categories(inputs_df) + reference_df = _normalize_categories(reference_df) + production_df = _coerce_numeric(production_df, numeric_features) + reference_df = _coerce_numeric(reference_df, numeric_features) + + sentinel_rates = {} + if "DAYS_EMPLOYED" in production_df.columns: + production_df["DAYS_EMPLOYED"], prod_rate = _replace_sentinel( + production_df["DAYS_EMPLOYED"], DAYS_EMPLOYED_SENTINEL + ) + reference_df["DAYS_EMPLOYED"], ref_rate = _replace_sentinel( + reference_df["DAYS_EMPLOYED"], DAYS_EMPLOYED_SENTINEL + ) + sentinel_rates = { + "production": prod_rate, + "reference": ref_rate, + } + + summary_rows: list[dict[str, object]] = [] + n_prod = len(production_df) + n_ref = len(reference_df) + + for feature in features: + if feature not in reference_df.columns: + continue + ref_series = reference_df[feature] + prod_series = production_df[feature] + if feature in CATEGORICAL_FEATURES: + feature_n_prod = int(prod_series.dropna().shape[0]) + feature_n_ref = int(ref_series.dropna().shape[0]) + ref_series, prod_series = _prepare_categorical( + ref_series, prod_series, min_share=min_category_share, other_label="OTHER" + ) + insufficient_sample = feature_n_prod < min_prod_samples + psi_value = None + if not insufficient_sample: + psi_value = _psi(ref_series, prod_series, eps=psi_eps) + summary_rows.append( + { + "feature": feature, + "type": "categorical", + "psi": round(psi_value, 4) if psi_value is not None else None, + "drift_detected": bool(psi_value is not None and psi_value >= psi_threshold), + "n_prod": feature_n_prod, + "n_ref": feature_n_ref, + "note": "insufficient_sample" if insufficient_sample else "", + } + ) + else: + ref_clean = ref_series.dropna() + prod_clean = prod_series.dropna() + if ref_clean.empty or prod_clean.empty: + continue + feature_n_prod = int(len(prod_clean)) + insufficient_sample = feature_n_prod < min_prod_samples + stat = None + pvalue = None + if not insufficient_sample: + stat, pvalue = stats.ks_2samp(ref_clean, prod_clean) + summary_rows.append( + { + "feature": feature, + "type": "numeric", + "ks_stat": round(float(stat), 4) if stat is not None else None, + "p_value": round(float(pvalue), 6) if pvalue is not None else None, + "p_value_fdr": None, + "drift_detected": bool(pvalue is not None and pvalue < 0.05), + "n_prod": feature_n_prod, + "n_ref": int(len(ref_clean)), + "note": "insufficient_sample" if insufficient_sample else "", + } + ) + + numeric_rows = [ + (idx, row) + for idx, row in enumerate(summary_rows) + if row.get("type") == "numeric" and row.get("p_value") is not None + ] + if numeric_rows: + pvalues = [row["p_value"] for _, row in numeric_rows] + qvals, reject = _benjamini_hochberg(pvalues, alpha=fdr_alpha) + for (idx, _), qval, rejected in zip(numeric_rows, qvals, reject): + summary_rows[idx]["p_value_fdr"] = round(float(qval), 6) + summary_rows[idx]["drift_detected"] = bool(rejected) + + summary_df = pd.DataFrame(summary_rows) + drift_flags = summary_df.get("drift_detected", pd.Series(dtype=bool)).fillna(False) + drift_count = int(drift_flags.sum()) + overall_drift = drift_count >= max(min_drift_features, 1) and n_prod >= min_prod_samples + drift_features = summary_df.loc[drift_flags, "feature"].tolist() if not summary_df.empty else [] + + if not prod_since and not prod_until: + window_info = "full_log" + elif window_status in {"timestamp_missing", "timestamp_invalid"}: + window_info = f"{window_status} (no filter applied)" + else: + window_info = f"{prod_since or '...'} to {prod_until or '...'}" + + return { + "summary_df": summary_df, + "production_df": production_df, + "reference_df": reference_df, + "meta_df_all": meta_df_all, + "inputs_df_all": inputs_df_all, + "meta_df_valid": meta_df_valid, + "valid_mask": valid_mask, + "sentinel_rates": sentinel_rates, + "features": features, + "n_prod": n_prod, + "n_ref": n_ref, + "window_info": window_info, + "drift_features": drift_features, + "drift_count": drift_count, + "overall_drift": overall_drift, + } + + +def generate_report( log_path: Path, + reference_path: Path, output_dir: Path, - sample_size: int = 50000, - output_name: str = "evidently_drift_report.html", + sample_size: int, + psi_threshold: float, + score_bins: int, + min_prod_samples: int = MIN_PROD_SAMPLES_DEFAULT, + psi_eps: float = PSI_EPS_DEFAULT, + min_category_share: float = RARE_CATEGORY_MIN_SHARE_DEFAULT, + fdr_alpha: float = FDR_ALPHA_DEFAULT, + min_drift_features: int = 1, + prod_since: str | None = None, + prod_until: str | None = None, ) -> Path: - # 1) Load reference - ref = pd.read_parquet(reference_path) - if sample_size > 0 and len(ref) > sample_size: - ref = ref.sample(sample_size, random_state=42) - - # 2) Load prod (inputs) depuis logs - cur = _load_prod_inputs_for_evidently(log_path) - if cur.empty: - raise ValueError("No valid production inputs found in logs (status_code < 400).") - - # 3) Pré-traitements minimalistes identiques des deux côtés (si tu en as besoin) - # (garde tes helpers si tu veux: sentinel, normalisation genre, etc.) - # ref = _replace_sentinel(ref); cur = _replace_sentinel(cur) - # ... - - # 4) Aligner les colonnes communes - common = sorted(set(ref.columns).intersection(cur.columns)) - if not common: - raise ValueError("No common columns between reference and production data.") - ref = ref[common] - cur = cur[common] - - # 5) Evidently report - report = Report(metrics=[ - DataDriftPreset(), - DataQualityPreset(), - ]) - - run_result = report.run(reference_data=ref, current_data=cur) + summary = compute_drift_summary( + log_path=log_path, + reference_path=reference_path, + sample_size=sample_size, + psi_threshold=psi_threshold, + score_bins=score_bins, + min_prod_samples=min_prod_samples, + psi_eps=psi_eps, + min_category_share=min_category_share, + fdr_alpha=fdr_alpha, + min_drift_features=min_drift_features, + prod_since=prod_since, + prod_until=prod_until, + ) + + summary_df = summary["summary_df"] + production_df = summary["production_df"] + reference_df = summary["reference_df"] + meta_df_all = summary["meta_df_all"] + inputs_df_all = summary["inputs_df_all"] + meta_df_valid = summary["meta_df_valid"] + valid_mask = summary["valid_mask"] + sentinel_rates = summary["sentinel_rates"] + features = summary["features"] + n_prod = summary["n_prod"] + n_ref = summary["n_ref"] + window_info = summary["window_info"] + drift_features = summary["drift_features"] + overall_drift = summary["overall_drift"] + + plots_dir = output_dir / "plots" + plots_dir.mkdir(parents=True, exist_ok=True) + + for feature in features: + if feature not in reference_df.columns or feature not in production_df.columns: + continue + ref_series = reference_df[feature] + prod_series = production_df[feature] + if feature in CATEGORICAL_FEATURES: + ref_series, prod_series = _prepare_categorical( + ref_series, prod_series, min_share=min_category_share, other_label="OTHER" + ) + plot_path = plots_dir / f"{_safe_name(feature)}.png" + _plot_categorical(ref_series, prod_series, plot_path) + else: + plot_path = plots_dir / f"{_safe_name(feature)}.png" + _plot_numeric(ref_series, prod_series, plot_path) output_dir.mkdir(parents=True, exist_ok=True) - out = output_dir / output_name - - # Compat versions Evidently : - # - certaines versions: report.save_html(...) - # - d'autres: run_result.save_html(...) - if hasattr(run_result, "save_html"): - run_result.save_html(str(out)) - elif hasattr(report, "save_html"): - report.save_html(str(out)) + report_path = output_dir / "drift_report.html" + + total_calls = len(meta_df_all) + error_series = meta_df_all.get("status_code", pd.Series(dtype=int)) + error_rate = float((error_series >= 400).mean()) if total_calls else 0.0 + latency_ms = meta_df_all.get("latency_ms", pd.Series(dtype=float)).dropna() + latency_p50 = float(latency_ms.quantile(0.5)) if not latency_ms.empty else 0.0 + latency_p95 = float(latency_ms.quantile(0.95)) if not latency_ms.empty else 0.0 + calls_with_inputs = int(inputs_df_all.notna().any(axis=1).sum()) if not inputs_df_all.empty else 0 + calls_with_dq = int(meta_df_all.get("data_quality", pd.Series(dtype=object)).notna().sum()) if total_calls else 0 + calls_success = int(valid_mask.sum()) + + valid_meta = meta_df_valid + score_series = ( + pd.to_numeric(valid_meta.get("probability", pd.Series(dtype=float)), errors="coerce") + .dropna() + ) + pred_series = ( + pd.to_numeric(valid_meta.get("prediction", pd.Series(dtype=float)), errors="coerce") + .dropna() + ) + + score_metrics_html = "
  • No prediction scores available.
  • " + score_plots_html = "" + if not score_series.empty: + score_mean = float(score_series.mean()) + score_p50 = float(score_series.quantile(0.5)) + score_p95 = float(score_series.quantile(0.95)) + score_min = float(score_series.min()) + score_max = float(score_series.max()) + score_metrics = [ + f"
  • Score mean: {score_mean:.4f}
  • ", + f"
  • Score p50: {score_p50:.4f}
  • ", + f"
  • Score p95: {score_p95:.4f}
  • ", + f"
  • Score min: {score_min:.4f}
  • ", + f"
  • Score max: {score_max:.4f}
  • ", + ] + score_metrics_html = "\n".join(score_metrics) + score_plot_path = plots_dir / "score_distribution.png" + _plot_score_distribution(score_series, score_plot_path, bins=score_bins) + score_plots_html = "" + + if not pred_series.empty: + pred_rate = float(pred_series.mean()) + score_metrics_html += f"\n
  • Predicted default rate: {pred_rate:.2%}
  • " + pred_plot_path = plots_dir / "prediction_rate.png" + _plot_prediction_rate(pred_series, pred_plot_path) + score_plots_html += "\n" + + error_breakdown = summarize_errors(meta_df_all[error_series >= 400]) + if error_breakdown: + error_items = "\n".join( + f"
  • {message} ({count})
  • " for message, count in error_breakdown + ) + error_html = "" else: - raise RuntimeError("Evidently save_html not available on this version.") + error_html = "

    No error details logged.

    " - return out + dq_metrics = summarize_data_quality(meta_df_all, production_df, sentinel_rates) + if dq_metrics.get("source") == "none": + dq_html = "

    No data quality metrics available.

    " + else: + dq_items = [ + f"
  • Source: {dq_metrics.get('source')}
  • ", + f"
  • Sample size: {dq_metrics.get('sample_size')}
  • ", + f"
  • Missing required rate: {dq_metrics.get('missing_required_rate', 0.0):.2%}
  • ", + ] + if "invalid_numeric_rate" in dq_metrics: + dq_items.append(f"
  • Invalid numeric rate: {dq_metrics.get('invalid_numeric_rate', 0.0):.2%}
  • ") + if "out_of_range_rate" in dq_metrics: + dq_items.append(f"
  • Out-of-range rate: {dq_metrics.get('out_of_range_rate', 0.0):.2%}
  • ") + if "outlier_rate" in dq_metrics: + dq_items.append(f"
  • Outlier rate: {dq_metrics.get('outlier_rate', 0.0):.2%}
  • ") + if "nan_rate" in dq_metrics: + dq_items.append(f"
  • NaN rate (avg): {dq_metrics.get('nan_rate', 0.0):.2%}
  • ") + dq_items.append( + f"
  • Unknown CODE_GENDER rate: {dq_metrics.get('unknown_gender_rate', 0.0):.2%}
  • " + ) + dq_items.append( + f"
  • Unknown FLAG_OWN_CAR rate: {dq_metrics.get('unknown_car_rate', 0.0):.2%}
  • " + ) + dq_items.append( + f"
  • DAYS_EMPLOYED sentinel rate: {dq_metrics.get('days_employed_sentinel_rate', 0.0):.2%}
  • " + ) + dq_html = "" + + summary_html = summary_df.to_html(index=False, escape=False) + plots_html = "\n".join( + f"

    {row['feature']}

    " + for _, row in summary_df.iterrows() + ) + + sample_badge = "" + if n_prod < min_prod_samples: + sample_badge = ( + "
    Sample insuffisant: " + f"{n_prod} < {min_prod_samples} (resultats non fiables).
    " + ) + if n_prod < min_prod_samples: + drift_badge = ( + "
    Drift non calcule " + f"(n_prod < {min_prod_samples}).
    " + ) + elif overall_drift: + drift_badge = "
    Drift alert
    " + else: + drift_badge = "
    No drift alert
    " + + html = f""" + + + + Drift Report + + + +

    Production Monitoring Summary

    + +

    Top error reasons

    + {error_html} + {sample_badge} +

    Score Monitoring

    + + {score_plots_html} +

    Data Quality

    + {dq_html} +

    Data Drift Summary

    + {drift_badge} + + {summary_html} +

    Feature Distributions

    + {plots_html} + + +""" + + report_path.write_text(html, encoding="utf-8") + return report_path + + +def main() -> None: + parser = argparse.ArgumentParser(description="Generate a drift report from production logs.") + parser.add_argument("--logs", type=Path, default=Path("logs/predictions.jsonl")) + parser.add_argument("--reference", type=Path, default=Path("data/data_final.parquet")) + parser.add_argument("--output-dir", type=Path, default=Path("reports")) + parser.add_argument("--sample-size", type=int, default=50000) + parser.add_argument("--psi-threshold", type=float, default=0.2) + parser.add_argument("--score-bins", type=int, default=30) + parser.add_argument("--min-prod-samples", type=int, default=MIN_PROD_SAMPLES_DEFAULT) + parser.add_argument("--psi-eps", type=float, default=PSI_EPS_DEFAULT) + parser.add_argument("--min-category-share", type=float, default=RARE_CATEGORY_MIN_SHARE_DEFAULT) + parser.add_argument("--fdr-alpha", type=float, default=FDR_ALPHA_DEFAULT) + parser.add_argument("--min-drift-features", type=int, default=1) + parser.add_argument("--prod-since", type=str, default=None) + parser.add_argument("--prod-until", type=str, default=None) + args = parser.parse_args() + + report_path = generate_report( + log_path=args.logs, + reference_path=args.reference, + output_dir=args.output_dir, + sample_size=args.sample_size, + psi_threshold=args.psi_threshold, + score_bins=args.score_bins, + min_prod_samples=args.min_prod_samples, + psi_eps=args.psi_eps, + min_category_share=args.min_category_share, + fdr_alpha=args.fdr_alpha, + min_drift_features=args.min_drift_features, + prod_since=args.prod_since, + prod_until=args.prod_until, + ) + print(f"Drift report saved to {report_path}") + + +if __name__ == "__main__": + main() diff --git a/hf_space/monitoring/streamlit_app.py b/hf_space/monitoring/streamlit_app.py index cae224e552842ba3376010bd706e98780997b841..c5d503793a24eb76fdfdb5c4b7eaa9f5a5e72925 100644 --- a/hf_space/monitoring/streamlit_app.py +++ b/hf_space/monitoring/streamlit_app.py @@ -1,5 +1,6 @@ from __future__ import annotations +from collections import Counter from pathlib import Path import sys @@ -9,16 +10,22 @@ import streamlit as st import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt -import streamlit.components.v1 as components - -from drift_report import generate_report, generate_evidently_report, _load_logs ROOT = Path(__file__).resolve().parents[1] if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) -from monitoring.drift_report import generate_report, _load_logs +from monitoring.drift_report import ( + CATEGORICAL_FEATURES, + DAYS_EMPLOYED_SENTINEL, + compute_drift_summary, + generate_report, + summarize_data_quality, + summarize_errors, + _load_logs, + _prepare_categorical, +) def _load_logs_safe(log_path: Path) -> tuple[pd.DataFrame, pd.DataFrame]: @@ -27,6 +34,81 @@ def _load_logs_safe(log_path: Path) -> tuple[pd.DataFrame, pd.DataFrame]: return _load_logs(log_path) +def _filter_by_time( + meta_df: pd.DataFrame, + inputs_df: pd.DataFrame, + since: str | None, + until: str | None, +) -> tuple[pd.DataFrame, pd.DataFrame, str]: + if not since and not until: + return meta_df, inputs_df, "" + if "timestamp" not in meta_df.columns: + return meta_df, inputs_df, "timestamp_missing" + timestamps = pd.to_datetime(meta_df["timestamp"], errors="coerce", utc=True) + if timestamps.isna().all(): + return meta_df, inputs_df, "timestamp_invalid" + mask = pd.Series(True, index=meta_df.index) + if since: + since_dt = pd.to_datetime(since, errors="coerce", utc=True) + if not pd.isna(since_dt): + mask &= timestamps >= since_dt + if until: + until_dt = pd.to_datetime(until, errors="coerce", utc=True) + if not pd.isna(until_dt): + mask &= timestamps <= until_dt + return meta_df.loc[mask].reset_index(drop=True), inputs_df.loc[mask].reset_index(drop=True), "filtered" + + +def _count_dq_columns(meta_df: pd.DataFrame, key: str) -> Counter: + counts: Counter = Counter() + if "data_quality" not in meta_df.columns: + return counts + for item in meta_df["data_quality"].dropna(): + if not isinstance(item, dict): + continue + values = item.get(key) + if isinstance(values, list): + counts.update(values) + return counts + + +def _counts_to_frame(counts: Counter, limit: int = 5) -> pd.DataFrame: + if not counts: + return pd.DataFrame() + return pd.DataFrame(counts.most_common(limit), columns=["feature", "count"]) + + +@st.cache_data(show_spinner=False) +def _cached_drift_summary( + log_path: Path, + reference_path: Path, + sample_size: int, + psi_threshold: float, + score_bins: int, + min_prod_samples: int, + psi_eps: float, + min_category_share: float, + fdr_alpha: float, + min_drift_features: int, + prod_since: str | None, + prod_until: str | None, +) -> dict[str, object]: + return compute_drift_summary( + log_path=log_path, + reference_path=reference_path, + sample_size=sample_size, + psi_threshold=psi_threshold, + score_bins=score_bins, + min_prod_samples=min_prod_samples, + psi_eps=psi_eps, + min_category_share=min_category_share, + fdr_alpha=fdr_alpha, + min_drift_features=min_drift_features, + prod_since=prod_since, + prod_until=prod_until, + ) + + st.set_page_config(page_title="Credit Scoring Monitoring", layout="wide") st.title("Credit Scoring Monitoring") @@ -38,7 +120,7 @@ with st.sidebar: sample_size = st.number_input("Sample size", min_value=1000, max_value=200000, value=50000, step=1000) psi_threshold = st.number_input("PSI threshold", min_value=0.05, max_value=1.0, value=0.2, step=0.05) score_bins = st.number_input("Score bins", min_value=10, max_value=100, value=30, step=5) - min_prod_samples = st.number_input("Min prod samples", min_value=10, max_value=5000, value=200, step=50) + min_prod_samples = st.number_input("Min prod samples", min_value=10, max_value=5000, value=50, step=50) psi_eps = st.number_input("PSI epsilon", min_value=1e-6, max_value=1e-2, value=1e-4, format="%.6f") min_category_share = st.number_input( "Min category share", @@ -52,6 +134,9 @@ with st.sidebar: min_drift_features = st.number_input("Min drift features", min_value=1, max_value=10, value=1, step=1) prod_since = st.text_input("Prod since (ISO)", "") prod_until = st.text_input("Prod until (ISO)", "") + time_bucket = st.selectbox("Time bucket", ["1H", "6H", "1D"], index=2) + show_preview = st.checkbox("Show log preview", value=False) + preview_rows = st.number_input("Preview rows", min_value=10, max_value=1000, value=200, step=50) inputs_df, meta_df = _load_logs_safe(log_path) @@ -59,30 +144,69 @@ if meta_df.empty: st.warning("No logs found. Check the logs path.") st.stop() +meta_df, inputs_df, window_status = _filter_by_time( + meta_df, inputs_df, prod_since or None, prod_until or None +) +if window_status in {"timestamp_missing", "timestamp_invalid"}: + st.info(f"Time filter ignored ({window_status}).") + total_calls = len(meta_df) valid_mask = meta_df.get("status_code", pd.Series(dtype=int)).fillna(0) < 400 +valid_meta = meta_df.loc[valid_mask] prod_inputs = inputs_df.loc[valid_mask] if not inputs_df.empty else inputs_df -n_prod = len(prod_inputs) +success_rate = float(valid_mask.mean()) if total_calls else 0.0 error_rate = float((meta_df.get("status_code", pd.Series(dtype=int)) >= 400).mean()) if total_calls else 0.0 latency_ms = meta_df.get("latency_ms", pd.Series(dtype=float)).dropna() latency_p50 = float(latency_ms.quantile(0.5)) if not latency_ms.empty else 0.0 latency_p95 = float(latency_ms.quantile(0.95)) if not latency_ms.empty else 0.0 +latency_p99 = float(latency_ms.quantile(0.99)) if not latency_ms.empty else 0.0 +latency_mean = float(latency_ms.mean()) if not latency_ms.empty else 0.0 + +col1, col2, col3, col4, col5 = st.columns(5) +col1.metric("Total calls", f"{total_calls}") +col2.metric("Success rate", f"{success_rate:.2%}") +col3.metric("Error rate", f"{error_rate:.2%}") +col4.metric("Latency p50", f"{latency_p50:.2f} ms") +col5.metric("Latency p95", f"{latency_p95:.2f} ms") +st.caption(f"Latency p99: {latency_p99:.2f} ms | Mean: {latency_mean:.2f} ms") + +st.subheader("Log Storage") +if log_path.exists(): + log_stat = log_path.stat() + st.write(f"Path: `{log_path}`") + st.write(f"Size: {log_stat.st_size / (1024 * 1024):.2f} MB") + st.write(f"Last modified: {pd.to_datetime(log_stat.st_mtime, unit='s')}") + if show_preview: + st.dataframe(meta_df.tail(int(preview_rows)), use_container_width=True) +else: + st.info("Log file not found.") + +st.subheader("Traffic & Latency") +timestamps = pd.to_datetime(meta_df.get("timestamp", pd.Series(dtype=object)), errors="coerce", utc=True) +if not timestamps.isna().all(): + ts_df = meta_df.copy() + ts_df["timestamp"] = timestamps + ts_df = ts_df.dropna(subset=["timestamp"]) + if not ts_df.empty: + calls_series = ts_df.set_index("timestamp").resample(time_bucket).size() + st.line_chart(calls_series.rename("calls")) + if "latency_ms" in ts_df.columns: + latency_series = ts_df.set_index("timestamp")["latency_ms"].resample(time_bucket).median() + st.line_chart(latency_series.rename("latency_p50_ms")) +else: + st.info("No valid timestamps available for time series charts.") + +if not latency_ms.empty: + fig, ax = plt.subplots(figsize=(6, 3)) + ax.hist(latency_ms, bins=30, color="#4C78A8", alpha=0.8) + ax.set_xlabel("Latency (ms)") + ax.set_ylabel("Count") + ax.set_title("Latency distribution") + st.pyplot(fig, clear_figure=True) -valid_meta = meta_df -if "status_code" in meta_df.columns: - valid_meta = meta_df[meta_df["status_code"] < 400] scores = pd.to_numeric(valid_meta.get("probability", pd.Series(dtype=float)), errors="coerce").dropna() predictions = pd.to_numeric(valid_meta.get("prediction", pd.Series(dtype=float)), errors="coerce").dropna() -col1, col2, col3, col4 = st.columns(4) -col1.metric("Total calls", f"{total_calls}") -col2.metric("Error rate", f"{error_rate:.2%}") -col3.metric("Latency p50", f"{latency_p50:.2f} ms") -col4.metric("Latency p95", f"{latency_p95:.2f} ms") -st.caption(f"Production sample size (status < 400): {n_prod}") -if n_prod < int(min_prod_samples): - st.warning("Sample insuffisant: drift non fiable (gate active).") - st.subheader("Score Monitoring") if not scores.empty: score_stats = { @@ -115,7 +239,139 @@ if not predictions.empty: ax.set_title("Prediction rate") st.pyplot(fig, clear_figure=True) +if not valid_meta.empty and "timestamp" in valid_meta.columns and not scores.empty: + score_ts = valid_meta.copy() + score_ts["timestamp"] = pd.to_datetime(score_ts["timestamp"], errors="coerce", utc=True) + score_ts["score"] = pd.to_numeric(score_ts.get("probability", pd.Series(dtype=float)), errors="coerce") + score_ts = score_ts.dropna(subset=["timestamp", "score"]) + if not score_ts.empty: + score_series = score_ts.set_index("timestamp")["score"].resample(time_bucket).mean() + st.line_chart(score_series.rename("avg_score")) + +st.subheader("Data Quality & Errors") +sentinel_rate = 0.0 +if "DAYS_EMPLOYED" in prod_inputs.columns: + sentinel_rate = float( + (pd.to_numeric(prod_inputs["DAYS_EMPLOYED"], errors="coerce") == DAYS_EMPLOYED_SENTINEL).mean() + ) +dq_metrics = summarize_data_quality(meta_df, prod_inputs, {"production": sentinel_rate}) +if dq_metrics.get("source") == "none": + st.info("No data quality metrics available.") +else: + dq_table = pd.DataFrame( + [ + {"metric": "missing_required_rate", "value": dq_metrics.get("missing_required_rate", 0.0)}, + {"metric": "invalid_numeric_rate", "value": dq_metrics.get("invalid_numeric_rate", 0.0)}, + {"metric": "out_of_range_rate", "value": dq_metrics.get("out_of_range_rate", 0.0)}, + {"metric": "outlier_rate", "value": dq_metrics.get("outlier_rate", 0.0)}, + {"metric": "nan_rate", "value": dq_metrics.get("nan_rate", 0.0)}, + {"metric": "unknown_gender_rate", "value": dq_metrics.get("unknown_gender_rate", 0.0)}, + {"metric": "unknown_car_rate", "value": dq_metrics.get("unknown_car_rate", 0.0)}, + {"metric": "days_employed_sentinel_rate", "value": dq_metrics.get("days_employed_sentinel_rate", 0.0)}, + ] + ) + dq_table["value"] = dq_table["value"].map(lambda v: f"{float(v):.2%}") + st.table(dq_table) + +issues = { + "missing_required_columns": "Missing required", + "invalid_numeric_columns": "Invalid numeric", + "out_of_range_columns": "Out of range", + "outlier_columns": "Outliers", + "unknown_categories": "Unknown categories", +} +for key, label in issues.items(): + df_counts = _counts_to_frame(_count_dq_columns(meta_df, key)) + if not df_counts.empty: + st.caption(label) + st.dataframe(df_counts, hide_index=True, use_container_width=True) + +error_breakdown = summarize_errors(meta_df[meta_df.get("status_code", pd.Series(dtype=int)) >= 400]) +if error_breakdown: + st.caption("Top error reasons") + st.table(pd.DataFrame(error_breakdown, columns=["error", "count"])) + st.subheader("Data Drift") +if not reference_path.exists(): + st.warning("Reference dataset not found. Drift summary disabled.") +else: + try: + summary = _cached_drift_summary( + log_path=log_path, + reference_path=reference_path, + sample_size=int(sample_size), + psi_threshold=float(psi_threshold), + score_bins=int(score_bins), + min_prod_samples=int(min_prod_samples), + psi_eps=float(psi_eps), + min_category_share=float(min_category_share), + fdr_alpha=float(fdr_alpha), + min_drift_features=int(min_drift_features), + prod_since=prod_since or None, + prod_until=prod_until or None, + ) + summary_df = summary["summary_df"] + n_prod = summary["n_prod"] + n_ref = summary["n_ref"] + drift_count = summary["drift_count"] + drift_features = summary["drift_features"] + if n_prod < int(min_prod_samples): + st.warning("Sample insuffisant: drift non fiable (gate active).") + st.metric("Drifted features", f"{drift_count}") + if drift_features: + st.write(f"Drifted: {', '.join(drift_features)}") + + show_only_drifted = st.checkbox("Show only drifted features", value=False) + table_df = summary_df + if show_only_drifted: + table_df = summary_df[summary_df["drift_detected"] == True] + st.dataframe(table_df, use_container_width=True, hide_index=True) + + if not summary_df.empty: + feature = st.selectbox("Feature to inspect", summary_df["feature"].tolist()) + row = summary_df.loc[summary_df["feature"] == feature].iloc[0] + production_df = summary["production_df"] + reference_df = summary["reference_df"] + fig, ax = plt.subplots(figsize=(6, 3)) + if feature in CATEGORICAL_FEATURES: + ref_series, prod_series = _prepare_categorical( + reference_df[feature], + production_df[feature], + min_share=float(min_category_share), + other_label="OTHER", + ) + plot_df = pd.DataFrame( + { + "reference": ref_series.value_counts(normalize=True), + "production": prod_series.value_counts(normalize=True), + } + ).fillna(0) + plot_df.plot(kind="bar", ax=ax) + ax.set_title(f"Distribution: {feature}") + ax.set_ylabel("Share") + psi_value = row.get("psi") + if psi_value is not None: + st.caption(f"PSI: {psi_value} | n_prod: {row.get('n_prod')} | n_ref: {row.get('n_ref')}") + else: + ax.hist(reference_df[feature].dropna(), bins=30, alpha=0.6, label="reference") + ax.hist(production_df[feature].dropna(), bins=30, alpha=0.6, label="production") + ax.set_title(f"Distribution: {feature}") + ax.legend() + st.caption( + f"KS: {row.get('ks_stat')} | p_value: {row.get('p_value_fdr') or row.get('p_value')}" + ) + st.pyplot(fig, clear_figure=True) + + if row.get("drift_detected"): + st.warning("Drift detected: investigate data pipeline and model stability.") + else: + st.success("No drift signal for this feature.") + except SystemExit as exc: + st.warning(str(exc)) + except Exception as exc: + st.error(str(exc)) + +st.subheader("Generate Drift Report") if st.button("Generate drift report"): try: report_path = generate_report( @@ -141,37 +397,6 @@ if st.button("Generate drift report"): ) st.exception(exc) -if st.button("Generate drift report"): - try: - if drift_engine == "Evidently": - report_path = generate_evidently_report( - reference_path=reference_path, - log_path=log_path, - output_dir=output_dir, - sample_size=int(sample_size), - ) - else: - report_path = generate_report( - log_path=log_path, - reference_path=reference_path, - output_dir=output_dir, - sample_size=int(sample_size), - psi_threshold=float(psi_threshold), - score_bins=int(score_bins), - min_prod_samples=int(min_prod_samples), - psi_eps=float(psi_eps), - min_category_share=float(min_category_share), - fdr_alpha=float(fdr_alpha), - min_drift_features=int(min_drift_features), - prod_since=prod_since or None, - prod_until=prod_until or None, - ) - st.success(f"Generated: {report_path}") - except Exception as exc: - st.error(str(exc)) - st.exception(exc) - - report_file = output_dir / "drift_report.html" if report_file.exists(): st.markdown(f"Report available at `{report_file}`") diff --git a/hf_space/tests/test_drift_report.py b/hf_space/tests/test_drift_report.py index ba0c4225a895d28daa1274351bee6a712d7d84e8..8ddf286777ad5fe336273c93b12e331b3d2c7f10 100644 --- a/hf_space/tests/test_drift_report.py +++ b/hf_space/tests/test_drift_report.py @@ -3,7 +3,7 @@ from pathlib import Path import pandas as pd -from monitoring.drift_report_old import generate_report +from monitoring.drift_report import generate_report def _write_jsonl(path: Path, entries: list[dict]) -> None: