Spaces:

PCelia
/

Pret-a-depenser

Configuration error

App Files Files Community

celpri commited on Feb 15

Commit

4b22893

1 Parent(s): 4b2bde0

Notebooks DataDrift

Browse files

Files changed (7) hide show

data_drift_analysis.ipynb +0 -159
monitoring/data_drift_report.html +0 -0
monitoring/drif_analysis.ipynb +0 -0
tests/fonctionnal/test_api.py +3 -6
tests/unit/test_input_validation.py +0 -1
tests/unit/test_model_loading.py +3 -3
tests/unit/test_preprocessing.py +1 -1

data_drift_analysis.ipynb DELETED Viewed

@@ -1,159 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "57a500a1",
-   "metadata": {},
-   "source": [
-    "Charger le dataset initial"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "265ff33b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "\n",
-    "df = pd.read_csv(\"Data/features_clients.csv\")\n",
-    "df = df.drop(columns=[\"SK_ID_CURR\"])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "55f5c7f9",
-   "metadata": {},
-   "source": [
-    "Train/Test Split"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "33025b1c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_train, df_test = train_test_split(\n",
-    "    df,\n",
-    "    test_size=0.3,\n",
-    "    random_state=42\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "ee84412a",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0.7.20\n",
-      "c:\\Users\\User\\Desktop\\Formation IA\\projet8\\projet8\\Lib\\site-packages\\evidently\\__init__.py\n"
-     ]
-    }
-   ],
-   "source": [
-    "import evidently\n",
-    "print(evidently.__version__)\n",
-    "print(evidently.__file__)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "dc5d67c4",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['AbsMaxError', 'Accuracy', 'AlmostConstantColumnsCount', 'AlmostDuplicatedColumnsCount', 'CategoryCount', 'ColumnCorrelationMatrix', 'ColumnCorrelations', 'ColumnCount', 'ConstantColumnsCount', 'CorrelationMatrix', 'DatasetCorrelations', 'DatasetMissingValueCount', 'Diversity', 'DriftedColumnsCount', 'DummyAccuracy', 'DummyF1Score', 'DummyFNR', 'DummyFPR', 'DummyLogLoss', 'DummyMAE', 'DummyMAPE', 'DummyPrecision', 'DummyRMSE', 'DummyRecall', 'DummyRocAuc', 'DummyTNR', 'DummyTPR', 'DuplicatedColumnsCount', 'DuplicatedRowCount', 'EmptyColumnsCount', 'EmptyRowsCount', 'F1ByLabel', 'F1Score', 'FBetaTopK', 'FNR', 'FPR', 'GroupBy', 'HitRate', 'InListValueCount', 'InRangeValueCount', 'ItemBias', 'LogLoss', 'MAE', 'MAP', 'MAPE', 'MRR', 'MaxValue', 'MeanError', 'MeanValue', 'MedianValue', 'MinValue', 'MissingValueCount', 'NDCG', 'Novelty', 'OutListValueCount', 'OutRangeValueCount', 'Personalization', 'PopularityBiasMetric', 'Precision', 'PrecisionByLabel', 'PrecisionTopK', 'QuantileValue', 'R2Score', 'RMSE', 'RecCasesTable', 'Recall', 'RecallByLabel', 'RecallTopK', 'RocAuc', 'RocAucByLabel', 'RowCount', 'RowTestSummary', 'ScoreDistribution', 'Serendipity', 'StdValue', 'SumValue', 'TNR', 'TPR', 'UniqueValueCount', 'UserBias', 'ValueDrift', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '_legacy', 'classification', 'column_statistics', 'data_quality', 'dataset_statistics', 'group_by', 'recsys', 'regression', 'row_test_summary']\n"
-     ]
-    }
-   ],
-   "source": [
-    "import evidently.metrics\n",
-    "print(dir(evidently.metrics))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4a4bba44",
-   "metadata": {},
-   "source": [
-    "Lancer Evidently"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ba976620",
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "AttributeError",
-     "evalue": "'Snapshot' object has no attribute 'save'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
-      "\u001b[31mAttributeError\u001b[39m                            Traceback (most recent call last)",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[16]\u001b[39m\u001b[32m, line 21\u001b[39m\n\u001b[32m     14\u001b[39m report.run(reference_data=reference, current_data=current)\n\u001b[32m     16\u001b[39m snapshot = report.run(\n\u001b[32m     17\u001b[39m     reference_data=reference,\n\u001b[32m     18\u001b[39m     current_data=current\n\u001b[32m     19\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m21\u001b[39m \u001b[43msnapshot\u001b[49m\u001b[43m.\u001b[49m\u001b[43msave\u001b[49m(\u001b[33m\"\u001b[39m\u001b[33mdata_drift_report.html\u001b[39m\u001b[33m\"\u001b[39m)\n",
-      "\u001b[31mAttributeError\u001b[39m: 'Snapshot' object has no attribute 'save'"
-     ]
-    }
-   ],
-   "source": [
-    "from evidently import Report, Dataset\n",
-    "from evidently.metrics import ValueDrift\n",
-    "\n",
-    "reference = Dataset.from_pandas(df_train)\n",
-    "current = Dataset.from_pandas(df_test)\n",
-    "\n",
-    "metrics = []\n",
-    "\n",
-    "for col in df_train.columns:\n",
-    "    metrics.append(ValueDrift(column=col))\n",
-    "\n",
-    "report = Report(metrics)\n",
-    "\n",
-    "report.run(reference_data=reference, current_data=current)\n",
-    "\n",
-    "snapshot = report.run(\n",
-    "    reference_data=reference,\n",
-    "    current_data=current\n",
-    ")\n",
-    "\n",
-    "snapshot.save_html(\"data_drift_report.html\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "projet8 (3.12.10)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

monitoring/data_drift_report.html ADDED Viewed

The diff for this file is too large to render. See raw diff

monitoring/drif_analysis.ipynb ADDED Viewed

File without changes

tests/fonctionnal/test_api.py CHANGED Viewed

@@ -6,13 +6,13 @@ from contextlib import asynccontextmanager
 from src.api.main import app
-# ---- Neutralise le lifespan ----
 @asynccontextmanager
 async def empty_lifespan(app):
     yield
 app.router.lifespan_context = empty_lifespan
-# --------------------------------
 class DummyModel:
@@ -23,10 +23,7 @@ class DummyModel:
 def get_client():
     app.state.model = DummyModel()
     app.state.features = pd.DataFrame({
-        "SK_ID_CURR": [100002],
-        "feature_1": [0.5],
-        "feature_2": [1.2],
-    })
     return TestClient(app)

 from src.api.main import app
+# Neutralise le lifespan
 @asynccontextmanager
 async def empty_lifespan(app):
     yield
 app.router.lifespan_context = empty_lifespan
 class DummyModel:
 def get_client():
     app.state.model = DummyModel()
     app.state.features = pd.DataFrame({
+        "SK_ID_CURR": [100002],})
     return TestClient(app)

tests/unit/test_input_validation.py CHANGED Viewed

@@ -2,7 +2,6 @@ import pytest
 def validate_input(age: int, income: float):
-    # remplace par ta vraie fonction si elle existe
     if age < 0:
         raise ValueError("Age must be positive")
     if income <= 0:

 def validate_input(age: int, income: float):
     if age < 0:
         raise ValueError("Age must be positive")
     if income <= 0:

tests/unit/test_model_loading.py CHANGED Viewed

@@ -4,7 +4,7 @@ from unittest.mock import patch, MagicMock
 from src.model.model import load_model
-# 1️⃣ HF OK
 @patch("src.model.model.joblib.load")
 @patch("src.model.model.hf_hub_download")
 def test_load_model_from_hf(mock_hf, mock_joblib):
@@ -16,7 +16,7 @@ def test_load_model_from_hf(mock_hf, mock_joblib):
     assert model == "MODEL"
-# 2️⃣ HF échoue → MLflow OK
 @patch("src.model.model.hf_hub_download", side_effect=Exception("HF fail"))
 @patch("mlflow.sklearn.load_model")
 def test_load_model_fallback_mlflow(mock_mlflow, mock_hf):
@@ -27,7 +27,7 @@ def test_load_model_fallback_mlflow(mock_mlflow, mock_hf):
     assert model == "MLFLOW_MODEL"
-# 3️⃣ Tout échoue → FileNotFoundError
 @patch("src.model.model.hf_hub_download", side_effect=Exception("HF fail"))
 @patch("mlflow.sklearn.load_model", side_effect=Exception("MLflow fail"))
 def test_load_model_raises_error(mock_mlflow, mock_hf):

 from src.model.model import load_model
+# HF OK
 @patch("src.model.model.joblib.load")
 @patch("src.model.model.hf_hub_download")
 def test_load_model_from_hf(mock_hf, mock_joblib):
     assert model == "MODEL"
+# HF échoue → MLflow OK
 @patch("src.model.model.hf_hub_download", side_effect=Exception("HF fail"))
 @patch("mlflow.sklearn.load_model")
 def test_load_model_fallback_mlflow(mock_mlflow, mock_hf):
     assert model == "MLFLOW_MODEL"
+# Tout échoue → FileNotFoundError
 @patch("src.model.model.hf_hub_download", side_effect=Exception("HF fail"))
 @patch("mlflow.sklearn.load_model", side_effect=Exception("MLflow fail"))
 def test_load_model_raises_error(mock_mlflow, mock_hf):

tests/unit/test_preprocessing.py CHANGED Viewed

@@ -3,7 +3,7 @@ import numpy as np
 def dummy_preprocess(df: pd.DataFrame):
-    # remplace par ta vraie fonction si elle existe
     return df.fillna(0)


3
4
5	def dummy_preprocess(df: pd.DataFrame):
6	+
7	return df.fillna(0)
8
9