Spaces:
Configuration error
Configuration error
celpri commited on
Commit ·
4b22893
1
Parent(s): 4b2bde0
Notebooks DataDrift
Browse files
data_drift_analysis.ipynb
DELETED
|
@@ -1,159 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"id": "57a500a1",
|
| 6 |
-
"metadata": {},
|
| 7 |
-
"source": [
|
| 8 |
-
"Charger le dataset initial"
|
| 9 |
-
]
|
| 10 |
-
},
|
| 11 |
-
{
|
| 12 |
-
"cell_type": "code",
|
| 13 |
-
"execution_count": 1,
|
| 14 |
-
"id": "265ff33b",
|
| 15 |
-
"metadata": {},
|
| 16 |
-
"outputs": [],
|
| 17 |
-
"source": [
|
| 18 |
-
"import pandas as pd\n",
|
| 19 |
-
"from sklearn.model_selection import train_test_split\n",
|
| 20 |
-
"\n",
|
| 21 |
-
"df = pd.read_csv(\"Data/features_clients.csv\")\n",
|
| 22 |
-
"df = df.drop(columns=[\"SK_ID_CURR\"])"
|
| 23 |
-
]
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"cell_type": "markdown",
|
| 27 |
-
"id": "55f5c7f9",
|
| 28 |
-
"metadata": {},
|
| 29 |
-
"source": [
|
| 30 |
-
"Train/Test Split"
|
| 31 |
-
]
|
| 32 |
-
},
|
| 33 |
-
{
|
| 34 |
-
"cell_type": "code",
|
| 35 |
-
"execution_count": 2,
|
| 36 |
-
"id": "33025b1c",
|
| 37 |
-
"metadata": {},
|
| 38 |
-
"outputs": [],
|
| 39 |
-
"source": [
|
| 40 |
-
"df_train, df_test = train_test_split(\n",
|
| 41 |
-
" df,\n",
|
| 42 |
-
" test_size=0.3,\n",
|
| 43 |
-
" random_state=42\n",
|
| 44 |
-
")"
|
| 45 |
-
]
|
| 46 |
-
},
|
| 47 |
-
{
|
| 48 |
-
"cell_type": "code",
|
| 49 |
-
"execution_count": 8,
|
| 50 |
-
"id": "ee84412a",
|
| 51 |
-
"metadata": {},
|
| 52 |
-
"outputs": [
|
| 53 |
-
{
|
| 54 |
-
"name": "stdout",
|
| 55 |
-
"output_type": "stream",
|
| 56 |
-
"text": [
|
| 57 |
-
"0.7.20\n",
|
| 58 |
-
"c:\\Users\\User\\Desktop\\Formation IA\\projet8\\projet8\\Lib\\site-packages\\evidently\\__init__.py\n"
|
| 59 |
-
]
|
| 60 |
-
}
|
| 61 |
-
],
|
| 62 |
-
"source": [
|
| 63 |
-
"import evidently\n",
|
| 64 |
-
"print(evidently.__version__)\n",
|
| 65 |
-
"print(evidently.__file__)"
|
| 66 |
-
]
|
| 67 |
-
},
|
| 68 |
-
{
|
| 69 |
-
"cell_type": "code",
|
| 70 |
-
"execution_count": 14,
|
| 71 |
-
"id": "dc5d67c4",
|
| 72 |
-
"metadata": {},
|
| 73 |
-
"outputs": [
|
| 74 |
-
{
|
| 75 |
-
"name": "stdout",
|
| 76 |
-
"output_type": "stream",
|
| 77 |
-
"text": [
|
| 78 |
-
"['AbsMaxError', 'Accuracy', 'AlmostConstantColumnsCount', 'AlmostDuplicatedColumnsCount', 'CategoryCount', 'ColumnCorrelationMatrix', 'ColumnCorrelations', 'ColumnCount', 'ConstantColumnsCount', 'CorrelationMatrix', 'DatasetCorrelations', 'DatasetMissingValueCount', 'Diversity', 'DriftedColumnsCount', 'DummyAccuracy', 'DummyF1Score', 'DummyFNR', 'DummyFPR', 'DummyLogLoss', 'DummyMAE', 'DummyMAPE', 'DummyPrecision', 'DummyRMSE', 'DummyRecall', 'DummyRocAuc', 'DummyTNR', 'DummyTPR', 'DuplicatedColumnsCount', 'DuplicatedRowCount', 'EmptyColumnsCount', 'EmptyRowsCount', 'F1ByLabel', 'F1Score', 'FBetaTopK', 'FNR', 'FPR', 'GroupBy', 'HitRate', 'InListValueCount', 'InRangeValueCount', 'ItemBias', 'LogLoss', 'MAE', 'MAP', 'MAPE', 'MRR', 'MaxValue', 'MeanError', 'MeanValue', 'MedianValue', 'MinValue', 'MissingValueCount', 'NDCG', 'Novelty', 'OutListValueCount', 'OutRangeValueCount', 'Personalization', 'PopularityBiasMetric', 'Precision', 'PrecisionByLabel', 'PrecisionTopK', 'QuantileValue', 'R2Score', 'RMSE', 'RecCasesTable', 'Recall', 'RecallByLabel', 'RecallTopK', 'RocAuc', 'RocAucByLabel', 'RowCount', 'RowTestSummary', 'ScoreDistribution', 'Serendipity', 'StdValue', 'SumValue', 'TNR', 'TPR', 'UniqueValueCount', 'UserBias', 'ValueDrift', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '_legacy', 'classification', 'column_statistics', 'data_quality', 'dataset_statistics', 'group_by', 'recsys', 'regression', 'row_test_summary']\n"
|
| 79 |
-
]
|
| 80 |
-
}
|
| 81 |
-
],
|
| 82 |
-
"source": [
|
| 83 |
-
"import evidently.metrics\n",
|
| 84 |
-
"print(dir(evidently.metrics))"
|
| 85 |
-
]
|
| 86 |
-
},
|
| 87 |
-
{
|
| 88 |
-
"cell_type": "markdown",
|
| 89 |
-
"id": "4a4bba44",
|
| 90 |
-
"metadata": {},
|
| 91 |
-
"source": [
|
| 92 |
-
"Lancer Evidently"
|
| 93 |
-
]
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"cell_type": "code",
|
| 97 |
-
"execution_count": null,
|
| 98 |
-
"id": "ba976620",
|
| 99 |
-
"metadata": {},
|
| 100 |
-
"outputs": [
|
| 101 |
-
{
|
| 102 |
-
"ename": "AttributeError",
|
| 103 |
-
"evalue": "'Snapshot' object has no attribute 'save'",
|
| 104 |
-
"output_type": "error",
|
| 105 |
-
"traceback": [
|
| 106 |
-
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
| 107 |
-
"\u001b[31mAttributeError\u001b[39m Traceback (most recent call last)",
|
| 108 |
-
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[16]\u001b[39m\u001b[32m, line 21\u001b[39m\n\u001b[32m 14\u001b[39m report.run(reference_data=reference, current_data=current)\n\u001b[32m 16\u001b[39m snapshot = report.run(\n\u001b[32m 17\u001b[39m reference_data=reference,\n\u001b[32m 18\u001b[39m current_data=current\n\u001b[32m 19\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m21\u001b[39m \u001b[43msnapshot\u001b[49m\u001b[43m.\u001b[49m\u001b[43msave\u001b[49m(\u001b[33m\"\u001b[39m\u001b[33mdata_drift_report.html\u001b[39m\u001b[33m\"\u001b[39m)\n",
|
| 109 |
-
"\u001b[31mAttributeError\u001b[39m: 'Snapshot' object has no attribute 'save'"
|
| 110 |
-
]
|
| 111 |
-
}
|
| 112 |
-
],
|
| 113 |
-
"source": [
|
| 114 |
-
"from evidently import Report, Dataset\n",
|
| 115 |
-
"from evidently.metrics import ValueDrift\n",
|
| 116 |
-
"\n",
|
| 117 |
-
"reference = Dataset.from_pandas(df_train)\n",
|
| 118 |
-
"current = Dataset.from_pandas(df_test)\n",
|
| 119 |
-
"\n",
|
| 120 |
-
"metrics = []\n",
|
| 121 |
-
"\n",
|
| 122 |
-
"for col in df_train.columns:\n",
|
| 123 |
-
" metrics.append(ValueDrift(column=col))\n",
|
| 124 |
-
"\n",
|
| 125 |
-
"report = Report(metrics)\n",
|
| 126 |
-
"\n",
|
| 127 |
-
"report.run(reference_data=reference, current_data=current)\n",
|
| 128 |
-
"\n",
|
| 129 |
-
"snapshot = report.run(\n",
|
| 130 |
-
" reference_data=reference,\n",
|
| 131 |
-
" current_data=current\n",
|
| 132 |
-
")\n",
|
| 133 |
-
"\n",
|
| 134 |
-
"snapshot.save_html(\"data_drift_report.html\")"
|
| 135 |
-
]
|
| 136 |
-
}
|
| 137 |
-
],
|
| 138 |
-
"metadata": {
|
| 139 |
-
"kernelspec": {
|
| 140 |
-
"display_name": "projet8 (3.12.10)",
|
| 141 |
-
"language": "python",
|
| 142 |
-
"name": "python3"
|
| 143 |
-
},
|
| 144 |
-
"language_info": {
|
| 145 |
-
"codemirror_mode": {
|
| 146 |
-
"name": "ipython",
|
| 147 |
-
"version": 3
|
| 148 |
-
},
|
| 149 |
-
"file_extension": ".py",
|
| 150 |
-
"mimetype": "text/x-python",
|
| 151 |
-
"name": "python",
|
| 152 |
-
"nbconvert_exporter": "python",
|
| 153 |
-
"pygments_lexer": "ipython3",
|
| 154 |
-
"version": "3.12.10"
|
| 155 |
-
}
|
| 156 |
-
},
|
| 157 |
-
"nbformat": 4,
|
| 158 |
-
"nbformat_minor": 5
|
| 159 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
monitoring/data_drift_report.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
monitoring/drif_analysis.ipynb
ADDED
|
File without changes
|
tests/fonctionnal/test_api.py
CHANGED
|
@@ -6,13 +6,13 @@ from contextlib import asynccontextmanager
|
|
| 6 |
from src.api.main import app
|
| 7 |
|
| 8 |
|
| 9 |
-
#
|
| 10 |
@asynccontextmanager
|
| 11 |
async def empty_lifespan(app):
|
| 12 |
yield
|
| 13 |
|
| 14 |
app.router.lifespan_context = empty_lifespan
|
| 15 |
-
|
| 16 |
|
| 17 |
|
| 18 |
class DummyModel:
|
|
@@ -23,10 +23,7 @@ class DummyModel:
|
|
| 23 |
def get_client():
|
| 24 |
app.state.model = DummyModel()
|
| 25 |
app.state.features = pd.DataFrame({
|
| 26 |
-
"SK_ID_CURR": [100002],
|
| 27 |
-
"feature_1": [0.5],
|
| 28 |
-
"feature_2": [1.2],
|
| 29 |
-
})
|
| 30 |
return TestClient(app)
|
| 31 |
|
| 32 |
|
|
|
|
| 6 |
from src.api.main import app
|
| 7 |
|
| 8 |
|
| 9 |
+
# Neutralise le lifespan
|
| 10 |
@asynccontextmanager
|
| 11 |
async def empty_lifespan(app):
|
| 12 |
yield
|
| 13 |
|
| 14 |
app.router.lifespan_context = empty_lifespan
|
| 15 |
+
|
| 16 |
|
| 17 |
|
| 18 |
class DummyModel:
|
|
|
|
| 23 |
def get_client():
|
| 24 |
app.state.model = DummyModel()
|
| 25 |
app.state.features = pd.DataFrame({
|
| 26 |
+
"SK_ID_CURR": [100002],})
|
|
|
|
|
|
|
|
|
|
| 27 |
return TestClient(app)
|
| 28 |
|
| 29 |
|
tests/unit/test_input_validation.py
CHANGED
|
@@ -2,7 +2,6 @@ import pytest
|
|
| 2 |
|
| 3 |
|
| 4 |
def validate_input(age: int, income: float):
|
| 5 |
-
# remplace par ta vraie fonction si elle existe
|
| 6 |
if age < 0:
|
| 7 |
raise ValueError("Age must be positive")
|
| 8 |
if income <= 0:
|
|
|
|
| 2 |
|
| 3 |
|
| 4 |
def validate_input(age: int, income: float):
|
|
|
|
| 5 |
if age < 0:
|
| 6 |
raise ValueError("Age must be positive")
|
| 7 |
if income <= 0:
|
tests/unit/test_model_loading.py
CHANGED
|
@@ -4,7 +4,7 @@ from unittest.mock import patch, MagicMock
|
|
| 4 |
from src.model.model import load_model
|
| 5 |
|
| 6 |
|
| 7 |
-
#
|
| 8 |
@patch("src.model.model.joblib.load")
|
| 9 |
@patch("src.model.model.hf_hub_download")
|
| 10 |
def test_load_model_from_hf(mock_hf, mock_joblib):
|
|
@@ -16,7 +16,7 @@ def test_load_model_from_hf(mock_hf, mock_joblib):
|
|
| 16 |
assert model == "MODEL"
|
| 17 |
|
| 18 |
|
| 19 |
-
#
|
| 20 |
@patch("src.model.model.hf_hub_download", side_effect=Exception("HF fail"))
|
| 21 |
@patch("mlflow.sklearn.load_model")
|
| 22 |
def test_load_model_fallback_mlflow(mock_mlflow, mock_hf):
|
|
@@ -27,7 +27,7 @@ def test_load_model_fallback_mlflow(mock_mlflow, mock_hf):
|
|
| 27 |
assert model == "MLFLOW_MODEL"
|
| 28 |
|
| 29 |
|
| 30 |
-
#
|
| 31 |
@patch("src.model.model.hf_hub_download", side_effect=Exception("HF fail"))
|
| 32 |
@patch("mlflow.sklearn.load_model", side_effect=Exception("MLflow fail"))
|
| 33 |
def test_load_model_raises_error(mock_mlflow, mock_hf):
|
|
|
|
| 4 |
from src.model.model import load_model
|
| 5 |
|
| 6 |
|
| 7 |
+
# HF OK
|
| 8 |
@patch("src.model.model.joblib.load")
|
| 9 |
@patch("src.model.model.hf_hub_download")
|
| 10 |
def test_load_model_from_hf(mock_hf, mock_joblib):
|
|
|
|
| 16 |
assert model == "MODEL"
|
| 17 |
|
| 18 |
|
| 19 |
+
# HF échoue → MLflow OK
|
| 20 |
@patch("src.model.model.hf_hub_download", side_effect=Exception("HF fail"))
|
| 21 |
@patch("mlflow.sklearn.load_model")
|
| 22 |
def test_load_model_fallback_mlflow(mock_mlflow, mock_hf):
|
|
|
|
| 27 |
assert model == "MLFLOW_MODEL"
|
| 28 |
|
| 29 |
|
| 30 |
+
# Tout échoue → FileNotFoundError
|
| 31 |
@patch("src.model.model.hf_hub_download", side_effect=Exception("HF fail"))
|
| 32 |
@patch("mlflow.sklearn.load_model", side_effect=Exception("MLflow fail"))
|
| 33 |
def test_load_model_raises_error(mock_mlflow, mock_hf):
|
tests/unit/test_preprocessing.py
CHANGED
|
@@ -3,7 +3,7 @@ import numpy as np
|
|
| 3 |
|
| 4 |
|
| 5 |
def dummy_preprocess(df: pd.DataFrame):
|
| 6 |
-
|
| 7 |
return df.fillna(0)
|
| 8 |
|
| 9 |
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
def dummy_preprocess(df: pd.DataFrame):
|
| 6 |
+
|
| 7 |
return df.fillna(0)
|
| 8 |
|
| 9 |
|