CI Bot commited on
Commit
5effd82
·
1 Parent(s): 3a7a131

CI deploy Mon Nov 24 10:58:16 UTC 2025

Browse files
coverage.xml CHANGED
@@ -1,5 +1,5 @@
1
  <?xml version="1.0" ?>
2
- <coverage version="7.12.0" timestamp="1763724997948" lines-valid="290" lines-covered="242" line-rate="0.8345" branches-valid="16" branches-covered="7" branch-rate="0.4375" complexity="0">
3
  <!-- Generated by coverage.py: https://coverage.readthedocs.io/en/7.12.0 -->
4
  <!-- Based on https://raw.githubusercontent.com/cobertura/web/master/htdocs/xml/coverage-04.dtd -->
5
  <sources>
 
1
  <?xml version="1.0" ?>
2
+ <coverage version="7.12.0" timestamp="1763981841477" lines-valid="290" lines-covered="242" line-rate="0.8345" branches-valid="16" branches-covered="7" branch-rate="0.4375" complexity="0">
3
  <!-- Generated by coverage.py: https://coverage.readthedocs.io/en/7.12.0 -->
4
  <!-- Based on https://raw.githubusercontent.com/cobertura/web/master/htdocs/xml/coverage-04.dtd -->
5
  <sources>
src/data/models/__init__.py CHANGED
@@ -1,7 +1,11 @@
1
  from .base import Base
2
  from .predict_logs import PredictLogs
 
 
3
 
4
  __all__ = [
5
  "Base",
6
- "PredictLogs"
 
 
7
  ]
 
1
  from .base import Base
2
  from .predict_logs import PredictLogs
3
+ from .drift_run import DriftRun
4
+ from .drift_feature_metric import DriftFeatureMetric
5
 
6
  __all__ = [
7
  "Base",
8
+ "PredictLogs",
9
+ "DriftRun",
10
+ "DriftFeatureMetric"
11
  ]
src/data/models/drift_feature_metric.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # models/drift_feature_metric.py
2
+ from sqlalchemy import Column, Integer, String, Float, Boolean, ForeignKey
3
+ from sqlalchemy.orm import relationship
4
+ from src.data.models.base import Base
5
+
6
+ class DriftFeatureMetric(Base):
7
+ """
8
+ Détaille les métriques de drift pour chaque feature au sein d'un run spécifique.
9
+ """
10
+ __tablename__ = "drift_feature_metric"
11
+
12
+ id = Column(Integer, primary_key=True, autoincrement=True)
13
+
14
+ # Clé étrangère vers drift_run
15
+ run_id = Column(Integer, ForeignKey("drift_run.id"), nullable=False, index=True)
16
+
17
+ feature_name = Column(String(100), nullable=False, index=True)
18
+
19
+ drift_detected = Column(Boolean, nullable=False)
20
+ drift_score = Column(Float, nullable=True)
21
+ stattest_name = Column(String(50), nullable=True) # type de test statistique
22
+
23
+ # Relation pour accéder au run parent depuis la métrique de feature
24
+ drift_run = relationship("DriftRun", backref="feature_metrics")
25
+
26
+ def __repr__(self):
27
+ return (
28
+ f"<DriftFeatureMetric(id={self.id}, run_id={self.run_id}, "
29
+ f"feature={self.feature_name}, drift={self.drift_detected})>"
30
+ )
src/data/models/drift_run.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # models/drift_run.py
2
+ from datetime import datetime
3
+ from sqlalchemy import Column, Integer, String, Boolean, DateTime, Float
4
+ from src.data.models.base import Base
5
+
6
+ class DriftRun(Base):
7
+ """
8
+ Représente un run de monitoring de drift global (dataset-level).
9
+ """
10
+ __tablename__ = "drift_run"
11
+
12
+ id = Column(Integer, primary_key=True, autoincrement=True)
13
+
14
+ # Timestamp du calcul
15
+ date = Column(DateTime, nullable=False, default=datetime.utcnow, index=True)
16
+
17
+ # Indique si un drift global a été détecté pour le dataset
18
+ dataset_drift = Column(Boolean, nullable=False)
19
+
20
+ # Score de drift global (share de colonnes ayant drifté)
21
+ drift_score = Column(Float, nullable=True)
22
+
23
+ def __repr__(self):
24
+ return (
25
+ f"<DriftRun(id={self.id}, date={self.date}, "
26
+ f"dataset_drift={self.dataset_drift}, drift_score={self.drift_score})>"
27
+ )
src/drift/monitoring.py CHANGED
@@ -6,6 +6,10 @@ from sqlalchemy import text
6
  from evidently import Report
7
  from evidently.presets import DataDriftPreset
8
 
 
 
 
 
9
  # Ajuste ce chemin à ton projet si besoin
10
  ROOT_DIR = Path(__file__).resolve().parents[2]
11
  sys.path.insert(0, str(ROOT_DIR))
@@ -15,7 +19,7 @@ from src.data.database import get_db
15
  # Config
16
  DATA_DIR = ROOT_DIR / ".data"
17
  TRAIN_PATH = DATA_DIR / "application_train.csv"
18
- WINDOW_DAYS = 100
19
  REPORT_OUTPUT = DATA_DIR / "drift" / "report.html"
20
 
21
  def extract_prod_data() -> pd.DataFrame:
@@ -45,6 +49,51 @@ def load_reference_data() -> pd.DataFrame:
45
  return pd.read_csv(TRAIN_PATH)
46
 
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def generate_drift_report(reference_data: pd.DataFrame, current_data: pd.DataFrame) -> None:
49
  """Génère un rapport HTML de drift avec Evidently."""
50
 
@@ -60,6 +109,14 @@ def generate_drift_report(reference_data: pd.DataFrame, current_data: pd.DataFra
60
 
61
  print(f"Colonnes communes détectées: {len(common_cols)}")
62
 
 
 
 
 
 
 
 
 
63
  reference_subset = reference_data[common_cols]
64
  current_subset = current_data[common_cols]
65
 
@@ -80,6 +137,9 @@ def generate_drift_report(reference_data: pd.DataFrame, current_data: pd.DataFra
80
  # Cette version‑là de Report a bien save_html
81
  eval.save_html(str(REPORT_OUTPUT))
82
 
 
 
 
83
  print(f"Rapport de drift généré: {REPORT_OUTPUT}")
84
 
85
 
@@ -101,7 +161,7 @@ def main():
101
  print("Aucune donnée de production trouvée!")
102
  return
103
 
104
- print(f"Données de production: {current_data.shape}")
105
 
106
  # 3. Générer le rapport de drift
107
  generate_drift_report(reference_data, current_data)
 
6
  from evidently import Report
7
  from evidently.presets import DataDriftPreset
8
 
9
+ from src.data.models import DriftRun, DriftFeatureMetric
10
+ from src.data.database import get_db
11
+ from datetime import datetime
12
+
13
  # Ajuste ce chemin à ton projet si besoin
14
  ROOT_DIR = Path(__file__).resolve().parents[2]
15
  sys.path.insert(0, str(ROOT_DIR))
 
19
  # Config
20
  DATA_DIR = ROOT_DIR / ".data"
21
  TRAIN_PATH = DATA_DIR / "application_train.csv"
22
+ WINDOW_DAYS = 365
23
  REPORT_OUTPUT = DATA_DIR / "drift" / "report.html"
24
 
25
  def extract_prod_data() -> pd.DataFrame:
 
49
  return pd.read_csv(TRAIN_PATH)
50
 
51
 
52
+ def save_drift_to_db(result_dict: dict):
53
+ from src.data.database import get_db
54
+ from datetime import datetime
55
+
56
+ db = next(get_db())
57
+
58
+ try:
59
+ metrics = result_dict.get("metrics", [])
60
+
61
+ # Créer le run
62
+ drift_run = DriftRun(
63
+ date=datetime.utcnow(),
64
+ dataset_drift=any(float(m["value"]) > m["config"].get("threshold", 0.1)
65
+ for m in metrics
66
+ if m["config"].get("type") == "evidently:metric_v2:ValueDrift"),
67
+ drift_score=next((float(m["value"]["share"]) for m in metrics
68
+ if m["config"].get("type") == "evidently:metric_v2:DriftedColumnsCount"), None)
69
+ )
70
+ db.add(drift_run)
71
+ db.flush()
72
+
73
+ # Ajouter les features
74
+ for m in metrics:
75
+ if m["config"].get("type") != "evidently:metric_v2:ValueDrift":
76
+ continue
77
+
78
+ val = float(m["value"])
79
+ threshold = m["config"].get("threshold", 0.1)
80
+
81
+ db.add(DriftFeatureMetric(
82
+ run_id=drift_run.id,
83
+ feature_name=m["config"]["column"],
84
+ drift_detected=val > threshold,
85
+ drift_score=val,
86
+ stattest_name=m["config"].get("method"),
87
+ ))
88
+
89
+ db.commit()
90
+
91
+ except Exception as e:
92
+ db.rollback()
93
+ raise
94
+ finally:
95
+ db.close()
96
+
97
  def generate_drift_report(reference_data: pd.DataFrame, current_data: pd.DataFrame) -> None:
98
  """Génère un rapport HTML de drift avec Evidently."""
99
 
 
109
 
110
  print(f"Colonnes communes détectées: {len(common_cols)}")
111
 
112
+ # Colonnes à exclure de l'analyse de drift
113
+ EXCLUDE_COLS = ['SK_ID_CURR']
114
+
115
+ # Exclure les colonnes
116
+ common_cols = [col for col in common_cols if col not in EXCLUDE_COLS]
117
+ print(
118
+ f"Colonnes exclues: {[col for col in EXCLUDE_COLS if col in set(reference_data.columns) & set(current_data.columns)]}")
119
+
120
  reference_subset = reference_data[common_cols]
121
  current_subset = current_data[common_cols]
122
 
 
137
  # Cette version‑là de Report a bien save_html
138
  eval.save_html(str(REPORT_OUTPUT))
139
 
140
+ result_dict = eval.dict()
141
+ save_drift_to_db(result_dict)
142
+
143
  print(f"Rapport de drift généré: {REPORT_OUTPUT}")
144
 
145
 
 
161
  print("Aucune donnée de production trouvée!")
162
  return
163
 
164
+ print(f"Données de production: {current_data.shape} (lignes: {len(current_data)})")
165
 
166
  # 3. Générer le rapport de drift
167
  generate_drift_report(reference_data, current_data)
src/scripts/api_simulation.py CHANGED
@@ -157,5 +157,5 @@ if __name__ == "__main__":
157
  # Génère 100 prédictions à partir de l'index 0
158
  results = generate_production_data(
159
  start_index=0,
160
- num_records=500
161
  )
 
157
  # Génère 100 prédictions à partir de l'index 0
158
  results = generate_production_data(
159
  start_index=0,
160
+ num_records=4275
161
  )