KLEB38 commited on
Commit
aedb20e
·
1 Parent(s): 6758ac3

fix: logging for unknown categorical values in input

Browse files
Files changed (1) hide show
  1. app/main.py +38 -13
app/main.py CHANGED
@@ -3,11 +3,31 @@ import pandas as pd
3
  import joblib
4
  from app.schemas import EmployeeInput
5
  import shap
 
 
 
 
 
6
 
7
  app = FastAPI() # On crée l'outil (le guichet)
8
 
9
  # Au démarrage, on charge ton pipeline
10
- model = joblib.load('app/pipeline_rh.joblib')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  def inconsistency(df):
13
  if df["departement"] == "Commercial":
@@ -61,10 +81,21 @@ def developpement(df):
61
  def depart(x):
62
  if x == 0:
63
  return "The staff has a LOW probability of resigning"
64
- if x==1:
65
  return "The staff has a HIGH probability of resigning"
66
 
67
 
 
 
 
 
 
 
 
 
 
 
 
68
  @app.get("/") # La page d'accueil de ton API
69
  def read_root():
70
  return {"message": "Welcome to the FUTURISYS HR predictor API"}
@@ -74,6 +105,11 @@ def predict(data: EmployeeInput):
74
  # 1. On transforme le dictionnaire reçu en DataFrame pandas
75
  df = pd.DataFrame([data.model_dump()])
76
 
 
 
 
 
 
77
  # Encodage binaire non inclus dans le pipeline:
78
  df['genre']= df["genre"].map({"M": 1, "F": 0})
79
  df['heure_supplementaires']= df["heure_supplementaires"].map({"Oui": 1, "Non": 0})
@@ -105,17 +141,6 @@ def predict(data: EmployeeInput):
105
  shap_series = pd.Series(shap_values_obj.values[0], index=feature_names)
106
  top_factors = shap_series.abs().nlargest(5)
107
 
108
- def interpret_shap(rank: int, value: float) -> str:
109
- intensity = {
110
- 0: "Primary driver",
111
- 1: "Strong factor",
112
- 2: "Moderate factor",
113
- 3: "Contributing factor",
114
- 4: "Notable factor"
115
- }
116
- direction = "increases resignation risk" if value > 0 else "decreases resignation risk"
117
- return f"{intensity[rank]} — {direction}"
118
-
119
  # 3. On renvoie le résultat au format JSON
120
  return {
121
  "statut_employe": depart(prediction),
 
3
  import joblib
4
  from app.schemas import EmployeeInput
5
  import shap
6
+ import os
7
+ import logging
8
+
9
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
10
+
11
 
12
  app = FastAPI() # On crée l'outil (le guichet)
13
 
14
  # Au démarrage, on charge ton pipeline
15
+ model = joblib.load(os.path.join(BASE_DIR, 'pipeline_rh.joblib'))
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ known_values = {
20
+ "departement": ["Consulting", "Commercial", "Ressources Humaines"],
21
+ "statut_marital": ["Marié(e)", "Célibataire", "Divorcé(e)"],
22
+ "frequence_deplacement": ["Aucun", "Occasionnel", "Frequent"],
23
+ "poste": ['Cadre Commercial', 'Assistant de Direction', 'Consultant',
24
+ 'Tech Lead', 'Manager', 'Senior Manager',
25
+ 'Représentant Commercial', 'Directeur Technique',
26
+ 'Ressources Humaines'],
27
+ "domaine_etude": ['Infra & Cloud', 'Autre', 'Transformation Digitale', 'Marketing',
28
+ 'Entrepreunariat', 'Ressources Humaines'],
29
+ }
30
+
31
 
32
  def inconsistency(df):
33
  if df["departement"] == "Commercial":
 
81
  def depart(x):
82
  if x == 0:
83
  return "The staff has a LOW probability of resigning"
84
+ if x == 1:
85
  return "The staff has a HIGH probability of resigning"
86
 
87
 
88
+ def interpret_shap(rank: int, value: float) -> str:
89
+ intensity = {
90
+ 0: "Primary driver",
91
+ 1: "Strong factor",
92
+ 2: "Moderate factor",
93
+ 3: "Contributing factor",
94
+ 4: "Notable factor"
95
+ }
96
+ direction = "increases resignation risk" if value > 0 else "decreases resignation risk"
97
+ return f"{intensity[rank]} — {direction}"
98
+
99
  @app.get("/") # La page d'accueil de ton API
100
  def read_root():
101
  return {"message": "Welcome to the FUTURISYS HR predictor API"}
 
105
  # 1. On transforme le dictionnaire reçu en DataFrame pandas
106
  df = pd.DataFrame([data.model_dump()])
107
 
108
+ for col, known in known_values.items():
109
+ val = df[col].values[0]
110
+ if val not in known:
111
+ logger.warning(f"Unknown value '{val}' for column '{col}' — prediction may be unreliable")
112
+
113
  # Encodage binaire non inclus dans le pipeline:
114
  df['genre']= df["genre"].map({"M": 1, "F": 0})
115
  df['heure_supplementaires']= df["heure_supplementaires"].map({"Oui": 1, "Non": 0})
 
141
  shap_series = pd.Series(shap_values_obj.values[0], index=feature_names)
142
  top_factors = shap_series.abs().nlargest(5)
143
 
 
 
 
 
 
 
 
 
 
 
 
144
  # 3. On renvoie le résultat au format JSON
145
  return {
146
  "statut_employe": depart(prediction),