Spaces:

Zeyadd-Mostaffa
/

NTI_ML_Project

Sleeping

App Files Files Community

Zeyadd-Mostaffa commited on May 21, 2025

Commit

4339fe4

verified ·

1 Parent(s): abb49f7

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -10

app.py CHANGED Viewed

@@ -5,10 +5,9 @@ import joblib
 import warnings
 from huggingface_hub import hf_hub_download
-# Suppress warnings
 warnings.filterwarnings("ignore")
-# Load ensemble model from Hugging Face Hub
 def load_model():
     model_path = hf_hub_download(
         repo_id="Zeyadd-Mostaffa/final_ensemble_model",
@@ -18,7 +17,18 @@ def load_model():
     print("✅ Ensemble model loaded successfully.")
     return model
 model = load_model()
 # Define prediction function
 def predict_employee_status(
@@ -26,7 +36,6 @@ def predict_employee_status(
     average_monthly_hours, time_spend_company,
     work_accident, promotion_last_5years, salary, department, threshold=0.5
 ):
-    # Expected columns from training
     expected_columns = [
         'satisfaction_level', 'last_evaluation', 'number_project', 'average_monthly_hours',
         'time_spend_company', 'Work_accident', 'promotion_last_5years', 'salary',
@@ -36,17 +45,14 @@ def predict_employee_status(
         'department_sales', 'department_support', 'department_technical'
     ]
-    # Construct department one-hot features
     department_features = {col: 0 for col in expected_columns if col.startswith("department_")}
     dept_key = f"department_{department}"
     if dept_key in department_features:
         department_features[dept_key] = 1
-    # Create interaction features
     satisfaction_evaluation = satisfaction_level * last_evaluation
     work_balance = average_monthly_hours / number_project
-    # Create input dataframe
     input_data = {
         "satisfaction_level": [satisfaction_level],
         "last_evaluation": [last_evaluation],
@@ -63,12 +69,19 @@ def predict_employee_status(
     input_df = pd.DataFrame(input_data)
-    # Ensure all expected columns are present and ordered
     for col in expected_columns:
         if col not in input_df.columns:
             input_df[col] = 0
     input_df = input_df[expected_columns]
     try:
         prob = model.predict_proba(input_df)[0][1]
         result = "✅ Employee is likely to quit." if prob >= threshold else "✅ Employee is likely to stay."
@@ -76,7 +89,7 @@ def predict_employee_status(
     except Exception as e:
         return f"❌ Prediction error: {str(e)}"
-# Gradio Interface
 def gradio_interface():
     interface = gr.Interface(
         fn=predict_employee_status,
@@ -104,5 +117,3 @@ def gradio_interface():
     interface.launch()
 gradio_interface()

 import warnings
 from huggingface_hub import hf_hub_download
 warnings.filterwarnings("ignore")
+# Load ensemble model
 def load_model():
     model_path = hf_hub_download(
         repo_id="Zeyadd-Mostaffa/final_ensemble_model",
     print("✅ Ensemble model loaded successfully.")
     return model
+# Load scaler
+def load_scaler():
+    scaler_path = hf_hub_download(
+        repo_id="Zeyadd-Mostaffa/final_ensemble_model",
+        filename="scaler.pkl"
+    )
+    scaler = joblib.load(scaler_path)
+    print("✅ Scaler loaded successfully.")
+    return scaler
 model = load_model()
+scaler = load_scaler()
 # Define prediction function
 def predict_employee_status(
     average_monthly_hours, time_spend_company,
     work_accident, promotion_last_5years, salary, department, threshold=0.5
 ):
     expected_columns = [
         'satisfaction_level', 'last_evaluation', 'number_project', 'average_monthly_hours',
         'time_spend_company', 'Work_accident', 'promotion_last_5years', 'salary',
         'department_sales', 'department_support', 'department_technical'
     ]
     department_features = {col: 0 for col in expected_columns if col.startswith("department_")}
     dept_key = f"department_{department}"
     if dept_key in department_features:
         department_features[dept_key] = 1
     satisfaction_evaluation = satisfaction_level * last_evaluation
     work_balance = average_monthly_hours / number_project
     input_data = {
         "satisfaction_level": [satisfaction_level],
         "last_evaluation": [last_evaluation],
     input_df = pd.DataFrame(input_data)
+    # Ensure all expected columns exist
     for col in expected_columns:
         if col not in input_df.columns:
             input_df[col] = 0
     input_df = input_df[expected_columns]
+    # Apply scaling to same numerical columns as training
+    numeric_cols = [
+        'satisfaction_level', 'last_evaluation',
+        'average_monthly_hours', 'number_project', 'work_balance'
+    ]
+    input_df[numeric_cols] = scaler.transform(input_df[numeric_cols])
     try:
         prob = model.predict_proba(input_df)[0][1]
         result = "✅ Employee is likely to quit." if prob >= threshold else "✅ Employee is likely to stay."
     except Exception as e:
         return f"❌ Prediction error: {str(e)}"
+# Gradio UI
 def gradio_interface():
     interface = gr.Interface(
         fn=predict_employee_status,
     interface.launch()
 gradio_interface()