chkp-talexm commited on
Commit
c25d9fa
Β·
1 Parent(s): e3af011
Files changed (1) hide show
  1. app.py +43 -18
app.py CHANGED
@@ -1,9 +1,9 @@
1
- import os
2
- import joblib
3
- import shutil
4
- from huggingface_hub import hf_hub_download
5
  import streamlit as st
6
  import pandas as pd
 
 
 
 
7
 
8
  # Hugging Face Model Repo
9
  MODEL_REPO = "chagu13/is_click_predictor"
@@ -20,16 +20,42 @@ CATBOOST_MODEL_PATH = os.path.join(MODEL_DIR, "catboost_model.pkl")
20
  XGB_MODEL_PATH = os.path.join(MODEL_DIR, "xgb_model.pkl")
21
  RF_MODEL_PATH = os.path.join(MODEL_DIR, "rf_model.pkl")
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- def download_model(filename, local_path):
25
- """Download model from Hugging Face and move it to the correct location."""
26
- temp_path = hf_hub_download(repo_id=MODEL_REPO, filename=filename, local_dir=MODEL_DIR)
 
 
27
 
28
- # Ensure correct file placement
29
- if temp_path != local_path:
30
- shutil.move(temp_path, local_path)
31
 
32
- return local_path
33
 
34
 
35
  def load_models():
@@ -37,20 +63,18 @@ def load_models():
37
  try:
38
  print("πŸ”„ Checking and downloading models...")
39
 
40
- # Ensure models are downloaded and placed correctly
41
  if not os.path.exists(CATBOOST_MODEL_PATH):
42
  print("πŸš€ Downloading CatBoost model...")
43
- download_model(CATBOOST_MODEL_FILENAME, CATBOOST_MODEL_PATH)
44
 
45
  if not os.path.exists(XGB_MODEL_PATH):
46
  print("πŸš€ Downloading XGBoost model...")
47
- download_model(XGB_MODEL_FILENAME, XGB_MODEL_PATH)
48
 
49
  if not os.path.exists(RF_MODEL_PATH):
50
  print("πŸš€ Downloading RandomForest model...")
51
- download_model(RF_MODEL_FILENAME, RF_MODEL_PATH)
52
 
53
- # βœ… Load models
54
  print("πŸ“¦ Loading models...")
55
  catboost_model = joblib.load(CATBOOST_MODEL_PATH)
56
  xgb_model = joblib.load(XGB_MODEL_PATH)
@@ -64,7 +88,7 @@ def load_models():
64
  return None, None, None
65
 
66
 
67
- # Load models at startup
68
  st.title("Is_Click Predictor - ML Model Inference")
69
  st.info("Upload a CSV file, and the trained models will predict click probability.")
70
 
@@ -74,7 +98,8 @@ catboost, xgb, rf = load_models()
74
  uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
75
  if uploaded_file:
76
  input_df = pd.read_csv(uploaded_file)
77
- st.success("File uploaded successfully!")
 
78
 
79
  # Make Predictions
80
  st.subheader("Predictions in Progress...")
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import numpy as np
4
+ import joblib
5
+ from huggingface_hub import hf_hub_download
6
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
7
 
8
  # Hugging Face Model Repo
9
  MODEL_REPO = "chagu13/is_click_predictor"
 
20
  XGB_MODEL_PATH = os.path.join(MODEL_DIR, "xgb_model.pkl")
21
  RF_MODEL_PATH = os.path.join(MODEL_DIR, "rf_model.pkl")
22
 
23
+ # Define feature lists
24
+ CATEGORICAL_COLUMNS = ["gender", "product", "campaign_id", "webpage_id"]
25
+ NUMERICAL_COLUMNS = [
26
+ "age_level", "city_development_index", "user_group_id", "user_depth", "var_1",
27
+ "click_sum_age_sex_prod", "click_count_age_sex_prod",
28
+ "unique_campaigns_age_sex_prod", "unique_webpages_age_sex_prod",
29
+ "click_sum_city_age_prod", "click_count_city_age_prod",
30
+ "unique_campaigns_city_age_prod", "unique_webpages_city_age_prod"
31
+ ]
32
+
33
+ FEATURE_COLUMNS = CATEGORICAL_COLUMNS + NUMERICAL_COLUMNS
34
+
35
+
36
+ def preprocess_input(input_df):
37
+ """Preprocess input data for prediction."""
38
+ input_df = input_df.copy()
39
+
40
+ # Fill missing values
41
+ input_df.fillna(-1, inplace=True)
42
+
43
+ # Convert categorical features to string
44
+ for col in CATEGORICAL_COLUMNS:
45
+ if col in input_df.columns:
46
+ input_df[col] = input_df[col].astype(str).replace("nan", "missing")
47
 
48
+ # Label encode categorical variables (same as training)
49
+ label_encoders = {}
50
+ for col in CATEGORICAL_COLUMNS:
51
+ le = LabelEncoder()
52
+ input_df[col] = le.fit_transform(input_df[col].astype(str))
53
 
54
+ # Normalize numerical features
55
+ scaler = StandardScaler()
56
+ input_df[NUMERICAL_COLUMNS] = scaler.fit_transform(input_df[NUMERICAL_COLUMNS])
57
 
58
+ return input_df[FEATURE_COLUMNS] # Return only necessary columns
59
 
60
 
61
  def load_models():
 
63
  try:
64
  print("πŸ”„ Checking and downloading models...")
65
 
 
66
  if not os.path.exists(CATBOOST_MODEL_PATH):
67
  print("πŸš€ Downloading CatBoost model...")
68
+ hf_hub_download(repo_id=MODEL_REPO, filename=CATBOOST_MODEL_FILENAME, local_dir=MODEL_DIR)
69
 
70
  if not os.path.exists(XGB_MODEL_PATH):
71
  print("πŸš€ Downloading XGBoost model...")
72
+ hf_hub_download(repo_id=MODEL_REPO, filename=XGB_MODEL_FILENAME, local_dir=MODEL_DIR)
73
 
74
  if not os.path.exists(RF_MODEL_PATH):
75
  print("πŸš€ Downloading RandomForest model...")
76
+ hf_hub_download(repo_id=MODEL_REPO, filename=RF_MODEL_FILENAME, local_dir=MODEL_DIR)
77
 
 
78
  print("πŸ“¦ Loading models...")
79
  catboost_model = joblib.load(CATBOOST_MODEL_PATH)
80
  xgb_model = joblib.load(XGB_MODEL_PATH)
 
88
  return None, None, None
89
 
90
 
91
+ # Streamlit UI
92
  st.title("Is_Click Predictor - ML Model Inference")
93
  st.info("Upload a CSV file, and the trained models will predict click probability.")
94
 
 
98
  uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
99
  if uploaded_file:
100
  input_df = pd.read_csv(uploaded_file)
101
+ input_df = preprocess_input(input_df) # βœ… Apply preprocessing
102
+ st.success("File uploaded and preprocessed successfully!")
103
 
104
  # Make Predictions
105
  st.subheader("Predictions in Progress...")