chkp-talexm
commited on
Commit
Β·
c25d9fa
1
Parent(s):
e3af011
update
Browse files
app.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import joblib
|
| 3 |
-
import shutil
|
| 4 |
-
from huggingface_hub import hf_hub_download
|
| 5 |
import streamlit as st
|
| 6 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# Hugging Face Model Repo
|
| 9 |
MODEL_REPO = "chagu13/is_click_predictor"
|
|
@@ -20,16 +20,42 @@ CATBOOST_MODEL_PATH = os.path.join(MODEL_DIR, "catboost_model.pkl")
|
|
| 20 |
XGB_MODEL_PATH = os.path.join(MODEL_DIR, "xgb_model.pkl")
|
| 21 |
RF_MODEL_PATH = os.path.join(MODEL_DIR, "rf_model.pkl")
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
| 27 |
|
| 28 |
-
#
|
| 29 |
-
|
| 30 |
-
|
| 31 |
|
| 32 |
-
return
|
| 33 |
|
| 34 |
|
| 35 |
def load_models():
|
|
@@ -37,20 +63,18 @@ def load_models():
|
|
| 37 |
try:
|
| 38 |
print("π Checking and downloading models...")
|
| 39 |
|
| 40 |
-
# Ensure models are downloaded and placed correctly
|
| 41 |
if not os.path.exists(CATBOOST_MODEL_PATH):
|
| 42 |
print("π Downloading CatBoost model...")
|
| 43 |
-
|
| 44 |
|
| 45 |
if not os.path.exists(XGB_MODEL_PATH):
|
| 46 |
print("π Downloading XGBoost model...")
|
| 47 |
-
|
| 48 |
|
| 49 |
if not os.path.exists(RF_MODEL_PATH):
|
| 50 |
print("π Downloading RandomForest model...")
|
| 51 |
-
|
| 52 |
|
| 53 |
-
# β
Load models
|
| 54 |
print("π¦ Loading models...")
|
| 55 |
catboost_model = joblib.load(CATBOOST_MODEL_PATH)
|
| 56 |
xgb_model = joblib.load(XGB_MODEL_PATH)
|
|
@@ -64,7 +88,7 @@ def load_models():
|
|
| 64 |
return None, None, None
|
| 65 |
|
| 66 |
|
| 67 |
-
#
|
| 68 |
st.title("Is_Click Predictor - ML Model Inference")
|
| 69 |
st.info("Upload a CSV file, and the trained models will predict click probability.")
|
| 70 |
|
|
@@ -74,7 +98,8 @@ catboost, xgb, rf = load_models()
|
|
| 74 |
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
|
| 75 |
if uploaded_file:
|
| 76 |
input_df = pd.read_csv(uploaded_file)
|
| 77 |
-
|
|
|
|
| 78 |
|
| 79 |
# Make Predictions
|
| 80 |
st.subheader("Predictions in Progress...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import joblib
|
| 5 |
+
from huggingface_hub import hf_hub_download
|
| 6 |
+
from sklearn.preprocessing import LabelEncoder, StandardScaler
|
| 7 |
|
| 8 |
# Hugging Face Model Repo
|
| 9 |
MODEL_REPO = "chagu13/is_click_predictor"
|
|
|
|
| 20 |
XGB_MODEL_PATH = os.path.join(MODEL_DIR, "xgb_model.pkl")
|
| 21 |
RF_MODEL_PATH = os.path.join(MODEL_DIR, "rf_model.pkl")
|
| 22 |
|
| 23 |
+
# Define feature lists
|
| 24 |
+
CATEGORICAL_COLUMNS = ["gender", "product", "campaign_id", "webpage_id"]
|
| 25 |
+
NUMERICAL_COLUMNS = [
|
| 26 |
+
"age_level", "city_development_index", "user_group_id", "user_depth", "var_1",
|
| 27 |
+
"click_sum_age_sex_prod", "click_count_age_sex_prod",
|
| 28 |
+
"unique_campaigns_age_sex_prod", "unique_webpages_age_sex_prod",
|
| 29 |
+
"click_sum_city_age_prod", "click_count_city_age_prod",
|
| 30 |
+
"unique_campaigns_city_age_prod", "unique_webpages_city_age_prod"
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
FEATURE_COLUMNS = CATEGORICAL_COLUMNS + NUMERICAL_COLUMNS
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def preprocess_input(input_df):
|
| 37 |
+
"""Preprocess input data for prediction."""
|
| 38 |
+
input_df = input_df.copy()
|
| 39 |
+
|
| 40 |
+
# Fill missing values
|
| 41 |
+
input_df.fillna(-1, inplace=True)
|
| 42 |
+
|
| 43 |
+
# Convert categorical features to string
|
| 44 |
+
for col in CATEGORICAL_COLUMNS:
|
| 45 |
+
if col in input_df.columns:
|
| 46 |
+
input_df[col] = input_df[col].astype(str).replace("nan", "missing")
|
| 47 |
|
| 48 |
+
# Label encode categorical variables (same as training)
|
| 49 |
+
label_encoders = {}
|
| 50 |
+
for col in CATEGORICAL_COLUMNS:
|
| 51 |
+
le = LabelEncoder()
|
| 52 |
+
input_df[col] = le.fit_transform(input_df[col].astype(str))
|
| 53 |
|
| 54 |
+
# Normalize numerical features
|
| 55 |
+
scaler = StandardScaler()
|
| 56 |
+
input_df[NUMERICAL_COLUMNS] = scaler.fit_transform(input_df[NUMERICAL_COLUMNS])
|
| 57 |
|
| 58 |
+
return input_df[FEATURE_COLUMNS] # Return only necessary columns
|
| 59 |
|
| 60 |
|
| 61 |
def load_models():
|
|
|
|
| 63 |
try:
|
| 64 |
print("π Checking and downloading models...")
|
| 65 |
|
|
|
|
| 66 |
if not os.path.exists(CATBOOST_MODEL_PATH):
|
| 67 |
print("π Downloading CatBoost model...")
|
| 68 |
+
hf_hub_download(repo_id=MODEL_REPO, filename=CATBOOST_MODEL_FILENAME, local_dir=MODEL_DIR)
|
| 69 |
|
| 70 |
if not os.path.exists(XGB_MODEL_PATH):
|
| 71 |
print("π Downloading XGBoost model...")
|
| 72 |
+
hf_hub_download(repo_id=MODEL_REPO, filename=XGB_MODEL_FILENAME, local_dir=MODEL_DIR)
|
| 73 |
|
| 74 |
if not os.path.exists(RF_MODEL_PATH):
|
| 75 |
print("π Downloading RandomForest model...")
|
| 76 |
+
hf_hub_download(repo_id=MODEL_REPO, filename=RF_MODEL_FILENAME, local_dir=MODEL_DIR)
|
| 77 |
|
|
|
|
| 78 |
print("π¦ Loading models...")
|
| 79 |
catboost_model = joblib.load(CATBOOST_MODEL_PATH)
|
| 80 |
xgb_model = joblib.load(XGB_MODEL_PATH)
|
|
|
|
| 88 |
return None, None, None
|
| 89 |
|
| 90 |
|
| 91 |
+
# Streamlit UI
|
| 92 |
st.title("Is_Click Predictor - ML Model Inference")
|
| 93 |
st.info("Upload a CSV file, and the trained models will predict click probability.")
|
| 94 |
|
|
|
|
| 98 |
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
|
| 99 |
if uploaded_file:
|
| 100 |
input_df = pd.read_csv(uploaded_file)
|
| 101 |
+
input_df = preprocess_input(input_df) # β
Apply preprocessing
|
| 102 |
+
st.success("File uploaded and preprocessed successfully!")
|
| 103 |
|
| 104 |
# Make Predictions
|
| 105 |
st.subheader("Predictions in Progress...")
|