Spaces:

KaiquanMah
/

is_click

Sleeping

App Files Files Community

KaiquanMah commited on Feb 15, 2025

Commit

e31c88e

verified ·

1 Parent(s): e4860b5

Upload 7 files

Browse files

Files changed (8) hide show

.gitattributes +1 -0
app.py +131 -0
main.py +45 -0
modelConnector.py +104 -0
models/catboost_model.cbm +3 -0
models/rf_model.pkl +3 -0
models/xgb_model.json +0 -0
requirements.txt +13 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+models/catboost_model.cbm filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import streamlit as st
+import pandas as pd
+import joblib
+from huggingface_hub import hf_hub_download
+from modelConnector import ModelConnector
+# ===========================
+#  LOAD MODEL & DATASET
+# ===========================
+st.title("📊 Is Click Predictor")
+# Download and load the trained model from Hugging Face
+model_path = hf_hub_download(repo_id="taimax13/is_click_predictor", filename="rf_model.pkl")
+rf_model = joblib.load(model_path)
+st.success("✅ Model Loaded Successfully!")
+# ===========================
+#  LOAD DATA FROM HUGGING FACE
+# ===========================
+st.sidebar.header("Dataset Selection")
+# # Download required dataset files
+# X_test_path = hf_hub_download(repo_id="taimax13/is_click_data", filename="X_test_1st(1).csv")
+# y_test_path = hf_hub_download(repo_id="taimax13/is_click_data", filename="y_test_1st.csv")
+# train_data_path = hf_hub_download(repo_id="taimax13/is_click_data", filename="train_dataset_full - train_dataset_full (1).csv")
+X_test_path = "HuggingFaceRepo/data/y_test_1st (1).csv"
+y_test_path = "HuggingFaceRepo/data/y_test_1st.csv"
+train_data_path = "HuggingFaceRepo/data/train_dataset_full - train_dataset_full.csv"
+# Load datasets
+X_test = pd.read_csv(X_test_path)
+y_test = pd.read_csv(y_test_path, header=None)  # Ensure labels match test dataset index
+train_data = pd.read_csv(train_data_path)
+st.info(f"✅ Loaded datasets: **Train: {len(train_data)} rows**, **Test: {len(X_test)} rows**")
+# Initialize Model Connector
+model_connector = ModelConnector()
+st.title("📊 Is Click Predictor - Train, Retrain, and Predict")
+# ===========================
+#  CHECK MODEL STATUS
+# ===========================
+if model_connector.model:
+    st.success("✅ Model Loaded Successfully!")
+else:
+    st.warning("⚠ No model found. Please train one first.")
+# ===========================
+#  TRAIN MODEL IF NOT FOUND
+# ===========================
+if st.button("🚀 Train Model"):
+    st.info("🔄 Training model...")
+    message = model_connector.train_model()
+    st.success(message)
+# ===========================
+#  RETRAIN MODEL
+# ===========================
+if st.button("🔄 Retrain Model"):
+    st.info("🔄 Retraining model with latest data...")
+    message = model_connector.retrain_model()
+    st.success(message)
+# ===========================
+#  SELECT A DATA SAMPLE
+# ===========================
+st.sidebar.header("Select a Test Sample for Prediction")
+# Merge X_test with y_test for selection (without labels affecting prediction)
+X_test["actual_click"] = y_test.values
+# Allow user to pick a row
+selected_index = st.sidebar.selectbox("Choose a test sample index", X_test.index)
+selected_row = X_test.loc[selected_index].drop("actual_click")  # Exclude actual label
+# Display selected row
+st.write("### Selected Data Sample:")
+st.dataframe(selected_row.to_frame().T)  # Display as a table
+# ===========================
+#  MAKE PREDICTION & EXPORT CSV
+# ===========================
+if st.button("Predict Click"):
+    # Convert selected row to DataFrame for model input
+    input_data = selected_row.to_frame().T
+    # Make prediction
+    prediction = rf_model.predict(input_data)[0]
+    # Add prediction to DataFrame
+    input_data["is_click_predicted"] = prediction
+    # Save prediction as CSV
+    csv_filename = "prediction_result.csv"
+    input_data.to_csv(csv_filename, index=False)
+    # Display Prediction Result
+    st.subheader("Prediction Result")
+    if prediction == 1:
+        st.success("🟢 The model predicts: **User WILL CLICK on the ad!**")
+    else:
+        st.warning("🔴 The model predicts: **User WILL NOT CLICK on the ad.**")
+    # Provide download button for prediction result
+    st.download_button(
+        label="📥 Download Prediction Result",
+        data=input_data.to_csv(index=False).encode("utf-8"),
+        file_name="prediction_result.csv",
+        mime="text/csv",
+    )
+st.markdown("---")
+st.info("Select a test row from the **left panel**, click **'Predict Click'**, and download the prediction result as a CSV.")

main.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import argparse
+import os
+from data_loader import load_and_process_data, CATEGORICAL_COLUMNS
+from model_trainer import train_models
+from model_manager import save_models, load_models
+from model_predictor import predict
+from config import MODEL_DIR
+## ===========================
+#  MAIN FUNCTION
+# ===========================
+def main(train=True, retrain=False):
+    """ Main entry point to train, retrain or predict """
+    # Create model directory if it doesn't exist
+    if not os.path.exists(MODEL_DIR):
+        os.makedirs(MODEL_DIR)
+    print("\n🚀 Loading data...")
+    X_train, X_val, y_train, y_val, test_df = load_and_process_data()
+    if train or retrain:
+        print("\n🚀 Training models...")
+        models = train_models(X_train, y_train, CATEGORICAL_COLUMNS)
+        save_models(models)
+    else:
+        print("\n🚀 Loading existing models...")
+        models = load_models()
+    print("\n🔍 Making predictions...")
+    predictions = predict(models, test_df)
+    # Save final predictions
+    predictions.to_csv("final_predictions.csv", index=False)
+    print("\n✅ Predictions saved successfully as 'final_predictions.csv'!")
+# ===========================
+#  COMMAND-LINE EXECUTION
+# ===========================
+if __name__ == "__main__":
+    # parser = argparse.ArgumentParser(description="Train, retrain or make predictions")
+    # parser.add_argument("--train", action="store_true", help="Train new models")
+    # parser.add_argument("--retrain", action="store_true", help="Retrain models with updated data")
+    #
+    # args = parser.parse_args()
+    main(train=False, retrain=False)

modelConnector.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import os
+import joblib
+import pandas as pd
+from huggingface_hub import hf_hub_download, HfApi
+from model_trainer import train_models  # Assumes model_trainer.py exists with train_models function
+# Hugging Face Model & Dataset Information
+MODEL_REPO = "taimax13/is_click_predictor"
+MODEL_FILENAME = "rf_model.pkl"
+DATA_REPO = "taimax13/is_click_data"
+LOCAL_MODEL_PATH = f"models/{MODEL_FILENAME}"
+# Hugging Face API
+api = HfApi()
+class ModelConnector:
+    def __init__(self):
+        """Initialize model connector and check if model exists."""
+        os.makedirs("models", exist_ok=True)
+        self.model = self.load_model()
+    def check_model_exists(self):
+        """Check if the model exists on Hugging Face."""
+        try:
+            hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
+            return True
+        except Exception:
+            return False
+    def load_model(self):
+        """Download and load the model from Hugging Face."""
+        if self.check_model_exists():
+            model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
+            return joblib.load(model_path)
+        return None
+    def train_model(self):
+        """Train a new model and upload it to Hugging Face."""
+        try:
+            # Load dataset
+            train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv")
+            train_data = pd.read_csv(train_data_path)
+            X_train = train_data.drop(columns=["is_click"])
+            y_train = train_data["is_click"]
+            # Train model
+            models = train_models(X_train, y_train)
+            rf_model = models["RandomForest"]
+            # Save locally
+            joblib.dump(rf_model, LOCAL_MODEL_PATH)
+            # Upload to Hugging Face
+            api.upload_file(
+                path_or_fileobj=LOCAL_MODEL_PATH,
+                path_in_repo=MODEL_FILENAME,
+                repo_id=MODEL_REPO,
+            )
+            self.model = rf_model  # Update instance with trained model
+            return "Model trained and uploaded successfully!"
+        except Exception as e:
+            return f"Error during training: {str(e)}"
+    def retrain_model(self):
+        """Retrain the existing model with new data."""
+        try:
+            # Load dataset
+            train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv")
+            train_data = pd.read_csv(train_data_path)
+            X_train = train_data.drop(columns=["is_click"])
+            y_train = train_data["is_click"]
+            if self.model is None:
+                return "No existing model found. Train a new model first."
+            # Retrain the model
+            self.model.fit(X_train, y_train)
+            # Save & upload retrained model
+            joblib.dump(self.model, LOCAL_MODEL_PATH)
+            api.upload_file(
+                path_or_fileobj=LOCAL_MODEL_PATH,
+                path_in_repo=MODEL_FILENAME,
+                repo_id=MODEL_REPO,
+            )
+            return "Model retrained and uploaded successfully!"
+        except Exception as e:
+            return f"Error during retraining: {str(e)}"
+    def predict(self, input_data):
+        """Make predictions using the loaded model."""
+        if self.model is None:
+            return "No model found. Train the model first."
+        input_df = pd.DataFrame([input_data])
+        prediction = self.model.predict(input_df)[0]
+        return int(prediction)

models/catboost_model.cbm ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bac1133bf0f84dd880f2a00b19d395c6b866e26eb6e0bdec12fe02879d528499
+size 907908

models/rf_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc29e7a3fa34217333f2d715d96df473c65e03bfd4ce6bdae6716e783d44f306
+size 111639881

models/xgb_model.json ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+pandas
+numpy
+scikit-learn
+imbalanced-learn
+matplotlib
+seaborn
+catboost
+xgboost
+joblib
+streamlit
+pandas
+joblib
+huggingface_hub