KaiquanMah commited on
Commit
e31c88e
·
verified ·
1 Parent(s): e4860b5

Upload 7 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ models/catboost_model.cbm filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import joblib
4
+ from huggingface_hub import hf_hub_download
5
+
6
+ from modelConnector import ModelConnector
7
+
8
+ # ===========================
9
+ # LOAD MODEL & DATASET
10
+ # ===========================
11
+
12
+ st.title("📊 Is Click Predictor")
13
+
14
+ # Download and load the trained model from Hugging Face
15
+ model_path = hf_hub_download(repo_id="taimax13/is_click_predictor", filename="rf_model.pkl")
16
+ rf_model = joblib.load(model_path)
17
+ st.success("✅ Model Loaded Successfully!")
18
+
19
+ # ===========================
20
+ # LOAD DATA FROM HUGGING FACE
21
+ # ===========================
22
+
23
+ st.sidebar.header("Dataset Selection")
24
+
25
+ # # Download required dataset files
26
+ # X_test_path = hf_hub_download(repo_id="taimax13/is_click_data", filename="X_test_1st(1).csv")
27
+ # y_test_path = hf_hub_download(repo_id="taimax13/is_click_data", filename="y_test_1st.csv")
28
+ # train_data_path = hf_hub_download(repo_id="taimax13/is_click_data", filename="train_dataset_full - train_dataset_full (1).csv")
29
+
30
+ X_test_path = "HuggingFaceRepo/data/y_test_1st (1).csv"
31
+ y_test_path = "HuggingFaceRepo/data/y_test_1st.csv"
32
+ train_data_path = "HuggingFaceRepo/data/train_dataset_full - train_dataset_full.csv"
33
+
34
+
35
+ # Load datasets
36
+ X_test = pd.read_csv(X_test_path)
37
+ y_test = pd.read_csv(y_test_path, header=None) # Ensure labels match test dataset index
38
+ train_data = pd.read_csv(train_data_path)
39
+
40
+ st.info(f"✅ Loaded datasets: **Train: {len(train_data)} rows**, **Test: {len(X_test)} rows**")
41
+
42
+
43
+
44
+ # Initialize Model Connector
45
+ model_connector = ModelConnector()
46
+
47
+ st.title("📊 Is Click Predictor - Train, Retrain, and Predict")
48
+
49
+ # ===========================
50
+ # CHECK MODEL STATUS
51
+ # ===========================
52
+
53
+ if model_connector.model:
54
+ st.success("✅ Model Loaded Successfully!")
55
+ else:
56
+ st.warning("⚠ No model found. Please train one first.")
57
+
58
+ # ===========================
59
+ # TRAIN MODEL IF NOT FOUND
60
+ # ===========================
61
+
62
+ if st.button("🚀 Train Model"):
63
+ st.info("🔄 Training model...")
64
+ message = model_connector.train_model()
65
+ st.success(message)
66
+
67
+ # ===========================
68
+ # RETRAIN MODEL
69
+ # ===========================
70
+
71
+ if st.button("🔄 Retrain Model"):
72
+ st.info("🔄 Retraining model with latest data...")
73
+ message = model_connector.retrain_model()
74
+ st.success(message)
75
+
76
+
77
+ # ===========================
78
+ # SELECT A DATA SAMPLE
79
+ # ===========================
80
+
81
+ st.sidebar.header("Select a Test Sample for Prediction")
82
+
83
+ # Merge X_test with y_test for selection (without labels affecting prediction)
84
+ X_test["actual_click"] = y_test.values
85
+
86
+ # Allow user to pick a row
87
+ selected_index = st.sidebar.selectbox("Choose a test sample index", X_test.index)
88
+ selected_row = X_test.loc[selected_index].drop("actual_click") # Exclude actual label
89
+
90
+ # Display selected row
91
+ st.write("### Selected Data Sample:")
92
+ st.dataframe(selected_row.to_frame().T) # Display as a table
93
+
94
+
95
+
96
+ # ===========================
97
+ # MAKE PREDICTION & EXPORT CSV
98
+ # ===========================
99
+
100
+ if st.button("Predict Click"):
101
+ # Convert selected row to DataFrame for model input
102
+ input_data = selected_row.to_frame().T
103
+
104
+ # Make prediction
105
+ prediction = rf_model.predict(input_data)[0]
106
+
107
+ # Add prediction to DataFrame
108
+ input_data["is_click_predicted"] = prediction
109
+
110
+ # Save prediction as CSV
111
+ csv_filename = "prediction_result.csv"
112
+ input_data.to_csv(csv_filename, index=False)
113
+
114
+ # Display Prediction Result
115
+ st.subheader("Prediction Result")
116
+ if prediction == 1:
117
+ st.success("🟢 The model predicts: **User WILL CLICK on the ad!**")
118
+ else:
119
+ st.warning("🔴 The model predicts: **User WILL NOT CLICK on the ad.**")
120
+
121
+ # Provide download button for prediction result
122
+ st.download_button(
123
+ label="📥 Download Prediction Result",
124
+ data=input_data.to_csv(index=False).encode("utf-8"),
125
+ file_name="prediction_result.csv",
126
+ mime="text/csv",
127
+ )
128
+
129
+ st.markdown("---")
130
+ st.info("Select a test row from the **left panel**, click **'Predict Click'**, and download the prediction result as a CSV.")
131
+
main.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ from data_loader import load_and_process_data, CATEGORICAL_COLUMNS
4
+ from model_trainer import train_models
5
+ from model_manager import save_models, load_models
6
+ from model_predictor import predict
7
+ from config import MODEL_DIR
8
+ ## ===========================
9
+ # MAIN FUNCTION
10
+ # ===========================
11
+
12
+ def main(train=True, retrain=False):
13
+ """ Main entry point to train, retrain or predict """
14
+ # Create model directory if it doesn't exist
15
+ if not os.path.exists(MODEL_DIR):
16
+ os.makedirs(MODEL_DIR)
17
+ print("\n🚀 Loading data...")
18
+ X_train, X_val, y_train, y_val, test_df = load_and_process_data()
19
+
20
+ if train or retrain:
21
+ print("\n🚀 Training models...")
22
+ models = train_models(X_train, y_train, CATEGORICAL_COLUMNS)
23
+ save_models(models)
24
+
25
+ else:
26
+ print("\n🚀 Loading existing models...")
27
+ models = load_models()
28
+
29
+ print("\n🔍 Making predictions...")
30
+ predictions = predict(models, test_df)
31
+
32
+ # Save final predictions
33
+ predictions.to_csv("final_predictions.csv", index=False)
34
+ print("\n✅ Predictions saved successfully as 'final_predictions.csv'!")
35
+
36
+ # ===========================
37
+ # COMMAND-LINE EXECUTION
38
+ # ===========================
39
+ if __name__ == "__main__":
40
+ # parser = argparse.ArgumentParser(description="Train, retrain or make predictions")
41
+ # parser.add_argument("--train", action="store_true", help="Train new models")
42
+ # parser.add_argument("--retrain", action="store_true", help="Retrain models with updated data")
43
+ #
44
+ # args = parser.parse_args()
45
+ main(train=False, retrain=False)
modelConnector.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import joblib
3
+ import pandas as pd
4
+ from huggingface_hub import hf_hub_download, HfApi
5
+ from model_trainer import train_models # Assumes model_trainer.py exists with train_models function
6
+
7
+ # Hugging Face Model & Dataset Information
8
+ MODEL_REPO = "taimax13/is_click_predictor"
9
+ MODEL_FILENAME = "rf_model.pkl"
10
+ DATA_REPO = "taimax13/is_click_data"
11
+ LOCAL_MODEL_PATH = f"models/{MODEL_FILENAME}"
12
+
13
+ # Hugging Face API
14
+ api = HfApi()
15
+
16
+
17
+ class ModelConnector:
18
+ def __init__(self):
19
+ """Initialize model connector and check if model exists."""
20
+ os.makedirs("models", exist_ok=True)
21
+ self.model = self.load_model()
22
+
23
+ def check_model_exists(self):
24
+ """Check if the model exists on Hugging Face."""
25
+ try:
26
+ hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
27
+ return True
28
+ except Exception:
29
+ return False
30
+
31
+ def load_model(self):
32
+ """Download and load the model from Hugging Face."""
33
+ if self.check_model_exists():
34
+ model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
35
+ return joblib.load(model_path)
36
+ return None
37
+
38
+ def train_model(self):
39
+ """Train a new model and upload it to Hugging Face."""
40
+ try:
41
+ # Load dataset
42
+ train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv")
43
+ train_data = pd.read_csv(train_data_path)
44
+
45
+ X_train = train_data.drop(columns=["is_click"])
46
+ y_train = train_data["is_click"]
47
+
48
+ # Train model
49
+ models = train_models(X_train, y_train)
50
+ rf_model = models["RandomForest"]
51
+
52
+ # Save locally
53
+ joblib.dump(rf_model, LOCAL_MODEL_PATH)
54
+
55
+ # Upload to Hugging Face
56
+ api.upload_file(
57
+ path_or_fileobj=LOCAL_MODEL_PATH,
58
+ path_in_repo=MODEL_FILENAME,
59
+ repo_id=MODEL_REPO,
60
+ )
61
+
62
+ self.model = rf_model # Update instance with trained model
63
+ return "Model trained and uploaded successfully!"
64
+
65
+ except Exception as e:
66
+ return f"Error during training: {str(e)}"
67
+
68
+ def retrain_model(self):
69
+ """Retrain the existing model with new data."""
70
+ try:
71
+ # Load dataset
72
+ train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv")
73
+ train_data = pd.read_csv(train_data_path)
74
+
75
+ X_train = train_data.drop(columns=["is_click"])
76
+ y_train = train_data["is_click"]
77
+
78
+ if self.model is None:
79
+ return "No existing model found. Train a new model first."
80
+
81
+ # Retrain the model
82
+ self.model.fit(X_train, y_train)
83
+
84
+ # Save & upload retrained model
85
+ joblib.dump(self.model, LOCAL_MODEL_PATH)
86
+ api.upload_file(
87
+ path_or_fileobj=LOCAL_MODEL_PATH,
88
+ path_in_repo=MODEL_FILENAME,
89
+ repo_id=MODEL_REPO,
90
+ )
91
+
92
+ return "Model retrained and uploaded successfully!"
93
+
94
+ except Exception as e:
95
+ return f"Error during retraining: {str(e)}"
96
+
97
+ def predict(self, input_data):
98
+ """Make predictions using the loaded model."""
99
+ if self.model is None:
100
+ return "No model found. Train the model first."
101
+
102
+ input_df = pd.DataFrame([input_data])
103
+ prediction = self.model.predict(input_df)[0]
104
+ return int(prediction)
models/catboost_model.cbm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bac1133bf0f84dd880f2a00b19d395c6b866e26eb6e0bdec12fe02879d528499
3
+ size 907908
models/rf_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc29e7a3fa34217333f2d715d96df473c65e03bfd4ce6bdae6716e783d44f306
3
+ size 111639881
models/xgb_model.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ numpy
3
+ scikit-learn
4
+ imbalanced-learn
5
+ matplotlib
6
+ seaborn
7
+ catboost
8
+ xgboost
9
+ joblib
10
+ streamlit
11
+ pandas
12
+ joblib
13
+ huggingface_hub