Spaces:

chagu13
/

is_click

Build error

App Files Files Community

chkp-talexm commited on Feb 17, 2025

Commit

c50bbfb

1 Parent(s): 6c67532

update

Browse files

Files changed (1) hide show

app.py +90 -66

app.py CHANGED Viewed

@@ -1,69 +1,93 @@
 import streamlit as st
 import pandas as pd
 import os
-from huggingface_hub import HfApi
-# Hugging Face Dataset Repository Info
-HF_DATASET_REPO = "chagu13/is_click_data"
-HF_USER = "chagu13"  # Your HF username (required for authentication)
-UPLOAD_DIR = "uploaded_files"
-# Ensure the directory exists
-os.makedirs(UPLOAD_DIR, exist_ok=True)
-st.title("Is_click Predictor - Upload to Hugging Face")
-# Step 1: Upload File
-st.header("Upload a File")
-uploaded_file = st.file_uploader("Choose a file", type=["csv", "txt"])
-file_name = st.text_input("Enter a name to save the file")
-if uploaded_file and file_name:
-    file_extension = uploaded_file.name.split(".")[-1]
-    file_path = os.path.join(UPLOAD_DIR, f"{file_name}.{file_extension}")
-    with open(file_path, "wb") as f:
-        f.write(uploaded_file.read())
-    st.success(f"File saved as **{file_name}.{file_extension}**")
-    # Step 2: Upload to Hugging Face
-    if st.button("Upload to Hugging Face"):
-        api = HfApi()
-        try:
-            api.upload_file(
-                path_or_fileobj=file_path,
-                path_in_repo=f"{file_name}.{file_extension}",
-                repo_id=HF_DATASET_REPO,
-            )
-            st.success(f"✅ Successfully uploaded to Hugging Face: {HF_DATASET_REPO}/{file_name}.{file_extension}")
-        except Exception as e:
-            st.error(f"❌ Failed to upload: {e}")
-# Step 3: Display and Download File
-st.header("View and Download File")
-files = os.listdir(UPLOAD_DIR)
-selected_file = st.selectbox("Select a file to view", files if files else ["No files available"])
-if selected_file and selected_file != "No files available":
-    file_path = os.path.join(UPLOAD_DIR, selected_file)
-    # Check file size
-    file_size = os.path.getsize(file_path) / (1024 * 1024)  # Convert to MB
-    if file_size > 1:
-        st.warning("File is too large. Displaying only the first 5 rows.")
-    # Read and display content
-    if selected_file.endswith(".csv"):
-        df = pd.read_csv(file_path)
-        st.dataframe(df.head(5) if file_size > 1 else df)
-    else:
-        with open(file_path, "r", encoding="utf-8") as f:
-            content = f.readlines()
-        st.text("".join(content[:5]) if file_size > 1 else "".join(content))
-    # Button to download full file
-    with open(file_path, "rb") as f:
-        st.download_button("Download Full File", f, file_name=selected_file)

 import streamlit as st
 import pandas as pd
 import os
+import joblib
+from huggingface_hub import hf_hub_download
+# Hugging Face Model Repo
+MODEL_REPO = "chagu13/is_click_predictor"
+MODEL_DIR = "models"
+os.makedirs(MODEL_DIR, exist_ok=True)  # Ensure directory exists
+# Model Paths
+CATBOOST_MODEL_PATH = os.path.join(MODEL_DIR, "catboost_model.pkl")
+XGB_MODEL_PATH = os.path.join(MODEL_DIR, "xgb_model.pkl")
+RF_MODEL_PATH = os.path.join(MODEL_DIR, "rf_model.pkl")
+# Function to download models from Hugging Face if not found locally
+def load_models():
+    if not os.path.exists(CATBOOST_MODEL_PATH):
+        hf_hub_download(repo_id=MODEL_REPO, filename="models/catboost_model.pkl", local_dir=MODEL_DIR)
+    if not os.path.exists(XGB_MODEL_PATH):
+        hf_hub_download(repo_id=MODEL_REPO, filename="models/xgb_model.pkl", local_dir=MODEL_DIR)
+    if not os.path.exists(RF_MODEL_PATH):
+        hf_hub_download(repo_id=MODEL_REPO, filename="models/rf_model.pkl", local_dir=MODEL_DIR)
+    catboost_model = joblib.load(CATBOOST_MODEL_PATH)
+    xgb_model = joblib.load(XGB_MODEL_PATH)
+    rf_model = joblib.load(RF_MODEL_PATH)
+    return catboost_model, xgb_model, rf_model
+# Load models at startup
+st.title("Is_Click Predictor - ML Model Inference")
+st.info("Upload a CSV file, and the trained models will predict click probability.")
+catboost, xgb, rf = load_models()
+# Upload File
+uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
+if uploaded_file:
+    input_df = pd.read_csv(uploaded_file)
+    st.success("File uploaded successfully!")
+    # Make Predictions
+    st.subheader("Predictions in Progress...")
+    catboost_preds = catboost.predict(input_df)
+    xgb_preds = xgb.predict(input_df)
+    rf_preds = rf.predict(input_df)
+    catboost_probs = catboost.predict_proba(input_df)[:, 1]
+    xgb_probs = xgb.predict_proba(input_df)[:, 1]
+    rf_probs = rf.predict_proba(input_df)[:, 1]
+    # Combine results
+    predictions_df = pd.DataFrame({
+        "CatBoost": catboost_preds,
+        "XGBoost": xgb_preds,
+        "RandomForest": rf_preds
+    })
+    # Apply "at least one model predicts 1" rule
+    predictions_df["is_click_predicted"] = predictions_df.max(axis=1)
+    # Generate probability file
+    probabilities_df = pd.DataFrame({
+        "CatBoost_Prob": catboost_probs,
+        "XGBoost_Prob": xgb_probs,
+        "RandomForest_Prob": rf_probs
+    })
+    # Save results
+    binary_predictions_path = "binary_predictions.csv"
+    filtered_predictions_path = "filtered_predictions.csv"
+    probabilities_path = "model_probabilities.csv"
+    predictions_df.to_csv(binary_predictions_path, index=False)
+    predictions_df[predictions_df["is_click_predicted"] == 1].to_csv(filtered_predictions_path, index=False)
+    probabilities_df.to_csv(probabilities_path, index=False)
+    st.success("Predictions completed! Download results below.")
+    # Download Buttons
+    with open(binary_predictions_path, "rb") as f:
+        st.download_button("Download Binary Predictions (0/1)", f, file_name="binary_predictions.csv")
+    with open(filtered_predictions_path, "rb") as f:
+        st.download_button("Download Clicked Predictions (Only 1s)", f, file_name="filtered_predictions.csv")
+    with open(probabilities_path, "rb") as f:
+        st.download_button("Download Probability Predictions", f, file_name="model_probabilities.csv")