chkp-talexm commited on
Commit
c50bbfb
·
1 Parent(s): 6c67532
Files changed (1) hide show
  1. app.py +90 -66
app.py CHANGED
@@ -1,69 +1,93 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import os
4
- from huggingface_hub import HfApi
5
-
6
- # Hugging Face Dataset Repository Info
7
- HF_DATASET_REPO = "chagu13/is_click_data"
8
- HF_USER = "chagu13" # Your HF username (required for authentication)
9
- UPLOAD_DIR = "uploaded_files"
10
-
11
- # Ensure the directory exists
12
- os.makedirs(UPLOAD_DIR, exist_ok=True)
13
-
14
- st.title("Is_click Predictor - Upload to Hugging Face")
15
-
16
- # Step 1: Upload File
17
- st.header("Upload a File")
18
-
19
- uploaded_file = st.file_uploader("Choose a file", type=["csv", "txt"])
20
- file_name = st.text_input("Enter a name to save the file")
21
-
22
- if uploaded_file and file_name:
23
- file_extension = uploaded_file.name.split(".")[-1]
24
- file_path = os.path.join(UPLOAD_DIR, f"{file_name}.{file_extension}")
25
-
26
- with open(file_path, "wb") as f:
27
- f.write(uploaded_file.read())
28
-
29
- st.success(f"File saved as **{file_name}.{file_extension}**")
30
-
31
- # Step 2: Upload to Hugging Face
32
- if st.button("Upload to Hugging Face"):
33
- api = HfApi()
34
- try:
35
- api.upload_file(
36
- path_or_fileobj=file_path,
37
- path_in_repo=f"{file_name}.{file_extension}",
38
- repo_id=HF_DATASET_REPO,
39
- )
40
- st.success(f"✅ Successfully uploaded to Hugging Face: {HF_DATASET_REPO}/{file_name}.{file_extension}")
41
- except Exception as e:
42
- st.error(f"❌ Failed to upload: {e}")
43
-
44
- # Step 3: Display and Download File
45
- st.header("View and Download File")
46
-
47
- files = os.listdir(UPLOAD_DIR)
48
- selected_file = st.selectbox("Select a file to view", files if files else ["No files available"])
49
-
50
- if selected_file and selected_file != "No files available":
51
- file_path = os.path.join(UPLOAD_DIR, selected_file)
52
-
53
- # Check file size
54
- file_size = os.path.getsize(file_path) / (1024 * 1024) # Convert to MB
55
- if file_size > 1:
56
- st.warning("File is too large. Displaying only the first 5 rows.")
57
-
58
- # Read and display content
59
- if selected_file.endswith(".csv"):
60
- df = pd.read_csv(file_path)
61
- st.dataframe(df.head(5) if file_size > 1 else df)
62
- else:
63
- with open(file_path, "r", encoding="utf-8") as f:
64
- content = f.readlines()
65
- st.text("".join(content[:5]) if file_size > 1 else "".join(content))
66
-
67
- # Button to download full file
68
- with open(file_path, "rb") as f:
69
- st.download_button("Download Full File", f, file_name=selected_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import os
4
+ import joblib
5
+ from huggingface_hub import hf_hub_download
6
+
7
+ # Hugging Face Model Repo
8
+ MODEL_REPO = "chagu13/is_click_predictor"
9
+ MODEL_DIR = "models"
10
+ os.makedirs(MODEL_DIR, exist_ok=True) # Ensure directory exists
11
+
12
+ # Model Paths
13
+ CATBOOST_MODEL_PATH = os.path.join(MODEL_DIR, "catboost_model.pkl")
14
+ XGB_MODEL_PATH = os.path.join(MODEL_DIR, "xgb_model.pkl")
15
+ RF_MODEL_PATH = os.path.join(MODEL_DIR, "rf_model.pkl")
16
+
17
+
18
+ # Function to download models from Hugging Face if not found locally
19
+ def load_models():
20
+ if not os.path.exists(CATBOOST_MODEL_PATH):
21
+ hf_hub_download(repo_id=MODEL_REPO, filename="models/catboost_model.pkl", local_dir=MODEL_DIR)
22
+ if not os.path.exists(XGB_MODEL_PATH):
23
+ hf_hub_download(repo_id=MODEL_REPO, filename="models/xgb_model.pkl", local_dir=MODEL_DIR)
24
+ if not os.path.exists(RF_MODEL_PATH):
25
+ hf_hub_download(repo_id=MODEL_REPO, filename="models/rf_model.pkl", local_dir=MODEL_DIR)
26
+
27
+ catboost_model = joblib.load(CATBOOST_MODEL_PATH)
28
+ xgb_model = joblib.load(XGB_MODEL_PATH)
29
+ rf_model = joblib.load(RF_MODEL_PATH)
30
+
31
+ return catboost_model, xgb_model, rf_model
32
+
33
+
34
+ # Load models at startup
35
+ st.title("Is_Click Predictor - ML Model Inference")
36
+ st.info("Upload a CSV file, and the trained models will predict click probability.")
37
+
38
+ catboost, xgb, rf = load_models()
39
+
40
+ # Upload File
41
+ uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
42
+ if uploaded_file:
43
+ input_df = pd.read_csv(uploaded_file)
44
+ st.success("File uploaded successfully!")
45
+
46
+ # Make Predictions
47
+ st.subheader("Predictions in Progress...")
48
+
49
+ catboost_preds = catboost.predict(input_df)
50
+ xgb_preds = xgb.predict(input_df)
51
+ rf_preds = rf.predict(input_df)
52
+
53
+ catboost_probs = catboost.predict_proba(input_df)[:, 1]
54
+ xgb_probs = xgb.predict_proba(input_df)[:, 1]
55
+ rf_probs = rf.predict_proba(input_df)[:, 1]
56
+
57
+ # Combine results
58
+ predictions_df = pd.DataFrame({
59
+ "CatBoost": catboost_preds,
60
+ "XGBoost": xgb_preds,
61
+ "RandomForest": rf_preds
62
+ })
63
+
64
+ # Apply "at least one model predicts 1" rule
65
+ predictions_df["is_click_predicted"] = predictions_df.max(axis=1)
66
+
67
+ # Generate probability file
68
+ probabilities_df = pd.DataFrame({
69
+ "CatBoost_Prob": catboost_probs,
70
+ "XGBoost_Prob": xgb_probs,
71
+ "RandomForest_Prob": rf_probs
72
+ })
73
+
74
+ # Save results
75
+ binary_predictions_path = "binary_predictions.csv"
76
+ filtered_predictions_path = "filtered_predictions.csv"
77
+ probabilities_path = "model_probabilities.csv"
78
+
79
+ predictions_df.to_csv(binary_predictions_path, index=False)
80
+ predictions_df[predictions_df["is_click_predicted"] == 1].to_csv(filtered_predictions_path, index=False)
81
+ probabilities_df.to_csv(probabilities_path, index=False)
82
+
83
+ st.success("Predictions completed! Download results below.")
84
+
85
+ # Download Buttons
86
+ with open(binary_predictions_path, "rb") as f:
87
+ st.download_button("Download Binary Predictions (0/1)", f, file_name="binary_predictions.csv")
88
+
89
+ with open(filtered_predictions_path, "rb") as f:
90
+ st.download_button("Download Clicked Predictions (Only 1s)", f, file_name="filtered_predictions.csv")
91
+
92
+ with open(probabilities_path, "rb") as f:
93
+ st.download_button("Download Probability Predictions", f, file_name="model_probabilities.csv")