Spaces:

jackfroooot
/

AssignmentExtraaLearnBackend

No application file

App Files Files Community

jackfroooot commited on Oct 18, 2025

Commit

40a8964

verified ·

1 Parent(s): 253a404

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

Dockerfile +10 -9
app.py +60 -95
requirements.txt +1 -5

Dockerfile CHANGED Viewed

@@ -1,16 +1,17 @@
 FROM python:3.9-slim
-# Set the working directory inside the container
 WORKDIR /app
-# Copy all files from the current directory to the container's working directory
 COPY . .
-# Install dependencies from the requirements file without using cache to reduce image size
-RUN pip install --no-cache-dir --upgrade -r requirements.txt
-# Define the command to start the application using Gunicorn with 4 worker processes
-# - `-w 4`: Uses 4 worker processes for handling requests
-# - `-b 0.0.0.0:7860`: Binds the server to port 7860 on all network interfaces
-# - `app:app`: Runs the Flask app (assuming `app.py` contains the Flask instance named `app`)
-CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:7860", "app:cust_predictor_api"]

+# Use a minimal base image with Python 3.9 installed
 FROM python:3.9-slim
+# Set the working directory inside the container to /app
 WORKDIR /app
+# Copy all files from the current directory on the host to the container's /app directory
 COPY . .
+# Install Python dependencies listed in requirements.txt
+RUN pip3 install -r requirements.txt
+# Define the command to run the Streamlit app on port 8501 and make it accessible externally
+#CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.enableXsrfProtection=false"]
+CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0", "--server.enableXsrfProtection=false"]
+# NOTE: Disable XSRF protection for easier external access in order to make batch predictions

app.py CHANGED Viewed

@@ -1,96 +1,61 @@
-# Import necessary libraries
-import numpy as np
-import joblib  # For loading the serialized model
-import pandas as pd  # For data manipulation
-from flask import Flask, request, jsonify  # For creating the Flask API
-# Initialize the Flask application
-cust_predictor_api = Flask("ExtraaLearn Customer Predictor")
-# Load the trained machine learning model
-model = joblib.load("customer_prediction_model_v1_0.joblib")
-# Define a route for the home page (GET request)
-@cust_predictor_api.get('/')
-def home():
-    """
-    This function handles GET requests to the root URL ('/') of the API.
-    It returns a simple welcome message.
-    """
-    return "Welcome to the ExtraaLearn Customer Prediction API!"
-classification_threshold = 0.45
-# Define an endpoint for customer prediction (POST request)
-@cust_predictor_api.post('/v1/cust_lead')
-def predict_cust_lead():
-    """
-    This function handles POST requests to the '/v1/cust_lead' endpoint.
-    It expects a JSON payload containing customer details and returns
-    the predicted customer probability as a JSON response.
-    """
-    # Get the JSON data from the request body
-    cust_data = request.get_json()
-    # Extract relevant features from the JSON data
-    sample = {
-		'age' 	: cust_data['age'],
-		'current_occupation' 	: cust_data['current_occupation'],
-		'first_interaction' 	: cust_data['first_interaction'],
-		'profile_completed' 	: cust_data['profile_completed'],
-		'website_visits' 	: cust_data['website_visits'],
-		'time_spent_on_website' 	: cust_data['time_spent_on_website'],
-		'page_views_per_visit' 	: cust_data['page_views_per_visit'],
-		'last_activity' 	: cust_data['last_activity'],
-		'print_media_type1' 	: cust_data['print_media_type1'],
-		'print_media_type2' 	: cust_data['print_media_type2'],
-		'digital_media' 	: cust_data['digital_media'],
-		'educational_channels' 	: cust_data['educational_channels'],
-		'referral' 	: cust_data['referral']
-    }
-    # Convert the extracted data into a Pandas DataFrame
-    input_data = pd.DataFrame([sample])
-    # Make prediction
-    predicted_cust = model.predict_proba(input_data)[0][1]
-    # convert continuous prob as 0/1
-    predicted_cust = (predicted_cust >= classification_threshold).astype(int)
-    # Return the actual prediction status
-    return jsonify({'Predicted customer status': predicted_cust})
-# Define an endpoint for batch prediction (POST request)
-@cust_predictor_api.post('/v1/cust_lead_batch')
-def predict_cust_lead_batch():
-    """
-    This function handles POST requests to the '/v1/cust_lead_batch' endpoint.
-    It expects a CSV file containing property details for multiple properties
-    and returns the predicted status as a dictionary in the JSON response.
-    """
-    # Get the uploaded CSV file from the request
-    file = request.files['file']
-    # Read the CSV file into a Pandas DataFrame
-    input_data = pd.read_csv(file)
-    # Make predictions for all properties in the DataFrame (get log_prices)
-    predicted_cust_list = model.predict_proba(input_data)[0][1]
-    predicted_cust_list = predicted_cust_list.tolist()
-    # Calculate actual prices
-    predicted_cust_list = [round(float(np.exp(log_price)), 2) for log_price in predicted_log_prices]
-    predicted_cust_list = [(predicted_cust >= classification_threshold).astype(int)  for predicted_cust in predicted_cust_list]
-    # Create a dictionary of predictions with customer IDs as keys
-    ids = input_data['ID'].tolist()
-    output_dict = dict(zip(ids, predicted_cust_list))
-    # Return the predictions dictionary as a JSON response
-    return output_dict
-# Run the Flask application in debug mode if this script is executed directly
-if __name__ == '__main__':
-    cust_predictor_api.run(debug=True)

+import streamlit as st
+import pandas as pd
+import joblib
+import warnings
+warnings.filterwarnings("ignore", message=".*ScriptRunContext.*")
+# Load the trained model
+def load_model():
+    return joblib.load("customer_prediction_model_v1_0.joblib")
+model = load_model()
+# Set the title of the Streamlit app
+st.title("ExtraaLearn Customer Predictor")
+st.subheader("Online Prediction")
+# Collect user input based on dataset columns
+# Collect user input for property features
+age                        = st.number_input("age", min_value=5, max_value=90, step=1, value=30)
+website_visits             = st.number_input("website_visits", min_value=0, step=1, value=1)
+time_spent_on_website      = st.number_input("time_spent_on_website", min_value=0, step=1, value=1)
+page_views_per_visit       = st.number_input("page_views_per_visit", min_value=0, step=1, value=1)
+current_occupation         = st.selectbox("current_occupation", ["Professional", "Student", "Unemployed"])
+first_interaction          = st.selectbox("first_interaction", ["Mobile App", "Website"])
+profile_completed          = st.selectbox("profile_completed", ["Medium", "High", "Low"])
+last_activity              = st.selectbox("last_activity", ["Website Activity", "Email Activity", "Phone Activity"])
+print_media_type1          = st.selectbox("print_media_type1", ["Yes", "No"])
+print_media_type2          = st.selectbox("print_media_type2", ["Yes", "No"])
+digital_media              = st.selectbox("digital_media", ["Yes", "No"])
+educational_channels       = st.selectbox("educational_channels", ["Yes", "No"])
+referral                   = st.selectbox("referral", ["Yes", "No"])
+# Convert user input into a DataFrame
+input_data = pd.DataFrame([{
+	'age'                       :  'age',
+	'website_visits'            :  'website_visits',
+	'time_spent_on_website'     :  'time_spent_on_website',
+	'page_views_per_visit'      :  'page_views_per_visit',
+	'current_occupation'        :  'current_occupation',
+	'first_interaction'         :  'first_interaction',
+	'profile_completed'         :  'profile_completed',
+	'last_activity'             :  'last_activity',
+	'print_media_type1'         :  'print_media_type1',
+	'print_media_type2'         :  'print_media_type2',
+	'digital_media'             :  'digital_media',
+	'educational_channels'      :  'educational_channels',
+	'referral'                  :  'referral'
+}])
+# Set classification threshold
+classification_threshold = 0.5
+# Predict button
+if st.button("Predict"):
+    prediction_proba = model.predict_proba(input_data)[0, 1]
+    prediction = (prediction_proba >= classification_threshold).astype(int)
+    result = "Join" if prediction == 1 else "not join"
+    st.write(f"Prediction: The customer is likely to **{result}**.")
+    st.write(f"Churn Probability: {prediction_proba:.2f}")

requirements.txt CHANGED Viewed

@@ -1,11 +1,7 @@
 pandas==2.2.2
 numpy==2.0.2
 scikit-learn==1.6.1
 xgboost==2.1.4
 joblib==1.4.2
-Werkzeug==2.2.2
-flask==2.2.2
-gunicorn==20.1.0
-requests==2.28.1
-uvicorn[standard]
 streamlit==1.43.2

 pandas==2.2.2
 numpy==2.0.2
 scikit-learn==1.6.1
 xgboost==2.1.4
 joblib==1.4.2
 streamlit==1.43.2