surnellas commited on
Commit
b10edef
·
verified ·
1 Parent(s): 549fa3a

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. Dockerfile +16 -21
  2. XGBoost_best_model.joblib +3 -0
  3. app.py +73 -0
  4. custom_transformers.py +73 -0
  5. requirements.txt +11 -3
Dockerfile CHANGED
@@ -1,21 +1,16 @@
1
- FROM python:3.9-slim
2
-
3
- WORKDIR /app
4
-
5
- RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- curl \
8
- software-properties-common \
9
- git \
10
- && rm -rf /var/lib/apt/lists/*
11
-
12
- COPY requirements.txt ./
13
- COPY src/ ./src/
14
-
15
- RUN pip3 install -r requirements.txt
16
-
17
- EXPOSE 8501
18
-
19
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
20
-
21
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
1
+ FROM python:3.9-slim
2
+
3
+ # Set the working directory inside the container
4
+ WORKDIR /app
5
+
6
+ # Copy all files from the current directory to the container's working directory
7
+ COPY . .
8
+
9
+ # Install dependencies from the requirements file without using cache to reduce image size
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ # Define the command to start the application using Gunicorn with 4 worker processes
13
+ # - `-w 4`: Uses 4 worker processes for handling requests
14
+ # - `-b 0.0.0.0:7860`: Binds the server to port 7860 on all network interfaces
15
+ # - `app:app`: Runs the Flask app (assuming `app.py` contains the Flask instance named `app`)
16
+ CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:7860", "app:sales_predictor_api"]
 
 
 
 
 
XGBoost_best_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37682de00f58050b36c832de82200b17a7703b6a95573e6f20a5beee36aa6b0b
3
+ size 2510935
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import necessary libraries
2
+ import numpy as np
3
+ import joblib # For loading the serialized model
4
+ import pandas as pd # For data manipulation
5
+ from flask import Flask, request, jsonify # For creating the Flask API
6
+
7
+ import pandas as pd
8
+ import numpy as np
9
+ from sklearn.base import BaseEstimator, TransformerMixin
10
+ from sklearn.preprocessing import PowerTransformer, OrdinalEncoder
11
+ from sklearn.pipeline import Pipeline
12
+ from sklearn.compose import ColumnTransformer
13
+ import sys
14
+
15
+ from custom_transformers import SkewnessCapper
16
+
17
+ print ( "Overriding Skewness Capper")
18
+ sys.modules['__main__'].SkewnessCapper = SkewnessCapper
19
+ print ( "Overriding Skewness Capper success")
20
+
21
+ print( " Trying to load XGBoost model using joblib")
22
+ model = joblib.load("XGBoost_best_model.joblib")
23
+
24
+ print("Model loaded successfully!")
25
+
26
+ # Initialize the Flask application
27
+ sales_predictor_api = Flask("SuperKart Sales Prediction")
28
+
29
+ # Define a route for the home page (GET request)
30
+ @sales_predictor_api.get('/')
31
+ def home():
32
+ """
33
+ This function handles GET requests to the root URL ('/') of the API.
34
+ It returns a simple welcome message.
35
+ """
36
+ return "Welcome to the SuperKart Sales Prediction API!"
37
+
38
+ # Define an endpoint for single property prediction (POST request)
39
+ @sales_predictor_api.post('/v1/sales')
40
+ def predict_sales():
41
+ """
42
+ This function handles POST requests to the '/v1/sales' endpoint.
43
+ It expects a JSON payload containing property details and returns
44
+ the predicted rental price as a JSON response.
45
+ """
46
+ # Get the JSON data from the request body
47
+ property_data = request.get_json()
48
+
49
+ # Extract relevant features from the JSON data
50
+ sample = {
51
+ 'Product_Weight': property_data['Product_Weight'],
52
+ 'Product_Allocated_Area': property_data['Product_Allocated_Area'],
53
+ 'Product_MRP': property_data['Product_MRP'],
54
+ 'Product_Sugar_Content': property_data['Product_Sugar_Content'],
55
+ 'Product_Type': property_data['Product_Type'],
56
+ 'Store_Establishment_Year': property_data['Store_Establishment_Year'],
57
+ 'Store_Size': property_data['Store_Size'],
58
+ 'Store_Location_City_Type': property_data['Store_Location_City_Type'],
59
+ 'Store_Type': property_data['Store_Type']
60
+ }
61
+
62
+ # Convert the extracted data into a Pandas DataFrame
63
+ input_data = pd.DataFrame([sample])
64
+
65
+ # Make prediction (get log_price)
66
+ predicted_sales = model.predict(input_data)[0]
67
+
68
+ # Return the actual price
69
+ return jsonify({'Predicted Sales = ': predicted_sales})
70
+
71
+ # Run the Flask application in debug mode if this script is executed directly
72
+ if __name__ == '__main__':
73
+ sales_predictor_api.run(debug=True)
custom_transformers.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.base import BaseEstimator, TransformerMixin
4
+ from sklearn.preprocessing import PowerTransformer, OrdinalEncoder
5
+ from sklearn.pipeline import Pipeline
6
+ from sklearn.compose import ColumnTransformer
7
+
8
+ # ===== Custom Transformer for Skewness and Capping =====
9
+ class SkewnessCapper(BaseEstimator, TransformerMixin):
10
+ def __init__(self, skew_threshold=0.5, cap_factor=3.0):
11
+ self.skew_threshold = skew_threshold
12
+ self.cap_factor = cap_factor
13
+ self.skewed_features_ = None
14
+ self.feature_caps_ = {}
15
+
16
+ def fit(self, X, y=None):
17
+ X = pd.DataFrame(X)
18
+ skewness = X.apply(lambda col: col.skew(skipna=True))
19
+ self.skewed_features_ = skewness[abs(skewness) > self.skew_threshold].index.tolist()
20
+
21
+ # Store caps for each feature
22
+ for col in X.columns:
23
+ mean, std = X[col].mean(), X[col].std()
24
+ self.feature_caps_[col] = (mean - self.cap_factor * std, mean + self.cap_factor * std)
25
+ return self
26
+
27
+ def transform(self, X):
28
+ X = pd.DataFrame(X).copy()
29
+ # Cap values
30
+ for col, (lower, upper) in self.feature_caps_.items():
31
+ X[col] = np.clip(X[col], lower, upper)
32
+ # Transform skewed columns
33
+ for col in self.skewed_features_:
34
+ X[col] = np.log1p(X[col] - X[col].min() + 1)
35
+ return X
36
+
37
+ # ===== Example Data =====
38
+ df = pd.DataFrame({
39
+ 'num1': [1, 2, 3, 100, 5],
40
+ 'num2': [10, 15, 14, 13, 1000],
41
+ 'cat1': ['A', 'B', 'A', 'C', 'B'],
42
+ 'cat2': ['X', 'X', 'Y', 'Z', 'Y']
43
+ })
44
+
45
+ # Separate numeric and categorical columns
46
+ num_features = df.select_dtypes(include=[np.number]).columns.tolist()
47
+ cat_features = df.select_dtypes(exclude=[np.number]).columns.tolist()
48
+
49
+ # ===== Pipelines for Numeric & Categorical =====
50
+ numeric_pipeline = Pipeline(steps=[
51
+ ('skew_cap', SkewnessCapper())
52
+ ])
53
+
54
+ categorical_pipeline = Pipeline(steps=[
55
+ ('encode', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1))
56
+ ])
57
+
58
+ # ===== Combine into ColumnTransformer =====
59
+ preprocessor = ColumnTransformer(
60
+ transformers=[
61
+ ('num', numeric_pipeline, num_features),
62
+ ('cat', categorical_pipeline, cat_features)
63
+ ]
64
+ )
65
+
66
+ # ===== Full Pipeline =====
67
+ full_pipeline = Pipeline(steps=[
68
+ ('preprocessor', preprocessor)
69
+ ])
70
+
71
+ # ===== Transform the Data =====
72
+ df_transformed = full_pipeline.fit_transform(df)
73
+ print(pd.DataFrame(df_transformed))
requirements.txt CHANGED
@@ -1,3 +1,11 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
 
 
 
1
+ pandas==2.2.2
2
+ numpy==2.0.2
3
+ scikit-learn==1.6.1
4
+ xgboost==2.1.4
5
+ joblib==1.4.2
6
+ Werkzeug==2.2.2
7
+ flask==2.2.2
8
+ gunicorn==20.1.0
9
+ requests==2.28.1
10
+ uvicorn[standard]
11
+ streamlit==1.43.2