Spaces:

gauravsahu1990
/

Store-Capacity-Predictor-Backend

Sleeping

App Files Files Community

gauravsahu1990 commited on Oct 8, 2025

Commit

72bce8e

verified ·

1 Parent(s): 3585319

Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

CatBoostWrapper.py +38 -0
Dockerfile +16 -0
FeatureEngineering.py +25 -0
app.py +139 -0
catbooster_model_v1_0.joblib +3 -0
requirements.txt +13 -0

CatBoostWrapper.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from sklearn.base import BaseEstimator, TransformerMixin
+import numpy as np
+from catboost import CatBoostRegressor
+import pandas as pd
+# ---------------------------
+# Full  CatBoost
+# ---------------------------
+class CatBoostWrapper(BaseEstimator):
+    def __init__(self, iterations=2000, learning_rate=0.03, depth=6, l2_leaf_reg=5, random_seed=42):
+        self.iterations = iterations
+        self.learning_rate = learning_rate
+        self.depth = depth
+        self.l2_leaf_reg = l2_leaf_reg
+        self.random_seed = random_seed
+        self.model = None
+    def fit(self, X, y):
+        self.model = CatBoostRegressor(
+            iterations=self.iterations,
+            learning_rate=self.learning_rate,
+            depth=self.depth,
+            l2_leaf_reg=self.l2_leaf_reg,
+            eval_metric='RMSE',
+            random_seed=self.random_seed,
+            early_stopping_rounds=100,
+            verbose=100
+        )
+        self.model.fit(X, y)
+        return self
+    def predict(self, X):
+        return self.model.predict(X)
+    def feature_importances_(self, feature_names):
+        return pd.DataFrame({
+            'Feature': feature_names,
+            'Importance': self.model.get_feature_importance()
+        }).sort_values(by='Importance', ascending=False)

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+FROM python:3.9-slim
+# Set the working directory inside the container
+WORKDIR /app
+# Copy all files from the current directory to the container's working directory
+COPY . .
+# Install dependencies from the requirements file without using cache to reduce image size
+RUN pip install --no-cache-dir -r requirements.txt
+# Define the command to start the application using Gunicorn with 4 worker processes
+# - `-w 4`: Uses 4 worker processes for handling requests
+# - `-b 0.0.0.0:7860`: Binds the server to port 7860 on all network interfaces
+# - `app:app`: Runs the Flask app (assuming `app.py` contains the Flask instance named `app`)
+CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:7860", "app:app"]

FeatureEngineering.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from sklearn.base import BaseEstimator, TransformerMixin
+import numpy as np
+from catboost import CatBoostRegressor
+import pandas as pd
+# ---------------------------
+# Custom Feature Engineering Transformer
+# ---------------------------
+class FeatureEngineering(BaseEstimator, TransformerMixin):
+    def __init__(self):
+        pass
+    def fit(self, X, y=None):
+        return self
+    def transform(self, X):
+        X_ = X.copy()
+        X_['StaffRatio'] = X_['StaffOnline'] / X_['StaffEmployed']
+        X_['TotalArea'] = X_['StoreArea'] + X_['PickingArea']
+        X_['Year'] = X_['Date'].dt.year
+        X_['Month'] = X_['Date'].dt.month
+        X_['Weekday'] = X_['Date'].dt.weekday
+        X_['IsSpecialEvent'] = X_['SpecialEvent'].apply(lambda x: 0 if x=="" else 1)
+        X_['SlotHour'] = X_['Slot'].str.split(":").str[0].astype(int)
+        return X_

app.py ADDED Viewed

	@@ -0,0 +1,139 @@

+import pandas as pd
+from flask import Flask, request, jsonify
+from CatBoostWrapper import CatBoostWrapper
+from FeatureEngineering import FeatureEngineering
+# To serialize the model
+import joblib
+# Initialize Flask app with a name
+app = Flask("Store Capacity Predictor")
+# Load the trained pipeline (replace joblib.load with dill.load if you saved with dill)
+# Load the trained churn prediction model
+model = joblib.load("catbooster_model_v1_0.joblib")
+# Define a route for the home page
+@app.get('/')
+def home():
+    return "Welcome to the Store Capacity Prediction API"
+# Define an endpoint to predict churn for a single customer
+@app.post('/v1/predict')
+def predict_capacity():
+    """
+    Expects JSON input with store and slot information.
+    Example JSON:
+    {
+        "Store": "BU1",
+        "QueueType": "Normal",
+        "StaffEmployed": 20,
+        "StaffOnline": 15,
+        "StaffLeave": 2,
+        "StoreArea": 1000,
+        "PickingArea": 200,
+        "TechAdvancement": 3,
+        "SpecialEvent": "",
+        "Date": "2025-10-06",
+        "Slot": "14:00"
+    }
+    """
+    try:
+        # Get JSON data from request
+        sales_data = request.get_json()
+        # Extract relevant features for the model
+        sample = {
+            'Store': sales_data['Store'],
+            'QueueType': sales_data['QueueType'],
+            'StaffEmployed': sales_data['StaffEmployed'],
+            'StaffOnline': sales_data['StaffOnline'],
+            'StaffLeave': sales_data['StaffLeave'],
+            'StoreArea': sales_data['StoreArea'],
+            'PickingArea': sales_data['PickingArea'],
+            'TechAdvancement': sales_data['TechAdvancement'],
+            'SpecialEvent': sales_data['SpecialEvent'],
+            'Date': sales_data['Date'],
+            'Slot': sales_data['Slot']
+        }
+        # Convert to DataFrame
+        input_data = pd.DataFrame([sample])
+        data_set = input_data.copy()  # optional backup
+        # Drop any IDs if your pipeline doesn’t need them
+        # input_data.drop(["Store_Id"], axis=1, inplace=True)  # example
+        # Predict using the trained pipeline
+        prediction = pipeline.predict(input_data).tolist()[0]
+        # Return prediction as JSON
+        return jsonify({'Predicted_Capacity': prediction})
+    except Exception as e:
+        return jsonify({'error': str(e)}), 400
+# Define an endpoint to predict churn for a batch of customers
+@app.post('/predict_batch')
+def predict_capacity_batch():
+    """
+    Expects JSON input with a list of store capacity data.
+    Example JSON:
+    {
+        "data": [
+            {
+                "Store": "BU1",
+                "QueueType": "Normal",
+                "StaffEmployed": 20,
+                "StaffOnline": 15,
+                "StaffLeave": 2,
+                "StoreArea": 1000,
+                "PickingArea": 200,
+                "TechAdvancement": 3,
+                "SpecialEvent": "",
+                "Date": "2025-10-06",
+                "Slot": "14:00"
+            },
+            {
+                "Store": "BU2",
+                "QueueType": "Express",
+                "StaffEmployed": 25,
+                "StaffOnline": 20,
+                "StaffLeave": 1,
+                "StoreArea": 1200,
+                "PickingArea": 250,
+                "TechAdvancement": 4,
+                "SpecialEvent": "BlackFriday",
+                "Date": "2025-10-06",
+                "Slot": "10:00"
+            }
+        ]
+    }
+    """
+    try:
+        # Get JSON data from request
+        input_json = request.get_json()
+        data_list = input_json.get("data", [])
+        # Convert list of dicts to DataFrame
+        input_data = pd.DataFrame(data_list)
+        # Predict using pipeline
+        predictions = pipeline.predict(input_data).tolist()
+        # Prepare output DataFrame with Date, Store, Predicted_Capacity
+        output_df = pd.DataFrame({
+            "Date": input_data["Date"],
+            "Store": input_data["Store"],
+            "Predicted_Capacity": predictions
+        })
+        # Return as HTML table
+        return output_df.to_html(index=False)
+    except Exception as e:
+        return jsonify({"error": str(e)}), 400
+# Run the Flask app in debug mode
+if __name__ == '__main__':
+    app.run(debug=True)

catbooster_model_v1_0.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d20a6e0fe08874508db62951b496151b06da2b57b78a1b5166e2a2090e067f71
+size 1673030

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+pandas==2.2.2
+numpy==2.0.2
+scikit-learn==1.6.1
+xgboost==2.1.4
+joblib==1.4.2
+Werkzeug==2.2.2
+flask==2.2.2
+gunicorn==20.1.0
+requests==2.28.1
+catboost==1.2.8
+dill== 0.3.8
+scipy==1.13.1
+uvicorn[standard]