gauravsahu1990 commited on
Commit
72bce8e
·
verified ·
1 Parent(s): 3585319

Upload folder using huggingface_hub

Browse files
CatBoostWrapper.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.base import BaseEstimator, TransformerMixin
2
+ import numpy as np
3
+ from catboost import CatBoostRegressor
4
+ import pandas as pd
5
+ # ---------------------------
6
+ # Full CatBoost
7
+ # ---------------------------
8
+ class CatBoostWrapper(BaseEstimator):
9
+ def __init__(self, iterations=2000, learning_rate=0.03, depth=6, l2_leaf_reg=5, random_seed=42):
10
+ self.iterations = iterations
11
+ self.learning_rate = learning_rate
12
+ self.depth = depth
13
+ self.l2_leaf_reg = l2_leaf_reg
14
+ self.random_seed = random_seed
15
+ self.model = None
16
+
17
+ def fit(self, X, y):
18
+ self.model = CatBoostRegressor(
19
+ iterations=self.iterations,
20
+ learning_rate=self.learning_rate,
21
+ depth=self.depth,
22
+ l2_leaf_reg=self.l2_leaf_reg,
23
+ eval_metric='RMSE',
24
+ random_seed=self.random_seed,
25
+ early_stopping_rounds=100,
26
+ verbose=100
27
+ )
28
+ self.model.fit(X, y)
29
+ return self
30
+
31
+ def predict(self, X):
32
+ return self.model.predict(X)
33
+
34
+ def feature_importances_(self, feature_names):
35
+ return pd.DataFrame({
36
+ 'Feature': feature_names,
37
+ 'Importance': self.model.get_feature_importance()
38
+ }).sort_values(by='Importance', ascending=False)
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ # Set the working directory inside the container
4
+ WORKDIR /app
5
+
6
+ # Copy all files from the current directory to the container's working directory
7
+ COPY . .
8
+
9
+ # Install dependencies from the requirements file without using cache to reduce image size
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ # Define the command to start the application using Gunicorn with 4 worker processes
13
+ # - `-w 4`: Uses 4 worker processes for handling requests
14
+ # - `-b 0.0.0.0:7860`: Binds the server to port 7860 on all network interfaces
15
+ # - `app:app`: Runs the Flask app (assuming `app.py` contains the Flask instance named `app`)
16
+ CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:7860", "app:app"]
FeatureEngineering.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from sklearn.base import BaseEstimator, TransformerMixin
3
+ import numpy as np
4
+ from catboost import CatBoostRegressor
5
+ import pandas as pd
6
+ # ---------------------------
7
+ # Custom Feature Engineering Transformer
8
+ # ---------------------------
9
+ class FeatureEngineering(BaseEstimator, TransformerMixin):
10
+ def __init__(self):
11
+ pass
12
+
13
+ def fit(self, X, y=None):
14
+ return self
15
+
16
+ def transform(self, X):
17
+ X_ = X.copy()
18
+ X_['StaffRatio'] = X_['StaffOnline'] / X_['StaffEmployed']
19
+ X_['TotalArea'] = X_['StoreArea'] + X_['PickingArea']
20
+ X_['Year'] = X_['Date'].dt.year
21
+ X_['Month'] = X_['Date'].dt.month
22
+ X_['Weekday'] = X_['Date'].dt.weekday
23
+ X_['IsSpecialEvent'] = X_['SpecialEvent'].apply(lambda x: 0 if x=="" else 1)
24
+ X_['SlotHour'] = X_['Slot'].str.split(":").str[0].astype(int)
25
+ return X_
app.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from flask import Flask, request, jsonify
3
+ from CatBoostWrapper import CatBoostWrapper
4
+ from FeatureEngineering import FeatureEngineering
5
+ # To serialize the model
6
+ import joblib
7
+
8
+ # Initialize Flask app with a name
9
+ app = Flask("Store Capacity Predictor")
10
+
11
+ # Load the trained pipeline (replace joblib.load with dill.load if you saved with dill)
12
+ # Load the trained churn prediction model
13
+ model = joblib.load("catbooster_model_v1_0.joblib")
14
+
15
+ # Define a route for the home page
16
+ @app.get('/')
17
+ def home():
18
+ return "Welcome to the Store Capacity Prediction API"
19
+
20
+ # Define an endpoint to predict churn for a single customer
21
+ @app.post('/v1/predict')
22
+ def predict_capacity():
23
+ """
24
+ Expects JSON input with store and slot information.
25
+ Example JSON:
26
+ {
27
+ "Store": "BU1",
28
+ "QueueType": "Normal",
29
+ "StaffEmployed": 20,
30
+ "StaffOnline": 15,
31
+ "StaffLeave": 2,
32
+ "StoreArea": 1000,
33
+ "PickingArea": 200,
34
+ "TechAdvancement": 3,
35
+ "SpecialEvent": "",
36
+ "Date": "2025-10-06",
37
+ "Slot": "14:00"
38
+ }
39
+ """
40
+ try:
41
+ # Get JSON data from request
42
+ sales_data = request.get_json()
43
+
44
+ # Extract relevant features for the model
45
+ sample = {
46
+ 'Store': sales_data['Store'],
47
+ 'QueueType': sales_data['QueueType'],
48
+ 'StaffEmployed': sales_data['StaffEmployed'],
49
+ 'StaffOnline': sales_data['StaffOnline'],
50
+ 'StaffLeave': sales_data['StaffLeave'],
51
+ 'StoreArea': sales_data['StoreArea'],
52
+ 'PickingArea': sales_data['PickingArea'],
53
+ 'TechAdvancement': sales_data['TechAdvancement'],
54
+ 'SpecialEvent': sales_data['SpecialEvent'],
55
+ 'Date': sales_data['Date'],
56
+ 'Slot': sales_data['Slot']
57
+ }
58
+
59
+ # Convert to DataFrame
60
+ input_data = pd.DataFrame([sample])
61
+ data_set = input_data.copy() # optional backup
62
+
63
+ # Drop any IDs if your pipeline doesn’t need them
64
+ # input_data.drop(["Store_Id"], axis=1, inplace=True) # example
65
+
66
+ # Predict using the trained pipeline
67
+ prediction = pipeline.predict(input_data).tolist()[0]
68
+
69
+ # Return prediction as JSON
70
+ return jsonify({'Predicted_Capacity': prediction})
71
+
72
+ except Exception as e:
73
+ return jsonify({'error': str(e)}), 400
74
+
75
+ # Define an endpoint to predict churn for a batch of customers
76
+ @app.post('/predict_batch')
77
+ def predict_capacity_batch():
78
+ """
79
+ Expects JSON input with a list of store capacity data.
80
+ Example JSON:
81
+ {
82
+ "data": [
83
+ {
84
+ "Store": "BU1",
85
+ "QueueType": "Normal",
86
+ "StaffEmployed": 20,
87
+ "StaffOnline": 15,
88
+ "StaffLeave": 2,
89
+ "StoreArea": 1000,
90
+ "PickingArea": 200,
91
+ "TechAdvancement": 3,
92
+ "SpecialEvent": "",
93
+ "Date": "2025-10-06",
94
+ "Slot": "14:00"
95
+ },
96
+ {
97
+ "Store": "BU2",
98
+ "QueueType": "Express",
99
+ "StaffEmployed": 25,
100
+ "StaffOnline": 20,
101
+ "StaffLeave": 1,
102
+ "StoreArea": 1200,
103
+ "PickingArea": 250,
104
+ "TechAdvancement": 4,
105
+ "SpecialEvent": "BlackFriday",
106
+ "Date": "2025-10-06",
107
+ "Slot": "10:00"
108
+ }
109
+ ]
110
+ }
111
+ """
112
+ try:
113
+ # Get JSON data from request
114
+ input_json = request.get_json()
115
+ data_list = input_json.get("data", [])
116
+
117
+ # Convert list of dicts to DataFrame
118
+ input_data = pd.DataFrame(data_list)
119
+
120
+ # Predict using pipeline
121
+ predictions = pipeline.predict(input_data).tolist()
122
+
123
+ # Prepare output DataFrame with Date, Store, Predicted_Capacity
124
+ output_df = pd.DataFrame({
125
+ "Date": input_data["Date"],
126
+ "Store": input_data["Store"],
127
+ "Predicted_Capacity": predictions
128
+ })
129
+
130
+ # Return as HTML table
131
+ return output_df.to_html(index=False)
132
+
133
+
134
+ except Exception as e:
135
+ return jsonify({"error": str(e)}), 400
136
+
137
+ # Run the Flask app in debug mode
138
+ if __name__ == '__main__':
139
+ app.run(debug=True)
catbooster_model_v1_0.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20a6e0fe08874508db62951b496151b06da2b57b78a1b5166e2a2090e067f71
3
+ size 1673030
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pandas==2.2.2
2
+ numpy==2.0.2
3
+ scikit-learn==1.6.1
4
+ xgboost==2.1.4
5
+ joblib==1.4.2
6
+ Werkzeug==2.2.2
7
+ flask==2.2.2
8
+ gunicorn==20.1.0
9
+ requests==2.28.1
10
+ catboost==1.2.8
11
+ dill== 0.3.8
12
+ scipy==1.13.1
13
+ uvicorn[standard]