codedfortamara commited on
Commit
d3cb070
·
verified ·
1 Parent(s): d952716

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. Dockerfile +6 -0
  2. app.py +57 -0
  3. best_model_pipeline.pkl +3 -0
  4. requirements.txt +14 -0
Dockerfile ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+ WORKDIR /app
3
+ COPY . .
4
+ RUN pip3 install --no-cache-dir -r requirements.txt
5
+ EXPOSE 7860
6
+ CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ import joblib
3
+ import pandas as pd
4
+
5
+ # Ok so first initialise the Flask application
6
+ app = Flask(__name__)
7
+
8
+ # Now up, up and away! Here we load the serialised model pipeline (preprocessor + model in one object). This file will be created in the serialisation step
9
+ model = joblib.load("best_model_pipeline.pkl")
10
+
11
+ # Define the exact feature names the model expects, in the correct order - to match columns used during training
12
+ EXPECTED_FEATURES = [
13
+ 'Product_Weight', 'Product_Sugar_Content', 'Product_Allocated_Area',
14
+ 'Product_Type', 'Product_MRP', 'Store_Size', 'Store_Location_City_Type',
15
+ 'Store_Type', 'Store_Age', 'Product_Category'
16
+ ]
17
+
18
+ # Now a quick health check on the end point. confirm the API is running - root URL (e.g. https://your-space.hf.space/)
19
+ @app.route('/')
20
+ def home():
21
+ return jsonify({"status": "running", "message": "SuperKart Sales Forecasting API is live."})
22
+
23
+ # Prediction endpoint, where the magic will happen. this will accept POST requests with json (like product or store features and return predicted sales revenue as json)
24
+ @app.route('/predict', methods=['POST'])
25
+ def predict():
26
+ try:
27
+ # Parse incoming JSON data from request body
28
+ data = request.get_json(force=True)
29
+
30
+ # Support both single predictions (dict) and batch predictions (list of dicts)
31
+ if isinstance(data, dict):
32
+ data = [data]
33
+
34
+ # Convert to dataframe so that the pipeline can process it
35
+ input_df = pd.DataFrame(data)
36
+
37
+ # Double check that all features we need are present in the input
38
+ missing = set(EXPECTED_FEATURES) - set(input_df.columns)
39
+ if missing:
40
+ return jsonify({"error": f"Missing features: {list(missing)}"}), 400
41
+
42
+ # Probably should reorder columns to match the order used during training
43
+ input_df = input_df[EXPECTED_FEATURES]
44
+
45
+ # Run the full pipeline: preprocessing (encoding + scaling) to the model prediction
46
+ predictions = model.predict(input_df)
47
+
48
+ # Return predictions as a json list
49
+ return jsonify({"predictions": predictions.tolist()})
50
+
51
+ except Exception as e:
52
+ # Catch any errors and return them as a 500 response
53
+ return jsonify({"error": str(e)}), 500
54
+
55
+ # Run the app on port 7860 (HuggingFace's default port for Docker spaces)
56
+ if __name__ == '__main__':
57
+ app.run(host='0.0.0.0', port=7860)
best_model_pipeline.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fbcdab28a9165856d92cb6bc5509816d4a40d5d850c2a4b51084ca77f1e83f3
3
+ size 1478341
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Flask: the web framework that handles HTTP requests and routing
2
+ flask==3.1.1
3
+ # Joblib: loads the serialised model pipeline from disk
4
+ joblib==1.4.2
5
+ # Pandas: converts incoming json to DataFrame for the pipeline
6
+ pandas==2.2.2
7
+ # Numpy: required by scikit-learn and the model internally
8
+ numpy==2.0.2
9
+ # Scikit-learn: contains the preprocessor (OneHotEncoder, StandardScaler) and model
10
+ scikit-learn==1.6.1
11
+ # XGBoost: required if the best model is XGBoost
12
+ xgboost==2.1.4
13
+ # Gunicorn: production-grade WSGI server (replaces Flask's dev server)
14
+ gunicorn==23.0.0