File size: 2,415 Bytes
d3cb070
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from flask import Flask, request, jsonify
import joblib
import pandas as pd

# Ok so first initialise the Flask application
app = Flask(__name__)

# Now up, up and away! Here we load the serialised model pipeline (preprocessor + model in one object). This file will be created in the serialisation step
model = joblib.load("best_model_pipeline.pkl")

# Define the exact feature names the model expects, in the correct order - to match columns used during training
EXPECTED_FEATURES = [
    'Product_Weight', 'Product_Sugar_Content', 'Product_Allocated_Area',
    'Product_Type', 'Product_MRP', 'Store_Size', 'Store_Location_City_Type',
    'Store_Type', 'Store_Age', 'Product_Category'
]

# Now a quick health check on the end point. confirm the API is running - root URL (e.g. https://your-space.hf.space/)
@app.route('/')
def home():
    return jsonify({"status": "running", "message": "SuperKart Sales Forecasting API is live."})

# Prediction endpoint, where the magic will happen. this will accept POST requests with json (like product or store features and return predicted sales revenue as json)
@app.route('/predict', methods=['POST'])
def predict():
    try:
        # Parse incoming JSON data from request body
        data = request.get_json(force=True)

        # Support both single predictions (dict) and batch predictions (list of dicts)
        if isinstance(data, dict):
            data = [data]

        # Convert to dataframe so that the pipeline can process it
        input_df = pd.DataFrame(data)

        # Double check that all features we need are present in the input
        missing = set(EXPECTED_FEATURES) - set(input_df.columns)
        if missing:
            return jsonify({"error": f"Missing features: {list(missing)}"}), 400

        # Probably should reorder columns to match the order used during training
        input_df = input_df[EXPECTED_FEATURES]

        # Run the full pipeline: preprocessing (encoding + scaling) to the  model prediction
        predictions = model.predict(input_df)

        # Return predictions as a json list
        return jsonify({"predictions": predictions.tolist()})

    except Exception as e:
        # Catch any errors and return them as a 500 response
        return jsonify({"error": str(e)}), 500

# Run the app on port 7860 (HuggingFace's default port for Docker spaces)
if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860)