File size: 5,077 Bytes
b8c339f
 
 
 
 
 
 
ef2e6ab
 
 
 
371ed35
ef2e6ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import os
import joblib
import pandas as pd

# must import Flask, request, jsonify before using them
from flask import Flask, request, jsonify


# ----------------------------
# Config / Model path
# ----------------------------
MODEL_PATH ="superKart_price_prediction_model_v1_0.joblib"

# ----------------------------
# Initialize app and load model
# ----------------------------
app = Flask("SuperKart Sales Predictor")

# Load model
if not os.path.exists(MODEL_PATH):
    raise FileNotFoundError(f"Model file not found at {MODEL_PATH}. ")
model = joblib.load(MODEL_PATH)

# These are the raw input feature names before preprocessing
NUMERIC_COLS = ['Product_Weight', 'Product_Allocated_Area', 'Product_MRP', 'Store_Age']
CATEGORICAL_COLS = ['Product_Sugar_Content', 'Product_Type', 'Store_Size',
                    'Store_Location_City_Type', 'Store_Type']

EXPECTED_COLUMNS = NUMERIC_COLS + CATEGORICAL_COLS

# ----------------------------
# Utility function
# ----------------------------
def validate_and_prepare_input(df: pd.DataFrame):
    """
    Ensure the dataframe has the required columns. If Store_Establishment_Year
    is provided instead of Store_Age, it will be converted to Store_Age.
    Returns the prepared dataframe and a list of missing columns (empty if ok).
    """
    df = df.copy()
    missing = [c for c in EXPECTED_COLUMNS if c not in df.columns]

    # Code for if user provided Store_Establishment_Year, convert to Store_Age
    if 'Store_Establishment_Year' in df.columns and 'Store_Age' in missing:
        df['Store_Age'] = 2025 - df['Store_Establishment_Year']
        missing = [c for c in EXPECTED_COLUMNS if c not in df.columns]

    return df, missing

# ----------------------------
# Routes
# ----------------------------
@app.get("/")
def home():
    """Health check / Landing page"""
    return jsonify({
        "service": "SuperKart Sales Predictor",
        "status": "running"
    })

@app.post("/v1/predict")
def predict_single():
    """
    Predict sales for a single product-store record.

    Expected JSON schema (example):
    {
      "Product_Weight": 12.5,
      "Product_Allocated_Area": 0.056,
      "Product_MRP": 149.0,
      "Store_Age": 16,
      "Product_Sugar_Content": "Low Sugar",
      "Product_Type": "Dairy",
      "Store_Size": "High",
      "Store_Location_City_Type": "Tier 1",
      "Store_Type": "Supermarket Type 1"
    }
    """
    try:
        data = request.get_json(force=True)
        if not isinstance(data, dict):
            return jsonify({"error": "Input JSON must be an object/dict"}), 400

        # Convert to DataFrame
        input_df = pd.DataFrame([data])

        # Validate and prepare
        input_df, missing = validate_and_prepare_input(input_df)
        if missing:
            return jsonify({"error": "Missing required columns", "missing_columns": missing}), 400

        # Keep only expected columns (ignore extra fields)
        input_df = input_df[EXPECTED_COLUMNS]

        # Predict using pipeline (pipeline will apply preprocessors)
        pred = model.predict(input_df)
        prediction_value = float(pred[0])

        return jsonify({"prediction": prediction_value}), 200

    except Exception as e:
        return jsonify({"error": "Exception during prediction", "details": str(e)}), 500

@app.post("/v1/predict_batch")
def predict_batch():
    """
    Predict sales for a batch of records supplied as a CSV file upload.
    The CSV should contain the expected columns (or Store_Establishment_Year
    instead of Store_Age which will be converted automatically).
    """
    try:
        if 'file' not in request.files:
            return jsonify({"error": "No file part in the request. Upload a CSV file with key 'file'."}), 400

        file = request.files['file']
        if file.filename == "":
            return jsonify({"error": "Empty filename. Please upload a CSV file."}), 400

        # Read CSV
        input_df = pd.read_csv(file)

        input_df, missing = validate_and_prepare_input(input_df)
        if missing:
            return jsonify({"error": "Missing required columns in uploaded CSV", "missing_columns": missing}), 400

        # Keep only expected columns and predict
        input_df = input_df[EXPECTED_COLUMNS]
        preds = model.predict(input_df)

        # Return predictions aligned with original input index
        output = input_df.copy()
        output['predicted_Product_Store_Sales_Total'] = preds.astype(float)

        # Convert to records for JSON response (limit size if necessary)
        results = output.reset_index().to_dict(orient='records')

        return jsonify({"predictions_count": len(results), "predictions": results}), 200

    except Exception as e:
        return jsonify({"error": "Exception during batch prediction", "details": str(e)}), 500

# ----------------------------
# Run app
# ----------------------------
if __name__ == "__main__":
    # Listen on 0.0.0.0 for containerized environments. In dev, use port 7860 or 5000 as required.
    app.run(host="0.0.0.0", port=7860, debug=False)