Spaces:
Sleeping
Sleeping
File size: 5,077 Bytes
b8c339f ef2e6ab 371ed35 ef2e6ab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import os
import joblib
import pandas as pd
# must import Flask, request, jsonify before using them
from flask import Flask, request, jsonify
# ----------------------------
# Config / Model path
# ----------------------------
MODEL_PATH ="superKart_price_prediction_model_v1_0.joblib"
# ----------------------------
# Initialize app and load model
# ----------------------------
app = Flask("SuperKart Sales Predictor")
# Load model
if not os.path.exists(MODEL_PATH):
raise FileNotFoundError(f"Model file not found at {MODEL_PATH}. ")
model = joblib.load(MODEL_PATH)
# These are the raw input feature names before preprocessing
NUMERIC_COLS = ['Product_Weight', 'Product_Allocated_Area', 'Product_MRP', 'Store_Age']
CATEGORICAL_COLS = ['Product_Sugar_Content', 'Product_Type', 'Store_Size',
'Store_Location_City_Type', 'Store_Type']
EXPECTED_COLUMNS = NUMERIC_COLS + CATEGORICAL_COLS
# ----------------------------
# Utility function
# ----------------------------
def validate_and_prepare_input(df: pd.DataFrame):
"""
Ensure the dataframe has the required columns. If Store_Establishment_Year
is provided instead of Store_Age, it will be converted to Store_Age.
Returns the prepared dataframe and a list of missing columns (empty if ok).
"""
df = df.copy()
missing = [c for c in EXPECTED_COLUMNS if c not in df.columns]
# Code for if user provided Store_Establishment_Year, convert to Store_Age
if 'Store_Establishment_Year' in df.columns and 'Store_Age' in missing:
df['Store_Age'] = 2025 - df['Store_Establishment_Year']
missing = [c for c in EXPECTED_COLUMNS if c not in df.columns]
return df, missing
# ----------------------------
# Routes
# ----------------------------
@app.get("/")
def home():
"""Health check / Landing page"""
return jsonify({
"service": "SuperKart Sales Predictor",
"status": "running"
})
@app.post("/v1/predict")
def predict_single():
"""
Predict sales for a single product-store record.
Expected JSON schema (example):
{
"Product_Weight": 12.5,
"Product_Allocated_Area": 0.056,
"Product_MRP": 149.0,
"Store_Age": 16,
"Product_Sugar_Content": "Low Sugar",
"Product_Type": "Dairy",
"Store_Size": "High",
"Store_Location_City_Type": "Tier 1",
"Store_Type": "Supermarket Type 1"
}
"""
try:
data = request.get_json(force=True)
if not isinstance(data, dict):
return jsonify({"error": "Input JSON must be an object/dict"}), 400
# Convert to DataFrame
input_df = pd.DataFrame([data])
# Validate and prepare
input_df, missing = validate_and_prepare_input(input_df)
if missing:
return jsonify({"error": "Missing required columns", "missing_columns": missing}), 400
# Keep only expected columns (ignore extra fields)
input_df = input_df[EXPECTED_COLUMNS]
# Predict using pipeline (pipeline will apply preprocessors)
pred = model.predict(input_df)
prediction_value = float(pred[0])
return jsonify({"prediction": prediction_value}), 200
except Exception as e:
return jsonify({"error": "Exception during prediction", "details": str(e)}), 500
@app.post("/v1/predict_batch")
def predict_batch():
"""
Predict sales for a batch of records supplied as a CSV file upload.
The CSV should contain the expected columns (or Store_Establishment_Year
instead of Store_Age which will be converted automatically).
"""
try:
if 'file' not in request.files:
return jsonify({"error": "No file part in the request. Upload a CSV file with key 'file'."}), 400
file = request.files['file']
if file.filename == "":
return jsonify({"error": "Empty filename. Please upload a CSV file."}), 400
# Read CSV
input_df = pd.read_csv(file)
input_df, missing = validate_and_prepare_input(input_df)
if missing:
return jsonify({"error": "Missing required columns in uploaded CSV", "missing_columns": missing}), 400
# Keep only expected columns and predict
input_df = input_df[EXPECTED_COLUMNS]
preds = model.predict(input_df)
# Return predictions aligned with original input index
output = input_df.copy()
output['predicted_Product_Store_Sales_Total'] = preds.astype(float)
# Convert to records for JSON response (limit size if necessary)
results = output.reset_index().to_dict(orient='records')
return jsonify({"predictions_count": len(results), "predictions": results}), 200
except Exception as e:
return jsonify({"error": "Exception during batch prediction", "details": str(e)}), 500
# ----------------------------
# Run app
# ----------------------------
if __name__ == "__main__":
# Listen on 0.0.0.0 for containerized environments. In dev, use port 7860 or 5000 as required.
app.run(host="0.0.0.0", port=7860, debug=False)
|