Spaces:
Sleeping
Sleeping
| import os | |
| import joblib | |
| import pandas as pd | |
| # must import Flask, request, jsonify before using them | |
| from flask import Flask, request, jsonify | |
| # ---------------------------- | |
| # Config / Model path | |
| # ---------------------------- | |
| MODEL_PATH ="superKart_price_prediction_model_v1_0.joblib" | |
| # ---------------------------- | |
| # Initialize app and load model | |
| # ---------------------------- | |
| app = Flask("SuperKart Sales Predictor") | |
| # Load model | |
| if not os.path.exists(MODEL_PATH): | |
| raise FileNotFoundError(f"Model file not found at {MODEL_PATH}. ") | |
| model = joblib.load(MODEL_PATH) | |
| # These are the raw input feature names before preprocessing | |
| NUMERIC_COLS = ['Product_Weight', 'Product_Allocated_Area', 'Product_MRP', 'Store_Age'] | |
| CATEGORICAL_COLS = ['Product_Sugar_Content', 'Product_Type', 'Store_Size', | |
| 'Store_Location_City_Type', 'Store_Type'] | |
| EXPECTED_COLUMNS = NUMERIC_COLS + CATEGORICAL_COLS | |
| # ---------------------------- | |
| # Utility function | |
| # ---------------------------- | |
| def validate_and_prepare_input(df: pd.DataFrame): | |
| """ | |
| Ensure the dataframe has the required columns. If Store_Establishment_Year | |
| is provided instead of Store_Age, it will be converted to Store_Age. | |
| Returns the prepared dataframe and a list of missing columns (empty if ok). | |
| """ | |
| df = df.copy() | |
| missing = [c for c in EXPECTED_COLUMNS if c not in df.columns] | |
| # Code for if user provided Store_Establishment_Year, convert to Store_Age | |
| if 'Store_Establishment_Year' in df.columns and 'Store_Age' in missing: | |
| df['Store_Age'] = 2025 - df['Store_Establishment_Year'] | |
| missing = [c for c in EXPECTED_COLUMNS if c not in df.columns] | |
| return df, missing | |
| # ---------------------------- | |
| # Routes | |
| # ---------------------------- | |
| def home(): | |
| """Health check / Landing page""" | |
| return jsonify({ | |
| "service": "SuperKart Sales Predictor", | |
| "status": "running" | |
| }) | |
| def predict_single(): | |
| """ | |
| Predict sales for a single product-store record. | |
| Expected JSON schema (example): | |
| { | |
| "Product_Weight": 12.5, | |
| "Product_Allocated_Area": 0.056, | |
| "Product_MRP": 149.0, | |
| "Store_Age": 16, | |
| "Product_Sugar_Content": "Low Sugar", | |
| "Product_Type": "Dairy", | |
| "Store_Size": "High", | |
| "Store_Location_City_Type": "Tier 1", | |
| "Store_Type": "Supermarket Type 1" | |
| } | |
| """ | |
| try: | |
| data = request.get_json(force=True) | |
| if not isinstance(data, dict): | |
| return jsonify({"error": "Input JSON must be an object/dict"}), 400 | |
| # Convert to DataFrame | |
| input_df = pd.DataFrame([data]) | |
| # Validate and prepare | |
| input_df, missing = validate_and_prepare_input(input_df) | |
| if missing: | |
| return jsonify({"error": "Missing required columns", "missing_columns": missing}), 400 | |
| # Keep only expected columns (ignore extra fields) | |
| input_df = input_df[EXPECTED_COLUMNS] | |
| # Predict using pipeline (pipeline will apply preprocessors) | |
| pred = model.predict(input_df) | |
| prediction_value = float(pred[0]) | |
| return jsonify({"prediction": prediction_value}), 200 | |
| except Exception as e: | |
| return jsonify({"error": "Exception during prediction", "details": str(e)}), 500 | |
| def predict_batch(): | |
| """ | |
| Predict sales for a batch of records supplied as a CSV file upload. | |
| The CSV should contain the expected columns (or Store_Establishment_Year | |
| instead of Store_Age which will be converted automatically). | |
| """ | |
| try: | |
| if 'file' not in request.files: | |
| return jsonify({"error": "No file part in the request. Upload a CSV file with key 'file'."}), 400 | |
| file = request.files['file'] | |
| if file.filename == "": | |
| return jsonify({"error": "Empty filename. Please upload a CSV file."}), 400 | |
| # Read CSV | |
| input_df = pd.read_csv(file) | |
| input_df, missing = validate_and_prepare_input(input_df) | |
| if missing: | |
| return jsonify({"error": "Missing required columns in uploaded CSV", "missing_columns": missing}), 400 | |
| # Keep only expected columns and predict | |
| input_df = input_df[EXPECTED_COLUMNS] | |
| preds = model.predict(input_df) | |
| # Return predictions aligned with original input index | |
| output = input_df.copy() | |
| output['predicted_Product_Store_Sales_Total'] = preds.astype(float) | |
| # Convert to records for JSON response (limit size if necessary) | |
| results = output.reset_index().to_dict(orient='records') | |
| return jsonify({"predictions_count": len(results), "predictions": results}), 200 | |
| except Exception as e: | |
| return jsonify({"error": "Exception during batch prediction", "details": str(e)}), 500 | |
| # ---------------------------- | |
| # Run app | |
| # ---------------------------- | |
| if __name__ == "__main__": | |
| # Listen on 0.0.0.0 for containerized environments. In dev, use port 7860 or 5000 as required. | |
| app.run(host="0.0.0.0", port=7860, debug=False) | |