# Import necessary libraries import numpy as np import joblib import pandas as pd from flask import Flask, request, jsonify from huggingface_hub import hf_hub_download import joblib import tempfile import streamlit as st import io # Import io module REPO_ID = "omoral02/RevenuePrediction" FILENAME = "store-sales-prediction-model-v1-0.joblib" # Write model to temp directory (writable in Hugging Face Spaces) temp_dir = tempfile.gettempdir() model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, cache_dir=temp_dir) model = joblib.load(model_path) # Initialize the Flask app superkart_api = Flask("SuperKart Sales Predictor") # def transform_input_for_model(df_raw): # # Binning # df_raw['Product_MRP_bin'] = pd.qcut(df_raw['Product_MRP'], q=5, duplicates='drop') # df_raw['Product_Weight_bin'] = pd.qcut(df_raw['Product_Weight'], q=5, duplicates='drop') # df_raw['Product_Type_Binned'] = bin_categorical(df_raw['Product_Type']) # df_raw['Store_Type_Binned'] = bin_categorical(df_raw['Store_Type']) # df_raw['Store_Location_City_Type_Binned'] = bin_categorical(df_raw['Store_Location_City_Type']) # # Dummy encoding # df_encoded = pd.get_dummies(df_raw, columns=[ # 'Product_Type_Binned', 'Store_Type_Binned', # 'Store_Location_City_Type_Binned', # 'Product_MRP_bin', 'Product_Weight_bin' # ], drop_first=False) # # Drop original fields # df_encoded = df_encoded.select_dtypes(include=['number']).copy() # return df_encoded # Load the trained model @st.cache_resource def load_model(): return model model = load_model() # Define root endpoint @superkart_api.get('/') def home(): return jsonify({"okay": "Welcome to the SuperKart Sales Prediction API!"}) # Endpoint for single record prediction @superkart_api.post('/v1/predict') def predict_sales(): if model is None: return jsonify({"error": "Model not loaded"}), 500 try: input_json = request.get_json() expected_fields = [ 'Product_Type', 'Store_Type', 'Store_Location_City_Type', 'Store_Size', 'Product_Sugar_Content', 'Product_Weight', 'Product_MRP', 'Product_Allocated_Area', 'Store_Establishment_Year' ] missing = [f for f in expected_fields if f not in input_json] if missing: return jsonify({ 'error': 'Missing required input fields.', 'missing_fields': missing, 'received_fields': list(input_json.keys()) }), 400 # Extract relevant inputs (must match training columns) features = { 'Product_Type': input_json['Product_Type'], 'Store_Type': input_json['Store_Type'], 'Store_Location_City_Type': input_json['Store_Location_City_Type'], 'Store_Size': input_json['Store_Size'], 'Product_Sugar_Content': input_json['Product_Sugar_Content'], 'Product_Weight': input_json['Product_Weight'], 'Product_MRP': input_json['Product_MRP'], 'Product_Allocated_Area': input_json['Product_Allocated_Area'], 'Store_Establishment_Year': input_json['Store_Establishment_Year'], } input_df = pd.DataFrame([features]) # df_transformed = transform_input_for_model(input_df) prediction = model.predict(input_df)[0] return jsonify({'Predicted_Store_Sales_Total': round(float(prediction), 2)}) except Exception as e: print(f"Error during single prediction: {e}") # Added print for debugging return jsonify({"error": str(e), "message": "Prediction failed"}), 500 # Return error message and status code # Endpoint for batch prediction using CSV @superkart_api.post('/v1/batch') def predict_sales_batch(): try: uploaded_file = request.files['file'] input_df = pd.read_csv(uploaded_file) expected_fields = [ 'Product_Type', 'Store_Type', 'Store_Location_City_Type', 'Store_Size', 'Product_Sugar_Content', 'Product_Weight', 'Product_MRP', 'Product_Allocated_Area', 'Store_Establishment_Year' ] missing = [f for f in expected_fields if f not in input_df.columns] if missing: return jsonify({ 'error': 'Missing required columns in uploaded CSV.', 'missing_columns': missing, 'received_columns': list(input_df.columns) }), 400 # df_transformed = transform_input_for_model(input_df) predictions = model.predict(input_df).tolist() rounded_preds = [round(float(p), 2) for p in predictions] return jsonify({'Predicted_Store_Sales_Total': rounded_preds}) # Optional: use product-store pair if available # if 'Product_Id' in df_transformed.columns and 'Store_Id' in df_transformed.columns: # keys = df_transformed['Product_Id'].astype(str) + "_" + df_transformed['Store_Id'].astype(str) # else: # keys = [f"row_{i}" for i in range(len(df_transformed))] # return jsonify(dict(zip(keys, rounded_preds))) except Exception as e: print(f"Error during batch prediction: {e}") # Added print for debugging return jsonify({"error": str(e), "message": "Prediction failed"}), 500 # Run the Flask app if __name__ == '__main__': superkart_api.run(host="0.0.0.0", port=7860)