omoral02's picture
Upload folder using huggingface_hub
dffad5b verified
# Import necessary libraries
import numpy as np
import joblib
import pandas as pd
from flask import Flask, request, jsonify
from huggingface_hub import hf_hub_download
import joblib
import tempfile
import streamlit as st
import io # Import io module
REPO_ID = "omoral02/RevenuePrediction"
FILENAME = "store-sales-prediction-model-v1-0.joblib"
# Write model to temp directory (writable in Hugging Face Spaces)
temp_dir = tempfile.gettempdir()
model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, cache_dir=temp_dir)
model = joblib.load(model_path)
# Initialize the Flask app
superkart_api = Flask("SuperKart Sales Predictor")
# def transform_input_for_model(df_raw):
# # Binning
# df_raw['Product_MRP_bin'] = pd.qcut(df_raw['Product_MRP'], q=5, duplicates='drop')
# df_raw['Product_Weight_bin'] = pd.qcut(df_raw['Product_Weight'], q=5, duplicates='drop')
# df_raw['Product_Type_Binned'] = bin_categorical(df_raw['Product_Type'])
# df_raw['Store_Type_Binned'] = bin_categorical(df_raw['Store_Type'])
# df_raw['Store_Location_City_Type_Binned'] = bin_categorical(df_raw['Store_Location_City_Type'])
# # Dummy encoding
# df_encoded = pd.get_dummies(df_raw, columns=[
# 'Product_Type_Binned', 'Store_Type_Binned',
# 'Store_Location_City_Type_Binned',
# 'Product_MRP_bin', 'Product_Weight_bin'
# ], drop_first=False)
# # Drop original fields
# df_encoded = df_encoded.select_dtypes(include=['number']).copy()
# return df_encoded
# Load the trained model
@st.cache_resource
def load_model():
return model
model = load_model()
# Define root endpoint
@superkart_api.get('/')
def home():
return jsonify({"okay": "Welcome to the SuperKart Sales Prediction API!"})
# Endpoint for single record prediction
@superkart_api.post('/v1/predict')
def predict_sales():
if model is None:
return jsonify({"error": "Model not loaded"}), 500
try:
input_json = request.get_json()
expected_fields = [
'Product_Type', 'Store_Type', 'Store_Location_City_Type',
'Store_Size', 'Product_Sugar_Content', 'Product_Weight',
'Product_MRP', 'Product_Allocated_Area', 'Store_Establishment_Year'
]
missing = [f for f in expected_fields if f not in input_json]
if missing:
return jsonify({
'error': 'Missing required input fields.',
'missing_fields': missing,
'received_fields': list(input_json.keys())
}), 400
# Extract relevant inputs (must match training columns)
features = {
'Product_Type': input_json['Product_Type'],
'Store_Type': input_json['Store_Type'],
'Store_Location_City_Type': input_json['Store_Location_City_Type'],
'Store_Size': input_json['Store_Size'],
'Product_Sugar_Content': input_json['Product_Sugar_Content'],
'Product_Weight': input_json['Product_Weight'],
'Product_MRP': input_json['Product_MRP'],
'Product_Allocated_Area': input_json['Product_Allocated_Area'],
'Store_Establishment_Year': input_json['Store_Establishment_Year'],
}
input_df = pd.DataFrame([features])
# df_transformed = transform_input_for_model(input_df)
prediction = model.predict(input_df)[0]
return jsonify({'Predicted_Store_Sales_Total': round(float(prediction), 2)})
except Exception as e:
print(f"Error during single prediction: {e}") # Added print for debugging
return jsonify({"error": str(e), "message": "Prediction failed"}), 500 # Return error message and status code
# Endpoint for batch prediction using CSV
@superkart_api.post('/v1/batch')
def predict_sales_batch():
try:
uploaded_file = request.files['file']
input_df = pd.read_csv(uploaded_file)
expected_fields = [
'Product_Type', 'Store_Type', 'Store_Location_City_Type',
'Store_Size', 'Product_Sugar_Content', 'Product_Weight',
'Product_MRP', 'Product_Allocated_Area', 'Store_Establishment_Year'
]
missing = [f for f in expected_fields if f not in input_df.columns]
if missing:
return jsonify({
'error': 'Missing required columns in uploaded CSV.',
'missing_columns': missing,
'received_columns': list(input_df.columns)
}), 400
# df_transformed = transform_input_for_model(input_df)
predictions = model.predict(input_df).tolist()
rounded_preds = [round(float(p), 2) for p in predictions]
return jsonify({'Predicted_Store_Sales_Total': rounded_preds})
# Optional: use product-store pair if available
# if 'Product_Id' in df_transformed.columns and 'Store_Id' in df_transformed.columns:
# keys = df_transformed['Product_Id'].astype(str) + "_" + df_transformed['Store_Id'].astype(str)
# else:
# keys = [f"row_{i}" for i in range(len(df_transformed))]
# return jsonify(dict(zip(keys, rounded_preds)))
except Exception as e:
print(f"Error during batch prediction: {e}") # Added print for debugging
return jsonify({"error": str(e), "message": "Prediction failed"}), 500
# Run the Flask app
if __name__ == '__main__':
superkart_api.run(host="0.0.0.0", port=7860)