Spaces:

codedfortamara
/

superkart-backend

Sleeping

App Files Files Community

codedfortamara commited on Feb 13

Commit

d3cb070

verified ·

1 Parent(s): d952716

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

Dockerfile +6 -0
app.py +57 -0
best_model_pipeline.pkl +3 -0
requirements.txt +14 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,6 @@

+FROM python:3.9-slim
+WORKDIR /app
+COPY . .
+RUN pip3 install --no-cache-dir -r requirements.txt
+EXPOSE 7860
+CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]

app.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from flask import Flask, request, jsonify
+import joblib
+import pandas as pd
+# Ok so first initialise the Flask application
+app = Flask(__name__)
+# Now up, up and away! Here we load the serialised model pipeline (preprocessor + model in one object). This file will be created in the serialisation step
+model = joblib.load("best_model_pipeline.pkl")
+# Define the exact feature names the model expects, in the correct order - to match columns used during training
+EXPECTED_FEATURES = [
+    'Product_Weight', 'Product_Sugar_Content', 'Product_Allocated_Area',
+    'Product_Type', 'Product_MRP', 'Store_Size', 'Store_Location_City_Type',
+    'Store_Type', 'Store_Age', 'Product_Category'
+]
+# Now a quick health check on the end point. confirm the API is running - root URL (e.g. https://your-space.hf.space/)
+@app.route('/')
+def home():
+    return jsonify({"status": "running", "message": "SuperKart Sales Forecasting API is live."})
+# Prediction endpoint, where the magic will happen. this will accept POST requests with json (like product or store features and return predicted sales revenue as json)
+@app.route('/predict', methods=['POST'])
+def predict():
+    try:
+        # Parse incoming JSON data from request body
+        data = request.get_json(force=True)
+        # Support both single predictions (dict) and batch predictions (list of dicts)
+        if isinstance(data, dict):
+            data = [data]
+        # Convert to dataframe so that the pipeline can process it
+        input_df = pd.DataFrame(data)
+        # Double check that all features we need are present in the input
+        missing = set(EXPECTED_FEATURES) - set(input_df.columns)
+        if missing:
+            return jsonify({"error": f"Missing features: {list(missing)}"}), 400
+        # Probably should reorder columns to match the order used during training
+        input_df = input_df[EXPECTED_FEATURES]
+        # Run the full pipeline: preprocessing (encoding + scaling) to the  model prediction
+        predictions = model.predict(input_df)
+        # Return predictions as a json list
+        return jsonify({"predictions": predictions.tolist()})
+    except Exception as e:
+        # Catch any errors and return them as a 500 response
+        return jsonify({"error": str(e)}), 500
+# Run the app on port 7860 (HuggingFace's default port for Docker spaces)
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=7860)

best_model_pipeline.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fbcdab28a9165856d92cb6bc5509816d4a40d5d850c2a4b51084ca77f1e83f3
+size 1478341

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+# Flask: the web framework that handles HTTP requests and routing
+flask==3.1.1
+# Joblib: loads the serialised model pipeline from disk
+joblib==1.4.2
+# Pandas: converts incoming json to DataFrame for the pipeline
+pandas==2.2.2
+# Numpy: required by scikit-learn and the model internally
+numpy==2.0.2
+# Scikit-learn: contains the preprocessor (OneHotEncoder, StandardScaler) and model
+scikit-learn==1.6.1
+# XGBoost: required if the best model is XGBoost
+xgboost==2.1.4
+# Gunicorn: production-grade WSGI server (replaces Flask's dev server)
+gunicorn==23.0.0