Spaces:

pawanmall
/

superkart-backend

Sleeping

App Files Files Community

pawanmall commited on May 18, 2025

Commit

7174399

verified ·

1 Parent(s): d97b45b

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

Dockerfile +10 -36
app.py +62 -154
requirements.txt +9 -6

Dockerfile CHANGED Viewed

@@ -1,42 +1,16 @@
 FROM python:3.9-slim
 WORKDIR /app
-# Install build dependencies and curl
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-    build-essential \
-    gcc \
-    g++ \
-    curl \
-    && rm -rf /var/lib/apt/lists/*
-# Copy requirements first
-COPY requirements.txt /app/requirements.txt
-# Install Python dependencies with compatible versions
-# Force reinstallation with --no-binary for key packages
-RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
-    pip install --no-cache-dir -r requirements.txt && \
-    pip install --no-cache-dir --force-reinstall numpy pandas scikit-learn
-# Copy application files
-COPY superkart_model_v1_0.joblib /app/superkart_model_v1_0.joblib
-COPY app.py /app/app.py
-# Expose the port
-EXPOSE 5000
-# Simple shell script to test if the model can be loaded before starting the server
-RUN echo '#!/bin/bash\n\
-python -c "import joblib; print(\"Testing model loading...\"); try: joblib.load(\"/app/superkart_model_v1_0.joblib\"); print(\"Model loads successfully\"); except Exception as e: print(f\"Model loading error: {e}\"); exit(1)" && \
-exec gunicorn --workers=1 --timeout=600 --graceful-timeout=300 --log-level=debug -b 0.0.0.0:5000 app:app\
-' > /app/startup.sh && chmod +x /app/startup.sh
-# Health check
-HEALTHCHECK --interval=30s --timeout=30s --start-period=30s --retries=3 \
-  CMD curl -f http://localhost:5000/ || exit 1
-# Run with a single worker to reduce memory usage
-CMD ["/app/startup.sh"]

 FROM python:3.9-slim
+# Set the working directory inside the container
 WORKDIR /app
+# Copy all files from the current directory to the container's working directory
+COPY . .
+# Install dependencies from the requirements file without using cache to reduce image size
+RUN pip install --no-cache-dir -r requirements.txt
+# Define the command to start the application using Gunicorn with 4 worker processes
+# - `-w 4`: Uses 4 worker processes for handling requests
+# - `-b 0.0.0.0:7860`: Binds the server to port 7860 on all network interfaces
+# - `app:app`: Runs the Flask app (assuming `app.py` contains the Flask instance named `app`)
+CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:7860", "app:app"]

app.py CHANGED Viewed

@@ -1,166 +1,74 @@
 import joblib
-import os
-import time
-import json
 from flask import Flask, request, jsonify
-# Define the Flask API
-app = Flask(__name__)
-# Global variables
-MODEL_PATH = "superkart_model_v1_0.joblib"
-loaded_model = None
-model_loading = False
-# Mark whether we've attempted to load the model yet
-model_load_attempted = False
-# Simple wrapper to safely load model
-def load_model():
-    global loaded_model, model_loading, model_load_attempted
-    # Don't try to load if already loading
-    if model_loading:
-        return None
-    try:
-        model_loading = True
-        model_load_attempted = True
-        print(f"Starting model loading from {MODEL_PATH}...")
-        # Check if model file exists
-        if not os.path.exists(MODEL_PATH):
-            print(f"ERROR: Model file not found at {MODEL_PATH}")
-            model_loading = False
-            return None
-        # Check model file size
-        file_size = os.path.getsize(MODEL_PATH) / (1024 * 1024)  # Size in MB
-        print(f"Model file size: {file_size:.2f} MB")
-        start_time = time.time()
-        loaded_model = joblib.load(MODEL_PATH)
-        load_time = time.time() - start_time
-        print(f"Model loaded successfully in {load_time:.2f} seconds")
-        return loaded_model
-    except Exception as e:
-        print(f"ERROR loading model: {str(e)}")
-        return None
-    finally:
-        model_loading = False
-# Health check endpoint
-@app.route('/')
-def index():
-    return jsonify({
-        "status": "running",
-        "message": "SuperKart Sales Forecasting API"
-    })
-# Status endpoint
-@app.route('/status')
-def status():
-    global loaded_model, model_load_attempted
-    # Try to load model if not loaded and not attempted yet
-    if loaded_model is None and not model_load_attempted:
-        try:
-            load_model()
-        except Exception as e:
-            print(f"Auto-load attempt failed: {str(e)}")
-    return jsonify({
-        "status": "running",
-        "model_loaded": loaded_model is not None,
-        "load_attempted": model_load_attempted
-    })
-# Simple predict endpoint - no complex processing to avoid compatibility issues
-@app.route('/predict', methods=['POST'])
-def predict():
-    global loaded_model
-    # Try to load model if not loaded yet
-    if loaded_model is None:
-        loaded_model = load_model()
-    # Check if model loaded successfully
-    if loaded_model is None:
-        return jsonify({
-            "error": "Model could not be loaded",
-            "details": "See server logs for more information"
-        }), 500
-    try:
-        # Get JSON data
-        data = request.get_json()
-        # Very basic validation
-        if not data:
-            return jsonify({"error": "No input data provided"}), 400
-        # Convert prediction to list
-        # We're using minimal code here to avoid compatibility issues
-        result = {
-            "status": "success",
-            "message": "Prediction would happen here - model is loaded"
-        }
-        # For debugging only
-        result["model_type"] = str(type(loaded_model))
-        return jsonify(result)
-    except Exception as e:
-        return jsonify({
-            "error": str(e),
-            "stack": str(e.__traceback__)
-        }), 500
-# Error handler
-@app.errorhandler(500)
-def server_error(e):
-    return jsonify({
-        "error": "Internal server error",
-        "message": str(e)
-    }), 500
-# Add a simple debug endpoint
-@app.route('/debug')
-def debug():
-    import sys
-    import platform
-    # Collect debugging information
-    debug_info = {
-        "python_version": sys.version,
-        "platform": platform.platform(),
-        "architecture": platform.architecture(),
-        "path": sys.path,
-        "env": {k: v for k, v in os.environ.items() if not k.startswith('_')}
-    }
-    # Import diagnostics
-    try:
-        import numpy as np
-        debug_info["numpy_version"] = np.__version__
-    except:
-        debug_info["numpy_version"] = "not available"
-    try:
-        import pandas as pd
-        debug_info["pandas_version"] = pd.__version__
-    except:
-        debug_info["pandas_version"] = "not available"
     try:
-        import sklearn
-        debug_info["sklearn_version"] = sklearn.__version__
-    except:
-        debug_info["sklearn_version"] = "not available"
-    return jsonify(debug_info)

 import joblib
+import pandas as pd
 from flask import Flask, request, jsonify
+# Initialize Flask app with a name
+app = Flask("Superkart sales forecasting")
+# Load the trained sales forecasting model
+model = joblib.load("superkart_model_v1_0.joblib")
+# Define a route for the home page
+@app.route('/')
+def home():
+    return "Welcome to the Superkart Sales Forecasting API"
+# Define an endpoint to predict sales for a single product in a store
+@app.route('/v1/predict', methods=['POST'])
+def predict_sales():
+    # Get JSON data from the request
+    product_store_data = request.get_json()
+    # Extract relevant features from the input data
+    sample = {
+        'Product_Weight': product_store_data['Product_Weight'],
+        'Product_Sugar_Content': product_store_data['Product_Sugar_Content'],
+        'Product_Allocated_Area': product_store_data['Product_Allocated_Area'],
+        'Product_Type': product_store_data['Product_Type'],
+        'Product_MRP': product_store_data['Product_MRP'],
+        'Store_Establishment_Year': product_store_data['Store_Establishment_Year'],
+        'Store_Size': product_store_data['Store_Size'],
+        'Store_Location_City_Type': product_store_data['Store_Location_City_Type'],
+        'Store_Type': product_store_data['Store_Type']
+    }
+    # Convert the extracted data into a DataFrame
+    input_data = pd.DataFrame([sample])
+    # Make a sales prediction using the trained model
+    prediction = model.predict(input_data).tolist()[0]
+    # Return the prediction as a JSON response
+    return jsonify({'Predicted_Sales': prediction})
+# Define an endpoint to predict sales for a batch of products/stores
+@app.route('/v1/batch_predict', methods=['POST'])
+def predict_sales_batch():
+    # Get the uploaded CSV file from the request
+    file = request.files['file']
+    # Read the file into a DataFrame
+    input_data = pd.read_csv(file)
+    # Make predictions for the batch data
+    predictions = model.predict(input_data).tolist()
+    # Assuming the input CSV has a unique identifier column (e.g., 'Product_Store_ID' or a combination)
+    # You'll need to adjust the column name based on your CSV structure
+    # If there's no single ID, you might need to return predictions in the same order as the input rows
+    # For simplicity, let's assume a 'Product_Store_ID' column exists for mapping
+    # If not, you might just return the list of predictions
     try:
+        product_store_ids = input_data['Product_Id'].values.tolist() # Assuming Product_Id is sufficient or create a combined ID
+        output_dict = dict(zip(product_store_ids, predictions))
+        return jsonify(output_dict)
+    except KeyError:
+        # If no ID column is available, just return the list of predictions
+        return jsonify({'Predictions': predictions})
+# Run the Flask app in debug mode
+if __name__ == '__main__':
+    # In a production environment, you would typically use a production-ready WSGI server like Gunicorn
+    # For local testing and development, debug=True is fine.
+    app.run(debug=True)

requirements.txt CHANGED Viewed

@@ -1,7 +1,10 @@
-flask==2.0.1
-joblib==1.1.1
 gunicorn==20.1.0
-# Install binary packages with compatible versions
-numpy==1.21.6  # Last version that supports Python 3.9 well
-pandas==1.3.5  # Compatible with numpy 1.21.6
-scikit-learn==1.0.2  # Compatible with both, known stable

+pandas==2.2.2
+numpy==2.0.2
+scikit-learn==1.6.1
+xgboost==2.1.4
+joblib==1.4.2
+Werkzeug==2.2.2
+flask==2.2.2
 gunicorn==20.1.0
+requests==2.28.1
+uvicorn[standard]