pawanmall commited on
Commit
7174399
·
verified ·
1 Parent(s): d97b45b

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. Dockerfile +10 -36
  2. app.py +62 -154
  3. requirements.txt +9 -6
Dockerfile CHANGED
@@ -1,42 +1,16 @@
1
-
2
  FROM python:3.9-slim
3
 
 
4
  WORKDIR /app
5
 
6
- # Install build dependencies and curl
7
- RUN apt-get update && \
8
- apt-get install -y --no-install-recommends \
9
- build-essential \
10
- gcc \
11
- g++ \
12
- curl \
13
- && rm -rf /var/lib/apt/lists/*
14
-
15
- # Copy requirements first
16
- COPY requirements.txt /app/requirements.txt
17
-
18
- # Install Python dependencies with compatible versions
19
- # Force reinstallation with --no-binary for key packages
20
- RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
21
- pip install --no-cache-dir -r requirements.txt && \
22
- pip install --no-cache-dir --force-reinstall numpy pandas scikit-learn
23
-
24
- # Copy application files
25
- COPY superkart_model_v1_0.joblib /app/superkart_model_v1_0.joblib
26
- COPY app.py /app/app.py
27
-
28
- # Expose the port
29
- EXPOSE 5000
30
-
31
- # Simple shell script to test if the model can be loaded before starting the server
32
- RUN echo '#!/bin/bash\n\
33
- python -c "import joblib; print(\"Testing model loading...\"); try: joblib.load(\"/app/superkart_model_v1_0.joblib\"); print(\"Model loads successfully\"); except Exception as e: print(f\"Model loading error: {e}\"); exit(1)" && \
34
- exec gunicorn --workers=1 --timeout=600 --graceful-timeout=300 --log-level=debug -b 0.0.0.0:5000 app:app\
35
- ' > /app/startup.sh && chmod +x /app/startup.sh
36
 
37
- # Health check
38
- HEALTHCHECK --interval=30s --timeout=30s --start-period=30s --retries=3 \
39
- CMD curl -f http://localhost:5000/ || exit 1
40
 
41
- # Run with a single worker to reduce memory usage
42
- CMD ["/app/startup.sh"]
 
 
 
 
 
1
  FROM python:3.9-slim
2
 
3
+ # Set the working directory inside the container
4
  WORKDIR /app
5
 
6
+ # Copy all files from the current directory to the container's working directory
7
+ COPY . .
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ # Install dependencies from the requirements file without using cache to reduce image size
10
+ RUN pip install --no-cache-dir -r requirements.txt
 
11
 
12
+ # Define the command to start the application using Gunicorn with 4 worker processes
13
+ # - `-w 4`: Uses 4 worker processes for handling requests
14
+ # - `-b 0.0.0.0:7860`: Binds the server to port 7860 on all network interfaces
15
+ # - `app:app`: Runs the Flask app (assuming `app.py` contains the Flask instance named `app`)
16
+ CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:7860", "app:app"]
app.py CHANGED
@@ -1,166 +1,74 @@
1
-
2
  import joblib
3
- import os
4
- import time
5
- import json
6
  from flask import Flask, request, jsonify
7
 
8
- # Define the Flask API
9
- app = Flask(__name__)
10
 
11
- # Global variables
12
- MODEL_PATH = "superkart_model_v1_0.joblib"
13
- loaded_model = None
14
- model_loading = False
15
 
16
- # Mark whether we've attempted to load the model yet
17
- model_load_attempted = False
 
 
18
 
19
- # Simple wrapper to safely load model
20
- def load_model():
21
- global loaded_model, model_loading, model_load_attempted
22
-
23
- # Don't try to load if already loading
24
- if model_loading:
25
- return None
26
-
27
- try:
28
- model_loading = True
29
- model_load_attempted = True
30
- print(f"Starting model loading from {MODEL_PATH}...")
31
-
32
- # Check if model file exists
33
- if not os.path.exists(MODEL_PATH):
34
- print(f"ERROR: Model file not found at {MODEL_PATH}")
35
- model_loading = False
36
- return None
37
-
38
- # Check model file size
39
- file_size = os.path.getsize(MODEL_PATH) / (1024 * 1024) # Size in MB
40
- print(f"Model file size: {file_size:.2f} MB")
41
-
42
- start_time = time.time()
43
- loaded_model = joblib.load(MODEL_PATH)
44
- load_time = time.time() - start_time
45
- print(f"Model loaded successfully in {load_time:.2f} seconds")
46
-
47
- return loaded_model
48
-
49
- except Exception as e:
50
- print(f"ERROR loading model: {str(e)}")
51
- return None
52
-
53
- finally:
54
- model_loading = False
55
 
56
- # Health check endpoint
57
- @app.route('/')
58
- def index():
59
- return jsonify({
60
- "status": "running",
61
- "message": "SuperKart Sales Forecasting API"
62
- })
 
 
 
 
 
63
 
64
- # Status endpoint
65
- @app.route('/status')
66
- def status():
67
- global loaded_model, model_load_attempted
68
-
69
- # Try to load model if not loaded and not attempted yet
70
- if loaded_model is None and not model_load_attempted:
71
- try:
72
- load_model()
73
- except Exception as e:
74
- print(f"Auto-load attempt failed: {str(e)}")
75
-
76
- return jsonify({
77
- "status": "running",
78
- "model_loaded": loaded_model is not None,
79
- "load_attempted": model_load_attempted
80
- })
81
 
82
- # Simple predict endpoint - no complex processing to avoid compatibility issues
83
- @app.route('/predict', methods=['POST'])
84
- def predict():
85
- global loaded_model
86
-
87
- # Try to load model if not loaded yet
88
- if loaded_model is None:
89
- loaded_model = load_model()
90
-
91
- # Check if model loaded successfully
92
- if loaded_model is None:
93
- return jsonify({
94
- "error": "Model could not be loaded",
95
- "details": "See server logs for more information"
96
- }), 500
97
-
98
- try:
99
- # Get JSON data
100
- data = request.get_json()
101
-
102
- # Very basic validation
103
- if not data:
104
- return jsonify({"error": "No input data provided"}), 400
105
-
106
- # Convert prediction to list
107
- # We're using minimal code here to avoid compatibility issues
108
- result = {
109
- "status": "success",
110
- "message": "Prediction would happen here - model is loaded"
111
- }
112
-
113
- # For debugging only
114
- result["model_type"] = str(type(loaded_model))
115
-
116
- return jsonify(result)
117
-
118
- except Exception as e:
119
- return jsonify({
120
- "error": str(e),
121
- "stack": str(e.__traceback__)
122
- }), 500
123
 
124
- # Error handler
125
- @app.errorhandler(500)
126
- def server_error(e):
127
- return jsonify({
128
- "error": "Internal server error",
129
- "message": str(e)
130
- }), 500
131
 
132
- # Add a simple debug endpoint
133
- @app.route('/debug')
134
- def debug():
135
- import sys
136
- import platform
137
-
138
- # Collect debugging information
139
- debug_info = {
140
- "python_version": sys.version,
141
- "platform": platform.platform(),
142
- "architecture": platform.architecture(),
143
- "path": sys.path,
144
- "env": {k: v for k, v in os.environ.items() if not k.startswith('_')}
145
- }
146
-
147
- # Import diagnostics
148
- try:
149
- import numpy as np
150
- debug_info["numpy_version"] = np.__version__
151
- except:
152
- debug_info["numpy_version"] = "not available"
153
-
154
- try:
155
- import pandas as pd
156
- debug_info["pandas_version"] = pd.__version__
157
- except:
158
- debug_info["pandas_version"] = "not available"
159
-
160
  try:
161
- import sklearn
162
- debug_info["sklearn_version"] = sklearn.__version__
163
- except:
164
- debug_info["sklearn_version"] = "not available"
165
-
166
- return jsonify(debug_info)
 
 
 
 
 
 
 
 
 
1
  import joblib
2
+ import pandas as pd
 
 
3
  from flask import Flask, request, jsonify
4
 
5
+ # Initialize Flask app with a name
6
+ app = Flask("Superkart sales forecasting")
7
 
8
+ # Load the trained sales forecasting model
9
+ model = joblib.load("superkart_model_v1_0.joblib")
 
 
10
 
11
+ # Define a route for the home page
12
+ @app.route('/')
13
+ def home():
14
+ return "Welcome to the Superkart Sales Forecasting API"
15
 
16
+ # Define an endpoint to predict sales for a single product in a store
17
+ @app.route('/v1/predict', methods=['POST'])
18
+ def predict_sales():
19
+ # Get JSON data from the request
20
+ product_store_data = request.get_json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ # Extract relevant features from the input data
23
+ sample = {
24
+ 'Product_Weight': product_store_data['Product_Weight'],
25
+ 'Product_Sugar_Content': product_store_data['Product_Sugar_Content'],
26
+ 'Product_Allocated_Area': product_store_data['Product_Allocated_Area'],
27
+ 'Product_Type': product_store_data['Product_Type'],
28
+ 'Product_MRP': product_store_data['Product_MRP'],
29
+ 'Store_Establishment_Year': product_store_data['Store_Establishment_Year'],
30
+ 'Store_Size': product_store_data['Store_Size'],
31
+ 'Store_Location_City_Type': product_store_data['Store_Location_City_Type'],
32
+ 'Store_Type': product_store_data['Store_Type']
33
+ }
34
 
35
+ # Convert the extracted data into a DataFrame
36
+ input_data = pd.DataFrame([sample])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ # Make a sales prediction using the trained model
39
+ prediction = model.predict(input_data).tolist()[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ # Return the prediction as a JSON response
42
+ return jsonify({'Predicted_Sales': prediction})
 
 
 
 
 
43
 
44
+ # Define an endpoint to predict sales for a batch of products/stores
45
+ @app.route('/v1/batch_predict', methods=['POST'])
46
+ def predict_sales_batch():
47
+ # Get the uploaded CSV file from the request
48
+ file = request.files['file']
49
+
50
+ # Read the file into a DataFrame
51
+ input_data = pd.read_csv(file)
52
+
53
+ # Make predictions for the batch data
54
+ predictions = model.predict(input_data).tolist()
55
+
56
+ # Assuming the input CSV has a unique identifier column (e.g., 'Product_Store_ID' or a combination)
57
+ # You'll need to adjust the column name based on your CSV structure
58
+ # If there's no single ID, you might need to return predictions in the same order as the input rows
59
+ # For simplicity, let's assume a 'Product_Store_ID' column exists for mapping
60
+ # If not, you might just return the list of predictions
 
 
 
 
 
 
 
 
 
 
 
61
  try:
62
+ product_store_ids = input_data['Product_Id'].values.tolist() # Assuming Product_Id is sufficient or create a combined ID
63
+ output_dict = dict(zip(product_store_ids, predictions))
64
+ return jsonify(output_dict)
65
+ except KeyError:
66
+ # If no ID column is available, just return the list of predictions
67
+ return jsonify({'Predictions': predictions})
68
+
69
+
70
+ # Run the Flask app in debug mode
71
+ if __name__ == '__main__':
72
+ # In a production environment, you would typically use a production-ready WSGI server like Gunicorn
73
+ # For local testing and development, debug=True is fine.
74
+ app.run(debug=True)
requirements.txt CHANGED
@@ -1,7 +1,10 @@
1
- flask==2.0.1
2
- joblib==1.1.1
 
 
 
 
 
3
  gunicorn==20.1.0
4
- # Install binary packages with compatible versions
5
- numpy==1.21.6 # Last version that supports Python 3.9 well
6
- pandas==1.3.5 # Compatible with numpy 1.21.6
7
- scikit-learn==1.0.2 # Compatible with both, known stable
 
1
+ pandas==2.2.2
2
+ numpy==2.0.2
3
+ scikit-learn==1.6.1
4
+ xgboost==2.1.4
5
+ joblib==1.4.2
6
+ Werkzeug==2.2.2
7
+ flask==2.2.2
8
  gunicorn==20.1.0
9
+ requests==2.28.1
10
+ uvicorn[standard]