Shalyn commited on
Commit
55dd870
·
verified ·
1 Parent(s): 1015936

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. Dockerfile +9 -9
  2. app.py +78 -52
  3. requirements.txt +10 -8
Dockerfile CHANGED
@@ -1,16 +1,16 @@
1
- # Use a minimal base image with Python 3.9 installed
2
  FROM python:3.9-slim
3
 
4
- # Set the working directory inside the container to /app
5
  WORKDIR /app
6
 
7
- # Copy all files from the current directory on the host to the container's /app directory
8
  COPY . .
9
 
10
- # Install Python dependencies listed in requirements.txt
11
- RUN pip3 install -r requirements.txt
12
 
13
- # Define the command to run the Streamlit app on port 8501 and make it accessible externally
14
- CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.enableXsrfProtection=false"]
15
-
16
- # NOTE: Disable XSRF protection for easier external access in order to make batch predictions
 
 
 
1
  FROM python:3.9-slim
2
 
3
+ # Set the working directory inside the container
4
  WORKDIR /app
5
 
6
+ # Copy all files from the current directory to the container's working directory
7
  COPY . .
8
 
9
+ # Install dependencies from the requirements file without using cache to reduce image size
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
 
12
+ # Define the command to start the application using Gunicorn with 4 worker processes
13
+ # - `-w 4`: Uses 4 worker processes for handling requests
14
+ # - `-b 0.0.0.0:7860`: Binds the server to port 7860 on all network interfaces
15
+ # - `app:app`: Runs the Flask app (assuming `app.py` contains the Flask instance named `app`)
16
+ CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:7860", "app:sales_forecast_api"]
app.py CHANGED
@@ -1,54 +1,80 @@
1
- import requests
2
- import streamlit as st
3
  import pandas as pd
 
4
 
5
- st.title("Sales Prediction for SuperKart")
6
-
7
- # Batch Prediction
8
- st.subheader("Online Prediction")
9
-
10
- # Input fields for product data
11
- Product_Weight=st.number_input("Product_Weight")
12
- Product_Sugar_Content=st.selectbox("Sugar content",["Low Sugar","Regular","No Sugar","reg"])
13
- Product_Allocated_Area=st.number_input("Product_Allocated_Area")
14
- Product_Type=st.selectbox("Product Type",["Frozen Foods","Dairy","Canned","Baking Goods","Health and Hygiene","Snack Foods","Meat","Household","Hard Drinks","Fruits and Vegetables","Breads","Soft Drinks","Breakfast","Others","Starchy Foods","Seafood"])
15
- Product_MRP=st.number_input("Product_MRP")
16
- Store_Establishment_Year=st.number_input("Store_Establishment_Year",min_value=1900, max_value=9999)
17
- Store_Size=st.selectbox("Store size", ["Medium","High","Small"])
18
- Store_Location_City_Type=st.selectbox("Store_Location_City_Type",["Tier 1","Tier 2","Tier 3"])
19
- Store_Type=st.selectbox("Store_Type",["Food Mart","Supermarket Type1","Supermarket Type2","Departmental Store"])
20
-
21
- sales_data={
22
- 'Product_Weight': Product_Weight,
23
- 'Product_Sugar_Content': Product_Sugar_Content,
24
- 'Product_Allocated_Area': Product_Allocated_Area,
25
- 'Product_Type': Product_Type,
26
- 'Product_MRP': Product_MRP,
27
- 'Store_Establishment_Year': Store_Establishment_Year,
28
- 'Store_Size': Store_Size,
29
- 'Store_Location_City_Type': Store_Location_City_Type,
30
- 'Store_Type': Store_Type
31
- }
32
-
33
- if st.button("Predict", type='primary'):
34
- response = requests.post("https://Shalyn-backend.hf.space/v1/sales", json=sales_data)
35
- if response.status_code == 200:
36
- result = response.json()
37
- sales_prediction = result["Prediction"] # Extract only the value
38
- st.write(f"Based on the information provided, the sales forecast is likely to be {sales_prediction}.")
39
- else:
40
- st.error("Error in API request")
41
-
42
- # Batch Prediction
43
- st.subheader("Batch Prediction")
44
-
45
- file = st.file_uploader("Upload CSV file", type=["csv"])
46
- if file is not None:
47
- if st.button("Predict for Batch", type='primary'):
48
- response = requests.post("https://Shalyn-backend.hf.space/v1/salesbatch", files={"file": file})
49
- if response.status_code == 200:
50
- result = response.json()
51
- st.header("Batch Prediction Results")
52
- st.write(result)
53
- else:
54
- st.error("Error in API request")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import joblib
 
2
  import pandas as pd
3
+ from flask import Flask, request, jsonify
4
 
5
+ #initialise flask app
6
+ sales_forecast_api = Flask('Sales forecasting')
7
+
8
+ # load the model
9
+
10
+ model = joblib.load('deployment_files/sales_forecast_v1_0.joblib')
11
+
12
+ #define home page
13
+ @sales_forecast_api.get('/')
14
+ def home():
15
+ return 'Welcome to the sales forecase api'
16
+
17
+ #define an endpoint for prediction
18
+ @sales_forecast_api.post('/v1/sales')
19
+ def sales_predict():
20
+ #get data from json request
21
+ sales_data = request.get_json()
22
+
23
+ #get relevant details
24
+ sample = {
25
+ 'Product_Weight': sales_data['Product_Weight'],
26
+ 'Product_Sugar_Content': sales_data['Product_Sugar_Content'],
27
+ 'Product_Allocated_Area': sales_data['Product_Allocated_Area'],
28
+ 'Product_Type': sales_data['Product_Type'],
29
+ 'Product_MRP': sales_data['Product_MRP'],
30
+ 'Store_Establishment_Year': sales_data['Store_Establishment_Year'],
31
+ 'Store_Size': sales_data['Store_Size'],
32
+ 'Store_Location_City_Type': sales_data['Store_Location_City_Type'],
33
+ 'Store_Type': sales_data['Store_Type']
34
+ }
35
+
36
+ input_data = pd.DataFrame([sample])
37
+
38
+ #convert the categorical to dummies
39
+ categorical_columns_for_dummies = ['Product_Sugar_Content','Product_Type','Store_Size','Store_Location_City_Type','Store_Type']
40
+ input_df_dummies = pd.get_dummies(input_data, columns=categorical_columns_for_dummies, drop_first=True)
41
+
42
+ #make model to predict
43
+ prediction = model.predict(input_df_dummies.reindex(columns=X_train.columns, fill_value=0))
44
+
45
+ return jsonify({'Prediction':prediction[0]})
46
+
47
+
48
+ #defining endpoint for batch
49
+ @sales_forecast_api.post('/v1/salesbatch')
50
+
51
+ def sales_batch_predict():
52
+ #get the file from the request
53
+ file = request.files['file']
54
+ #read the file to df
55
+ input_data = pd.read_csv(file)
56
+
57
+ #convert the categorical to dummies
58
+ categorical_columns_for_dummies = ['Product_Sugar_Content','Product_Type','Store_Size','Store_Location_City_Type','Store_Type']
59
+ input_df_dummies = pd.get_dummies(input_data, columns=categorical_columns_for_dummies, drop_first=False)
60
+ input_df_aligned =input_df_dummies.reindex(columns=X_train.columns, fill_value=0)
61
+
62
+ #predict
63
+ predictions = model.predict(input_df_aligned).tolist() # Predict and convert to list
64
+ product_id_list = input_data.Product_Id.tolist() # Convert to list
65
+ store_id_list = input_data.Store_Id.tolist() # Convert to list
66
+
67
+ # Create a list of dictionaries for the output
68
+ output_list = []
69
+ for i in range(len(product_id_list)):
70
+ output_list.append({
71
+ 'Product_Id': product_id_list[i],
72
+ 'Store_Id': store_id_list[i],
73
+ 'Prediction': predictions[i]
74
+ })
75
+
76
+ return jsonify(output_list)
77
+
78
+ #run the flask app in debug mode
79
+ if __name__ == '__main__':
80
+ sales_forecast_api.run(debug=True)
requirements.txt CHANGED
@@ -1,11 +1,13 @@
1
- pandas==2.2.2
2
- numpy==2.0.2
3
- scikit-learn==1.6.1
4
- xgboost==2.1.4
5
- joblib==1.4.2
6
- Werkzeug==2.2.2
7
- flask==2.2.2
8
- gunicorn==20.1.0
 
9
  requests==2.28.1
10
  uvicorn[standard]
11
  streamlit==1.43.2
 
 
1
+ scikit-learn==1.4.2
2
+ pandas==2.0.3
3
+ numpy==1.25.2
4
+ matplotlib==3.7.1
5
+ seaborn==0.13.1
6
+ joblib==1.3.2
7
+ huggingface_hub==0.20.3
8
+ Flask==3.0.2
9
+ gunicorn==21.2.0
10
  requests==2.28.1
11
  uvicorn[standard]
12
  streamlit==1.43.2
13
+ Werkzeug>=3.0.0