grkavi0912 commited on
Commit
63d0f3b
·
verified ·
1 Parent(s): 5956579

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. Dockerfile +14 -12
  2. app.py +98 -0
  3. requirements.txt +7 -3
Dockerfile CHANGED
@@ -1,20 +1,22 @@
1
- FROM python:3.13.5-slim
 
2
 
 
3
  WORKDIR /app
4
 
5
- RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- curl \
8
- git \
9
- && rm -rf /var/lib/apt/lists/*
10
 
11
- COPY requirements.txt ./
12
- COPY src/ ./src/
13
 
14
- RUN pip3 install -r requirements.txt
 
 
 
15
 
 
16
  EXPOSE 8501
17
 
18
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
19
-
20
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
1
+ # Use a lightweight Python image as the base image
2
+ FROM python:3.9-slim
3
 
4
+ # Set the working directory in the container
5
  WORKDIR /app
6
 
7
+ # Copy the requirements.txt file into the container
8
+ COPY requirements.txt .
 
 
 
9
 
10
+ # Install the dependencies
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
 
13
+ # Copy the application files (app.py and the saved model) into the container
14
+ COPY app.py .
15
+ COPY best_random_forest_pipeline.joblib .
16
+ # If you have other necessary files, copy them here as well
17
 
18
+ # Expose the port that Streamlit runs on
19
  EXPOSE 8501
20
 
21
+ # Command to run the Streamlit application
22
+ CMD ["streamlit", "run", "app.py"]
 
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import joblib
5
+
6
+ # Load the trained model pipeline
7
+ model_pipeline = joblib.load('best_random_forest_pipeline.joblib')
8
+
9
+ # Define the Streamlit app title and description
10
+ st.title('SuperKart Sales Forecasting App')
11
+ st.write('Enter the product and store details to get a sales forecast.')
12
+
13
+ # Define input fields for the features
14
+ # You need to create input fields for all the features used by your model
15
+ # Based on your preprocessing, the features are:
16
+ # Numerical: Product_Weight, Product_Allocated_Area, Product_MRP, Store_Establishment_Year
17
+ # Categorical: Product_Sugar_Content, Product_Type, Store_Id, Store_Size, Store_Location_City_Type, Store_Type
18
+
19
+ st.sidebar.header('Product and Store Details')
20
+
21
+ # Numerical Inputs
22
+ product_weight = st.sidebar.number_input('Product Weight', min_value=0.0, value=10.0)
23
+ product_allocated_area = st.sidebar.number_input('Product Allocated Area', min_value=0.0, value=0.05)
24
+ product_mrp = st.sidebar.number_input('Product MRP', min_value=0.0, value=100.0)
25
+ store_establishment_year = st.sidebar.number_input('Store Establishment Year', min_value=1900, max_value=2024, value=2000)
26
+
27
+ # Categorical Inputs (using unique values from your data)
28
+ # Replace the options with the actual unique categories from your dataset
29
+ sugar_content_options = ['Low Sugar', 'Regular', 'No Sugar'] # Update with actual unique values
30
+ product_type_options = ['Frozen Foods', 'Dairy', 'Canned', 'Baking Goods', 'Health and Hygiene', 'Snack Foods', 'Household', 'Meat', 'Soft Drinks', 'Breads', 'Hard Drinks', 'Others', 'Starchy Foods', 'Breakfast', 'Seafood', 'Fruits and Vegetables'] # Update with actual unique values
31
+ store_id_options = ['OUT004', 'OUT003', 'OUT001', 'OUT002'] # Update with actual unique values
32
+ store_size_options = ['Medium', 'High', 'Small'] # Update with actual unique values
33
+ store_location_options = ['Tier 2', 'Tier 1', 'Tier 3'] # Update with actual unique values
34
+ store_type_options = ['Supermarket Type2', 'Departmental Store', 'Supermarket Type1', 'Food Mart'] # Update with actual unique values
35
+
36
+ product_sugar_content = st.sidebar.selectbox('Product Sugar Content', sugar_content_options)
37
+ product_type = st.sidebar.selectbox('Product Type', product_type_options)
38
+ store_id = st.sidebar.selectbox('Store ID', store_id_options)
39
+ store_size = st.sidebar.selectbox('Store Size', store_size_options)
40
+ store_location_city_type = st.sidebar.selectbox('Store Location City Type', store_location_options)
41
+ store_type = st.sidebar.selectbox('Store Type', store_type_options)
42
+
43
+
44
+ # Create a dictionary from the input values
45
+ input_data = {
46
+ 'Product_Weight': product_weight,
47
+ 'Product_Allocated_Area': product_allocated_area,
48
+ 'Product_MRP': product_mrp,
49
+ 'Store_Establishment_Year': store_establishment_year,
50
+ 'Product_Sugar_Content': product_sugar_content,
51
+ 'Product_Type': product_type,
52
+ 'Store_Id': store_id,
53
+ 'Store_Size': store_size,
54
+ 'Store_Location_City_Type': store_location_city_type,
55
+ 'Store_Type': store_type
56
+ }
57
+
58
+ # Convert the dictionary to a pandas DataFrame
59
+ input_df = pd.DataFrame([input_data])
60
+
61
+ # Display the input data
62
+ st.subheader('Input Details:')
63
+ st.write(input_df)
64
+
65
+
66
+ # Make prediction when the button is clicked
67
+ if st.button('Predict Sales'):
68
+ # Ensure column order matches the training data features expected by the pipeline
69
+ # This is crucial because the pipeline expects features in a specific order,
70
+ # especially after one-hot encoding.
71
+ # The easiest way to handle this is to ensure the input DataFrame has the same
72
+ # columns and order as the training data features (X_train) before passing
73
+ # it to the pipeline's predict method.
74
+
75
+ # Recreate a dummy DataFrame with the same columns and order as X_train
76
+ # and then populate it with the input values. This ensures the one-hot encoding
77
+ # within the pipeline works correctly.
78
+
79
+ # Get the column names from X_train (assuming X_train is available or you have saved its column names)
80
+ # For this script, we'll assume the columns are in a specific order.
81
+ # In a real deployment, you would save the column order or a sample of X_train
82
+ # along with the model pipeline.
83
+
84
+ # A safer approach is to pass the raw input_df to the pipeline,
85
+ # as the preprocessor within the pipeline should handle the column transformations
86
+ # based on how it was fitted on the training data.
87
+ # However, the order of columns in the input DataFrame should ideally match the
88
+ # order of columns in the original DataFrame before splitting/preprocessing.
89
+ # Let's assume the order of columns in input_df matches the original data columns
90
+ # that were used to create X_train.
91
+
92
+ try:
93
+ prediction = model_pipeline.predict(input_df)
94
+ st.subheader('Predicted Product Store Sales Total:')
95
+ st.write(f'{prediction[0]:,.2f}')
96
+ except Exception as e:
97
+ st.error(f"An error occurred during prediction: {e}")
98
+ st.write("Please ensure the input features are correct and match the expected format.")
requirements.txt CHANGED
@@ -1,3 +1,7 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
1
+ streamlit==1.36.0
2
+ pandas==2.2.2
3
+ numpy==2.0.2
4
+ scikit-learn==1.6.1
5
+ joblib==1.4.2
6
+ xgboost==2.1.4
7
+ # Add any other libraries you used in your notebook if not already listed