DataWiz-6939 commited on
Commit
74efdba
·
verified ·
1 Parent(s): a429b3b

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. Dockerfile +15 -12
  2. app.py +86 -0
  3. requirements.txt +8 -3
Dockerfile CHANGED
@@ -1,20 +1,23 @@
1
- FROM python:3.13.5-slim
 
2
 
 
3
  WORKDIR /app
4
 
5
- RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- curl \
8
- git \
9
- && rm -rf /var/lib/apt/lists/*
10
-
11
- COPY requirements.txt ./
12
- COPY src/ ./src/
13
 
 
14
  RUN pip3 install -r requirements.txt
15
 
16
- EXPOSE 8501
 
 
 
 
 
17
 
18
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
19
 
20
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
1
+ # Use a minimal base image with Python 3.9 installed
2
+ FROM python:3.9
3
 
4
+ # Set the working directory inside the container to /app
5
  WORKDIR /app
6
 
7
+ # Copy all files from the current directory on the host to the container's /app directory
8
+ COPY . .
 
 
 
 
 
 
9
 
10
+ # Install Python dependencies listed in requirements.txt
11
  RUN pip3 install -r requirements.txt
12
 
13
+ RUN useradd -m -u 1000 user
14
+ USER user
15
+ ENV HOME=/home/user \
16
+ PATH=/home/user/.local/bin:$PATH
17
+
18
+ WORKDIR $HOME/app
19
 
20
+ COPY --chown=user . $HOME/app
21
 
22
+ # Define the command to run the Streamlit app on port "8501" and make it accessible externally
23
+ CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.enableXsrfProtection=false"]
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from huggingface_hub import hf_hub_download
4
+ import joblib
5
+ import numpy as np
6
+
7
+
8
+ # Download and load the model
9
+ # Ensure the HF token is available in the environment if the repo is private/gated
10
+ model_path = hf_hub_download(repo_id= "DataWiz-6939/sales-prediction-model", filename="best_sales_prediction_model_v1.joblib")
11
+ model = joblib.load(model_path)
12
+
13
+ # Streamlit UI for Superkart Sales Prediction
14
+ st.title("Superkart Sales Prediction App")
15
+ st.write("""
16
+ This application predicts the total sales for a given product in a specific store.
17
+ Please enter the product and store details below to get a sales forecast.
18
+ """)
19
+
20
+ # User input fields based on the dataset features
21
+ st.header("Product Details")
22
+ product_id_prefix = st.selectbox("Product Category Prefix (from Product_Id)", ['DR', 'NC', 'FD'])
23
+ product_id_num = st.number_input("Product ID Number (e.g., 001 for Px001)", min_value=0, max_value=999, value=np.random.randint(0,100))
24
+ product_id_dummy = f"{product_id_prefix}{product_id_num:03d}"
25
+
26
+ product_weight = st.number_input("Product Weight", min_value=1.0, max_value=50.0, value=10.0, step=0.1)
27
+ product_sugar_content = st.selectbox("Product Sugar Content", ['low sugar', 'regular', 'no sugar'])
28
+ product_allocated_area = st.number_input("Product Allocated Area (ratio)", min_value=0.0, max_value=0.5, value=0.05, step=0.01)
29
+ product_type = st.selectbox("Product Type", [
30
+ 'Dairy', 'Snack Foods', 'Household', 'Frozen Foods', 'Fruits and Vegetables',
31
+ 'Meat', 'Breakfast', 'Seafood', 'Hard Drinks', 'Canned', 'Soft Drinks',
32
+ 'Health and Hygiene', 'Baking Goods', 'Bread', 'Starchy Foods', 'Others'
33
+ ])
34
+ product_mrp = st.number_input("Product MRP (Maximum Retail Price)", min_value=10.0, max_value=1000.0, value=150.0, step=1.0)
35
+
36
+ st.header("Store Details")
37
+ store_id = st.text_input("Store ID (e.g., S001)", 'S001') # Can be string as it's one-hot encoded
38
+ store_establishment_year = st.number_input("Store Establishment Year", min_value=1900, max_value=2023, value=2000)
39
+ store_size = st.selectbox("Store Size", ['High', 'Medium', 'Low'])
40
+ store_location_city_type = st.selectbox("Store Location City Type", ['Tier 1', 'Tier 2', 'Tier 3'])
41
+ store_type = st.selectbox("Store Type", ['Supermarket Type 1', 'Departmental Store', 'Supermarket Type 2', 'Food Mart'])
42
+
43
+ # Assemble input into DataFrame (raw features)
44
+ input_df = pd.DataFrame([{
45
+ 'Product_Id': product_id_dummy, # Will be engineered
46
+ 'Product_Weight': product_weight,
47
+ 'Product_Sugar_Content': product_sugar_content,
48
+ 'Product_Allocated_Area': product_allocated_area,
49
+ 'Product_Type': product_type,
50
+ 'Product_MRP': product_mrp,
51
+ 'Store_Id': store_id,
52
+ 'Store_Establishment_Year': store_establishment_year,
53
+ 'Store_Size': store_size,
54
+ 'Store_Location_City_Type': store_location_city_type,
55
+ 'Store_Type': store_type
56
+ }])
57
+
58
+ # Replicate Feature Engineering (MUST match prep.py)
59
+
60
+ # 1. Extract Product Category from Product_Id
61
+ input_df['Product_Category'] = input_df['Product_Id'].apply(lambda x: x[:2])
62
+ product_category_map = {
63
+ 'DR': 'Drinks',
64
+ 'NC': 'Non-Consumable',
65
+ 'FD': 'Food & Veg'
66
+ }
67
+ input_df['Product_Category'] = input_df['Product_Category'].map(product_category_map).fillna('Other')
68
+ # Drop original Product_Id (as it's engineered)
69
+ input_df.drop(columns=['Product_Id'], inplace=True)
70
+
71
+ # 2. Compute Store Age
72
+ current_year = 2024 # Must match year used in prep.py
73
+ input_df['Store_Age'] = current_year - input_df['Store_Establishment_Year']
74
+ # Drop original Store_Establishment_Year
75
+ input_df.drop(columns=['Store_Establishment_Year'], inplace=True)
76
+
77
+ # 3. Classify Food Type into Perishable category
78
+ perishable_types = ['Meat', 'Dairy', 'Fruits and Vegetables', 'Breakfast', 'Seafood'] # Must match prep.py
79
+ input_df['Food_Type'] = input_df['Product_Type'].apply(lambda x: 'Perishable' if x in perishable_types else 'Non-Perishable')
80
+
81
+ # End Feature Engineering
82
+
83
+ if st.button("Predict Sales"):
84
+ prediction = model.predict(input_df)[0]
85
+ st.subheader("Prediction Result:")
86
+ st.success(f"The predicted total sales for this product in the store is: **${prediction:,.2f}**")
requirements.txt CHANGED
@@ -1,3 +1,8 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
1
+ pandas==2.2.2
2
+ huggingface_hub==0.32.6
3
+ streamlit==1.43.2
4
+ joblib==1.5.1
5
+ scikit-learn==1.6.0
6
+ xgboost==2.1.4
7
+ mlflow==3.0.1
8
+