Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files
app.py
CHANGED
|
@@ -7,7 +7,6 @@ import os
|
|
| 7 |
import traceback
|
| 8 |
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
| 9 |
from sklearn.compose import make_column_transformer
|
| 10 |
-
from sklearn.pipeline import Pipeline
|
| 11 |
|
| 12 |
# Page configuration
|
| 13 |
st.set_page_config(
|
|
@@ -44,27 +43,78 @@ st.markdown("""
|
|
| 44 |
</style>
|
| 45 |
""", unsafe_allow_html=True)
|
| 46 |
|
| 47 |
-
# Create
|
| 48 |
-
def
|
| 49 |
-
"""Create a preprocessor
|
| 50 |
|
| 51 |
# Define numerical and categorical columns
|
| 52 |
numerical_columns = ['Product_Weight', 'Product_Allocated_Area', 'Product_MRP', 'Store_Establishment_Year']
|
| 53 |
categorical_columns = ['Product_Sugar_Content', 'Product_Type', 'Store_Size', 'Store_Location_City_Type', 'Store_Type']
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
# Create preprocessor
|
| 56 |
preprocessor = make_column_transformer(
|
| 57 |
(StandardScaler(), numerical_columns),
|
| 58 |
(OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_columns),
|
| 59 |
-
remainder='drop' # Drop Product_Id and Store_Id
|
| 60 |
)
|
| 61 |
|
|
|
|
|
|
|
|
|
|
| 62 |
return preprocessor
|
| 63 |
|
| 64 |
-
# Load model and create
|
| 65 |
@st.cache_resource
|
| 66 |
-
def
|
| 67 |
-
"""Load model and create a
|
| 68 |
try:
|
| 69 |
# Check if model file exists
|
| 70 |
model_file = 'best_model.pkl'
|
|
@@ -76,9 +126,9 @@ def load_model_and_create_preprocessor():
|
|
| 76 |
model = joblib.load(model_file)
|
| 77 |
st.success(f"β
Model loaded successfully. Type: {type(model)}")
|
| 78 |
|
| 79 |
-
# Create
|
| 80 |
-
preprocessor =
|
| 81 |
-
st.success("β
|
| 82 |
|
| 83 |
return model, preprocessor, True
|
| 84 |
|
|
@@ -88,7 +138,7 @@ def load_model_and_create_preprocessor():
|
|
| 88 |
return None, None, False
|
| 89 |
|
| 90 |
# Load model and preprocessor
|
| 91 |
-
model, preprocessor, model_loaded =
|
| 92 |
|
| 93 |
def generate_ids():
|
| 94 |
"""Generate Product_Id and Store_Id"""
|
|
@@ -183,7 +233,7 @@ def single_prediction_page():
|
|
| 183 |
st.info("π Making prediction...")
|
| 184 |
st.info(f"Generated Product ID: {product_id}, Store ID: {store_id}")
|
| 185 |
|
| 186 |
-
# Transform the data using the
|
| 187 |
transformed_data = preprocessor.transform(input_data)
|
| 188 |
st.info(f"β
Data transformed successfully. Shape: {transformed_data.shape}")
|
| 189 |
|
|
@@ -250,7 +300,7 @@ def batch_prediction_page():
|
|
| 250 |
if 'Store_Id' not in df.columns:
|
| 251 |
df['Store_Id'] = [f"ST{np.random.randint(1000, 9999)}" for _ in range(len(df))]
|
| 252 |
|
| 253 |
-
# Transform the data using the
|
| 254 |
transformed_data = preprocessor.transform(df)
|
| 255 |
st.info(f"β
Data transformed successfully. Shape: {transformed_data.shape}")
|
| 256 |
|
|
@@ -285,7 +335,7 @@ def about_page():
|
|
| 285 |
- **Batch Prediction**: Process multiple predictions at once
|
| 286 |
- **Standalone Operation**: No backend API required - works directly with model files
|
| 287 |
- **Auto ID Generation**: Automatically generates Product_Id and Store_Id when needed
|
| 288 |
-
- **
|
| 289 |
|
| 290 |
## Model Information
|
| 291 |
The forecasting model uses ensemble learning techniques including:
|
|
|
|
| 7 |
import traceback
|
| 8 |
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
| 9 |
from sklearn.compose import make_column_transformer
|
|
|
|
| 10 |
|
| 11 |
# Page configuration
|
| 12 |
st.set_page_config(
|
|
|
|
| 43 |
</style>
|
| 44 |
""", unsafe_allow_html=True)
|
| 45 |
|
| 46 |
+
# Create and fit a preprocessor with sample data
|
| 47 |
+
def create_and_fit_preprocessor():
|
| 48 |
+
"""Create and fit a preprocessor with comprehensive sample data"""
|
| 49 |
|
| 50 |
# Define numerical and categorical columns
|
| 51 |
numerical_columns = ['Product_Weight', 'Product_Allocated_Area', 'Product_MRP', 'Store_Establishment_Year']
|
| 52 |
categorical_columns = ['Product_Sugar_Content', 'Product_Type', 'Store_Size', 'Store_Location_City_Type', 'Store_Type']
|
| 53 |
|
| 54 |
+
# Create comprehensive sample data to fit the preprocessor
|
| 55 |
+
sample_data = pd.DataFrame({
|
| 56 |
+
'Product_Id': ['PR0001', 'PR0002', 'PR0003', 'PR0004', 'PR0005'],
|
| 57 |
+
'Product_Weight': [10.5, 15.2, 8.7, 20.1, 12.3],
|
| 58 |
+
'Product_Sugar_Content': ['Low Sugar', 'Regular', 'No Sugar', 'Low Sugar', 'Regular'],
|
| 59 |
+
'Product_Allocated_Area': [0.05, 0.08, 0.03, 0.12, 0.06],
|
| 60 |
+
'Product_Type': ['Frozen Foods', 'Dairy', 'Canned', 'Baking Goods', 'Health and Hygiene'],
|
| 61 |
+
'Product_MRP': [150.0, 200.0, 100.0, 300.0, 180.0],
|
| 62 |
+
'Store_Id': ['ST0001', 'ST0002', 'ST0003', 'ST0004', 'ST0005'],
|
| 63 |
+
'Store_Establishment_Year': [2010, 2015, 2008, 2020, 2012],
|
| 64 |
+
'Store_Size': ['High', 'Medium', 'Small', 'High', 'Medium'],
|
| 65 |
+
'Store_Location_City_Type': ['Tier 1', 'Tier 2', 'Tier 3', 'Tier 1', 'Tier 2'],
|
| 66 |
+
'Store_Type': ['Supermarket Type1', 'Departmental Store', 'Food Mart', 'Supermarket Type2', 'Supermarket Type1']
|
| 67 |
+
})
|
| 68 |
+
|
| 69 |
+
# Add more sample data to cover all possible categorical values
|
| 70 |
+
additional_samples = pd.DataFrame({
|
| 71 |
+
'Product_Id': ['PR0006', 'PR0007', 'PR0008', 'PR0009', 'PR0010'],
|
| 72 |
+
'Product_Weight': [25.0, 5.5, 18.9, 30.2, 7.8],
|
| 73 |
+
'Product_Sugar_Content': ['No Sugar', 'Regular', 'Low Sugar', 'No Sugar', 'Regular'],
|
| 74 |
+
'Product_Allocated_Area': [0.15, 0.02, 0.10, 0.20, 0.04],
|
| 75 |
+
'Product_Type': ['Snack Foods', 'Meat', 'Household', 'Soft Drinks', 'Hard Drinks'],
|
| 76 |
+
'Product_MRP': [250.0, 80.0, 400.0, 120.0, 350.0],
|
| 77 |
+
'Store_Id': ['ST0006', 'ST0007', 'ST0008', 'ST0009', 'ST0010'],
|
| 78 |
+
'Store_Establishment_Year': [2005, 2018, 2011, 2019, 2013],
|
| 79 |
+
'Store_Size': ['Small', 'High', 'Medium', 'High', 'Small'],
|
| 80 |
+
'Store_Location_City_Type': ['Tier 3', 'Tier 1', 'Tier 2', 'Tier 1', 'Tier 3'],
|
| 81 |
+
'Store_Type': ['Food Mart', 'Supermarket Type1', 'Departmental Store', 'Supermarket Type2', 'Food Mart']
|
| 82 |
+
})
|
| 83 |
+
|
| 84 |
+
# Add more samples for all product types
|
| 85 |
+
more_product_types = pd.DataFrame({
|
| 86 |
+
'Product_Id': ['PR0011', 'PR0012', 'PR0013', 'PR0014', 'PR0015'],
|
| 87 |
+
'Product_Weight': [14.0, 22.0, 9.5, 16.8, 11.2],
|
| 88 |
+
'Product_Sugar_Content': ['Low Sugar', 'Regular', 'No Sugar', 'Low Sugar', 'Regular'],
|
| 89 |
+
'Product_Allocated_Area': [0.07, 0.11, 0.05, 0.09, 0.06],
|
| 90 |
+
'Product_Type': ['Bread', 'Breakfast', 'Fruits and Vegetables', 'Seafood', 'Starchy Foods'],
|
| 91 |
+
'Product_MRP': [160.0, 220.0, 90.0, 280.0, 140.0],
|
| 92 |
+
'Store_Id': ['ST0011', 'ST0012', 'ST0013', 'ST0014', 'ST0015'],
|
| 93 |
+
'Store_Establishment_Year': [2014, 2017, 2009, 2021, 2016],
|
| 94 |
+
'Store_Size': ['Medium', 'High', 'Small', 'High', 'Medium'],
|
| 95 |
+
'Store_Location_City_Type': ['Tier 2', 'Tier 1', 'Tier 3', 'Tier 1', 'Tier 2'],
|
| 96 |
+
'Store_Type': ['Supermarket Type1', 'Supermarket Type2', 'Food Mart', 'Departmental Store', 'Supermarket Type1']
|
| 97 |
+
})
|
| 98 |
+
|
| 99 |
+
# Combine all sample data
|
| 100 |
+
all_sample_data = pd.concat([sample_data, additional_samples, more_product_types], ignore_index=True)
|
| 101 |
+
|
| 102 |
# Create preprocessor
|
| 103 |
preprocessor = make_column_transformer(
|
| 104 |
(StandardScaler(), numerical_columns),
|
| 105 |
(OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_columns),
|
| 106 |
+
remainder='drop' # Drop Product_Id and Store_Id
|
| 107 |
)
|
| 108 |
|
| 109 |
+
# Fit the preprocessor with sample data
|
| 110 |
+
preprocessor.fit(all_sample_data)
|
| 111 |
+
|
| 112 |
return preprocessor
|
| 113 |
|
| 114 |
+
# Load model and create fitted preprocessor
|
| 115 |
@st.cache_resource
|
| 116 |
+
def load_model_and_create_fitted_preprocessor():
|
| 117 |
+
"""Load model and create a fitted preprocessor"""
|
| 118 |
try:
|
| 119 |
# Check if model file exists
|
| 120 |
model_file = 'best_model.pkl'
|
|
|
|
| 126 |
model = joblib.load(model_file)
|
| 127 |
st.success(f"β
Model loaded successfully. Type: {type(model)}")
|
| 128 |
|
| 129 |
+
# Create and fit preprocessor
|
| 130 |
+
preprocessor = create_and_fit_preprocessor()
|
| 131 |
+
st.success("β
Preprocessor created and fitted successfully")
|
| 132 |
|
| 133 |
return model, preprocessor, True
|
| 134 |
|
|
|
|
| 138 |
return None, None, False
|
| 139 |
|
| 140 |
# Load model and preprocessor
|
| 141 |
+
model, preprocessor, model_loaded = load_model_and_create_fitted_preprocessor()
|
| 142 |
|
| 143 |
def generate_ids():
|
| 144 |
"""Generate Product_Id and Store_Id"""
|
|
|
|
| 233 |
st.info("π Making prediction...")
|
| 234 |
st.info(f"Generated Product ID: {product_id}, Store ID: {store_id}")
|
| 235 |
|
| 236 |
+
# Transform the data using the fitted preprocessor
|
| 237 |
transformed_data = preprocessor.transform(input_data)
|
| 238 |
st.info(f"β
Data transformed successfully. Shape: {transformed_data.shape}")
|
| 239 |
|
|
|
|
| 300 |
if 'Store_Id' not in df.columns:
|
| 301 |
df['Store_Id'] = [f"ST{np.random.randint(1000, 9999)}" for _ in range(len(df))]
|
| 302 |
|
| 303 |
+
# Transform the data using the fitted preprocessor
|
| 304 |
transformed_data = preprocessor.transform(df)
|
| 305 |
st.info(f"β
Data transformed successfully. Shape: {transformed_data.shape}")
|
| 306 |
|
|
|
|
| 335 |
- **Batch Prediction**: Process multiple predictions at once
|
| 336 |
- **Standalone Operation**: No backend API required - works directly with model files
|
| 337 |
- **Auto ID Generation**: Automatically generates Product_Id and Store_Id when needed
|
| 338 |
+
- **Fitted Preprocessor**: Creates and fits a preprocessor with comprehensive sample data
|
| 339 |
|
| 340 |
## Model Information
|
| 341 |
The forecasting model uses ensemble learning techniques including:
|