ananttripathiak commited on
Commit
09060d1
Β·
verified Β·
1 Parent(s): 0224753

Upload 2 files

Browse files
Files changed (1) hide show
  1. app.py +65 -15
app.py CHANGED
@@ -7,7 +7,6 @@ import os
7
  import traceback
8
  from sklearn.preprocessing import StandardScaler, OneHotEncoder
9
  from sklearn.compose import make_column_transformer
10
- from sklearn.pipeline import Pipeline
11
 
12
  # Page configuration
13
  st.set_page_config(
@@ -44,27 +43,78 @@ st.markdown("""
44
  </style>
45
  """, unsafe_allow_html=True)
46
 
47
- # Create a new preprocessor that works with raw data
48
- def create_new_preprocessor():
49
- """Create a preprocessor that works with raw input data"""
50
 
51
  # Define numerical and categorical columns
52
  numerical_columns = ['Product_Weight', 'Product_Allocated_Area', 'Product_MRP', 'Store_Establishment_Year']
53
  categorical_columns = ['Product_Sugar_Content', 'Product_Type', 'Store_Size', 'Store_Location_City_Type', 'Store_Type']
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  # Create preprocessor
56
  preprocessor = make_column_transformer(
57
  (StandardScaler(), numerical_columns),
58
  (OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_columns),
59
- remainder='drop' # Drop Product_Id and Store_Id as they're not needed for prediction
60
  )
61
 
 
 
 
62
  return preprocessor
63
 
64
- # Load model and create new preprocessor
65
  @st.cache_resource
66
- def load_model_and_create_preprocessor():
67
- """Load model and create a working preprocessor"""
68
  try:
69
  # Check if model file exists
70
  model_file = 'best_model.pkl'
@@ -76,9 +126,9 @@ def load_model_and_create_preprocessor():
76
  model = joblib.load(model_file)
77
  st.success(f"βœ… Model loaded successfully. Type: {type(model)}")
78
 
79
- # Create new preprocessor
80
- preprocessor = create_new_preprocessor()
81
- st.success("βœ… New preprocessor created successfully")
82
 
83
  return model, preprocessor, True
84
 
@@ -88,7 +138,7 @@ def load_model_and_create_preprocessor():
88
  return None, None, False
89
 
90
  # Load model and preprocessor
91
- model, preprocessor, model_loaded = load_model_and_create_preprocessor()
92
 
93
  def generate_ids():
94
  """Generate Product_Id and Store_Id"""
@@ -183,7 +233,7 @@ def single_prediction_page():
183
  st.info("πŸ”„ Making prediction...")
184
  st.info(f"Generated Product ID: {product_id}, Store ID: {store_id}")
185
 
186
- # Transform the data using the new preprocessor
187
  transformed_data = preprocessor.transform(input_data)
188
  st.info(f"βœ… Data transformed successfully. Shape: {transformed_data.shape}")
189
 
@@ -250,7 +300,7 @@ def batch_prediction_page():
250
  if 'Store_Id' not in df.columns:
251
  df['Store_Id'] = [f"ST{np.random.randint(1000, 9999)}" for _ in range(len(df))]
252
 
253
- # Transform the data using the new preprocessor
254
  transformed_data = preprocessor.transform(df)
255
  st.info(f"βœ… Data transformed successfully. Shape: {transformed_data.shape}")
256
 
@@ -285,7 +335,7 @@ def about_page():
285
  - **Batch Prediction**: Process multiple predictions at once
286
  - **Standalone Operation**: No backend API required - works directly with model files
287
  - **Auto ID Generation**: Automatically generates Product_Id and Store_Id when needed
288
- - **New Preprocessor**: Creates a fresh preprocessor that works with raw data
289
 
290
  ## Model Information
291
  The forecasting model uses ensemble learning techniques including:
 
7
  import traceback
8
  from sklearn.preprocessing import StandardScaler, OneHotEncoder
9
  from sklearn.compose import make_column_transformer
 
10
 
11
  # Page configuration
12
  st.set_page_config(
 
43
  </style>
44
  """, unsafe_allow_html=True)
45
 
46
+ # Create and fit a preprocessor with sample data
47
+ def create_and_fit_preprocessor():
48
+ """Create and fit a preprocessor with comprehensive sample data"""
49
 
50
  # Define numerical and categorical columns
51
  numerical_columns = ['Product_Weight', 'Product_Allocated_Area', 'Product_MRP', 'Store_Establishment_Year']
52
  categorical_columns = ['Product_Sugar_Content', 'Product_Type', 'Store_Size', 'Store_Location_City_Type', 'Store_Type']
53
 
54
+ # Create comprehensive sample data to fit the preprocessor
55
+ sample_data = pd.DataFrame({
56
+ 'Product_Id': ['PR0001', 'PR0002', 'PR0003', 'PR0004', 'PR0005'],
57
+ 'Product_Weight': [10.5, 15.2, 8.7, 20.1, 12.3],
58
+ 'Product_Sugar_Content': ['Low Sugar', 'Regular', 'No Sugar', 'Low Sugar', 'Regular'],
59
+ 'Product_Allocated_Area': [0.05, 0.08, 0.03, 0.12, 0.06],
60
+ 'Product_Type': ['Frozen Foods', 'Dairy', 'Canned', 'Baking Goods', 'Health and Hygiene'],
61
+ 'Product_MRP': [150.0, 200.0, 100.0, 300.0, 180.0],
62
+ 'Store_Id': ['ST0001', 'ST0002', 'ST0003', 'ST0004', 'ST0005'],
63
+ 'Store_Establishment_Year': [2010, 2015, 2008, 2020, 2012],
64
+ 'Store_Size': ['High', 'Medium', 'Small', 'High', 'Medium'],
65
+ 'Store_Location_City_Type': ['Tier 1', 'Tier 2', 'Tier 3', 'Tier 1', 'Tier 2'],
66
+ 'Store_Type': ['Supermarket Type1', 'Departmental Store', 'Food Mart', 'Supermarket Type2', 'Supermarket Type1']
67
+ })
68
+
69
+ # Add more sample data to cover all possible categorical values
70
+ additional_samples = pd.DataFrame({
71
+ 'Product_Id': ['PR0006', 'PR0007', 'PR0008', 'PR0009', 'PR0010'],
72
+ 'Product_Weight': [25.0, 5.5, 18.9, 30.2, 7.8],
73
+ 'Product_Sugar_Content': ['No Sugar', 'Regular', 'Low Sugar', 'No Sugar', 'Regular'],
74
+ 'Product_Allocated_Area': [0.15, 0.02, 0.10, 0.20, 0.04],
75
+ 'Product_Type': ['Snack Foods', 'Meat', 'Household', 'Soft Drinks', 'Hard Drinks'],
76
+ 'Product_MRP': [250.0, 80.0, 400.0, 120.0, 350.0],
77
+ 'Store_Id': ['ST0006', 'ST0007', 'ST0008', 'ST0009', 'ST0010'],
78
+ 'Store_Establishment_Year': [2005, 2018, 2011, 2019, 2013],
79
+ 'Store_Size': ['Small', 'High', 'Medium', 'High', 'Small'],
80
+ 'Store_Location_City_Type': ['Tier 3', 'Tier 1', 'Tier 2', 'Tier 1', 'Tier 3'],
81
+ 'Store_Type': ['Food Mart', 'Supermarket Type1', 'Departmental Store', 'Supermarket Type2', 'Food Mart']
82
+ })
83
+
84
+ # Add more samples for all product types
85
+ more_product_types = pd.DataFrame({
86
+ 'Product_Id': ['PR0011', 'PR0012', 'PR0013', 'PR0014', 'PR0015'],
87
+ 'Product_Weight': [14.0, 22.0, 9.5, 16.8, 11.2],
88
+ 'Product_Sugar_Content': ['Low Sugar', 'Regular', 'No Sugar', 'Low Sugar', 'Regular'],
89
+ 'Product_Allocated_Area': [0.07, 0.11, 0.05, 0.09, 0.06],
90
+ 'Product_Type': ['Bread', 'Breakfast', 'Fruits and Vegetables', 'Seafood', 'Starchy Foods'],
91
+ 'Product_MRP': [160.0, 220.0, 90.0, 280.0, 140.0],
92
+ 'Store_Id': ['ST0011', 'ST0012', 'ST0013', 'ST0014', 'ST0015'],
93
+ 'Store_Establishment_Year': [2014, 2017, 2009, 2021, 2016],
94
+ 'Store_Size': ['Medium', 'High', 'Small', 'High', 'Medium'],
95
+ 'Store_Location_City_Type': ['Tier 2', 'Tier 1', 'Tier 3', 'Tier 1', 'Tier 2'],
96
+ 'Store_Type': ['Supermarket Type1', 'Supermarket Type2', 'Food Mart', 'Departmental Store', 'Supermarket Type1']
97
+ })
98
+
99
+ # Combine all sample data
100
+ all_sample_data = pd.concat([sample_data, additional_samples, more_product_types], ignore_index=True)
101
+
102
  # Create preprocessor
103
  preprocessor = make_column_transformer(
104
  (StandardScaler(), numerical_columns),
105
  (OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_columns),
106
+ remainder='drop' # Drop Product_Id and Store_Id
107
  )
108
 
109
+ # Fit the preprocessor with sample data
110
+ preprocessor.fit(all_sample_data)
111
+
112
  return preprocessor
113
 
114
+ # Load model and create fitted preprocessor
115
  @st.cache_resource
116
+ def load_model_and_create_fitted_preprocessor():
117
+ """Load model and create a fitted preprocessor"""
118
  try:
119
  # Check if model file exists
120
  model_file = 'best_model.pkl'
 
126
  model = joblib.load(model_file)
127
  st.success(f"βœ… Model loaded successfully. Type: {type(model)}")
128
 
129
+ # Create and fit preprocessor
130
+ preprocessor = create_and_fit_preprocessor()
131
+ st.success("βœ… Preprocessor created and fitted successfully")
132
 
133
  return model, preprocessor, True
134
 
 
138
  return None, None, False
139
 
140
  # Load model and preprocessor
141
+ model, preprocessor, model_loaded = load_model_and_create_fitted_preprocessor()
142
 
143
  def generate_ids():
144
  """Generate Product_Id and Store_Id"""
 
233
  st.info("πŸ”„ Making prediction...")
234
  st.info(f"Generated Product ID: {product_id}, Store ID: {store_id}")
235
 
236
+ # Transform the data using the fitted preprocessor
237
  transformed_data = preprocessor.transform(input_data)
238
  st.info(f"βœ… Data transformed successfully. Shape: {transformed_data.shape}")
239
 
 
300
  if 'Store_Id' not in df.columns:
301
  df['Store_Id'] = [f"ST{np.random.randint(1000, 9999)}" for _ in range(len(df))]
302
 
303
+ # Transform the data using the fitted preprocessor
304
  transformed_data = preprocessor.transform(df)
305
  st.info(f"βœ… Data transformed successfully. Shape: {transformed_data.shape}")
306
 
 
335
  - **Batch Prediction**: Process multiple predictions at once
336
  - **Standalone Operation**: No backend API required - works directly with model files
337
  - **Auto ID Generation**: Automatically generates Product_Id and Store_Id when needed
338
+ - **Fitted Preprocessor**: Creates and fits a preprocessor with comprehensive sample data
339
 
340
  ## Model Information
341
  The forecasting model uses ensemble learning techniques including: