Spaces:

ananttripathiak
/

SuperKart_SA

Sleeping

App Files Files Community

ananttripathiak commited on Sep 13, 2025

Commit

09060d1

verified ·

1 Parent(s): 0224753

Upload 2 files

Browse files

Files changed (1) hide show

app.py +65 -15

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ import os
 import traceback
 from sklearn.preprocessing import StandardScaler, OneHotEncoder
 from sklearn.compose import make_column_transformer
-from sklearn.pipeline import Pipeline
 # Page configuration
 st.set_page_config(
@@ -44,27 +43,78 @@ st.markdown("""
 </style>
 """, unsafe_allow_html=True)
-# Create a new preprocessor that works with raw data
-def create_new_preprocessor():
-    """Create a preprocessor that works with raw input data"""
     # Define numerical and categorical columns
     numerical_columns = ['Product_Weight', 'Product_Allocated_Area', 'Product_MRP', 'Store_Establishment_Year']
     categorical_columns = ['Product_Sugar_Content', 'Product_Type', 'Store_Size', 'Store_Location_City_Type', 'Store_Type']
     # Create preprocessor
     preprocessor = make_column_transformer(
         (StandardScaler(), numerical_columns),
         (OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_columns),
-        remainder='drop'  # Drop Product_Id and Store_Id as they're not needed for prediction
     )
     return preprocessor
-# Load model and create new preprocessor
 @st.cache_resource
-def load_model_and_create_preprocessor():
-    """Load model and create a working preprocessor"""
     try:
         # Check if model file exists
         model_file = 'best_model.pkl'
@@ -76,9 +126,9 @@ def load_model_and_create_preprocessor():
         model = joblib.load(model_file)
         st.success(f"✅ Model loaded successfully. Type: {type(model)}")
-        # Create new preprocessor
-        preprocessor = create_new_preprocessor()
-        st.success("✅ New preprocessor created successfully")
         return model, preprocessor, True
@@ -88,7 +138,7 @@ def load_model_and_create_preprocessor():
         return None, None, False
 # Load model and preprocessor
-model, preprocessor, model_loaded = load_model_and_create_preprocessor()
 def generate_ids():
     """Generate Product_Id and Store_Id"""
@@ -183,7 +233,7 @@ def single_prediction_page():
             st.info("🔄 Making prediction...")
             st.info(f"Generated Product ID: {product_id}, Store ID: {store_id}")
-            # Transform the data using the new preprocessor
             transformed_data = preprocessor.transform(input_data)
             st.info(f"✅ Data transformed successfully. Shape: {transformed_data.shape}")
@@ -250,7 +300,7 @@ def batch_prediction_page():
                         if 'Store_Id' not in df.columns:
                             df['Store_Id'] = [f"ST{np.random.randint(1000, 9999)}" for _ in range(len(df))]
-                        # Transform the data using the new preprocessor
                         transformed_data = preprocessor.transform(df)
                         st.info(f"✅ Data transformed successfully. Shape: {transformed_data.shape}")
@@ -285,7 +335,7 @@ def about_page():
     - **Batch Prediction**: Process multiple predictions at once
     - **Standalone Operation**: No backend API required - works directly with model files
     - **Auto ID Generation**: Automatically generates Product_Id and Store_Id when needed
-    - **New Preprocessor**: Creates a fresh preprocessor that works with raw data
     ## Model Information
     The forecasting model uses ensemble learning techniques including:

 import traceback
 from sklearn.preprocessing import StandardScaler, OneHotEncoder
 from sklearn.compose import make_column_transformer
 # Page configuration
 st.set_page_config(
 </style>
 """, unsafe_allow_html=True)
+# Create and fit a preprocessor with sample data
+def create_and_fit_preprocessor():
+    """Create and fit a preprocessor with comprehensive sample data"""
     # Define numerical and categorical columns
     numerical_columns = ['Product_Weight', 'Product_Allocated_Area', 'Product_MRP', 'Store_Establishment_Year']
     categorical_columns = ['Product_Sugar_Content', 'Product_Type', 'Store_Size', 'Store_Location_City_Type', 'Store_Type']
+    # Create comprehensive sample data to fit the preprocessor
+    sample_data = pd.DataFrame({
+        'Product_Id': ['PR0001', 'PR0002', 'PR0003', 'PR0004', 'PR0005'],
+        'Product_Weight': [10.5, 15.2, 8.7, 20.1, 12.3],
+        'Product_Sugar_Content': ['Low Sugar', 'Regular', 'No Sugar', 'Low Sugar', 'Regular'],
+        'Product_Allocated_Area': [0.05, 0.08, 0.03, 0.12, 0.06],
+        'Product_Type': ['Frozen Foods', 'Dairy', 'Canned', 'Baking Goods', 'Health and Hygiene'],
+        'Product_MRP': [150.0, 200.0, 100.0, 300.0, 180.0],
+        'Store_Id': ['ST0001', 'ST0002', 'ST0003', 'ST0004', 'ST0005'],
+        'Store_Establishment_Year': [2010, 2015, 2008, 2020, 2012],
+        'Store_Size': ['High', 'Medium', 'Small', 'High', 'Medium'],
+        'Store_Location_City_Type': ['Tier 1', 'Tier 2', 'Tier 3', 'Tier 1', 'Tier 2'],
+        'Store_Type': ['Supermarket Type1', 'Departmental Store', 'Food Mart', 'Supermarket Type2', 'Supermarket Type1']
+    })
+    # Add more sample data to cover all possible categorical values
+    additional_samples = pd.DataFrame({
+        'Product_Id': ['PR0006', 'PR0007', 'PR0008', 'PR0009', 'PR0010'],
+        'Product_Weight': [25.0, 5.5, 18.9, 30.2, 7.8],
+        'Product_Sugar_Content': ['No Sugar', 'Regular', 'Low Sugar', 'No Sugar', 'Regular'],
+        'Product_Allocated_Area': [0.15, 0.02, 0.10, 0.20, 0.04],
+        'Product_Type': ['Snack Foods', 'Meat', 'Household', 'Soft Drinks', 'Hard Drinks'],
+        'Product_MRP': [250.0, 80.0, 400.0, 120.0, 350.0],
+        'Store_Id': ['ST0006', 'ST0007', 'ST0008', 'ST0009', 'ST0010'],
+        'Store_Establishment_Year': [2005, 2018, 2011, 2019, 2013],
+        'Store_Size': ['Small', 'High', 'Medium', 'High', 'Small'],
+        'Store_Location_City_Type': ['Tier 3', 'Tier 1', 'Tier 2', 'Tier 1', 'Tier 3'],
+        'Store_Type': ['Food Mart', 'Supermarket Type1', 'Departmental Store', 'Supermarket Type2', 'Food Mart']
+    })
+    # Add more samples for all product types
+    more_product_types = pd.DataFrame({
+        'Product_Id': ['PR0011', 'PR0012', 'PR0013', 'PR0014', 'PR0015'],
+        'Product_Weight': [14.0, 22.0, 9.5, 16.8, 11.2],
+        'Product_Sugar_Content': ['Low Sugar', 'Regular', 'No Sugar', 'Low Sugar', 'Regular'],
+        'Product_Allocated_Area': [0.07, 0.11, 0.05, 0.09, 0.06],
+        'Product_Type': ['Bread', 'Breakfast', 'Fruits and Vegetables', 'Seafood', 'Starchy Foods'],
+        'Product_MRP': [160.0, 220.0, 90.0, 280.0, 140.0],
+        'Store_Id': ['ST0011', 'ST0012', 'ST0013', 'ST0014', 'ST0015'],
+        'Store_Establishment_Year': [2014, 2017, 2009, 2021, 2016],
+        'Store_Size': ['Medium', 'High', 'Small', 'High', 'Medium'],
+        'Store_Location_City_Type': ['Tier 2', 'Tier 1', 'Tier 3', 'Tier 1', 'Tier 2'],
+        'Store_Type': ['Supermarket Type1', 'Supermarket Type2', 'Food Mart', 'Departmental Store', 'Supermarket Type1']
+    })
+    # Combine all sample data
+    all_sample_data = pd.concat([sample_data, additional_samples, more_product_types], ignore_index=True)
     # Create preprocessor
     preprocessor = make_column_transformer(
         (StandardScaler(), numerical_columns),
         (OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_columns),
+        remainder='drop'  # Drop Product_Id and Store_Id
     )
+    # Fit the preprocessor with sample data
+    preprocessor.fit(all_sample_data)
     return preprocessor
+# Load model and create fitted preprocessor
 @st.cache_resource
+def load_model_and_create_fitted_preprocessor():
+    """Load model and create a fitted preprocessor"""
     try:
         # Check if model file exists
         model_file = 'best_model.pkl'
         model = joblib.load(model_file)
         st.success(f"✅ Model loaded successfully. Type: {type(model)}")
+        # Create and fit preprocessor
+        preprocessor = create_and_fit_preprocessor()
+        st.success("✅ Preprocessor created and fitted successfully")
         return model, preprocessor, True
         return None, None, False
 # Load model and preprocessor
+model, preprocessor, model_loaded = load_model_and_create_fitted_preprocessor()
 def generate_ids():
     """Generate Product_Id and Store_Id"""
             st.info("🔄 Making prediction...")
             st.info(f"Generated Product ID: {product_id}, Store ID: {store_id}")
+            # Transform the data using the fitted preprocessor
             transformed_data = preprocessor.transform(input_data)
             st.info(f"✅ Data transformed successfully. Shape: {transformed_data.shape}")
                         if 'Store_Id' not in df.columns:
                             df['Store_Id'] = [f"ST{np.random.randint(1000, 9999)}" for _ in range(len(df))]
+                        # Transform the data using the fitted preprocessor
                         transformed_data = preprocessor.transform(df)
                         st.info(f"✅ Data transformed successfully. Shape: {transformed_data.shape}")
     - **Batch Prediction**: Process multiple predictions at once
     - **Standalone Operation**: No backend API required - works directly with model files
     - **Auto ID Generation**: Automatically generates Product_Id and Store_Id when needed
+    - **Fitted Preprocessor**: Creates and fits a preprocessor with comprehensive sample data
     ## Model Information
     The forecasting model uses ensemble learning techniques including: