Spaces:

Dattaluri
/

Great_Learning

Runtime error

App Files Files Community

Dattaluri commited on Oct 28, 2025

Commit

8f9892a

verified ·

1 Parent(s): 49d4a0a

Upload backend app files

Browse files

Files changed (1) hide show

app.py +42 -14

app.py CHANGED Viewed

@@ -1,25 +1,28 @@
 from flask import Flask, request, jsonify
 import joblib
 import pandas as pd
 app = Flask(__name__)
 # Load the serialized full pipeline
 try:
     full_pipeline = joblib.load('deployment_files/SuperKart_model_v1_0.joblib')
-    # Get the list of columns from the training data used by the pipeline
-    # Adjusting to correctly get column names after one-hot encoding and scaling
-    # This part might need refinement based on the exact structure of your pipeline's preprocessor
-    # A safer approach is to save the column names of X_train during preprocessing
-    # For now, let's assume the order is numerical followed by one-hot encoded categorical
-    # We need to get the feature names from the OneHotEncoder and combine with numerical names
-    categorical_feature_names = full_pipeline.named_steps['preprocessor'].transformers_[0][1].get_feature_names_out(
-        full_pipeline.named_steps['preprocessor'].transformers_[0][2]
-    )
-    numerical_feature_names = full_pipeline.named_steps['scaler'].feature_names_in_
-    # Combine numerical and categorical feature names in the correct order
-    pipeline_columns = list(numerical_feature_names) + list(categorical_feature_names)
 except Exception as e:
     full_pipeline = None
@@ -33,11 +36,31 @@ def predict():
     try:
         data = request.get_json(force=True)
-        # Convert input data to DataFrame, ensuring column order matches training data
         input_df = pd.DataFrame([data])
         # Reorder columns to match the order expected by the pipeline
         # This assumes all expected columns are present in the input data
         input_df = input_df[pipeline_columns]
@@ -51,4 +74,9 @@ def predict():
         return jsonify({'error': str(e)}), 400
 if __name__ == '__main__':
     app.run(debug=True, host='0.0.0.0', port=5000)

 from flask import Flask, request, jsonify
 import joblib
 import pandas as pd
+import numpy as np
 app = Flask(__name__)
 # Load the serialized full pipeline
 try:
     full_pipeline = joblib.load('deployment_files/SuperKart_model_v1_0.joblib')
+    # Get the column names from the preprocessor step
+    # This handles both one-hot encoded and passthrough columns
+    # We need to access the transformers and their get_feature_names_out methods
+    transformer = full_pipeline.named_steps['preprocessor']
+    ohe_features = list(transformer.named_transformers_['onehotencoder'].get_feature_names_out(transformer.transformers_[0][2]))
+    passthrough_features = list(transformer.named_transformers_['remainder'].get_feature_names_out())
+    # Combine the feature names in the order they appear in the processed data
+    # The order should be passthrough features followed by one-hot encoded features
+    pipeline_columns = passthrough_features + ohe_features
+    # Note: This assumes the numerical columns (handled by StandardScaler in the pipeline)
+    # are the 'remainder' and appear before the one-hot encoded columns.
+    # If the order is different in your pipeline, adjust the concatenation accordingly.
 except Exception as e:
     full_pipeline = None
     try:
         data = request.get_json(force=True)
+        # Convert input data to DataFrame
         input_df = pd.DataFrame([data])
+        # Create 'Store_Age' feature if not present
+        if 'Store_Establishment_Year' in input_df.columns and 'Store_Age' not in input_df.columns:
+             input_df['Store_Age'] = 2025 - input_df['Store_Establishment_Year']
+             input_df = input_df.drop('Store_Establishment_Year', axis=1) # Drop the original year column
+        # Ensure the input DataFrame has the same columns as the training data and in the same order
+        # We need to reindex the input_df to match the columns the pipeline expects
+        # This requires knowing the exact columns expected by the pipeline after preprocessing
+        # A safer way is to define the expected columns based on the pipeline structure
         # Reorder columns to match the order expected by the pipeline
         # This assumes all expected columns are present in the input data
+        # We need the list of columns the pipeline expects after preprocessing
+        # This list was constructed during pipeline loading
+        # Ensure all expected columns are in the input data, add missing ones with default values (e.g., 0 for one-hot encoded)
+        for col in pipeline_columns:
+            if col not in input_df.columns:
+                input_df[col] = 0 # Assuming 0 is a safe default for missing features
+        # Reorder columns to match the pipeline's expected order
         input_df = input_df[pipeline_columns]
         return jsonify({'error': str(e)}), 400
 if __name__ == '__main__':
+    # Create the backend_app directory if it doesn't exist
+    import os
+    os.makedirs('backend_app', exist_ok=True)
+    # Ensure deployment_files directory exists
+    os.makedirs('deployment_files', exist_ok=True)
     app.run(debug=True, host='0.0.0.0', port=5000)