Dattaluri commited on
Commit
8f9892a
·
verified ·
1 Parent(s): 49d4a0a

Upload backend app files

Browse files
Files changed (1) hide show
  1. app.py +42 -14
app.py CHANGED
@@ -1,25 +1,28 @@
1
  from flask import Flask, request, jsonify
2
  import joblib
3
  import pandas as pd
 
4
 
5
  app = Flask(__name__)
6
 
7
  # Load the serialized full pipeline
8
  try:
9
  full_pipeline = joblib.load('deployment_files/SuperKart_model_v1_0.joblib')
10
- # Get the list of columns from the training data used by the pipeline
11
- # Adjusting to correctly get column names after one-hot encoding and scaling
12
- # This part might need refinement based on the exact structure of your pipeline's preprocessor
13
- # A safer approach is to save the column names of X_train during preprocessing
14
- # For now, let's assume the order is numerical followed by one-hot encoded categorical
15
- # We need to get the feature names from the OneHotEncoder and combine with numerical names
16
- categorical_feature_names = full_pipeline.named_steps['preprocessor'].transformers_[0][1].get_feature_names_out(
17
- full_pipeline.named_steps['preprocessor'].transformers_[0][2]
18
- )
19
- numerical_feature_names = full_pipeline.named_steps['scaler'].feature_names_in_
20
-
21
- # Combine numerical and categorical feature names in the correct order
22
- pipeline_columns = list(numerical_feature_names) + list(categorical_feature_names)
 
 
23
 
24
  except Exception as e:
25
  full_pipeline = None
@@ -33,11 +36,31 @@ def predict():
33
  try:
34
  data = request.get_json(force=True)
35
 
36
- # Convert input data to DataFrame, ensuring column order matches training data
37
  input_df = pd.DataFrame([data])
38
 
 
 
 
 
 
 
 
 
 
 
 
39
  # Reorder columns to match the order expected by the pipeline
40
  # This assumes all expected columns are present in the input data
 
 
 
 
 
 
 
 
 
41
  input_df = input_df[pipeline_columns]
42
 
43
 
@@ -51,4 +74,9 @@ def predict():
51
  return jsonify({'error': str(e)}), 400
52
 
53
  if __name__ == '__main__':
 
 
 
 
 
54
  app.run(debug=True, host='0.0.0.0', port=5000)
 
1
  from flask import Flask, request, jsonify
2
  import joblib
3
  import pandas as pd
4
+ import numpy as np
5
 
6
  app = Flask(__name__)
7
 
8
  # Load the serialized full pipeline
9
  try:
10
  full_pipeline = joblib.load('deployment_files/SuperKart_model_v1_0.joblib')
11
+
12
+ # Get the column names from the preprocessor step
13
+ # This handles both one-hot encoded and passthrough columns
14
+ # We need to access the transformers and their get_feature_names_out methods
15
+ transformer = full_pipeline.named_steps['preprocessor']
16
+ ohe_features = list(transformer.named_transformers_['onehotencoder'].get_feature_names_out(transformer.transformers_[0][2]))
17
+ passthrough_features = list(transformer.named_transformers_['remainder'].get_feature_names_out())
18
+
19
+ # Combine the feature names in the order they appear in the processed data
20
+ # The order should be passthrough features followed by one-hot encoded features
21
+ pipeline_columns = passthrough_features + ohe_features
22
+
23
+ # Note: This assumes the numerical columns (handled by StandardScaler in the pipeline)
24
+ # are the 'remainder' and appear before the one-hot encoded columns.
25
+ # If the order is different in your pipeline, adjust the concatenation accordingly.
26
 
27
  except Exception as e:
28
  full_pipeline = None
 
36
  try:
37
  data = request.get_json(force=True)
38
 
39
+ # Convert input data to DataFrame
40
  input_df = pd.DataFrame([data])
41
 
42
+ # Create 'Store_Age' feature if not present
43
+ if 'Store_Establishment_Year' in input_df.columns and 'Store_Age' not in input_df.columns:
44
+ input_df['Store_Age'] = 2025 - input_df['Store_Establishment_Year']
45
+ input_df = input_df.drop('Store_Establishment_Year', axis=1) # Drop the original year column
46
+
47
+
48
+ # Ensure the input DataFrame has the same columns as the training data and in the same order
49
+ # We need to reindex the input_df to match the columns the pipeline expects
50
+ # This requires knowing the exact columns expected by the pipeline after preprocessing
51
+ # A safer way is to define the expected columns based on the pipeline structure
52
+
53
  # Reorder columns to match the order expected by the pipeline
54
  # This assumes all expected columns are present in the input data
55
+ # We need the list of columns the pipeline expects after preprocessing
56
+ # This list was constructed during pipeline loading
57
+
58
+ # Ensure all expected columns are in the input data, add missing ones with default values (e.g., 0 for one-hot encoded)
59
+ for col in pipeline_columns:
60
+ if col not in input_df.columns:
61
+ input_df[col] = 0 # Assuming 0 is a safe default for missing features
62
+
63
+ # Reorder columns to match the pipeline's expected order
64
  input_df = input_df[pipeline_columns]
65
 
66
 
 
74
  return jsonify({'error': str(e)}), 400
75
 
76
  if __name__ == '__main__':
77
+ # Create the backend_app directory if it doesn't exist
78
+ import os
79
+ os.makedirs('backend_app', exist_ok=True)
80
+ # Ensure deployment_files directory exists
81
+ os.makedirs('deployment_files', exist_ok=True)
82
  app.run(debug=True, host='0.0.0.0', port=5000)