HarshilRamiAISV commited on
Commit
466d49d
·
verified ·
1 Parent(s): 5fac565

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -0
app.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
  import pickle
4
  from sklearn.impute import SimpleImputer
5
  from sklearn.utils.validation import check_is_fitted
 
6
 
7
  # Load the trained model and preprocessing objects using pickle
8
  with open('random_forest_model.pkl', 'rb') as f:
@@ -83,6 +84,11 @@ def preprocess_new_data(df):
83
  df['state'].fillna("other", inplace=True)
84
  df['country'].fillna("other", inplace=True)
85
 
 
 
 
 
 
86
  df = df[['Ref No', 'Earnest Money', 'Estimated Cost', 'DocFees', 'Ownership', ' Type of Tender ', 'days_left', 'city', 'state', 'country']]
87
 
88
  imputer = SimpleImputer(strategy='median')
@@ -90,6 +96,12 @@ def preprocess_new_data(df):
90
 
91
  for column in ['Ownership', ' Type of Tender ', 'city', 'state', 'country']:
92
  le = label_encoders[column]
 
 
 
 
 
 
93
  df[column] = df[column].apply(lambda x: x if x in le.classes_ else 'other')
94
  df[column] = le.transform(df[column])
95
 
@@ -98,6 +110,8 @@ def preprocess_new_data(df):
98
 
99
  return df
100
 
 
 
101
  def predict_new_data(new_data):
102
  preprocessed_data = preprocess_new_data(new_data)
103
  X_new = preprocessed_data.drop(columns=['Ref No'])
 
3
  import pickle
4
  from sklearn.impute import SimpleImputer
5
  from sklearn.utils.validation import check_is_fitted
6
+ import numpy as np
7
 
8
  # Load the trained model and preprocessing objects using pickle
9
  with open('random_forest_model.pkl', 'rb') as f:
 
84
  df['state'].fillna("other", inplace=True)
85
  df['country'].fillna("other", inplace=True)
86
 
87
+ # Remove commas and convert numerical columns to floats
88
+ numerical_columns = ['Earnest Money', 'Estimated Cost', 'DocFees']
89
+ for col in numerical_columns:
90
+ df[col] = df[col].replace({',': ''}, regex=True).astype(float)
91
+
92
  df = df[['Ref No', 'Earnest Money', 'Estimated Cost', 'DocFees', 'Ownership', ' Type of Tender ', 'days_left', 'city', 'state', 'country']]
93
 
94
  imputer = SimpleImputer(strategy='median')
 
96
 
97
  for column in ['Ownership', ' Type of Tender ', 'city', 'state', 'country']:
98
  le = label_encoders[column]
99
+
100
+ # Add 'other' to the classes if it's not already there
101
+ if 'other' not in le.classes_:
102
+ le.classes_ = np.append(le.classes_, 'other')
103
+
104
+ # Replace unseen labels with 'other'
105
  df[column] = df[column].apply(lambda x: x if x in le.classes_ else 'other')
106
  df[column] = le.transform(df[column])
107
 
 
110
 
111
  return df
112
 
113
+
114
+
115
  def predict_new_data(new_data):
116
  preprocessed_data = preprocess_new_data(new_data)
117
  X_new = preprocessed_data.drop(columns=['Ref No'])