Spaces:

youdata-ai
/

Tender-Selection

Sleeping

HarshilRamiAISV commited on Aug 14, 2024

Commit

466d49d

verified ·

1 Parent(s): 5fac565

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import pandas as pd
 import pickle
 from sklearn.impute import SimpleImputer
 from sklearn.utils.validation import check_is_fitted
 # Load the trained model and preprocessing objects using pickle
 with open('random_forest_model.pkl', 'rb') as f:
@@ -83,6 +84,11 @@ def preprocess_new_data(df):
     df['state'].fillna("other", inplace=True)
     df['country'].fillna("other", inplace=True)
     df = df[['Ref No', 'Earnest Money', 'Estimated Cost', 'DocFees', 'Ownership', ' Type of Tender ', 'days_left', 'city', 'state', 'country']]
     imputer = SimpleImputer(strategy='median')
@@ -90,6 +96,12 @@ def preprocess_new_data(df):
     for column in ['Ownership', ' Type of Tender ', 'city', 'state', 'country']:
         le = label_encoders[column]
         df[column] = df[column].apply(lambda x: x if x in le.classes_ else 'other')
         df[column] = le.transform(df[column])
@@ -98,6 +110,8 @@ def preprocess_new_data(df):
     return df
 def predict_new_data(new_data):
     preprocessed_data = preprocess_new_data(new_data)
     X_new = preprocessed_data.drop(columns=['Ref No'])

 import pickle
 from sklearn.impute import SimpleImputer
 from sklearn.utils.validation import check_is_fitted
+import numpy as np
 # Load the trained model and preprocessing objects using pickle
 with open('random_forest_model.pkl', 'rb') as f:
     df['state'].fillna("other", inplace=True)
     df['country'].fillna("other", inplace=True)
+    # Remove commas and convert numerical columns to floats
+    numerical_columns = ['Earnest Money', 'Estimated Cost', 'DocFees']
+    for col in numerical_columns:
+        df[col] = df[col].replace({',': ''}, regex=True).astype(float)
     df = df[['Ref No', 'Earnest Money', 'Estimated Cost', 'DocFees', 'Ownership', ' Type of Tender ', 'days_left', 'city', 'state', 'country']]
     imputer = SimpleImputer(strategy='median')
     for column in ['Ownership', ' Type of Tender ', 'city', 'state', 'country']:
         le = label_encoders[column]
+        # Add 'other' to the classes if it's not already there
+        if 'other' not in le.classes_:
+            le.classes_ = np.append(le.classes_, 'other')
+        # Replace unseen labels with 'other'
         df[column] = df[column].apply(lambda x: x if x in le.classes_ else 'other')
         df[column] = le.transform(df[column])
     return df
 def predict_new_data(new_data):
     preprocessed_data = preprocess_new_data(new_data)
     X_new = preprocessed_data.drop(columns=['Ref No'])