Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import pickle | |
| from sklearn.impute import SimpleImputer | |
| from sklearn.utils.validation import check_is_fitted | |
| import numpy as np | |
| # Load the trained model and preprocessing objects using pickle | |
| with open('random_forest_model.pkl', 'rb') as f: | |
| random_forest_model = pickle.load(f) | |
| with open('scaler.pkl', 'rb') as f: | |
| scaler = pickle.load(f) | |
| with open('label_encoders.pkl', 'rb') as f: | |
| label_encoders = pickle.load(f) | |
| # State corrections and valid states/UTs | |
| state_corrections = { | |
| 'uttaranchal': 'uttarakhand', | |
| 'orissa (odisha)': 'odisha', | |
| 'kashmir': 'jammu and kashmir', | |
| 'multi state': 'other', | |
| 'not classified': 'other' | |
| } | |
| valid_states_uts = [ | |
| 'andhra pradesh', 'arunachal pradesh', 'assam', 'bihar', 'chhattisgarh', 'goa', | |
| 'gujarat', 'haryana', 'himachal pradesh', 'jharkhand', 'karnataka', 'kerala', | |
| 'madhya pradesh', 'maharashtra', 'manipur', 'meghalaya', 'mizoram', 'nagaland', | |
| 'odisha', 'punjab', 'rajasthan', 'sikkim', 'tamil nadu', 'telangana', 'tripura', | |
| 'uttar pradesh', 'uttarakhand', 'west bengal', 'andaman and nicobar islands', | |
| 'chandigarh', 'dadra and nagar haveli and daman and diu', 'lakshadweep', 'delhi', | |
| 'puducherry', 'jammu and kashmir', 'ladakh' | |
| ] | |
| # Extract city, state, and country | |
| def extract_city(x): | |
| if isinstance(x, str): | |
| splitted_string = x.split("-") | |
| if len(splitted_string) == 4: | |
| return f"{splitted_string[0].strip().lower()} {splitted_string[1].strip().lower()}" | |
| else: | |
| return splitted_string[0].strip().lower() | |
| else: | |
| return "other" | |
| def extract_state(x): | |
| if isinstance(x, str): | |
| state = x.split("-")[-2].strip().lower() | |
| return state_corrections.get(state, state if state in valid_states_uts else 'other') | |
| else: | |
| return "other" | |
| def extract_country(x): | |
| if isinstance(x, str): | |
| return x.split("-")[-1].strip().lower() | |
| else: | |
| return "other" | |
| def preprocess_new_data(df): | |
| df['Ownership'] = df['Ownership'].str.lower().str.strip() | |
| df[' Type of Tender '] = df[' Type of Tender '].str.lower().str.strip() | |
| def parse_closing_date(date_str): | |
| try: | |
| return pd.to_datetime(date_str) | |
| except Exception: | |
| if " to " in date_str: | |
| date_str = date_str.split(" to ")[-1] | |
| return pd.to_datetime(date_str, errors='coerce') | |
| return pd.NaT | |
| df['Closing Date'] = df['Closing Date'].apply(parse_closing_date) | |
| df['Date'] = pd.to_datetime(df['Date'], errors='coerce') | |
| df['days_left'] = (df['Closing Date'] - df['Date']).dt.days | |
| df['city'] = df['Location'].apply(lambda x: extract_city(x)) | |
| df['state'] = df['Location'].apply(lambda x: extract_state(x)) | |
| df['country'] = df['Location'].apply(lambda x: extract_country(x)) | |
| df['city'].fillna("other", inplace=True) | |
| df['state'].fillna("other", inplace=True) | |
| df['country'].fillna("other", inplace=True) | |
| # Remove commas and convert numerical columns to floats | |
| numerical_columns = ['Earnest Money', 'Estimated Cost', 'DocFees'] | |
| for col in numerical_columns: | |
| df[col] = df[col].replace({',': ''}, regex=True).astype(float) | |
| df = df[['Ref No', 'Earnest Money', 'Estimated Cost', 'DocFees', 'Ownership', ' Type of Tender ', 'days_left', 'city', 'state', 'country']] | |
| imputer = SimpleImputer(strategy='median') | |
| df['days_left'] = imputer.fit_transform(df[['days_left']]) | |
| for column in ['Ownership', ' Type of Tender ', 'city', 'state', 'country']: | |
| le = label_encoders[column] | |
| # Add 'other' to the classes if it's not already there | |
| if 'other' not in le.classes_: | |
| le.classes_ = np.append(le.classes_, 'other') | |
| # Replace unseen labels with 'other' | |
| df[column] = df[column].apply(lambda x: x if x in le.classes_ else 'other') | |
| df[column] = le.transform(df[column]) | |
| numerical_features = ['Earnest Money', 'Estimated Cost', 'DocFees', 'days_left'] | |
| df[numerical_features] = scaler.transform(df[numerical_features]) | |
| return df | |
| def predict_new_data(new_data): | |
| preprocessed_data = preprocess_new_data(new_data) | |
| X_new = preprocessed_data.drop(columns=['Ref No']) | |
| tender_ref_numbers_new = preprocessed_data['Ref No'] | |
| predictions = random_forest_model.predict(X_new) | |
| results = pd.DataFrame({ | |
| 'Ref No': tender_ref_numbers_new, | |
| 'predictions': predictions | |
| }) | |
| return results | |
| st.title("Tender Selection Prediction") | |
| uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"]) | |
| if uploaded_file is not None: | |
| new_data = pd.read_csv(uploaded_file) | |
| prediction_results = predict_new_data(new_data) | |
| selected_tenders = prediction_results[prediction_results['predictions'] == "yes"]['Ref No'].astype(str).to_list() | |
| new_data['Ref No'] = new_data['Ref No'].astype(str) | |
| st.write("Selected Tenders:") | |
| st.write(new_data[new_data['Ref No'].isin(selected_tenders)].drop(columns=['Unnamed: 0']).reset_index().drop(columns=['index'])) | |