Spaces:
Sleeping
Sleeping
| import json | |
| import pandas as pd | |
| from sklearn.preprocessing import StandardScaler | |
| scale = StandardScaler() | |
| x={ | |
| "State": { | |
| "AK": 0, | |
| "AL": 1, | |
| "AR": 2, | |
| "AZ": 3, | |
| "CA": 4, | |
| "CO": 5, | |
| "CT": 6, | |
| "DC": 7, | |
| "DE": 8, | |
| "FL": 9, | |
| "GA": 10, | |
| "HI": 11, | |
| "IA": 12, | |
| "ID": 13, | |
| "IL": 14, | |
| "IN": 15, | |
| "KS": 16, | |
| "KY": 17, | |
| "LA": 18, | |
| "MA": 19, | |
| "MD": 20, | |
| "ME": 21, | |
| "MI": 22, | |
| "MN": 23, | |
| "MO": 24, | |
| "MS": 25, | |
| "MT": 26, | |
| "NC": 27, | |
| "ND": 28, | |
| "NE": 29, | |
| "NH": 30, | |
| "NJ": 31, | |
| "NM": 32, | |
| "NV": 33, | |
| "NY": 34, | |
| "OH": 35, | |
| "OK": 36, | |
| "OR": 37, | |
| "PA": 38, | |
| "RI": 39, | |
| "SC": 40, | |
| "SD": 41, | |
| "TN": 42, | |
| "TX": 43, | |
| "UT": 44, | |
| "VA": 45, | |
| "VT": 46, | |
| "WA": 47, | |
| "WI": 48, | |
| "WV": 49, | |
| "WY": 50 | |
| }, | |
| "BankState": { | |
| "AK": 0, | |
| "AL": 1, | |
| "AR": 2, | |
| "AZ": 3, | |
| "CA": 4, | |
| "CO": 5, | |
| "CT": 6, | |
| "DC": 7, | |
| "DE": 8, | |
| "EN": 9, | |
| "FL": 10, | |
| "GA": 11, | |
| "GU": 12, | |
| "HI": 13, | |
| "IA": 14, | |
| "ID": 15, | |
| "IL": 16, | |
| "IN": 17, | |
| "KS": 18, | |
| "KY": 19, | |
| "LA": 20, | |
| "MA": 21, | |
| "MD": 22, | |
| "ME": 23, | |
| "MI": 24, | |
| "MN": 25, | |
| "MO": 26, | |
| "MS": 27, | |
| "MT": 28, | |
| "NC": 29, | |
| "ND": 30, | |
| "NE": 31, | |
| "NH": 32, | |
| "NJ": 33, | |
| "NM": 34, | |
| "NV": 35, | |
| "NY": 36, | |
| "OH": 37, | |
| "OK": 38, | |
| "OR": 39, | |
| "PA": 40, | |
| "PR": 41, | |
| "RI": 42, | |
| "SC": 43, | |
| "SD": 44, | |
| "TN": 45, | |
| "TX": 46, | |
| "UT": 47, | |
| "VA": 48, | |
| "VT": 49, | |
| "WA": 50, | |
| "WI": 51, | |
| "WV": 52, | |
| "WY": 53 | |
| }, | |
| "Industry": { | |
| "Accom/Food_serv": 0, | |
| "Admin_sup/Waste_Mgmt_Rem": 1, | |
| "Ag/For/Fish/Hunt": 2, | |
| "Arts/Entertain/Rec": 3, | |
| "Construction": 4, | |
| "Educational": 5, | |
| "Finance/Insurance": 6, | |
| "Healthcare/Social_assist": 7, | |
| "Information": 8, | |
| "Manufacturing": 9, | |
| "Mgmt_comp": 10, | |
| "Min/Quar/Oil_Gas_ext": 11, | |
| "Other_no_pub": 12, | |
| "Prof/Science/Tech": 13, | |
| "Public_Admin": 14, | |
| "RE/Rental/Lease": 15, | |
| "Retail_trade": 16, | |
| "Trans/Ware": 17, | |
| "Unknown": 18, | |
| "Utilities": 19, | |
| "Wholesale_trade": 20 | |
| } | |
| } | |
| def clean_data(df): | |
| df['State'] = df['State'].map(x['State']) | |
| df['BankState'] = df['BankState'].map(x['BankState']) | |
| df['Industry'] = df['Industry'].map(x['Industry']) | |
| return df | |
| # Function to scale data | |
| def scaling(df): | |
| # Only scale numerical columns | |
| num_cols = df.select_dtypes(include=['number']).columns | |
| df[num_cols] = scale.fit_transform(df[num_cols]) | |
| return df | |