Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import streamlit as st | |
| import joblib | |
| from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
| from sklearn.compose import ColumnTransformer | |
| # Load data and update column names | |
| df = pd.read_csv('dataset.csv') | |
| df = df.drop(['id','Class'], axis=1) | |
| df.columns = df.columns.str.replace(r'[\s\.]', '_', regex=True) | |
| df.columns = df.columns.str.replace(r'Gender&Type', 'Gender_Type', regex=True) | |
| df.columns = df.columns.str.replace(r'#', '', regex=True) | |
| df[['Monthly_Period', 'Credit1', 'InstallmentRate', 'Tenancy_Period', 'Age', 'Credits', 'Authorities']] = df[['Monthly_Period', 'Credit1', 'InstallmentRate', 'Tenancy_Period', 'Age', 'Credits', 'Authorities']].apply(pd.to_numeric, errors='coerce').fillna(0).astype(int) | |
| df[['InstallmentCredit', 'Yearly_Period']] = df[['InstallmentCredit', 'Yearly_Period']].apply(pd.to_numeric, errors='coerce').fillna(0).astype(float) | |
| # Select dependent and independent variables | |
| x = df | |
| # Preprocessing (StandardScaler) | |
| preprocessor = ColumnTransformer( | |
| transformers=[ | |
| ('num', StandardScaler(), ["Monthly_Period", "Credit1", "InstallmentRate", "Tenancy_Period", "Age", "Credits", "Authorities", "InstallmentCredit", "Yearly_Period"]), | |
| ('cat', OneHotEncoder(), ["Account1", "History", "Motive", "Account2", "Employment_Period", "Gender_Type", "Sponsors", "Plotsize", "Plan", "Housing", "Post", "Phone", "Expatriate"]) | |
| ] | |
| ) | |
| # Streamlit application | |
| def cluster_pred(Account1, Monthly_Period, History, Motive, Credit1, Account2, Employment_Period, InstallmentRate, Gender_Type, Sponsors, Tenancy_Period, | |
| Plotsize, Age, Plan, Housing, Credits, Post, Authorities, Phone, Expatriate, InstallmentCredit, Yearly_Period): | |
| input_data = pd.DataFrame({ | |
| 'Account1': [Account1], | |
| 'Monthly_Period': [Monthly_Period], | |
| 'History': [History], | |
| 'Motive': [Motive], | |
| 'Credit1': [Credit1], | |
| 'Account2': [Account2], | |
| 'Employment_Period': [Employment_Period], | |
| 'InstallmentRate': [InstallmentRate], | |
| 'Gender_Type': [Gender_Type], | |
| 'Sponsors': [Sponsors], | |
| 'Tenancy_Period': [Tenancy_Period], | |
| 'Plotsize': [Plotsize], | |
| 'Age': [Age], | |
| 'Plan': [Plan], | |
| 'Housing': [Housing], | |
| 'Credits': [Credits], | |
| 'Post': [Post], | |
| 'Authorities': [Authorities], | |
| 'Phone': [Phone], | |
| 'Expatriate': [Expatriate], | |
| 'InstallmentCredit': [InstallmentCredit], | |
| 'Yearly_Period': [Yearly_Period] | |
| }) | |
| model = joblib.load('Veri.pkl') | |
| scaler = StandardScaler() | |
| input_data_transformed = pd.get_dummies(input_data, drop_first=True) | |
| input_data_transformed = input_data_transformed.reindex(columns=x.columns, fill_value=0) | |
| input_data_transformed = scaler.transform(input_data_transformed) | |
| input_data_transformed = preprocessor.fit_transform(input_data) | |
| prediction = model.predict(input_data_transformed) | |
| return float(prediction[0]) | |
| st.title("KMeans Clustering Model") | |
| st.write("Enter Input Data to Predict Cluster") | |
| Account1 = st.selectbox('Account1', df['Account1'].unique()) | |
| Monthly_Period = st.slider('Monthly_Period', int(df['Monthly_Period'].min()), int(df['Monthly_Period'].max())) | |
| History = st.selectbox('History', df['History'].unique()) | |
| Motive = st.selectbox('Motive', df['Motive'].unique()) | |
| Credit1 = st.slider('Credit1', int(df['Credit1'].min()), int(df['Credit1'].max())) | |
| Account2 = st.selectbox('Account2', df['Account2'].unique()) | |
| Employment_Period = st.selectbox('Employment_Period', df['Employment_Period'].unique()) | |
| InstallmentRate = st.slider('InstallmentRate', int(df['InstallmentRate'].min()), int(df['InstallmentRate'].max())) | |
| Gender_Type = st.selectbox('Gender_Type', df['Gender_Type'].unique()) | |
| Sponsors = st.selectbox('Sponsors', df['Sponsors'].unique()) | |
| Tenancy_Period = st.slider('Tenancy_Period', int(df['Tenancy_Period'].min()), int(df['Tenancy_Period'].max())) | |
| Plotsize = st.selectbox('Plotsize', df['Plotsize'].unique()) | |
| Age = st.slider('Age', int(df['Age'].min()), int(df['Age'].max())) | |
| Plan = st.selectbox('Plan', df['Plan'].unique()) | |
| Housing = st.selectbox('Housing', df['Housing'].unique()) | |
| Credits = st.slider('Credits', float(df['Credits'].min()), float(df['Credits'].max())) | |
| Post = st.selectbox('Post', df['Post'].unique()) | |
| Authorities = st.slider('Authorities', float(df['Authorities'].min()), float(df['Authorities'].max())) | |
| Phone = st.selectbox('Phone', df['Phone'].unique()) | |
| Expatriate = st.selectbox('Expatriate', [True, False]) | |
| InstallmentCredit = st.slider('InstallmentCredit', float(df['InstallmentCredit'].min()), float(df['InstallmentCredit'].max())) | |
| Yearly_Period = st.slider('Yearly_Period', float(df['Yearly_Period'].min()), float(df['Yearly_Period'].max())) | |
| if st.button('Predict Cluster'): | |
| cluster = cluster_pred(Account1, Monthly_Period, History, Motive, Credit1, Account2, Employment_Period, InstallmentRate, Gender_Type, Sponsors, Tenancy_Period, | |
| Plotsize, Age, Plan, Housing, Credits, Post, Authorities, Phone, Expatriate, InstallmentCredit, Yearly_Period) | |
| st.write(f'The predicted cluster is: {cluster}') | |