| | import streamlit as st |
| | import pandas as pd |
| | import numpy as np |
| | import pickle |
| |
|
| | |
| | with open("kmeans_model.pkl", "rb") as f: |
| | kmeans = pickle.load(f) |
| |
|
| | |
| | def preprocess_data(data): |
| | |
| | data = data.drop(columns = "Zone") |
| | |
| | data = data.dropna(subset=['ID']) |
| | |
| | modes = data.mode().iloc[0] |
| | data.fillna(modes, inplace=True) |
| | |
| | X = data.drop(columns = ["ID","At Risk Rate", "Disabled Rate", "Total Amount Post Upfront Amount", "Gender"]) |
| | |
| | cats = ["County", "Area", "Accounts Product Family"] |
| | X[cats] = X[cats].apply(lambda x: pd.Categorical(x).codes) |
| | |
| | conts = ["Age", "Repayment Speed", "Total Amount Paid"] |
| | X[conts] = X[conts].apply(lambda x: np.log1p(x)) |
| | |
| | return X |
| |
|
| | |
| | st.title("Anomaly Detection on Repayment Speed") |
| |
|
| | |
| | st.sidebar.header("Upload your data") |
| |
|
| | |
| | uploaded_file = st.sidebar.file_uploader("Choose a file", type=["csv", "xlsx"]) |
| |
|
| | |
| | if uploaded_file is not None: |
| | |
| | if uploaded_file.type == "text/csv": |
| | data = pd.read_csv(uploaded_file) |
| | elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": |
| | data = pd.read_excel(uploaded_file) |
| |
|
| | data["ID"] = data["ID"].astype(str) |
| | |
| | |
| | st.subheader("Your data") |
| | st.write(data) |
| |
|
| | |
| | X = preprocess_data(data) |
| |
|
| | |
| | prediction = kmeans.predict(X["Repayment Speed"].to_frame()) |
| | data_p = data.copy() |
| | data_p = data_p.dropna(subset=['ID']) |
| | data_p['Prediction'] = prediction |
| | data_p['Prediction'] = data_p['Prediction'].replace({0: 'slower', 1: 'faster', 2: 'medium'}) |
| | data_p = data_p[["ID", "Prediction"]] |
| | |
| | st.subheader("prediction") |
| | st.write(data_p) |
| |
|