File size: 2,347 Bytes
3c8d407
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import streamlit as st
import pandas as pd
import numpy as np
import pickle

# Load the k-means model from the pickle file
with open("kmeans_model.pkl", "rb") as f:
    kmeans = pickle.load(f)

# Define a function to preprocess the uploaded data
def preprocess_data(data):
    # Drop the Zone column since it is all NaN
    data = data.drop(columns = "Zone")
    # Drop the rows with missing ID
    data = data.dropna(subset=['ID'])
    # Fill the other missing values with mode
    modes = data.mode().iloc[0]
    data.fillna(modes, inplace=True)
    # Select the relevant features
    X = data.drop(columns = ["ID","At Risk Rate", "Disabled Rate", "Total Amount Post Upfront Amount", "Gender"])
    # Convert categorical features to codes
    cats = ["County", "Area", "Accounts Product Family"]
    X[cats] = X[cats].apply(lambda x: pd.Categorical(x).codes)
    # Apply log transformation to numerical features
    conts = ["Age", "Repayment Speed", "Total Amount Paid"]
    X[conts] = X[conts].apply(lambda x: np.log1p(x))
    # Return the preprocessed data
    return X

# Create a title for the app
st.title("Anomaly Detection on Repayment Speed")

# Create a sidebar for user input
st.sidebar.header("Upload your data")

# Allow the user to upload a file in csv or excel format
uploaded_file = st.sidebar.file_uploader("Choose a file", type=["csv", "xlsx"])

# If the user uploads a file, display it and make a prediction
if uploaded_file is not None:
    # Read the uploaded file as a dataframe
    if uploaded_file.type == "text/csv":
        data = pd.read_csv(uploaded_file)
    elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
        data = pd.read_excel(uploaded_file)

    data["ID"] = data["ID"].astype(str)
    
    # Display the uploaded data
    st.subheader("Your data")
    st.write(data)

  # Preprocess the data
    X = preprocess_data(data)

  # Make a prediction using the k-means model
    prediction = kmeans.predict(X["Repayment Speed"].to_frame())
    data_p = data.copy()
    data_p = data_p.dropna(subset=['ID'])
    data_p['Prediction'] = prediction
    data_p['Prediction'] = data_p['Prediction'].replace({0: 'slower', 1: 'faster', 2: 'medium'})
    data_p = data_p[["ID", "Prediction"]]
  # Display the prediction
    st.subheader("prediction")
    st.write(data_p)