Solab commited on
Commit
3c8d407
·
1 Parent(s): 498298c

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. Data.xlsx +3 -0
  3. app.py +65 -0
  4. kmeans_model.pkl +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Data.xlsx filter=lfs diff=lfs merge=lfs -text
Data.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a69032ecc9f5b1992d714c699626f9c2c7318d33e989c9fc2780a0934bf23e27
3
+ size 3265210
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import pickle
5
+
6
+ # Load the k-means model from the pickle file
7
+ with open("kmeans_model.pkl", "rb") as f:
8
+ kmeans = pickle.load(f)
9
+
10
+ # Define a function to preprocess the uploaded data
11
+ def preprocess_data(data):
12
+ # Drop the Zone column since it is all NaN
13
+ data = data.drop(columns = "Zone")
14
+ # Drop the rows with missing ID
15
+ data = data.dropna(subset=['ID'])
16
+ # Fill the other missing values with mode
17
+ modes = data.mode().iloc[0]
18
+ data.fillna(modes, inplace=True)
19
+ # Select the relevant features
20
+ X = data.drop(columns = ["ID","At Risk Rate", "Disabled Rate", "Total Amount Post Upfront Amount", "Gender"])
21
+ # Convert categorical features to codes
22
+ cats = ["County", "Area", "Accounts Product Family"]
23
+ X[cats] = X[cats].apply(lambda x: pd.Categorical(x).codes)
24
+ # Apply log transformation to numerical features
25
+ conts = ["Age", "Repayment Speed", "Total Amount Paid"]
26
+ X[conts] = X[conts].apply(lambda x: np.log1p(x))
27
+ # Return the preprocessed data
28
+ return X
29
+
30
+ # Create a title for the app
31
+ st.title("Anomaly Detection on Repayment Speed")
32
+
33
+ # Create a sidebar for user input
34
+ st.sidebar.header("Upload your data")
35
+
36
+ # Allow the user to upload a file in csv or excel format
37
+ uploaded_file = st.sidebar.file_uploader("Choose a file", type=["csv", "xlsx"])
38
+
39
+ # If the user uploads a file, display it and make a prediction
40
+ if uploaded_file is not None:
41
+ # Read the uploaded file as a dataframe
42
+ if uploaded_file.type == "text/csv":
43
+ data = pd.read_csv(uploaded_file)
44
+ elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
45
+ data = pd.read_excel(uploaded_file)
46
+
47
+ data["ID"] = data["ID"].astype(str)
48
+
49
+ # Display the uploaded data
50
+ st.subheader("Your data")
51
+ st.write(data)
52
+
53
+ # Preprocess the data
54
+ X = preprocess_data(data)
55
+
56
+ # Make a prediction using the k-means model
57
+ prediction = kmeans.predict(X["Repayment Speed"].to_frame())
58
+ data_p = data.copy()
59
+ data_p = data_p.dropna(subset=['ID'])
60
+ data_p['Prediction'] = prediction
61
+ data_p['Prediction'] = data_p['Prediction'].replace({0: 'slower', 1: 'faster', 2: 'medium'})
62
+ data_p = data_p[["ID", "Prediction"]]
63
+ # Display the prediction
64
+ st.subheader("prediction")
65
+ st.write(data_p)
kmeans_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ff40dd290dea505c119c4241b4aa8e2423613437fff93f068b79e8d586ca2e4
3
+ size 200191