Spaces:

Solab
/

SunKing

Runtime error

App Files Files Community

Solab commited on Jul 4, 2023

Commit

3c8d407

1 Parent(s): 498298c

Upload 3 files

Browse files

Files changed (4) hide show

.gitattributes +1 -0
Data.xlsx +3 -0
app.py +65 -0
kmeans_model.pkl +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Data.xlsx filter=lfs diff=lfs merge=lfs -text

Data.xlsx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a69032ecc9f5b1992d714c699626f9c2c7318d33e989c9fc2780a0934bf23e27
+size 3265210

app.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import pickle
+# Load the k-means model from the pickle file
+with open("kmeans_model.pkl", "rb") as f:
+    kmeans = pickle.load(f)
+# Define a function to preprocess the uploaded data
+def preprocess_data(data):
+    # Drop the Zone column since it is all NaN
+    data = data.drop(columns = "Zone")
+    # Drop the rows with missing ID
+    data = data.dropna(subset=['ID'])
+    # Fill the other missing values with mode
+    modes = data.mode().iloc[0]
+    data.fillna(modes, inplace=True)
+    # Select the relevant features
+    X = data.drop(columns = ["ID","At Risk Rate", "Disabled Rate", "Total Amount Post Upfront Amount", "Gender"])
+    # Convert categorical features to codes
+    cats = ["County", "Area", "Accounts Product Family"]
+    X[cats] = X[cats].apply(lambda x: pd.Categorical(x).codes)
+    # Apply log transformation to numerical features
+    conts = ["Age", "Repayment Speed", "Total Amount Paid"]
+    X[conts] = X[conts].apply(lambda x: np.log1p(x))
+    # Return the preprocessed data
+    return X
+# Create a title for the app
+st.title("Anomaly Detection on Repayment Speed")
+# Create a sidebar for user input
+st.sidebar.header("Upload your data")
+# Allow the user to upload a file in csv or excel format
+uploaded_file = st.sidebar.file_uploader("Choose a file", type=["csv", "xlsx"])
+# If the user uploads a file, display it and make a prediction
+if uploaded_file is not None:
+    # Read the uploaded file as a dataframe
+    if uploaded_file.type == "text/csv":
+        data = pd.read_csv(uploaded_file)
+    elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
+        data = pd.read_excel(uploaded_file)
+    data["ID"] = data["ID"].astype(str)
+    # Display the uploaded data
+    st.subheader("Your data")
+    st.write(data)
+  # Preprocess the data
+    X = preprocess_data(data)
+  # Make a prediction using the k-means model
+    prediction = kmeans.predict(X["Repayment Speed"].to_frame())
+    data_p = data.copy()
+    data_p = data_p.dropna(subset=['ID'])
+    data_p['Prediction'] = prediction
+    data_p['Prediction'] = data_p['Prediction'].replace({0: 'slower', 1: 'faster', 2: 'medium'})
+    data_p = data_p[["ID", "Prediction"]]
+  # Display the prediction
+    st.subheader("prediction")
+    st.write(data_p)

kmeans_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ff40dd290dea505c119c4241b4aa8e2423613437fff93f068b79e8d586ca2e4
+size 200191