Spaces:

Solab
/

SunKing

Runtime error

App Files Files Community

SunKing / app.py

Solab

Upload 3 files

3c8d407 over 2 years ago

raw

history blame contribute delete

2.35 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import pickle

	# Load the k-means model from the pickle file
	with open("kmeans_model.pkl", "rb") as f:
	kmeans = pickle.load(f)

	# Define a function to preprocess the uploaded data
	def preprocess_data(data):
	# Drop the Zone column since it is all NaN
	data = data.drop(columns = "Zone")
	# Drop the rows with missing ID
	data = data.dropna(subset=['ID'])
	# Fill the other missing values with mode
	modes = data.mode().iloc[0]
	data.fillna(modes, inplace=True)
	# Select the relevant features
	X = data.drop(columns = ["ID","At Risk Rate", "Disabled Rate", "Total Amount Post Upfront Amount", "Gender"])
	# Convert categorical features to codes
	cats = ["County", "Area", "Accounts Product Family"]
	X[cats] = X[cats].apply(lambda x: pd.Categorical(x).codes)
	# Apply log transformation to numerical features
	conts = ["Age", "Repayment Speed", "Total Amount Paid"]
	X[conts] = X[conts].apply(lambda x: np.log1p(x))
	# Return the preprocessed data
	return X

	# Create a title for the app
	st.title("Anomaly Detection on Repayment Speed")

	# Create a sidebar for user input
	st.sidebar.header("Upload your data")

	# Allow the user to upload a file in csv or excel format
	uploaded_file = st.sidebar.file_uploader("Choose a file", type=["csv", "xlsx"])

	# If the user uploads a file, display it and make a prediction
	if uploaded_file is not None:
	# Read the uploaded file as a dataframe
	if uploaded_file.type == "text/csv":
	data = pd.read_csv(uploaded_file)
	elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
	data = pd.read_excel(uploaded_file)

	data["ID"] = data["ID"].astype(str)

	# Display the uploaded data
	st.subheader("Your data")
	st.write(data)

	# Preprocess the data
	X = preprocess_data(data)

	# Make a prediction using the k-means model
	prediction = kmeans.predict(X["Repayment Speed"].to_frame())
	data_p = data.copy()
	data_p = data_p.dropna(subset=['ID'])
	data_p['Prediction'] = prediction
	data_p['Prediction'] = data_p['Prediction'].replace({0: 'slower', 1: 'faster', 2: 'medium'})
	data_p = data_p[["ID", "Prediction"]]
	# Display the prediction
	st.subheader("prediction")
	st.write(data_p)