Tryfonas's picture
Upload folder using huggingface_hub
e4c3069 verified
raw
history blame
1.55 kB
# Import necessary libraries
import streamlit as st
import pandas as pd
import altair as alt
import matplotlib.pyplot as plt
from scipy.stats import zscore
st.title('my shitty app ')
file_path= 'kiva_loans.csv'
df_kiva_loans = pd.read_csv(file_path)
df_kiva_loans = df_kiva_loans.drop(['use', 'disbursed_time','funded_time','posted_time','tags'], axis=1)
#drop nas on specific columns not all of them, it doesnt affect the task we actually want to do now, but might need for later use
df_kiva_loans.dropna(subset=['partner_id','borrower_genders'], inplace=True)
# Calculate Z-scores
z_scores = zscore(df_kiva_loans['funded_amount'])
# Get boolean array indicating the presence of outliers
df_kiva_loans['outlier_funded_amount'] = (z_scores > 3) | (z_scores < -3)
df_kiva_loans_cleaned = df_kiva_loans[~df_kiva_loans['outlier_funded_amount']]
#Grouping by 'repayment interval' and calculating mean,sum and max for funded amount and loan amount.
selected = st.sidebar.selectbox("Select Variable of Interest", ['country', 'sector', 'repayment_interval'])
selected2 = st.sidebar.selectbox("Select Variable of Interest", ['funded_amount', 'count'])
if selected2 == 'count':
result = df_kiva_loans.groupby(selected).size().reset_index(name='count')
result = result.sort_values(by='count', ascending=False)
else:
result = df_kiva_loans.groupby(selected)[selected2].sum().reset_index()
result = result.sort_values(by=selected2, ascending=False)
st.title("Aggregated Data")
st.table(result)