Tryfonas's picture
Upload folder using huggingface_hub
85cd1e1 verified
raw
history blame
1.57 kB
# Import necessary libraries
import streamlit as st
import pandas as pd
import altair as alt
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import zscore
st.title('my shitty app ')
file_path= 'kiva_loans.csv'
df_kiva_loans = pd.read_csv(file_path)
df_kiva_loans = df_kiva_loans.drop(['use', 'disbursed_time','funded_time','posted_time','tags'], axis=1)
#drop nas on specific columns not all of them, it doesnt affect the task we actually want to do now, but might need for later use
df_kiva_loans.dropna(subset=['partner_id','borrower_genders'], inplace=True)
# Calculate Z-scores
z_scores = zscore(df_kiva_loans['funded_amount'])
# Get boolean array indicating the presence of outliers
df_kiva_loans['outlier_funded_amount'] = (z_scores > 3) | (z_scores < -3)
df_kiva_loans_cleaned = df_kiva_loans[~df_kiva_loans['outlier_funded_amount']]
#Grouping by 'repayment interval' and calculating mean,sum and max for funded amount and loan amount.
selected = st.sidebar.selectbox("Select Variable of Interest", ['country', 'sector', 'repayment_interval'])
selected2 = st.sidebar.selectbox("Select Variable of Interest", ['funded_amount', 'count'])
if selected2 == 'count':
result = df_kiva_loans.groupby(selected).size().reset_index(name='count')
result = result.sort_values(by='count', ascending=False)
else:
result = df_kiva_loans.groupby(selected)[selected2].sum().reset_index()
result = result.sort_values(by=selected2, ascending=False)
st.title("Aggregated Data")
st.table(result)