Spaces:
Sleeping
Sleeping
| # Import necessary libraries | |
| import streamlit as st | |
| import pandas as pd | |
| import altair as alt | |
| import matplotlib.pyplot as plt | |
| from scipy.stats import zscore | |
| st.title('my shitty app ') | |
| file_path= 'kiva_loans.csv' | |
| df_kiva_loans = pd.read_csv(file_path) | |
| df_kiva_loans = df_kiva_loans.drop(['use', 'disbursed_time','funded_time','posted_time','tags'], axis=1) | |
| #drop nas on specific columns not all of them, it doesnt affect the task we actually want to do now, but might need for later use | |
| df_kiva_loans.dropna(subset=['partner_id','borrower_genders'], inplace=True) | |
| # Calculate Z-scores | |
| z_scores = zscore(df_kiva_loans['funded_amount']) | |
| # Get boolean array indicating the presence of outliers | |
| df_kiva_loans['outlier_funded_amount'] = (z_scores > 3) | (z_scores < -3) | |
| df_kiva_loans_cleaned = df_kiva_loans[~df_kiva_loans['outlier_funded_amount']] | |
| #Grouping by 'repayment interval' and calculating mean,sum and max for funded amount and loan amount. | |
| selected = st.sidebar.selectbox("Select Variable of Interest", ['country', 'sector', 'repayment_interval']) | |
| selected2 = st.sidebar.selectbox("Select Variable of Interest", ['funded_amount', 'count']) | |
| if selected2 == 'count': | |
| result = df_kiva_loans.groupby(selected).size().reset_index(name='count') | |
| result = result.sort_values(by='count', ascending=False) | |
| else: | |
| result = df_kiva_loans.groupby(selected)[selected2].sum().reset_index() | |
| result = result.sort_values(by=selected2, ascending=False) | |
| st.title("Aggregated Data") | |
| st.table(result) | |