Spaces:

Tryfonas
/

WeeklyAssignment_Part2

Sleeping

Upload folder using huggingface_hub

85cd1e1 verified over 1 year ago

1.57 kB

	# Import necessary libraries
	import streamlit as st
	import pandas as pd
	import altair as alt
	import matplotlib.pyplot as plt
	import seaborn as sns
	from scipy.stats import zscore

	st.title('my shitty app ')

	file_path= 'kiva_loans.csv'

	df_kiva_loans = pd.read_csv(file_path)

	df_kiva_loans = df_kiva_loans.drop(['use', 'disbursed_time','funded_time','posted_time','tags'], axis=1)

	#drop nas on specific columns not all of them, it doesnt affect the task we actually want to do now, but might need for later use
	df_kiva_loans.dropna(subset=['partner_id','borrower_genders'], inplace=True)

	# Calculate Z-scores
	z_scores = zscore(df_kiva_loans['funded_amount'])

	# Get boolean array indicating the presence of outliers
	df_kiva_loans['outlier_funded_amount'] = (z_scores > 3) \| (z_scores < -3)
	df_kiva_loans_cleaned = df_kiva_loans[~df_kiva_loans['outlier_funded_amount']]

	#Grouping by 'repayment interval' and calculating mean,sum and max for funded amount and loan amount.

	selected = st.sidebar.selectbox("Select Variable of Interest", ['country', 'sector', 'repayment_interval'])
	selected2 = st.sidebar.selectbox("Select Variable of Interest", ['funded_amount', 'count'])

	if selected2 == 'count':
	result = df_kiva_loans.groupby(selected).size().reset_index(name='count')
	result = result.sort_values(by='count', ascending=False)
	else:
	result = df_kiva_loans.groupby(selected)[selected2].sum().reset_index()
	result = result.sort_values(by=selected2, ascending=False)

	st.title("Aggregated Data")
	st.table(result)