Spaces:

Tryfonas
/

WeeklyAssignment_Part2

Sleeping

Upload folder using huggingface_hub

e4c3069 verified over 1 year ago

1.55 kB

	# Import necessary libraries
	import streamlit as st
	import pandas as pd
	import altair as alt
	import matplotlib.pyplot as plt
	from scipy.stats import zscore

	st.title('my shitty app ')

	file_path= 'kiva_loans.csv'

	df_kiva_loans = pd.read_csv(file_path)

	df_kiva_loans = df_kiva_loans.drop(['use', 'disbursed_time','funded_time','posted_time','tags'], axis=1)

	#drop nas on specific columns not all of them, it doesnt affect the task we actually want to do now, but might need for later use
	df_kiva_loans.dropna(subset=['partner_id','borrower_genders'], inplace=True)

	# Calculate Z-scores
	z_scores = zscore(df_kiva_loans['funded_amount'])

	# Get boolean array indicating the presence of outliers
	df_kiva_loans['outlier_funded_amount'] = (z_scores > 3) \| (z_scores < -3)
	df_kiva_loans_cleaned = df_kiva_loans[~df_kiva_loans['outlier_funded_amount']]

	#Grouping by 'repayment interval' and calculating mean,sum and max for funded amount and loan amount.

	selected = st.sidebar.selectbox("Select Variable of Interest", ['country', 'sector', 'repayment_interval'])
	selected2 = st.sidebar.selectbox("Select Variable of Interest", ['funded_amount', 'count'])

	if selected2 == 'count':
	result = df_kiva_loans.groupby(selected).size().reset_index(name='count')
	result = result.sort_values(by='count', ascending=False)
	else:
	result = df_kiva_loans.groupby(selected)[selected2].sum().reset_index()
	result = result.sort_values(by=selected2, ascending=False)

	st.title("Aggregated Data")
	st.table(result)