# Import necessary libraries import streamlit as st import pandas as pd import altair as alt import matplotlib.pyplot as plt from scipy.stats import zscore import seaborn as sns file_path= 'kiva_loans.csv' df_kiva_loans = pd.read_csv(file_path) df_kiva_loans = df_kiva_loans.drop(['use', 'disbursed_time','funded_time','posted_time','tags'], axis=1) #drop nas on specific columns not all of them, it doesnt affect the task we actually want to do now, but might need for later use df_kiva_loans.dropna(subset=['partner_id','borrower_genders'], inplace=True) # Calculate Z-scores z_scores = zscore(df_kiva_loans['funded_amount']) # Get boolean array indicating the presence of outliers df_kiva_loans['outlier_funded_amount'] = (z_scores > 3) | (z_scores < -3) df_kiva_loans_cleaned = df_kiva_loans[~df_kiva_loans['outlier_funded_amount']] st.title('BDS24_Weekly_Assignment_Week 2| Tryfonas Karmiris') # Sidebar selection for the type of plot plot_type = st.sidebar.selectbox("Select Variable to Display", ['country', 'repayment_interval', 'sector']) # Slider to select the number of top values to display num_columns = st.sidebar.slider( "Select Number of Columns to Display", min_value=5, max_value=20, value=10, # default value step=1 ) # Select the top values based on the selected variable and number of columns if plot_type == 'country': top_values = df_kiva_loans.groupby('country')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index() x_column = 'country' count_column = 'count' elif plot_type == 'repayment_interval': top_values = df_kiva_loans.groupby('repayment_interval')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index() x_column = 'repayment_interval' count_column = 'count' else: # sector top_values = df_kiva_loans.groupby('sector')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index() x_column = 'sector' count_column = 'count' # Create a bar plot with dual axes fig, ax1 = plt.subplots(figsize=(12, 9)) plt.xticks(rotation=90) # Bar plot for funded_amount color = 'tab:blue' ax1.set_xlabel(x_column.replace("_", " ").title()) ax1.set_ylabel('Funded Amount', color=color) ax1.bar(top_values[x_column], top_values['sum'], color=color, alpha=0.6, label='Funded Amount') ax1.tick_params(axis='y', labelcolor=color) # Create a second y-axis for count ax2 = ax1.twinx() color = 'tab:red' ax2.set_ylabel('Count', color=color) ax2.plot(top_values[x_column], top_values[count_column], color=color, marker='o', linestyle='-', linewidth=2, label='Count') ax2.tick_params(axis='y', labelcolor=color) # Add titles and labels plt.title(f'Top {num_columns} by {plot_type.replace("_", " ").title()}') fig.tight_layout() # Display the plot in Streamlit st.pyplot(fig)