Spaces:
Sleeping
Sleeping
| # Import necessary libraries | |
| import streamlit as st | |
| import pandas as pd | |
| import altair as alt | |
| import matplotlib.pyplot as plt | |
| from scipy.stats import zscore | |
| import seaborn as sns | |
| file_path= 'kiva_loans.csv' | |
| df_kiva_loans = pd.read_csv(file_path) | |
| df_kiva_loans = df_kiva_loans.drop(['use', 'disbursed_time','funded_time','posted_time','tags'], axis=1) | |
| #drop nas on specific columns not all of them, it doesnt affect the task we actually want to do now, but might need for later use | |
| df_kiva_loans.dropna(subset=['partner_id','borrower_genders'], inplace=True) | |
| # Calculate Z-scores | |
| z_scores = zscore(df_kiva_loans['funded_amount']) | |
| # Get boolean array indicating the presence of outliers | |
| df_kiva_loans['outlier_funded_amount'] = (z_scores > 3) | (z_scores < -3) | |
| df_kiva_loans_cleaned = df_kiva_loans[~df_kiva_loans['outlier_funded_amount']] | |
| st.title('BDS24_Weekly_Assignment_Week 2| Tryfonas Karmiris') | |
| # Sidebar selection for the type of plot | |
| plot_type = st.sidebar.selectbox("Select Variable to Display", ['country', 'repayment_interval', 'sector']) | |
| # Slider to select the number of top values to display | |
| num_columns = st.sidebar.slider( | |
| "Select Number of Columns to Display", | |
| min_value=5, | |
| max_value=20, | |
| value=10, # default value | |
| step=1 | |
| ) | |
| # Select the top values based on the selected variable and number of columns | |
| if plot_type == 'country': | |
| top_values = df_kiva_loans.groupby('country')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index() | |
| x_column = 'country' | |
| count_column = 'count' | |
| elif plot_type == 'repayment_interval': | |
| top_values = df_kiva_loans.groupby('repayment_interval')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index() | |
| x_column = 'repayment_interval' | |
| count_column = 'count' | |
| else: # sector | |
| top_values = df_kiva_loans.groupby('sector')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index() | |
| x_column = 'sector' | |
| count_column = 'count' | |
| # Create a bar plot with dual axes | |
| fig, ax1 = plt.subplots(figsize=(12, 9)) | |
| plt.xticks(rotation=90) | |
| # Bar plot for funded_amount | |
| color = 'tab:blue' | |
| ax1.set_xlabel(x_column.replace("_", " ").title()) | |
| ax1.set_ylabel('Funded Amount', color=color) | |
| ax1.bar(top_values[x_column], top_values['sum'], color=color, alpha=0.6, label='Funded Amount') | |
| ax1.tick_params(axis='y', labelcolor=color) | |
| # Create a second y-axis for count | |
| ax2 = ax1.twinx() | |
| color = 'tab:red' | |
| ax2.set_ylabel('Count', color=color) | |
| ax2.plot(top_values[x_column], top_values[count_column], color=color, marker='o', linestyle='-', linewidth=2, label='Count') | |
| ax2.tick_params(axis='y', labelcolor=color) | |
| # Add titles and labels | |
| plt.title(f'Top {num_columns} by {plot_type.replace("_", " ").title()}') | |
| fig.tight_layout() | |
| # Display the plot in Streamlit | |
| st.pyplot(fig) |