File size: 2,905 Bytes
e962e08
 
 
 
 
85cd1e1
0c3cbdf
e962e08
 
85cd1e1
e962e08
85cd1e1
e962e08
85cd1e1
e962e08
85cd1e1
 
e962e08
85cd1e1
 
e962e08
85cd1e1
 
 
e962e08
00549a0
 
 
e962e08
00549a0
 
 
 
 
 
 
 
e962e08
00549a0
 
 
 
 
 
 
 
 
 
 
 
 
e962e08
00549a0
 
 
e962e08
00549a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# Import necessary libraries
import streamlit as st
import pandas as pd
import altair as alt
import matplotlib.pyplot as plt
from scipy.stats import zscore
import seaborn as sns


file_path= 'kiva_loans.csv'

df_kiva_loans = pd.read_csv(file_path)

df_kiva_loans = df_kiva_loans.drop(['use', 'disbursed_time','funded_time','posted_time','tags'], axis=1)

#drop nas on specific columns not all of them, it doesnt affect the task we actually want to do now, but might need for later use
df_kiva_loans.dropna(subset=['partner_id','borrower_genders'], inplace=True)

# Calculate Z-scores
z_scores = zscore(df_kiva_loans['funded_amount'])

# Get boolean array indicating the presence of outliers
df_kiva_loans['outlier_funded_amount'] = (z_scores > 3) | (z_scores < -3)
df_kiva_loans_cleaned = df_kiva_loans[~df_kiva_loans['outlier_funded_amount']]

st.title('BDS24_Weekly_Assignment_Week 2| Tryfonas Karmiris')
# Sidebar selection for the type of plot
plot_type = st.sidebar.selectbox("Select Variable to Display", ['country', 'repayment_interval', 'sector'])

# Slider to select the number of top values to display
num_columns = st.sidebar.slider(
    "Select Number of Columns to Display",
    min_value=5,
    max_value=20,
    value=10,  # default value
    step=1
)

# Select the top values based on the selected variable and number of columns
if plot_type == 'country':
    top_values = df_kiva_loans.groupby('country')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index()
    x_column = 'country'
    count_column = 'count'
elif plot_type == 'repayment_interval':
    top_values = df_kiva_loans.groupby('repayment_interval')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index()
    x_column = 'repayment_interval'
    count_column = 'count'
else:  # sector
    top_values = df_kiva_loans.groupby('sector')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index()
    x_column = 'sector'
    count_column = 'count'

# Create a bar plot with dual axes
fig, ax1 = plt.subplots(figsize=(12, 9))
plt.xticks(rotation=90)

# Bar plot for funded_amount
color = 'tab:blue'
ax1.set_xlabel(x_column.replace("_", " ").title())
ax1.set_ylabel('Funded Amount', color=color)
ax1.bar(top_values[x_column], top_values['sum'], color=color, alpha=0.6, label='Funded Amount')
ax1.tick_params(axis='y', labelcolor=color)

# Create a second y-axis for count
ax2 = ax1.twinx()
color = 'tab:red'
ax2.set_ylabel('Count', color=color)
ax2.plot(top_values[x_column], top_values[count_column], color=color, marker='o', linestyle='-', linewidth=2, label='Count')
ax2.tick_params(axis='y', labelcolor=color)

# Add titles and labels
plt.title(f'Top {num_columns} by {plot_type.replace("_", " ").title()}')
fig.tight_layout()


# Display the plot in Streamlit
st.pyplot(fig)