Tryfonas commited on
Commit
be26401
·
verified ·
1 Parent(s): fb89c2f

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +303 -137
app.py CHANGED
@@ -6,146 +6,312 @@ import matplotlib.pyplot as plt
6
  import seaborn as sns
7
  from scipy.stats import zscore
8
 
9
- # Load data
10
- file_path = 'kiva_loans.csv'
11
- df_kiva_loans = pd.read_csv(file_path)
 
 
 
 
 
 
12
 
13
- # Clean data
14
- df_kiva_loans = df_kiva_loans.drop(['use', 'disbursed_time', 'funded_time', 'posted_time', 'tags'], axis=1)
15
- df_kiva_loans.dropna(subset=['partner_id', 'borrower_genders'], inplace=True)
 
 
 
16
 
17
- # Calculate Z-scores
18
- z_scores = zscore(df_kiva_loans['funded_amount'])
19
- df_kiva_loans['outlier_funded_amount'] = (z_scores > 3) | (z_scores < -3)
20
- df_kiva_loans_cleaned = df_kiva_loans[~df_kiva_loans['outlier_funded_amount']]
21
 
22
  # Streamlit App Title
23
  st.title('BDS24_Weekly_Assignment_Week 2 | Tryfonas Karmiris')
24
 
25
- # Display the cleaned data table
26
- st.table(df_kiva_loans_cleaned.head())
27
-
28
- # Dropdown and slider for Altair chart
29
- st.subheader('Distribution of Funded Amounts')
30
- # Altair chart: simple distribution of funded amounts
31
- chart = alt.Chart(df_kiva_loans_cleaned).mark_bar().encode(
32
- alt.X('funded_amount', bin=alt.Bin(maxbins=50)), # Use funded_amount for distribution
33
- y='count()',
34
- ).properties(
35
- title='Distribution of Funded Amounts'
36
- )
37
- st.altair_chart(chart, use_container_width=True)
38
-
39
- # Dropdown and slider for Matplotlib dual-axis plot
40
- st.subheader('Top Values by Selected Variable')
41
-
42
- # Dropdown for plot type
43
- plot_type = st.selectbox("Select Variable to Display", ['country', 'repayment_interval', 'sector'])
44
-
45
- # Slider to select the number of top values to display
46
- num_columns = st.slider(
47
- "Select Number of Columns to Display",
48
- min_value=5,
49
- max_value=50,
50
- value=10, # default value
51
- step=1
52
- )
53
-
54
- # Select the top values based on the selected variable and number of columns
55
- if plot_type == 'country':
56
- top_values = df_kiva_loans.groupby('country')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index()
57
- x_column = 'country'
58
- count_column = 'count'
59
- elif plot_type == 'repayment_interval':
60
- top_values = df_kiva_loans.groupby('repayment_interval')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index()
61
- x_column = 'repayment_interval'
62
- count_column = 'count'
63
- else: # sector
64
- top_values = df_kiva_loans.groupby('sector')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index()
65
- x_column = 'sector'
66
- count_column = 'count'
67
-
68
- # Create a dual-axis bar plot using Matplotlib
69
- fig, ax1 = plt.subplots(figsize=(12, 9))
70
- plt.xticks(rotation=90)
71
-
72
- # Bar plot for funded_amount
73
- color = 'tab:blue'
74
- ax1.set_xlabel(x_column.replace("_", " ").title())
75
- ax1.set_ylabel('Funded Amount', color=color)
76
- ax1.bar(top_values[x_column], top_values['sum'], color=color, alpha=0.6, label='Funded Amount')
77
- ax1.tick_params(axis='y', labelcolor=color)
78
-
79
- # Create a second y-axis for count
80
- ax2 = ax1.twinx()
81
- color = 'tab:red'
82
- ax2.set_ylabel('Count', color=color)
83
- ax2.plot(top_values[x_column], top_values[count_column], color=color, marker='o', linestyle='-', linewidth=2, label='Count')
84
- ax2.tick_params(axis='y', labelcolor=color)
85
-
86
- # Add titles and labels
87
- plt.title(f'Top {num_columns} by {plot_type.replace("_", " ").title()}')
88
- fig.tight_layout()
89
- st.pyplot(fig)
90
-
91
- # Boxplot (or Violin Plot) after the dual-axis plot
92
- st.subheader('Funded Amount vs. Selected Variable')
93
-
94
- # Filter the data based on the selected variable and number of top values
95
- if plot_type == 'sector':
96
- top_values_boxplot = df_kiva_loans.groupby('sector')['funded_amount'].agg('sum').nlargest(num_columns).index
97
- filtered_df_boxplot = df_kiva_loans_cleaned[df_kiva_loans_cleaned['sector'].isin(top_values_boxplot)]
98
- elif plot_type == 'country':
99
- top_values_boxplot = df_kiva_loans.groupby('country')['funded_amount'].agg('sum').nlargest(num_columns).index
100
- filtered_df_boxplot = df_kiva_loans_cleaned[df_kiva_loans_cleaned['country'].isin(top_values_boxplot)]
101
- else: # repayment_interval
102
- filtered_df_boxplot = df_kiva_loans_cleaned
103
-
104
- # Create a boxplot
105
- fig, ax = plt.subplots(figsize=(12, 6))
106
- if plot_type != 'repayment_interval':
107
- # Use sorted values for 'sector' and 'country'
108
- top_values_sorted = df_kiva_loans.groupby(plot_type)['funded_amount'].agg('sum').nlargest(num_columns).index
109
- sns.boxplot(x=plot_type, y='funded_amount', data=filtered_df_boxplot, order=top_values_sorted, ax=ax)
110
- else:
111
- # No specific sorting needed for 'repayment_interval'
112
- sns.boxplot(x=plot_type, y='funded_amount', data=filtered_df_boxplot, ax=ax)
113
-
114
- plt.title('Funded Amount by Selected Variable')
115
- plt.xlabel(plot_type)
116
- plt.ylabel('Funded Amount')
117
- plt.xticks(rotation=45)
118
- st.pyplot(fig)
119
-
120
- # Dropdown for Seaborn countplot
121
- st.subheader('Repayment Interval by Selected Variable')
122
-
123
- # Dropdown for selecting variable for Seaborn countplot
124
- plot_var = st.selectbox("Select Variable for Countplot", ['sector', 'country'])
125
-
126
- # Slider to select the number of top values to display for Seaborn countplot
127
- num_top_values = st.slider(
128
- "Select Number of Top Values to Display",
129
- min_value=5,
130
- max_value=50,
131
- value=10, # default value
132
- step=1
133
- )
134
-
135
- # Filter the data based on the selected variable and number of top values
136
- if plot_var == 'sector':
137
- top_values_plot = df_kiva_loans.groupby('sector')['funded_amount'].agg('count').nlargest(num_top_values).index
138
- filtered_df_plot = df_kiva_loans_cleaned[df_kiva_loans_cleaned['sector'].isin(top_values_plot)]
139
- elif plot_var == 'country':
140
- top_values_plot = df_kiva_loans.groupby('country')['funded_amount'].agg('count').nlargest(num_top_values).index
141
- filtered_df_plot = df_kiva_loans_cleaned[df_kiva_loans_cleaned['country'].isin(top_values_plot)]
142
-
143
- # Create Seaborn countplot
144
- fig, ax = plt.subplots(figsize=(10, 6))
145
- sns.countplot(x='repayment_interval', hue=plot_var, data=filtered_df_plot, ax=ax)
146
- plt.title(f'Repayment Interval by {plot_var.replace("_", " ").title()}')
147
- plt.xlabel('Repayment Interval')
148
- plt.xticks(rotation=90)
149
- plt.ylabel('Count')
150
- plt.legend(title=plot_var.replace("_", " ").title(), bbox_to_anchor=(1.05, 1), loc='upper left')
151
- st.pyplot(fig)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import seaborn as sns
7
  from scipy.stats import zscore
8
 
9
+ # Function to load and clean data
10
+ @st.cache_data
11
+ def load_and_clean_data(file_path):
12
+ # Load data
13
+ df_kiva_loans = pd.read_csv(file_path)
14
+
15
+ # Clean data
16
+ df_kiva_loans = df_kiva_loans.drop(['use', 'disbursed_time', 'funded_time', 'posted_time', 'tags'], axis=1)
17
+ df_kiva_loans.dropna(subset=['partner_id', 'borrower_genders'], inplace=True)
18
 
19
+ # Calculate Z-scores
20
+ z_scores = zscore(df_kiva_loans['funded_amount'])
21
+ df_kiva_loans['outlier_funded_amount'] = (z_scores > 3) | (z_scores < -3)
22
+ df_kiva_loans_cleaned = df_kiva_loans[~df_kiva_loans['outlier_funded_amount']]
23
+
24
+ return df_kiva_loans_cleaned
25
 
26
+ # Load the cleaned data
27
+ file_path = 'kiva_loans.csv'
28
+ df_kiva_loans_cleaned = load_and_clean_data(file_path)
 
29
 
30
  # Streamlit App Title
31
  st.title('BDS24_Weekly_Assignment_Week 2 | Tryfonas Karmiris')
32
 
33
+ # Sidebar for navigation
34
+ st.sidebar.title("Navigation")
35
+ page = st.sidebar.radio("Select a page:", ["Introduction", "Data Overview", "Top Values by Selected Variable", "Repayment Interval by Selected Variable", "Country Comparison Deepdive", "Sector Comparison Deepdive"])
36
+
37
+ # Introduction Page
38
+ if page == "Introduction":
39
+ st.subheader("Introduction")
40
+ st.write("""
41
+ This application provides insights into Kiva loans data.
42
+ You can explore the distribution of funded amounts,
43
+ analyze top values by selected variables, and visualize
44
+ relationships between funded amounts and various factors.
45
+ """)
46
+
47
+ # Data Overview Page
48
+ elif page == "Data Overview":
49
+ st.subheader("Data Overview")
50
+ st.write("Here is a preview of the cleaned Kiva loans data:")
51
+
52
+ # Display the cleaned data table
53
+ st.table(df_kiva_loans_cleaned.head())
54
+
55
+ # Distribution of Funded Amounts
56
+ st.subheader('Distribution of Funded Amounts')
57
+ chart = alt.Chart(df_kiva_loans_cleaned).mark_bar().encode(
58
+ alt.X('funded_amount', bin=alt.Bin(maxbins=50)), # Use funded_amount for distribution
59
+ y='count()',
60
+ ).properties(
61
+ title='Distribution of Funded Amounts'
62
+ )
63
+ st.altair_chart(chart, use_container_width=True)
64
+ st.write("This chart shows the distribution of funded amounts for Kiva loans. The x-axis represents the funded amount, while the y-axis shows the count of loans that fall within each bin.")
65
+
66
+ # Page 3: Top Values by Selected Variable
67
+ elif page == "Top Values by Selected Variable":
68
+ st.subheader('Top Values by Selected Variable')
69
+
70
+ # Dropdown for plot type
71
+ plot_type = st.selectbox("Select Variable to Display", ['country', 'repayment_interval', 'sector'])
72
+
73
+ # Slider to select the number of top values to display
74
+ num_columns = st.slider(
75
+ "Select Number of Columns to Display",
76
+ min_value=5,
77
+ max_value=50,
78
+ value=10, # default value
79
+ step=1
80
+ )
81
+
82
+ # Select the top values based on the selected variable and number of columns
83
+ if plot_type == 'country':
84
+ top_values = df_kiva_loans_cleaned.groupby('country')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index()
85
+ x_column = 'country'
86
+ count_column = 'count'
87
+ description = f"This chart displays the top {num_columns} countries by total funded amount. The blue bars represent the total funded amount, while the red line indicates the count of loans."
88
+ elif plot_type == 'repayment_interval':
89
+ top_values = df_kiva_loans_cleaned.groupby('repayment_interval')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index()
90
+ x_column = 'repayment_interval'
91
+ count_column = 'count'
92
+ description = f"This chart shows the top {num_columns} repayment intervals by total funded amount. The blue bars represent the total funded amount, while the red line indicates the count of loans."
93
+ else: # sector
94
+ top_values = df_kiva_loans_cleaned.groupby('sector')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index()
95
+ x_column = 'sector'
96
+ count_column = 'count'
97
+ description = f"This chart illustrates the top {num_columns} sectors by total funded amount. The blue bars represent the total funded amount, while the red line indicates the count of loans."
98
+
99
+ # Display description
100
+ st.write(description)
101
+
102
+ # Create a dual-axis bar plot using Matplotlib
103
+ fig, ax1 = plt.subplots(figsize=(12, 9))
104
+ plt.xticks(rotation=90)
105
+
106
+ # Bar plot for funded_amount
107
+ color = 'tab:blue'
108
+ ax1.set_xlabel(x_column.replace("_", " ").title())
109
+ ax1.set_ylabel('Funded Amount', color=color)
110
+ ax1.bar(top_values[x_column], top_values['sum'], color=color, alpha=0.6, label='Funded Amount')
111
+ ax1.tick_params(axis='y', labelcolor=color)
112
+
113
+ # Create a second y-axis for count
114
+ ax2 = ax1.twinx()
115
+ color = 'tab:red'
116
+ ax2.set_ylabel('Count', color=color)
117
+ ax2.plot(top_values[x_column], top_values[count_column], color=color, marker='o', linestyle='-', linewidth=2, label='Count')
118
+ ax2.tick_params(axis='y', labelcolor=color)
119
+
120
+ # Add titles and labels
121
+ plt.title(f'Top {num_columns} by {plot_type.replace("_", " ").title()}')
122
+ fig.tight_layout()
123
+ st.pyplot(fig)
124
+
125
+ # Boxplot after the dual-axis plot
126
+ st.subheader('Funded Amount vs. Selected Variable')
127
+
128
+ # Filter the data based on the selected variable and number of top values
129
+ if plot_type == 'sector':
130
+ top_values_boxplot = df_kiva_loans_cleaned.groupby('sector')['funded_amount'].agg('sum').nlargest(num_columns).index
131
+ filtered_df_boxplot = df_kiva_loans_cleaned[df_kiva_loans_cleaned['sector'].isin(top_values_boxplot)]
132
+ elif plot_type == 'country':
133
+ top_values_boxplot = df_kiva_loans_cleaned.groupby('country')['funded_amount'].agg('sum').nlargest(num_columns).index
134
+ filtered_df_boxplot = df_kiva_loans_cleaned[df_kiva_loans_cleaned['country'].isin(top_values_boxplot)]
135
+ else: # repayment_interval
136
+ filtered_df_boxplot = df_kiva_loans_cleaned
137
+
138
+ # Create a boxplot
139
+ fig, ax = plt.subplots(figsize=(12, 6))
140
+ if plot_type != 'repayment_interval':
141
+ top_values_sorted = df_kiva_loans_cleaned.groupby(plot_type)['funded_amount'].agg('sum').nlargest(num_columns).index
142
+ sns.boxplot(x=plot_type, y='funded_amount', data=filtered_df_boxplot, order=top_values_sorted, ax=ax)
143
+ else:
144
+ sns.boxplot(x=plot_type, y='funded_amount', data=filtered_df_boxplot, ax=ax)
145
+
146
+ plt.title('Funded Amount by Selected Variable')
147
+ plt.xlabel(plot_type)
148
+ plt.ylabel('Funded Amount')
149
+ plt.xticks(rotation=45)
150
+ st.pyplot(fig)
151
+
152
+ # Display description for boxplot
153
+ st.write(f"This boxplot shows the distribution of funded amounts for the top {num_columns} {plot_type.replace('_', ' ')}. It provides insights into the spread and outliers of funded amounts.")
154
+
155
+ # Page 4: Other Plots
156
+ elif page == "Repayment Interval by Selected Variable":
157
+ st.subheader('Repayment Interval by Selected Variable')
158
+
159
+ # Dropdown for selecting variable for Seaborn countplot
160
+ plot_var = st.selectbox("Select Variable for Countplot", ['sector', 'country'])
161
+
162
+ # Slider to select the number of top values to display for Seaborn countplot
163
+ num_top_values = st.slider(
164
+ "Select Number of Top Values to Display",
165
+ min_value=5,
166
+ max_value=50,
167
+ value=10, # default value
168
+ step=1
169
+ )
170
+
171
+ # Filter the data based on the selected variable and number of top values
172
+ if plot_var == 'sector':
173
+ top_values_plot = df_kiva_loans_cleaned.groupby('sector')['funded_amount'].agg('count').nlargest(num_top_values).index
174
+ filtered_df_plot = df_kiva_loans_cleaned[df_kiva_loans_cleaned['sector'].isin(top_values_plot)]
175
+ description = f"This countplot shows the distribution of repayment intervals for the top {num_top_values} sectors based on the number of loans."
176
+ elif plot_var == 'country':
177
+ top_values_plot = df_kiva_loans_cleaned.groupby('country')['funded_amount'].agg('count').nlargest(num_top_values).index
178
+ filtered_df_plot = df_kiva_loans_cleaned[df_kiva_loans_cleaned['country'].isin(top_values_plot)]
179
+ description = f"This countplot illustrates the distribution of repayment intervals for the top {num_top_values} countries based on the number of loans."
180
+
181
+ # Display description
182
+ st.write(description)
183
+
184
+ # Create a count plot
185
+ fig, ax = plt.subplots(figsize=(10, 6))
186
+
187
+ # Count the occurrences of repayment intervals for the filtered data
188
+ count_data = filtered_df_plot.groupby('repayment_interval')[plot_var].value_counts().unstack(fill_value=0)
189
+
190
+ # Calculate total counts for sorting
191
+ total_counts = count_data.sum(axis=1)
192
+
193
+ # Sort the repayment intervals based on the total count of loans in descending order
194
+ sorted_index = total_counts.sort_values(ascending=False).index
195
+ count_data = count_data.loc[sorted_index]
196
+
197
+ # Create a grouped bar plot
198
+ count_data.plot(kind='bar', ax=ax, position=0, width=0.8)
199
+ plt.title(f'Repayment Interval by {plot_var.replace("_", " ").title()}')
200
+ plt.xlabel('Repayment Interval')
201
+ plt.ylabel('Count of Loans')
202
+ plt.xticks(rotation=45)
203
+ plt.legend(title=plot_var.replace("_", " ").title(), bbox_to_anchor=(1.05, 1), loc='upper left')
204
+ st.pyplot(fig)
205
+
206
+
207
+
208
+
209
+ # Page 5: Country Comparison
210
+ elif page == "Country Comparison Deepdive":
211
+ st.subheader("Country Comparison Deepdive")
212
+
213
+ # Multi-select for countries
214
+ selected_countries = st.multiselect("Select Countries to Compare", options=df_kiva_loans_cleaned['country'].unique())
215
+
216
+ # Option to choose between count or sum of funded amounts
217
+ aggregation_option = st.radio("Select Aggregation Type:", ("Count", "Sum"))
218
+
219
+ if selected_countries:
220
+ # Filter the data based on selected countries
221
+ filtered_data = df_kiva_loans_cleaned[df_kiva_loans_cleaned['country'].isin(selected_countries)]
222
+
223
+ # Create a combined bar plot for sector summary
224
+ st.subheader("Total Funded Amounts by Sector for Selected Countries")
225
+ if aggregation_option == "Sum":
226
+ sector_summary = filtered_data.groupby(['country', 'sector']).agg(
227
+ total_funded_amount=('funded_amount', 'sum')
228
+ ).reset_index()
229
+ else: # Count
230
+ sector_summary = filtered_data.groupby(['country', 'sector']).agg(
231
+ total_funded_amount=('funded_amount', 'count')
232
+ ).reset_index()
233
+
234
+ fig, ax = plt.subplots(figsize=(12, 6))
235
+ sns.barplot(x='sector', y='total_funded_amount', hue='country', data=sector_summary, ax=ax)
236
+ plt.title(f'Total Funded Amount by Sector for Selected Countries ({aggregation_option})')
237
+ plt.xlabel('Sector')
238
+ plt.ylabel('Total Funded Amount' if aggregation_option == "Sum" else 'Count of Loans')
239
+ plt.xticks(rotation=45)
240
+ st.pyplot(fig)
241
+
242
+ # Create a combined bar plot for repayment summary
243
+ st.subheader("Total Funded Amounts by Repayment Interval for Selected Countries")
244
+ if aggregation_option == "Sum":
245
+ repayment_summary = filtered_data.groupby(['country', 'repayment_interval']).agg(
246
+ total_funded_amount=('funded_amount', 'sum')
247
+ ).reset_index()
248
+ else: # Count
249
+ repayment_summary = filtered_data.groupby(['country', 'repayment_interval']).agg(
250
+ total_funded_amount=('funded_amount', 'count')
251
+ ).reset_index()
252
+
253
+ fig, ax = plt.subplots(figsize=(12, 6))
254
+ sns.barplot(x='repayment_interval', y='total_funded_amount', hue='country', data=repayment_summary, ax=ax)
255
+ plt.title(f'Total Funded Amount by Repayment Interval for Selected Countries ({aggregation_option})')
256
+ plt.xlabel('Repayment Interval')
257
+ plt.ylabel('Total Funded Amount' if aggregation_option == "Sum" else 'Count of Loans')
258
+ plt.xticks(rotation=45)
259
+ st.pyplot(fig)
260
+ else:
261
+ st.write("Please select one or more countries to compare.")
262
+
263
+ # Page 6: Sector Comparison
264
+ elif page == "Sector Comparison Deepdive":
265
+ st.subheader("Sector Comparison Deepdive")
266
+
267
+ # Multi-select for sectors
268
+ selected_sectors = st.multiselect("Select Sectors to Compare", options=df_kiva_loans_cleaned['sector'].unique())
269
+
270
+ # Option to choose between count or sum of funded amounts
271
+ aggregation_option = st.radio("Select Aggregation Type:", ("Count", "Sum"))
272
+
273
+ if selected_sectors:
274
+ # Filter the data based on selected sectors
275
+ filtered_data = df_kiva_loans_cleaned[df_kiva_loans_cleaned['sector'].isin(selected_sectors)]
276
+
277
+ # Create a combined bar plot for sector summary by country
278
+ st.subheader("Total Funded Amounts by Country for Selected Sectors")
279
+ if aggregation_option == "Sum":
280
+ country_summary = filtered_data.groupby(['country', 'sector']).agg(
281
+ total_funded_amount=('funded_amount', 'sum')
282
+ ).reset_index()
283
+ else: # Count
284
+ country_summary = filtered_data.groupby(['country', 'sector']).agg(
285
+ total_funded_amount=('funded_amount', 'count')
286
+ ).reset_index()
287
+
288
+ fig, ax = plt.subplots(figsize=(12, 6))
289
+ sns.barplot(x='country', y='total_funded_amount', hue='sector', data=country_summary, ax=ax)
290
+ plt.title(f'Total Funded Amount by Country for Selected Sectors ({aggregation_option})')
291
+ plt.xlabel('Country')
292
+ plt.ylabel('Total Funded Amount' if aggregation_option == "Sum" else 'Count of Loans')
293
+ plt.legend(title='Sector', bbox_to_anchor=(1.05, 1), loc='upper left')
294
+ plt.xticks(rotation=90)
295
+ st.pyplot(fig)
296
+
297
+ # Create a combined bar plot for repayment summary
298
+ st.subheader("Total Funded Amounts by Repayment Interval for Selected Sectors")
299
+ if aggregation_option == "Sum":
300
+ repayment_summary = filtered_data.groupby(['repayment_interval', 'sector']).agg(
301
+ total_funded_amount=('funded_amount', 'sum')
302
+ ).reset_index()
303
+ else: # Count
304
+ repayment_summary = filtered_data.groupby(['repayment_interval', 'sector']).agg(
305
+ total_funded_amount=('funded_amount', 'count')
306
+ ).reset_index()
307
+
308
+ fig, ax = plt.subplots(figsize=(12, 6))
309
+ sns.barplot(x='repayment_interval', y='total_funded_amount', hue='sector', data=repayment_summary, ax=ax)
310
+ plt.title(f'Total Funded Amount by Repayment Interval for Selected Sectors ({aggregation_option})')
311
+ plt.xlabel('Repayment Interval')
312
+ plt.ylabel('Total Funded Amount' if aggregation_option == "Sum" else 'Count of Loans')
313
+ plt.legend(title='Sector', bbox_to_anchor=(1.05, 1), loc='upper left')
314
+ plt.xticks(rotation=90)
315
+ st.pyplot(fig)
316
+ else:
317
+ st.write("Please select one or more sectors to compare.")