Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files
app.py
CHANGED
|
@@ -6,7 +6,6 @@ import matplotlib.pyplot as plt
|
|
| 6 |
from scipy.stats import zscore
|
| 7 |
import seaborn as sns
|
| 8 |
|
| 9 |
-
st.title('my shitty app ')
|
| 10 |
|
| 11 |
file_path= 'kiva_loans.csv'
|
| 12 |
|
|
@@ -24,18 +23,55 @@ z_scores = zscore(df_kiva_loans['funded_amount'])
|
|
| 24 |
df_kiva_loans['outlier_funded_amount'] = (z_scores > 3) | (z_scores < -3)
|
| 25 |
df_kiva_loans_cleaned = df_kiva_loans[~df_kiva_loans['outlier_funded_amount']]
|
| 26 |
|
| 27 |
-
|
|
|
|
|
|
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
|
|
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
from scipy.stats import zscore
|
| 7 |
import seaborn as sns
|
| 8 |
|
|
|
|
| 9 |
|
| 10 |
file_path= 'kiva_loans.csv'
|
| 11 |
|
|
|
|
| 23 |
df_kiva_loans['outlier_funded_amount'] = (z_scores > 3) | (z_scores < -3)
|
| 24 |
df_kiva_loans_cleaned = df_kiva_loans[~df_kiva_loans['outlier_funded_amount']]
|
| 25 |
|
| 26 |
+
st.title('BDS24_Weekly_Assignment_Week 2| Tryfonas Karmiris')
|
| 27 |
+
# Sidebar selection for the type of plot
|
| 28 |
+
plot_type = st.sidebar.selectbox("Select Variable to Display", ['country', 'repayment_interval', 'sector'])
|
| 29 |
|
| 30 |
+
# Slider to select the number of top values to display
|
| 31 |
+
num_columns = st.sidebar.slider(
|
| 32 |
+
"Select Number of Columns to Display",
|
| 33 |
+
min_value=5,
|
| 34 |
+
max_value=20,
|
| 35 |
+
value=10, # default value
|
| 36 |
+
step=1
|
| 37 |
+
)
|
| 38 |
|
| 39 |
+
# Select the top values based on the selected variable and number of columns
|
| 40 |
+
if plot_type == 'country':
|
| 41 |
+
top_values = df_kiva_loans.groupby('country')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index()
|
| 42 |
+
x_column = 'country'
|
| 43 |
+
count_column = 'count'
|
| 44 |
+
elif plot_type == 'repayment_interval':
|
| 45 |
+
top_values = df_kiva_loans.groupby('repayment_interval')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index()
|
| 46 |
+
x_column = 'repayment_interval'
|
| 47 |
+
count_column = 'count'
|
| 48 |
+
else: # sector
|
| 49 |
+
top_values = df_kiva_loans.groupby('sector')['funded_amount'].agg(['sum', 'count']).nlargest(num_columns, 'sum').reset_index()
|
| 50 |
+
x_column = 'sector'
|
| 51 |
+
count_column = 'count'
|
| 52 |
|
| 53 |
+
# Create a bar plot with dual axes
|
| 54 |
+
fig, ax1 = plt.subplots(figsize=(12, 9))
|
| 55 |
+
plt.xticks(rotation=90)
|
| 56 |
|
| 57 |
+
# Bar plot for funded_amount
|
| 58 |
+
color = 'tab:blue'
|
| 59 |
+
ax1.set_xlabel(x_column.replace("_", " ").title())
|
| 60 |
+
ax1.set_ylabel('Funded Amount', color=color)
|
| 61 |
+
ax1.bar(top_values[x_column], top_values['sum'], color=color, alpha=0.6, label='Funded Amount')
|
| 62 |
+
ax1.tick_params(axis='y', labelcolor=color)
|
| 63 |
+
|
| 64 |
+
# Create a second y-axis for count
|
| 65 |
+
ax2 = ax1.twinx()
|
| 66 |
+
color = 'tab:red'
|
| 67 |
+
ax2.set_ylabel('Count', color=color)
|
| 68 |
+
ax2.plot(top_values[x_column], top_values[count_column], color=color, marker='o', linestyle='-', linewidth=2, label='Count')
|
| 69 |
+
ax2.tick_params(axis='y', labelcolor=color)
|
| 70 |
+
|
| 71 |
+
# Add titles and labels
|
| 72 |
+
plt.title(f'Top {num_columns} by {plot_type.replace("_", " ").title()}')
|
| 73 |
+
fig.tight_layout()
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
# Display the plot in Streamlit
|
| 77 |
+
st.pyplot(fig)
|