Spaces:
Sleeping
Sleeping
Update parsing.py
Browse files- parsing.py +42 -1
parsing.py
CHANGED
|
@@ -311,11 +311,15 @@ def plot_line(df, x_column, y_columns, figsize=(12, 10), color='orange', title=N
|
|
| 311 |
|
| 312 |
return fig
|
| 313 |
|
|
|
|
| 314 |
def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=None):
|
| 315 |
fig, ax = plt.subplots(figsize=figsize)
|
| 316 |
|
| 317 |
sns.barplot(data=df, x=x_column, y=y_column, color=color, ax=ax)
|
| 318 |
|
|
|
|
|
|
|
|
|
|
| 319 |
ax.set_title(title if title else f'{y_column} by {x_column}', color=color, fontweight='bold')
|
| 320 |
ax.set_xlabel(x_column, color=color)
|
| 321 |
ax.set_ylabel(y_column, color=color)
|
|
@@ -336,7 +340,6 @@ def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=Non
|
|
| 336 |
ax.tick_params(axis='y', colors='orange')
|
| 337 |
ax.title.set_color('orange')
|
| 338 |
ax.legend(loc='upper right', bbox_to_anchor=(1, 1), facecolor='black', framealpha=.4, labelcolor='orange', edgecolor='orange')
|
| 339 |
-
|
| 340 |
return fig
|
| 341 |
|
| 342 |
def plot_grouped_bar(df, x_columns, y_column, figsize=(12, 10), colors=None, title=None):
|
|
@@ -490,6 +493,44 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 490 |
if len(user_date_input) == 2:
|
| 491 |
user_date_input = tuple(map(pd.to_datetime, user_date_input))
|
| 492 |
start_date, end_date = user_date_input
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 493 |
df_ = df_.loc[df_[column].between(start_date, end_date)]
|
| 494 |
|
| 495 |
date_column = column
|
|
|
|
| 311 |
|
| 312 |
return fig
|
| 313 |
|
| 314 |
+
|
| 315 |
def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=None):
|
| 316 |
fig, ax = plt.subplots(figsize=figsize)
|
| 317 |
|
| 318 |
sns.barplot(data=df, x=x_column, y=y_column, color=color, ax=ax)
|
| 319 |
|
| 320 |
+
# Rotate x-axis labels
|
| 321 |
+
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
|
| 322 |
+
|
| 323 |
ax.set_title(title if title else f'{y_column} by {x_column}', color=color, fontweight='bold')
|
| 324 |
ax.set_xlabel(x_column, color=color)
|
| 325 |
ax.set_ylabel(y_column, color=color)
|
|
|
|
| 340 |
ax.tick_params(axis='y', colors='orange')
|
| 341 |
ax.title.set_color('orange')
|
| 342 |
ax.legend(loc='upper right', bbox_to_anchor=(1, 1), facecolor='black', framealpha=.4, labelcolor='orange', edgecolor='orange')
|
|
|
|
| 343 |
return fig
|
| 344 |
|
| 345 |
def plot_grouped_bar(df, x_columns, y_column, figsize=(12, 10), colors=None, title=None):
|
|
|
|
| 493 |
if len(user_date_input) == 2:
|
| 494 |
user_date_input = tuple(map(pd.to_datetime, user_date_input))
|
| 495 |
start_date, end_date = user_date_input
|
| 496 |
+
# Determine the most appropriate time unit for plot
|
| 497 |
+
time_units = {
|
| 498 |
+
'year': df_[column].dt.year,
|
| 499 |
+
'month': df_[column].dt.to_period('M'),
|
| 500 |
+
'day': df_[column].dt.date
|
| 501 |
+
}
|
| 502 |
+
unique_counts = {unit: col.nunique() for unit, col in time_units.items()}
|
| 503 |
+
closest_to_36 = min(unique_counts, key=lambda k: abs(unique_counts[k] - 36))
|
| 504 |
+
|
| 505 |
+
# Group by the most appropriate time unit and count occurrences
|
| 506 |
+
grouped = df_.groupby(time_units[closest_to_36]).size().reset_index(name='count')
|
| 507 |
+
grouped.columns = [column, 'count']
|
| 508 |
+
|
| 509 |
+
# Create a complete date range
|
| 510 |
+
if closest_to_36 == 'year':
|
| 511 |
+
date_range = pd.date_range(start=f"{start_date.year}-01-01", end=f"{end_date.year}-12-31", freq='YS')
|
| 512 |
+
elif closest_to_36 == 'month':
|
| 513 |
+
date_range = pd.date_range(start=start_date.replace(day=1), end=end_date + pd.offsets.MonthEnd(0), freq='MS')
|
| 514 |
+
else: # day
|
| 515 |
+
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
|
| 516 |
+
|
| 517 |
+
# Create a DataFrame with the complete date range
|
| 518 |
+
complete_range = pd.DataFrame({column: date_range})
|
| 519 |
+
|
| 520 |
+
# Convert the date column to the appropriate format based on closest_to_36
|
| 521 |
+
if closest_to_36 == 'year':
|
| 522 |
+
complete_range[column] = complete_range[column].dt.year
|
| 523 |
+
elif closest_to_36 == 'month':
|
| 524 |
+
complete_range[column] = complete_range[column].dt.to_period('M')
|
| 525 |
+
|
| 526 |
+
# Merge the complete range with the grouped data
|
| 527 |
+
final_data = pd.merge(complete_range, grouped, on=column, how='left').fillna(0)
|
| 528 |
+
|
| 529 |
+
with st.status(f"Date Distributions: {column}", expanded=False) as stat:
|
| 530 |
+
try:
|
| 531 |
+
st.pyplot(plot_bar(final_data, column, 'count'))
|
| 532 |
+
except Exception as e:
|
| 533 |
+
st.error(f"Error plotting bar chart: {e}")
|
| 534 |
df_ = df_.loc[df_[column].between(start_date, end_date)]
|
| 535 |
|
| 536 |
date_column = column
|