Spaces:
Sleeping
Sleeping
Update analyzing.py
Browse files- analyzing.py +45 -1
analyzing.py
CHANGED
|
@@ -191,7 +191,8 @@ def plot_line(df, x_column, y_columns, figsize=(12, 10), color='orange', title=N
|
|
| 191 |
|
| 192 |
return fig
|
| 193 |
|
| 194 |
-
|
|
|
|
| 195 |
fig, ax = plt.subplots(figsize=figsize)
|
| 196 |
|
| 197 |
sns.barplot(data=df, x=x_column, y=y_column, color=color, ax=ax)
|
|
@@ -203,6 +204,8 @@ def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=Non
|
|
| 203 |
ax.tick_params(axis='x', colors=color)
|
| 204 |
ax.tick_params(axis='y', colors=color)
|
| 205 |
|
|
|
|
|
|
|
| 206 |
# Remove background
|
| 207 |
fig.patch.set_alpha(0)
|
| 208 |
ax.patch.set_alpha(0)
|
|
@@ -219,6 +222,7 @@ def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=Non
|
|
| 219 |
|
| 220 |
return fig
|
| 221 |
|
|
|
|
| 222 |
def plot_grouped_bar(df, x_columns, y_column, figsize=(12, 10), colors=None, title=None):
|
| 223 |
fig, ax = plt.subplots(figsize=figsize)
|
| 224 |
|
|
@@ -370,6 +374,46 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 370 |
if len(user_date_input) == 2:
|
| 371 |
user_date_input = tuple(map(pd.to_datetime, user_date_input))
|
| 372 |
start_date, end_date = user_date_input
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
df_ = df_.loc[df_[column].between(start_date, end_date)]
|
| 374 |
|
| 375 |
date_column = column
|
|
|
|
| 191 |
|
| 192 |
return fig
|
| 193 |
|
| 194 |
+
|
| 195 |
+
def plot_bar(df, x_column, y_column, figsize=(12, 10), color='orange', title=None, rotation=45):
|
| 196 |
fig, ax = plt.subplots(figsize=figsize)
|
| 197 |
|
| 198 |
sns.barplot(data=df, x=x_column, y=y_column, color=color, ax=ax)
|
|
|
|
| 204 |
ax.tick_params(axis='x', colors=color)
|
| 205 |
ax.tick_params(axis='y', colors=color)
|
| 206 |
|
| 207 |
+
plt.xticks(rotation=rotation)
|
| 208 |
+
|
| 209 |
# Remove background
|
| 210 |
fig.patch.set_alpha(0)
|
| 211 |
ax.patch.set_alpha(0)
|
|
|
|
| 222 |
|
| 223 |
return fig
|
| 224 |
|
| 225 |
+
|
| 226 |
def plot_grouped_bar(df, x_columns, y_column, figsize=(12, 10), colors=None, title=None):
|
| 227 |
fig, ax = plt.subplots(figsize=figsize)
|
| 228 |
|
|
|
|
| 374 |
if len(user_date_input) == 2:
|
| 375 |
user_date_input = tuple(map(pd.to_datetime, user_date_input))
|
| 376 |
start_date, end_date = user_date_input
|
| 377 |
+
|
| 378 |
+
# Determine the most appropriate time unit for plot
|
| 379 |
+
time_units = {
|
| 380 |
+
'year': df_[column].dt.year,
|
| 381 |
+
'month': df_[column].dt.to_period('M'),
|
| 382 |
+
'day': df_[column].dt.date
|
| 383 |
+
}
|
| 384 |
+
unique_counts = {unit: col.nunique() for unit, col in time_units.items()}
|
| 385 |
+
closest_to_36 = min(unique_counts, key=lambda k: abs(unique_counts[k] - 36))
|
| 386 |
+
|
| 387 |
+
# Group by the most appropriate time unit and count occurrences
|
| 388 |
+
grouped = df_.groupby(time_units[closest_to_36]).size().reset_index(name='count')
|
| 389 |
+
grouped.columns = [column, 'count']
|
| 390 |
+
|
| 391 |
+
# Create a complete date range
|
| 392 |
+
if closest_to_36 == 'year':
|
| 393 |
+
date_range = pd.date_range(start=f"{start_date.year}-01-01", end=f"{end_date.year}-12-31", freq='YS')
|
| 394 |
+
elif closest_to_36 == 'month':
|
| 395 |
+
date_range = pd.date_range(start=start_date.replace(day=1), end=end_date + pd.offsets.MonthEnd(0), freq='MS')
|
| 396 |
+
else: # day
|
| 397 |
+
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
|
| 398 |
+
|
| 399 |
+
# Create a DataFrame with the complete date range
|
| 400 |
+
complete_range = pd.DataFrame({column: date_range})
|
| 401 |
+
|
| 402 |
+
# Convert the date column to the appropriate format based on closest_to_36
|
| 403 |
+
if closest_to_36 == 'year':
|
| 404 |
+
complete_range[column] = complete_range[column].dt.year
|
| 405 |
+
elif closest_to_36 == 'month':
|
| 406 |
+
complete_range[column] = complete_range[column].dt.to_period('M')
|
| 407 |
+
|
| 408 |
+
# Merge the complete range with the grouped data
|
| 409 |
+
final_data = pd.merge(complete_range, grouped, on=column, how='left').fillna(0)
|
| 410 |
+
|
| 411 |
+
with st.status(f"Date Distributions: {column}", expanded=False) as stat:
|
| 412 |
+
try:
|
| 413 |
+
st.pyplot(plot_bar(final_data, column, 'count'))
|
| 414 |
+
except Exception as e:
|
| 415 |
+
st.error(f"Error plotting bar chart: {e}")
|
| 416 |
+
|
| 417 |
df_ = df_.loc[df_[column].between(start_date, end_date)]
|
| 418 |
|
| 419 |
date_column = column
|