Spaces:
Paused
Paused
| # src/chart_generator.py | |
| import matplotlib | |
| matplotlib.use('Agg') # Use a non-interactive backend suitable for scripts/web apps | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import pandas as pd | |
| import os # Needed for path operations | |
| import numpy as np # For radar charts | |
| # Define some appealing color palettes or specific colors | |
| COLOR_FEEDBACK = '#3498db' # Blue | |
| COLOR_INSTRUCTOR = '#e74c3c' # Red | |
| PALETTE_DISTRIBUTION_FB = 'Blues' # Sequential palette for feedback distribution | |
| PALETTE_DISTRIBUTION_IR = 'Reds' # Sequential palette for instructor distribution | |
| PALETTE_CORR = 'coolwarm' # Diverging for correlation | |
| COLOR_RADAR_FB = '#1f77b4' # Matplotlib default blue | |
| COLOR_RADAR_IR = '#ff7f0e' # Matplotlib default orange | |
| # Helper function to save charts | |
| def save_chart(fig: plt.Figure, output_dir: str, filename: str) -> str | None: | |
| """Saves a Matplotlib figure to a file and returns the full path.""" | |
| try: | |
| if not filename.lower().endswith(".png"): | |
| filename += ".png" | |
| os.makedirs(output_dir, exist_ok=True) | |
| filepath = os.path.join(output_dir, filename) | |
| # Save with good resolution and tight bounding box | |
| fig.savefig(filepath, format='png', bbox_inches='tight', dpi=150) | |
| print(f"Chart saved: {filepath}") | |
| return filepath | |
| except Exception as e: | |
| print(f"Error saving chart '{filename}' to '{output_dir}': {e}") | |
| return None | |
| finally: | |
| plt.close(fig) # Ensure figure is closed to free memory | |
| # --- Updated Plotting Functions --- | |
| def plot_feedback_distribution_per_subject( | |
| feedback_dist: dict, | |
| subject_name: str, | |
| output_dir: str, | |
| filename: str | |
| ) -> str | None: | |
| """Plots feedback distribution and saves as PNG, returning filepath.""" | |
| if subject_name not in feedback_dist or not feedback_dist[subject_name]: | |
| print(f"Chart Gen: No feedback data to plot for {subject_name}.") | |
| return None | |
| data = feedback_dist[subject_name] | |
| stars = sorted(data.keys()) | |
| counts = [data[s] for s in stars] | |
| if not stars or not counts: return None | |
| fig, ax = plt.subplots(figsize=(8, 5)) | |
| sns.barplot(x=stars, y=counts, hue=stars, palette=PALETTE_DISTRIBUTION_FB, order=stars, legend=False, ax=ax) | |
| ax.set_title(f'Feedback Star Distribution: {subject_name}', fontsize=16, fontweight='bold', pad=15) | |
| ax.set_xlabel('Star Rating', fontsize=12) | |
| ax.set_ylabel('Number of Responses', fontsize=12) | |
| ax.tick_params(axis='both', which='major', labelsize=10) | |
| ax.yaxis.grid(True, linestyle='--', linewidth=0.5, alpha=0.7) # Add horizontal grid lines | |
| ax.spines['top'].set_visible(False) # Remove top border | |
| ax.spines['right'].set_visible(False) # Remove right border | |
| # Add count labels on top of bars | |
| for i, count in enumerate(counts): | |
| ax.text(i, count + max(counts)*0.02, f'{count}', ha='center', va='bottom', fontsize=9) | |
| plt.tight_layout() | |
| return save_chart(fig, output_dir, filename) | |
| def plot_instructor_rating_distribution_per_subject( | |
| instructor_rating_dist: dict, | |
| subject_name: str, | |
| output_dir: str, | |
| filename: str | |
| ) -> str | None: | |
| """Plots instructor rating distribution and saves as PNG, returning filepath.""" | |
| if subject_name not in instructor_rating_dist or not instructor_rating_dist[subject_name]: | |
| print(f"Chart Gen: No instructor rating data to plot for {subject_name}.") | |
| return None | |
| data = instructor_rating_dist[subject_name] | |
| stars = sorted(data.keys()) | |
| counts = [data[s] for s in stars] | |
| if not stars or not counts: return None | |
| fig, ax = plt.subplots(figsize=(8, 5)) | |
| sns.barplot(x=stars, y=counts, hue=stars, palette=PALETTE_DISTRIBUTION_IR, order=stars, legend=False, ax=ax) | |
| ax.set_title(f'Instructor Rating Distribution: {subject_name}', fontsize=16, fontweight='bold', pad=15) | |
| ax.set_xlabel('Star Rating', fontsize=12) | |
| ax.set_ylabel('Number of Responses', fontsize=12) | |
| ax.tick_params(axis='both', which='major', labelsize=10) | |
| ax.yaxis.grid(True, linestyle='--', linewidth=0.5, alpha=0.7) | |
| ax.spines['top'].set_visible(False) | |
| ax.spines['right'].set_visible(False) | |
| # Add count labels on top of bars | |
| for i, count in enumerate(counts): | |
| ax.text(i, count + max(counts)*0.02, f'{count}', ha='center', va='bottom', fontsize=9) | |
| plt.tight_layout() | |
| return save_chart(fig, output_dir, filename) | |
| def plot_avg_scores_per_subject( | |
| avg_scores_subject_df: pd.DataFrame, | |
| output_dir: str, | |
| filename: str | |
| ) -> str | None: | |
| """Plots average scores per subject and saves as PNG, returning filepath.""" | |
| if avg_scores_subject_df is None or avg_scores_subject_df.empty: return None | |
| plot_df = avg_scores_subject_df.copy() | |
| plot_df['Average_Feedback_Stars'] = pd.to_numeric(plot_df['Average_Feedback_Stars'], errors='coerce') | |
| plot_df['Average_Instructor_Rating'] = pd.to_numeric(plot_df['Average_Instructor_Rating'], errors='coerce') | |
| plot_df.dropna(subset=['Average_Feedback_Stars', 'Average_Instructor_Rating'], how='all', inplace=True) | |
| if plot_df.empty: return None | |
| try: | |
| fig, ax = plt.subplots(figsize=(max(10, len(plot_df['Subject'].unique()) * 0.9), 6)) # Adjusted size | |
| plot_df_indexed = plot_df.set_index('Subject') | |
| # Use specified colors | |
| plot_df_indexed[['Average_Feedback_Stars', 'Average_Instructor_Rating']].plot( | |
| kind='bar', ax=ax, width=0.8, color=[COLOR_FEEDBACK, COLOR_INSTRUCTOR] | |
| ) | |
| ax.set_title('Average Scores per Subject', fontsize=18, fontweight='bold', pad=20) | |
| ax.set_ylabel('Average Rating (1-5)', fontsize=13) | |
| ax.set_xlabel(None) # Remove x-axis label if subjects are clear | |
| ax.tick_params(axis='x', rotation=45, labelsize=11) | |
| plt.setp(ax.get_xticklabels(), ha="right", rotation_mode="anchor") | |
| ax.tick_params(axis='y', labelsize=11) | |
| ax.legend(['Avg Feedback Stars', 'Avg Instructor Rating'], title='Score Type', fontsize=11, title_fontsize=12) | |
| ax.set_ylim(0, 5.5) | |
| ax.yaxis.grid(True, linestyle='--', linewidth=0.5, alpha=0.7) | |
| ax.spines['top'].set_visible(False) | |
| ax.spines['right'].set_visible(False) | |
| ax.spines['left'].set_visible(False) # Hide left axis line for cleaner look | |
| ax.tick_params(axis='y', which='both', left=False) # Hide y-axis ticks | |
| # Add value labels (rounded to 1 decimal) | |
| for container in ax.containers: | |
| ax.bar_label(container, fmt='%.1f', label_type='edge', fontsize=9, padding=3, color='dimgray') | |
| plt.tight_layout(rect=[0, 0.03, 1, 0.95]) # Adjust layout slightly | |
| return save_chart(fig, output_dir, filename) | |
| except Exception as e: | |
| print(f"Chart Gen: Error plotting average scores per subject: {e}") | |
| if 'fig' in locals(): plt.close(fig) | |
| return None | |
| def plot_avg_scores_per_department( | |
| avg_scores_dept_df: pd.DataFrame, | |
| output_dir: str, | |
| filename: str | |
| ) -> str | None: | |
| """Plots average scores per department and saves as PNG, returning filepath.""" | |
| if avg_scores_dept_df is None or avg_scores_dept_df.empty: return None | |
| plot_df = avg_scores_dept_df.copy() | |
| plot_df['Average_Feedback_Stars'] = pd.to_numeric(plot_df['Average_Feedback_Stars'], errors='coerce') | |
| plot_df['Average_Instructor_Rating'] = pd.to_numeric(plot_df['Average_Instructor_Rating'], errors='coerce') | |
| plot_df.dropna(subset=['Average_Feedback_Stars', 'Average_Instructor_Rating'], how='all', inplace=True) | |
| if plot_df.empty: return None | |
| try: | |
| fig, ax = plt.subplots(figsize=(max(10, len(plot_df['Department'].unique()) * 0.9), 6)) | |
| plot_df_indexed = plot_df.set_index('Department') | |
| plot_df_indexed[['Average_Feedback_Stars', 'Average_Instructor_Rating']].plot( | |
| kind='bar', ax=ax, width=0.8, color=[COLOR_FEEDBACK, COLOR_INSTRUCTOR] | |
| ) | |
| ax.set_title('Average Scores per Department', fontsize=18, fontweight='bold', pad=20) | |
| ax.set_ylabel('Average Rating (1-5)', fontsize=13) | |
| ax.set_xlabel(None) | |
| ax.tick_params(axis='x', rotation=45, labelsize=11) | |
| plt.setp(ax.get_xticklabels(), ha="right", rotation_mode="anchor") | |
| ax.tick_params(axis='y', labelsize=11) | |
| ax.legend(['Avg Feedback Stars', 'Avg Instructor Rating'], title='Score Type', fontsize=11, title_fontsize=12) | |
| ax.set_ylim(0, 5.5) | |
| ax.yaxis.grid(True, linestyle='--', linewidth=0.5, alpha=0.7) | |
| ax.spines['top'].set_visible(False) | |
| ax.spines['right'].set_visible(False) | |
| ax.spines['left'].set_visible(False) | |
| ax.tick_params(axis='y', which='both', left=False) | |
| for container in ax.containers: | |
| ax.bar_label(container, fmt='%.1f', label_type='edge', fontsize=9, padding=3, color='dimgray') | |
| plt.tight_layout(rect=[0, 0.03, 1, 0.95]) | |
| return save_chart(fig, output_dir, filename) | |
| except Exception as e: | |
| print(f"Chart Gen: Error plotting average scores per department: {e}") | |
| if 'fig' in locals(): plt.close(fig) | |
| return None | |
| def plot_correlation_heatmap( | |
| df: pd.DataFrame, | |
| output_dir: str, | |
| filename: str, | |
| title_suffix: str = "Overall" | |
| ) -> str | None: | |
| """Plots correlation heatmap and saves as PNG, returning filepath.""" | |
| if df is None or df.empty: return None | |
| corr_df = df[['Feedback_Stars', 'Instructor_Rating']].copy() | |
| corr_df['Feedback_Stars'] = pd.to_numeric(corr_df['Feedback_Stars'], errors='coerce') | |
| corr_df['Instructor_Rating'] = pd.to_numeric(corr_df['Instructor_Rating'], errors='coerce') | |
| corr_df.dropna(how='any', inplace=True) | |
| if len(corr_df) < 2 or corr_df['Feedback_Stars'].nunique() < 2 or corr_df['Instructor_Rating'].nunique() < 2: return None | |
| correlation_matrix = corr_df.corr() | |
| if correlation_matrix.isnull().all().all(): return None | |
| fig, ax = plt.subplots(figsize=(6, 5)) | |
| sns.heatmap( | |
| correlation_matrix, | |
| annot=True, | |
| cmap=PALETTE_CORR, | |
| fmt=".2f", # Keep .2f for correlation precision | |
| ax=ax, | |
| vmin=-1, vmax=1, | |
| annot_kws={"size": 12}, # Slightly larger annotation | |
| linewidths=.5, # Add lines between cells | |
| linecolor='lightgray' | |
| ) | |
| ax.set_title(f'Correlation ({title_suffix})', fontsize=14, fontweight='bold', pad=15) | |
| # Improve tick labels | |
| ax.set_xticklabels(ax.get_xticklabels(), rotation=0, fontsize=11) | |
| ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=11) | |
| plt.tight_layout(rect=[0, 0, 1, 0.95]) | |
| return save_chart(fig, output_dir, filename) | |
| def plot_radar_chart_subject_department( | |
| avg_scores_subject_dept_df: pd.DataFrame, | |
| department_name: str, | |
| output_dir: str, | |
| filename: str | |
| ) -> str | None: | |
| """Plots radar chart and saves as PNG, returning filepath.""" | |
| if avg_scores_subject_dept_df is None or avg_scores_subject_dept_df.empty: return None | |
| dept_data_full = avg_scores_subject_dept_df[avg_scores_subject_dept_df['Department'] == department_name] | |
| if dept_data_full.empty: return None | |
| dept_data = dept_data_full.copy() | |
| dept_data['Average_Feedback_Stars'] = pd.to_numeric(dept_data['Average_Feedback_Stars'], errors='coerce') | |
| dept_data['Average_Instructor_Rating'] = pd.to_numeric(dept_data['Average_Instructor_Rating'], errors='coerce') | |
| dept_data.dropna(subset=['Average_Feedback_Stars', 'Average_Instructor_Rating'], how='all', inplace=True) | |
| # Use .loc for fillna | |
| dept_data.loc[:, 'Average_Feedback_Stars'] = dept_data['Average_Feedback_Stars'].fillna(0) | |
| dept_data.loc[:, 'Average_Instructor_Rating'] = dept_data['Average_Instructor_Rating'].fillna(0) | |
| if dept_data.empty: return None | |
| labels = dept_data['Subject'].values | |
| num_vars = len(labels) | |
| if num_vars < 3: | |
| print(f"Chart Gen: Not enough subjects ({num_vars}) for radar chart (Dept: {department_name}). Needs >= 3.") | |
| return None | |
| angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist() | |
| angles += angles[:1] | |
| fig, ax = plt.subplots(figsize=(7, 7), subplot_kw=dict(polar=True)) | |
| # Feedback Stars | |
| feedback_values = dept_data['Average_Feedback_Stars'].tolist() | |
| feedback_values += feedback_values[:1] | |
| ax.plot(angles, feedback_values, linewidth=2, linestyle='solid', label='Avg Feedback Stars', color=COLOR_RADAR_FB, marker='o', markersize=5) | |
| ax.fill(angles, feedback_values, COLOR_RADAR_FB, alpha=0.25) | |
| # Instructor Rating | |
| instructor_values = dept_data['Average_Instructor_Rating'].tolist() | |
| instructor_values += instructor_values[:1] | |
| ax.plot(angles, instructor_values, linewidth=2, linestyle='solid', label='Avg Instructor Rating', color=COLOR_RADAR_IR, marker='o', markersize=5) | |
| ax.fill(angles, instructor_values, COLOR_RADAR_IR, alpha=0.25) | |
| ax.set_theta_offset(np.pi / 2) | |
| ax.set_theta_direction(-1) | |
| ax.set_xticks(angles[:-1]) | |
| # Wrap long subject labels if necessary | |
| wrapped_labels = ['\n'.join(label[i:i+15] for i in range(0, len(label), 15)) for label in labels] | |
| ax.set_xticklabels(wrapped_labels, fontsize=9) # Use wrapped labels | |
| ax.set_yticks(np.arange(0, 6, 1)) | |
| ax.set_yticklabels([str(i) for i in np.arange(0, 6, 1)], fontsize=10, color="grey") | |
| ax.set_ylim(0, 5) | |
| ax.grid(color="grey", linestyle='--', linewidth=0.5) # Style grid lines | |
| plt.title(f'Performance Radar: {department_name}', size=16, fontweight='bold', y=1.12) # Adjust title position | |
| ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1), fontsize=10) # Adjust legend position | |
| # fig.patch.set_alpha(0) # Make background transparent if needed for embedding | |
| # ax.patch.set_alpha(0) | |
| # Note: tight_layout might not work well with polar plots + adjusted legend/title. Manual adjustment might be needed if overlaps occur. | |
| # plt.tight_layout() # Use cautiously with polar plots | |
| return save_chart(fig, output_dir, filename) | |