import gradio as gr import tempfile import pandas as pd import numpy as np import plotly.express as px import plotly.graph_objects as go from plotly.subplots import make_subplots from langgraph.graph import StateGraph, END from typing import TypedDict, List, Dict, Any, Annotated import json from langchain_google_genai import ChatGoogleGenerativeAI from langchain.prompts import ChatPromptTemplate import html import markdown class EvaluationState(TypedDict): raw_data: pd.DataFrame processed_data: Dict[str, Any] visualizations: List[Dict[str, Any]] insights: str report_html: str # Node 1: Data Preprocessing def preprocess_data(state: EvaluationState) -> EvaluationState: """Process the raw CSV/Excel data and extract key metrics.""" df = state["raw_data"] # Basic cleaning # Assuming columns are: subject, feedback_rating (1-5), instructor_rating (1-5) # Convert ratings to numeric if they aren't already if 'feedback_rating' in df.columns: df['feedback_rating'] = pd.to_numeric(df['feedback_rating'], errors='coerce') if 'instructor_rating' in df.columns: df['instructor_rating'] = pd.to_numeric(df['instructor_rating'], errors='coerce') # Calculate metrics processed_data = {} # For each subject, calculate distribution of feedback and instructor ratings subjects = df['subject'].unique() processed_data['subjects'] = [] for subject in subjects: subject_data = df[df['subject'] == subject] feedback_distribution = {} instructor_distribution = {} # Count ratings by level 1-5 for i in range(1, 6): feedback_distribution[i] = int(subject_data['feedback_rating'].eq(i).sum()) instructor_distribution[i] = int(subject_data['instructor_rating'].eq(i).sum()) # Calculate averages avg_feedback = subject_data['feedback_rating'].mean() avg_instructor = subject_data['instructor_rating'].mean() # Calculate correlation between feedback and instructor rating correlation = subject_data['feedback_rating'].corr(subject_data['instructor_rating']) processed_data['subjects'].append({ 'name': subject, 'feedback_distribution': feedback_distribution, 'instructor_distribution': instructor_distribution, 'avg_feedback': avg_feedback, 'avg_instructor': avg_instructor, 'correlation': correlation if pd.notna(correlation) else 0.0, # Handle NaN correlation 'sample_size': len(subject_data) }) # Overall metrics processed_data['overall'] = { 'avg_feedback': df['feedback_rating'].mean(), 'avg_instructor': df['instructor_rating'].mean(), 'total_responses': len(df), 'correlation': df['feedback_rating'].corr(df['instructor_rating']) if pd.notna(df['feedback_rating'].corr(df['instructor_rating'])) else 0.0 # Handle NaN } state["processed_data"] = processed_data return state # Node 2: Create Visualizations def create_visualizations(state: EvaluationState) -> EvaluationState: """Create plotly visualizations based on the processed data.""" processed_data = state["processed_data"] visualizations = [] # 1. Overall distribution of feedback ratings feedback_data = [] for subject_data in processed_data['subjects']: for rating, count in subject_data['feedback_distribution'].items(): feedback_data.append({ 'Subject': subject_data['name'], 'Rating': rating, 'Count': count, 'Type': 'Feedback' }) if feedback_data: # Only create plot if data exists feedback_df = pd.DataFrame(feedback_data) fig_feedback = px.bar( feedback_df, x='Rating', y='Count', color='Subject', title='Distribution of Feedback Ratings by Subject', labels={'Rating': 'Rating (1-5)', 'Count': 'Number of Responses'}, barmode='group' ) visualizations.append({ 'title': 'feedback_distribution', 'fig': fig_feedback, 'description': 'Distribution of feedback ratings across different subjects' }) # 2. Overall distribution of instructor ratings instructor_data = [] for subject_data in processed_data['subjects']: for rating, count in subject_data['instructor_distribution'].items(): instructor_data.append({ 'Subject': subject_data['name'], 'Rating': rating, 'Count': count, 'Type': 'Instructor' }) if instructor_data: # Only create plot if data exists instructor_df = pd.DataFrame(instructor_data) fig_instructor = px.bar( instructor_df, x='Rating', y='Count', color='Subject', title='Distribution of Instructor Ratings by Subject', labels={'Rating': 'Rating (1-5)', 'Count': 'Number of Responses'}, barmode='group' ) visualizations.append({ 'title': 'instructor_distribution', 'fig': fig_instructor, 'description': 'Distribution of instructor ratings across different subjects' }) # 3. Radar chart comparing average ratings across subjects categories = [subject['name'] for subject in processed_data['subjects']] if categories: # Only create plot if categories exist fig_radar = go.Figure() fig_radar.add_trace(go.Scatterpolar( r=[subject['avg_feedback'] for subject in processed_data['subjects']], theta=categories, fill='toself', name='Avg. Feedback Rating' )) fig_radar.add_trace(go.Scatterpolar( r=[subject['avg_instructor'] for subject in processed_data['subjects']], theta=categories, fill='toself', name='Avg. Instructor Rating' )) fig_radar.update_layout( polar=dict( radialaxis=dict( visible=True, range=[0, 5] ) ), title_text="Average Ratings Comparison by Subject" # Use title_text ) visualizations.append({ 'title': 'radar_comparison', 'fig': fig_radar, 'description': 'Comparison of average feedback and instructor ratings by subject' }) # 4. Correlation scatter plot correlation_data = [] for subject_data in processed_data['subjects']: correlation_data.append({ 'Subject': subject_data['name'], 'Correlation': subject_data['correlation'], 'Sample Size': subject_data['sample_size'] }) if correlation_data: # Only create plot if data exists correlation_df = pd.DataFrame(correlation_data) fig_corr = px.scatter( correlation_df, x='Subject', y='Correlation', size='Sample Size', title='Correlation between Feedback and Instructor Ratings', labels={'Correlation': 'Pearson Correlation Coefficient'}, color='Correlation', color_continuous_scale=px.colors.diverging.RdBu, color_continuous_midpoint=0, # Ensure hover data is meaningful hover_name='Subject', hover_data={'Subject': False, 'Correlation': ':.2f', 'Sample Size': True} ) visualizations.append({ 'title': 'correlation_analysis', 'fig': fig_corr, 'description': 'Correlation between feedback and instructor ratings by subject' }) state["visualizations"] = visualizations return state # Node 3: Generate Insights using LangChain + Google Generative AI def generate_insights(state: EvaluationState) -> EvaluationState: """Generate narrative insights based on the data analysis using GenAI.""" processed_data = state["processed_data"] # Ensure GOOGLE_API_KEY is set in your environment for this to work # Example: export GOOGLE_API_KEY="your_actual_api_key" # Or ensure os.environ["GOOGLE_API_KEY"] is set before calling the LLM google_api_key = os.getenv("GOOGLE_API_KEY") try: if not google_api_key: raise ValueError("GOOGLE_API_KEY not found in environment.") # Prepare data for the LLM subjects_info = [] for subject in processed_data['subjects']: subject_info = f"Subject: {subject['name']}\n" subject_info += f" Average Feedback Rating: {subject['avg_feedback']:.2f}/5\n" subject_info += f" Average Instructor Rating: {subject['avg_instructor']:.2f}/5\n" subject_info += f" Correlation between Feedback and Instructor Rating: {subject['correlation']:.2f}\n" subject_info += f" Sample Size: {subject['sample_size']} responses\n" subject_info += " Feedback Rating Distribution (Rating: Count): " subject_info += ", ".join([f"{k}: {v}" for k, v in subject['feedback_distribution'].items()]) subject_info += "\n Instructor Rating Distribution (Rating: Count): " subject_info += ", ".join([f"{k}: {v}" for k, v in subject['instructor_distribution'].items()]) subjects_info.append(subject_info) overall_info = f"Overall Average Feedback Rating: {processed_data['overall']['avg_feedback']:.2f}/5\n" overall_info += f"Overall Average Instructor Rating: {processed_data['overall']['avg_instructor']:.2f}/5\n" overall_info += f"Overall Correlation: {processed_data['overall']['correlation']:.2f}\n" overall_info += f"Total Responses: {processed_data['overall']['total_responses']}" prompt = ChatPromptTemplate.from_messages([ ("system", """You are an expert data analyst . Your task is to analyze employees evaluation data and provide comprehensive, actionable insights. Format your response in Markdown. Your analysis should cover: 1. **Executive Summary**: A brief overview of key findings. 2. **Overall Performance Patterns**: Discuss general trends, average ratings, and overall sentiment. 3. **Subject-Specific Analysis**: * Highlight subjects with notably high or low ratings (both feedback and instructor). * Discuss variations between subjects. * Analyze the correlation between feedback and instructor ratings for each subject. 4. **Key Observations & Potential Issues**: Identify any outliers, significant discrepancies (e.g., large gap between feedback and instructor rating for a subject), or patterns that warrant attention. 5. **Actionable Recommendations**: Based on the data, provide specific, data-driven recommendations for improvement (e.g., for specific subjects, for instructor development, for curriculum adjustments). Be specific, use the data provided, and ensure your insights are clear and well-structured. """), ("user", """Please analyze the following student evaluation data: **Individual Subject Information:** {subjects_info} **Overall Statistics:** {overall_info} Provide your analysis in Markdown format. """) ]) llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", api_key=google_api_key) # Example model chain = prompt | llm response = chain.invoke({ "subjects_info": "\n\n".join(subjects_info), "overall_info": overall_info }) state["insights"] = response.content except Exception as e: import traceback error_message = f"Error in generate_insights: {e}\n{traceback.format_exc()}" print(error_message) # Enhanced Fallback Insights (using Markdown) fallback_insights = f""" # AI-Generated Insights (Fallback) Due to an issue connecting to the AI service, a basic analysis is provided below. ## Executive Summary The overall average feedback rating is **{processed_data['overall']['avg_feedback']:.2f}/5**, and the overall average instructor rating is **{processed_data['overall']['avg_instructor']:.2f}/5**. The correlation between feedback and instructor ratings across all subjects is **{processed_data['overall']['correlation']:.2f}**. A total of **{processed_data['overall']['total_responses']}** responses were analyzed. ## Overall Performance Patterns The data indicates a general performance level around the average ratings mentioned. Further subject-specific details are below. ## Subject-Specific Analysis """ for subject in processed_data['subjects']: fallback_insights += f""" ### {subject['name']} - **Average Feedback Rating**: {subject['avg_feedback']:.2f}/5 - **Average Instructor Rating**: {subject['avg_instructor']:.2f}/5 - **Correlation (Feedback vs. Instructor)**: {subject['correlation']:.2f} - **Sample Size**: {subject['sample_size']} responses - **Feedback Distribution** (Rating: Count): {', '.join([f'{k}: {v}' for k,v in subject['feedback_distribution'].items()])} - **Instructor Distribution** (Rating: Count): {', '.join([f'{k}: {v}' for k,v in subject['instructor_distribution'].items()])} """ fallback_insights += """ ## Recommendations (Generic) 1. **Investigate Low-Performing Subjects**: Focus on subjects with ratings significantly below average to identify areas for improvement. 2. **Analyze Rating Correlations**: Understand why correlations between student feedback and instructor ratings vary across subjects. 3. **Gather Qualitative Feedback**: For subjects with polarized or unexpectedly low ratings, consider gathering more detailed qualitative feedback. 4. **Share Best Practices**: Identify practices from highly-rated courses/instructors and explore ways to share them. **Note**: This is a fallback analysis. For detailed, AI-powered insights, please ensure the Google Generative AI integration is correctly configured with a valid API key and model name. """ state["insights"] = fallback_insights return state # Node 4: Compile HTML Report def compile_report(state: EvaluationState) -> EvaluationState: """Compile all analysis and visualizations into an HTML report.""" visualizations = state["visualizations"] insights = state["insights"] processed_data = state["processed_data"] from datetime import datetime date_generated = datetime.now().strftime("%B %d, %Y") subject_rows = "" for subject in processed_data['subjects']: subject_rows += f"""
Error rendering this chart: ' + plotError.message + '
'; }} """) except Exception as e: print(f"Error converting figure {viz_data['title']} to JSON: {e}") plotly_js_calls.append(f""" document.getElementById('{div_id}').innerHTML = 'Error preparing chart data for {html.escape(viz_data['description'])}.
'; """) else: plotly_js_calls.append(f""" document.getElementById('{div_id}').innerHTML = 'No data to display for {html.escape(viz_data['description'])}.
'; """) # Join all individual plot rendering calls plotly_script_content = "\n".join(plotly_js_calls) # Convert insights from Markdown to HTML # The 'nl2br' extension converts single newlines to{processed_data['overall']['avg_feedback']:.2f}/5
Based on {processed_data['overall']['total_responses']} total responses
{processed_data['overall']['avg_instructor']:.2f}/5
Correlation with feedback: {processed_data['overall']['correlation']:.2f}
| Subject | Avg. Feedback | Avg. Instructor | Correlation | Sample Size |
|---|
Please upload a CSV or Excel file.
", None, gr.update(visible=False) try: file_path = file_obj.name if file_path.lower().endswith('.csv'): df = pd.read_csv(file_path) elif file_path.lower().endswith(('.xls', '.xlsx')): df = pd.read_excel(file_path) else: return "Unsupported file type. Please upload a CSV or Excel file.
", None, gr.update(visible=False) required_columns = ['subject', 'feedback_rating', 'instructor_rating'] if not all(col in df.columns for col in required_columns): missing = [col for col in required_columns if col not in df.columns] return f"Missing required columns: {', '.join(missing)}.
", None, gr.update(visible=False) except Exception as e: return f"Error reading file: {html.escape(str(e))}
", None, gr.update(visible=False) initial_state = {"raw_data": df, "processed_data": {}, "visualizations": [], "insights": "", "report_html": ""} try: final_state = evaluation_app.invoke(initial_state) report_html_content = final_state["report_html"] except Exception as e: import traceback tb_str = traceback.format_exc() error_msg = f"An error occurred during report generation: {html.escape(str(e))}" print(f"{error_msg}\n{tb_str}") return f"{error_msg}
", None, gr.update(visible=False) try: # Use a more robust way to name the temporary file for download base_name = os.path.splitext(os.path.basename(file_path))[0] report_file_name = f"{base_name}_evaluation_report.html" with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".html", prefix=f"{base_name}_report_", encoding="utf-8") as tmp_file: tmp_file.write(report_html_content) temp_file_path = tmp_file.name return report_html_content, gr.update(value=temp_file_path, label=f"Download: {report_file_name}", visible=True), gr.update(visible=True) except Exception as e: return f"Error creating download file: {html.escape(str(e))}
", None, gr.update(visible=False) # --- GRADIO APP USING gr.Blocks --- with gr.Blocks( title="Evaluation Analytics System", theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky) ) as app: gr.Markdown( """ # 📊 Formation Evaluation Analytics Dashboard Upload evaluation data (CSV or Excel format) to generate an interactive report. Required columns: `subject`, `feedback_rating` (1-5), `instructor_rating` (1-5). """ ) with gr.Row(): with gr.Column(scale=1, min_width=300): # Input column file_input = gr.File( label="Upload Evaluation Data", file_types=['.csv', '.xls', '.xlsx'], # type="filepath" # 'filepath' is often better for NamedTemporaryFile ) generate_button = gr.Button("Generate Report", variant="primary") # Placeholder for download link, initially hidden download_file_output = gr.File(label="Download Full Report", visible=False, interactive=False) with gr.Column(scale=3): # Output column - larger for report preview report_output_html = gr.HTML( label="Analysis Report Preview", value="Upload a file and click 'Generate Report' to see the analysis.
" ) # Define interactions generate_button.click( fn=generate_report_gradio, inputs=[file_input], outputs=[report_output_html, download_file_output, download_file_output] # download_file_output is updated twice for value and visibility ) app.launch(ssr_mode=False)