formation / app.py
ArkenB's picture
Update app.py
5847257 verified
import gradio as gr
import tempfile
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from langgraph.graph import StateGraph, END
from typing import TypedDict, List, Dict, Any, Annotated
import json
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import ChatPromptTemplate
import html
import markdown
class EvaluationState(TypedDict):
raw_data: pd.DataFrame
processed_data: Dict[str, Any]
visualizations: List[Dict[str, Any]]
insights: str
report_html: str
# Node 1: Data Preprocessing
def preprocess_data(state: EvaluationState) -> EvaluationState:
"""Process the raw CSV/Excel data and extract key metrics."""
df = state["raw_data"]
# Basic cleaning
# Assuming columns are: subject, feedback_rating (1-5), instructor_rating (1-5)
# Convert ratings to numeric if they aren't already
if 'feedback_rating' in df.columns:
df['feedback_rating'] = pd.to_numeric(df['feedback_rating'], errors='coerce')
if 'instructor_rating' in df.columns:
df['instructor_rating'] = pd.to_numeric(df['instructor_rating'], errors='coerce')
# Calculate metrics
processed_data = {}
# For each subject, calculate distribution of feedback and instructor ratings
subjects = df['subject'].unique()
processed_data['subjects'] = []
for subject in subjects:
subject_data = df[df['subject'] == subject]
feedback_distribution = {}
instructor_distribution = {}
# Count ratings by level 1-5
for i in range(1, 6):
feedback_distribution[i] = int(subject_data['feedback_rating'].eq(i).sum())
instructor_distribution[i] = int(subject_data['instructor_rating'].eq(i).sum())
# Calculate averages
avg_feedback = subject_data['feedback_rating'].mean()
avg_instructor = subject_data['instructor_rating'].mean()
# Calculate correlation between feedback and instructor rating
correlation = subject_data['feedback_rating'].corr(subject_data['instructor_rating'])
processed_data['subjects'].append({
'name': subject,
'feedback_distribution': feedback_distribution,
'instructor_distribution': instructor_distribution,
'avg_feedback': avg_feedback,
'avg_instructor': avg_instructor,
'correlation': correlation if pd.notna(correlation) else 0.0, # Handle NaN correlation
'sample_size': len(subject_data)
})
# Overall metrics
processed_data['overall'] = {
'avg_feedback': df['feedback_rating'].mean(),
'avg_instructor': df['instructor_rating'].mean(),
'total_responses': len(df),
'correlation': df['feedback_rating'].corr(df['instructor_rating']) if pd.notna(df['feedback_rating'].corr(df['instructor_rating'])) else 0.0 # Handle NaN
}
state["processed_data"] = processed_data
return state
# Node 2: Create Visualizations
def create_visualizations(state: EvaluationState) -> EvaluationState:
"""Create plotly visualizations based on the processed data."""
processed_data = state["processed_data"]
visualizations = []
# 1. Overall distribution of feedback ratings
feedback_data = []
for subject_data in processed_data['subjects']:
for rating, count in subject_data['feedback_distribution'].items():
feedback_data.append({
'Subject': subject_data['name'],
'Rating': rating,
'Count': count,
'Type': 'Feedback'
})
if feedback_data: # Only create plot if data exists
feedback_df = pd.DataFrame(feedback_data)
fig_feedback = px.bar(
feedback_df,
x='Rating',
y='Count',
color='Subject',
title='Distribution of Feedback Ratings by Subject',
labels={'Rating': 'Rating (1-5)', 'Count': 'Number of Responses'},
barmode='group'
)
visualizations.append({
'title': 'feedback_distribution',
'fig': fig_feedback,
'description': 'Distribution of feedback ratings across different subjects'
})
# 2. Overall distribution of instructor ratings
instructor_data = []
for subject_data in processed_data['subjects']:
for rating, count in subject_data['instructor_distribution'].items():
instructor_data.append({
'Subject': subject_data['name'],
'Rating': rating,
'Count': count,
'Type': 'Instructor'
})
if instructor_data: # Only create plot if data exists
instructor_df = pd.DataFrame(instructor_data)
fig_instructor = px.bar(
instructor_df,
x='Rating',
y='Count',
color='Subject',
title='Distribution of Instructor Ratings by Subject',
labels={'Rating': 'Rating (1-5)', 'Count': 'Number of Responses'},
barmode='group'
)
visualizations.append({
'title': 'instructor_distribution',
'fig': fig_instructor,
'description': 'Distribution of instructor ratings across different subjects'
})
# 3. Radar chart comparing average ratings across subjects
categories = [subject['name'] for subject in processed_data['subjects']]
if categories: # Only create plot if categories exist
fig_radar = go.Figure()
fig_radar.add_trace(go.Scatterpolar(
r=[subject['avg_feedback'] for subject in processed_data['subjects']],
theta=categories,
fill='toself',
name='Avg. Feedback Rating'
))
fig_radar.add_trace(go.Scatterpolar(
r=[subject['avg_instructor'] for subject in processed_data['subjects']],
theta=categories,
fill='toself',
name='Avg. Instructor Rating'
))
fig_radar.update_layout(
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 5]
)
),
title_text="Average Ratings Comparison by Subject" # Use title_text
)
visualizations.append({
'title': 'radar_comparison',
'fig': fig_radar,
'description': 'Comparison of average feedback and instructor ratings by subject'
})
# 4. Correlation scatter plot
correlation_data = []
for subject_data in processed_data['subjects']:
correlation_data.append({
'Subject': subject_data['name'],
'Correlation': subject_data['correlation'],
'Sample Size': subject_data['sample_size']
})
if correlation_data: # Only create plot if data exists
correlation_df = pd.DataFrame(correlation_data)
fig_corr = px.scatter(
correlation_df,
x='Subject',
y='Correlation',
size='Sample Size',
title='Correlation between Feedback and Instructor Ratings',
labels={'Correlation': 'Pearson Correlation Coefficient'},
color='Correlation',
color_continuous_scale=px.colors.diverging.RdBu,
color_continuous_midpoint=0,
# Ensure hover data is meaningful
hover_name='Subject',
hover_data={'Subject': False, 'Correlation': ':.2f', 'Sample Size': True}
)
visualizations.append({
'title': 'correlation_analysis',
'fig': fig_corr,
'description': 'Correlation between feedback and instructor ratings by subject'
})
state["visualizations"] = visualizations
return state
# Node 3: Generate Insights using LangChain + Google Generative AI
def generate_insights(state: EvaluationState) -> EvaluationState:
"""Generate narrative insights based on the data analysis using GenAI."""
processed_data = state["processed_data"]
# Ensure GOOGLE_API_KEY is set in your environment for this to work
# Example: export GOOGLE_API_KEY="your_actual_api_key"
# Or ensure os.environ["GOOGLE_API_KEY"] is set before calling the LLM
google_api_key = os.getenv("GOOGLE_API_KEY")
try:
if not google_api_key:
raise ValueError("GOOGLE_API_KEY not found in environment.")
# Prepare data for the LLM
subjects_info = []
for subject in processed_data['subjects']:
subject_info = f"Subject: {subject['name']}\n"
subject_info += f" Average Feedback Rating: {subject['avg_feedback']:.2f}/5\n"
subject_info += f" Average Instructor Rating: {subject['avg_instructor']:.2f}/5\n"
subject_info += f" Correlation between Feedback and Instructor Rating: {subject['correlation']:.2f}\n"
subject_info += f" Sample Size: {subject['sample_size']} responses\n"
subject_info += " Feedback Rating Distribution (Rating: Count): "
subject_info += ", ".join([f"{k}: {v}" for k, v in subject['feedback_distribution'].items()])
subject_info += "\n Instructor Rating Distribution (Rating: Count): "
subject_info += ", ".join([f"{k}: {v}" for k, v in subject['instructor_distribution'].items()])
subjects_info.append(subject_info)
overall_info = f"Overall Average Feedback Rating: {processed_data['overall']['avg_feedback']:.2f}/5\n"
overall_info += f"Overall Average Instructor Rating: {processed_data['overall']['avg_instructor']:.2f}/5\n"
overall_info += f"Overall Correlation: {processed_data['overall']['correlation']:.2f}\n"
overall_info += f"Total Responses: {processed_data['overall']['total_responses']}"
prompt = ChatPromptTemplate.from_messages([
("system", """You are an expert data analyst . Your task is to analyze employees evaluation data and provide comprehensive, actionable insights.
Format your response in Markdown.
Your analysis should cover:
1. **Executive Summary**: A brief overview of key findings.
2. **Overall Performance Patterns**: Discuss general trends, average ratings, and overall sentiment.
3. **Subject-Specific Analysis**:
* Highlight subjects with notably high or low ratings (both feedback and instructor).
* Discuss variations between subjects.
* Analyze the correlation between feedback and instructor ratings for each subject.
4. **Key Observations & Potential Issues**: Identify any outliers, significant discrepancies (e.g., large gap between feedback and instructor rating for a subject), or patterns that warrant attention.
5. **Actionable Recommendations**: Based on the data, provide specific, data-driven recommendations for improvement (e.g., for specific subjects, for instructor development, for curriculum adjustments).
Be specific, use the data provided, and ensure your insights are clear and well-structured.
"""),
("user", """Please analyze the following student evaluation data:
**Individual Subject Information:**
{subjects_info}
**Overall Statistics:**
{overall_info}
Provide your analysis in Markdown format.
""")
])
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", api_key=google_api_key) # Example model
chain = prompt | llm
response = chain.invoke({
"subjects_info": "\n\n".join(subjects_info),
"overall_info": overall_info
})
state["insights"] = response.content
except Exception as e:
import traceback
error_message = f"Error in generate_insights: {e}\n{traceback.format_exc()}"
print(error_message)
# Enhanced Fallback Insights (using Markdown)
fallback_insights = f"""
# AI-Generated Insights (Fallback)
Due to an issue connecting to the AI service, a basic analysis is provided below.
## Executive Summary
The overall average feedback rating is **{processed_data['overall']['avg_feedback']:.2f}/5**, and the overall average instructor rating is **{processed_data['overall']['avg_instructor']:.2f}/5**.
The correlation between feedback and instructor ratings across all subjects is **{processed_data['overall']['correlation']:.2f}**.
A total of **{processed_data['overall']['total_responses']}** responses were analyzed.
## Overall Performance Patterns
The data indicates a general performance level around the average ratings mentioned. Further subject-specific details are below.
## Subject-Specific Analysis
"""
for subject in processed_data['subjects']:
fallback_insights += f"""
### {subject['name']}
- **Average Feedback Rating**: {subject['avg_feedback']:.2f}/5
- **Average Instructor Rating**: {subject['avg_instructor']:.2f}/5
- **Correlation (Feedback vs. Instructor)**: {subject['correlation']:.2f}
- **Sample Size**: {subject['sample_size']} responses
- **Feedback Distribution** (Rating: Count): {', '.join([f'{k}: {v}' for k,v in subject['feedback_distribution'].items()])}
- **Instructor Distribution** (Rating: Count): {', '.join([f'{k}: {v}' for k,v in subject['instructor_distribution'].items()])}
"""
fallback_insights += """
## Recommendations (Generic)
1. **Investigate Low-Performing Subjects**: Focus on subjects with ratings significantly below average to identify areas for improvement.
2. **Analyze Rating Correlations**: Understand why correlations between student feedback and instructor ratings vary across subjects.
3. **Gather Qualitative Feedback**: For subjects with polarized or unexpectedly low ratings, consider gathering more detailed qualitative feedback.
4. **Share Best Practices**: Identify practices from highly-rated courses/instructors and explore ways to share them.
**Note**: This is a fallback analysis. For detailed, AI-powered insights, please ensure the Google Generative AI integration is correctly configured with a valid API key and model name.
"""
state["insights"] = fallback_insights
return state
# Node 4: Compile HTML Report
def compile_report(state: EvaluationState) -> EvaluationState:
"""Compile all analysis and visualizations into an HTML report."""
visualizations = state["visualizations"]
insights = state["insights"]
processed_data = state["processed_data"]
from datetime import datetime
date_generated = datetime.now().strftime("%B %d, %Y")
subject_rows = ""
for subject in processed_data['subjects']:
subject_rows += f"""
<tr>
<td>{html.escape(subject['name'])}</td>
<td>{subject['avg_feedback']:.2f}</td>
<td>{subject['avg_instructor']:.2f}</td>
<td>{subject['correlation']:.2f}</td>
<td>{subject['sample_size']}</td>
</tr>
"""
plotly_js_calls = []
visualizations_html_divs = [] # Renamed to avoid conflict
for i, viz_data in enumerate(visualizations):
div_id = f"viz-{i}"
visualizations_html_divs.append(f"""
<div class="visualization">
<h3>{html.escape(viz_data['description'])}</h3>
<div id="{div_id}" style="width: 100%; height: 500px; min-height: 400px;"></div>
</div>
""")
if viz_data['fig'] is not None:
try:
fig_json = viz_data['fig'].to_json()
plotly_js_calls.append(f"""
try {{
var figure_data_{i} = {fig_json};
Plotly.newPlot('{div_id}', figure_data_{i}.data, figure_data_{i}.layout, {{responsive: true}});
}} catch (plotError) {{
console.error('Error rendering plot {div_id}:', plotError);
document.getElementById('{div_id}').innerHTML = '<p style=\\"color:red; padding:10px;\\">Error rendering this chart: ' + plotError.message + '</p>';
}}
""")
except Exception as e:
print(f"Error converting figure {viz_data['title']} to JSON: {e}")
plotly_js_calls.append(f"""
document.getElementById('{div_id}').innerHTML = '<p style=\\"color:red; padding:10px;\\">Error preparing chart data for {html.escape(viz_data['description'])}.</p>';
""")
else:
plotly_js_calls.append(f"""
document.getElementById('{div_id}').innerHTML = '<p style=\\"color:orange; padding:10px;\\">No data to display for {html.escape(viz_data['description'])}.</p>';
""")
# Join all individual plot rendering calls
plotly_script_content = "\n".join(plotly_js_calls)
# Convert insights from Markdown to HTML
# The 'nl2br' extension converts single newlines to <br>, useful for LLM output.
# 'fenced_code' for code blocks, 'tables' for markdown tables.
insights_html_content = markdown.markdown(insights, extensions=['fenced_code', 'tables', 'nl2br', 'extra'])
report_html = f"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Akwel Evaluation Analysis Report</title>
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
<style>
body {{ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; line-height: 1.6; color: #333; max-width: 1200px; margin: 0 auto; padding: 20px; background-color: #f4f7f6; }}
h1, h2, h3 {{ color: #2c3e50; margin-top: 1.5em; margin-bottom: 0.5em; }}
h1 {{ font-size: 2.2em; border-bottom: 2px solid #3498db; padding-bottom: 0.3em; }}
h2 {{ font-size: 1.8em; border-bottom: 1px solid #b0bec5; padding-bottom: 0.2em; }}
h3 {{ font-size: 1.4em; }}
.visualization {{ margin: 30px 0; border: 1px solid #ddd; border-radius: 8px; padding: 20px; background-color: #fff; box-shadow: 0 2px 10px rgba(0,0,0,0.05); }}
.stats-container {{ display: flex; flex-wrap: wrap; justify-content: space-around; margin-bottom: 30px; gap: 20px; }}
.stat-card {{ background-color: #ffffff; border-radius: 10px; padding: 25px; margin-bottom: 20px; flex: 1; min-width: 280px; box-shadow: 0 4px 8px rgba(0,0,0,0.1); border-left: 5px solid #3498db; }}
.stat-card h3 {{ margin-top: 0; color: #3498db; }}
.stat-card p {{ font-size: 1.1em; }}
.insights {{ background-color: #eaf5ff; padding: 25px; border-radius: 10px; margin: 30px 0; border-left: 5px solid #1abc9c; }}
.insights h2 {{ color: #16a085; }}
.insights p, .insights li {{ font-size: 1.05em; }}
.insights ul, .insights ol {{ padding-left: 20px; }}
.insights strong {{ color: #2c3e50; }}
table {{ width: 100%; border-collapse: collapse; margin: 25px 0; box-shadow: 0 2px 8px rgba(0,0,0,0.05); }}
th, td {{ border: 1px solid #ddd; padding: 12px; text-align: left; }}
th {{ background-color: #3498db; color: white; font-weight: bold; }}
tr:nth-child(even) {{ background-color: #f8f9fa; }}
tr:hover {{ background-color: #e9ecef; }}
footer {{ text-align: center; margin-top: 40px; padding-top: 20px; border-top: 1px solid #ddd; font-size: 0.9em; color: #777; }}
</style>
</head>
<body>
<h1>Employee Evaluation Analysis Report</h1>
<div class="stats-container">
<div class="stat-card">
<h3>Overall Feedback Rating</h3>
<p style="font-size: 28px; font-weight: bold;">{processed_data['overall']['avg_feedback']:.2f}/5</p>
<p>Based on {processed_data['overall']['total_responses']} total responses</p>
</div>
<div class="stat-card">
<h3>Overall Instructor Rating</h3>
<p style="font-size: 28px; font-weight: bold;">{processed_data['overall']['avg_instructor']:.2f}/5</p>
<p>Correlation with feedback: {processed_data['overall']['correlation']:.2f}</p>
</div>
</div>
<h2>Performance by Subject</h2>
<table>
<thead><tr>
<th>Subject</th>
<th>Avg. Feedback</th>
<th>Avg. Instructor</th>
<th>Correlation</th>
<th>Sample Size</th>
</tr></thead>
<tbody>
{subject_rows}
</tbody>
</table>
<h2>Visualizations</h2>
{''.join(visualizations_html_divs)}
<div class="insights">
<h2>AI-Generated Insights</h2>
{insights_html_content}
</div>
<footer>
<p>Report generated on {date_generated}</p>
</footer>
<script>
// Wait for the DOM to be fully loaded before trying to render plots
document.addEventListener('DOMContentLoaded', function() {{
if (typeof Plotly !== 'undefined') {{
console.log("Plotly object found. Attempting to render charts.");
// This is where the generated JS for all plots goes
{plotly_script_content}
console.log("Plotly chart rendering JS execution sequence initiated.");
}} else {{
console.error("Plotly.js is not loaded! Charts cannot be displayed.");
const vizDivs = document.querySelectorAll('.visualization div[id^="viz-"]');
vizDivs.forEach(div => {{
div.innerHTML = "<p style='color:red; padding:10px;'>Error: Chart library (Plotly.js) did not load. Charts cannot be displayed. Check internet connection or CDN link.</p>";
}});
}}
}});
</script>
</body>
</html>
"""
state["report_html"] = report_html
return state
# Define the LangGraph workflow
def create_evaluation_graph():
"""Create the LangGraph workflow for the evaluation analytics system."""
graph = StateGraph(EvaluationState)
graph.add_node("preprocess_data", preprocess_data)
graph.add_node("create_visualizations", create_visualizations)
graph.add_node("generate_insights", generate_insights)
graph.add_node("compile_report", compile_report)
graph.add_edge("preprocess_data", "create_visualizations")
graph.add_edge("create_visualizations", "generate_insights")
graph.add_edge("generate_insights", "compile_report")
graph.add_edge("compile_report", END)
graph.set_entry_point("preprocess_data")
return graph.compile()
evaluation_app = create_evaluation_graph()
def generate_report_gradio(file_obj):
if file_obj is None:
return "<p style='color:red; text-align:center; padding-top: 20px;'>Please upload a CSV or Excel file.</p>", None, gr.update(visible=False)
try:
file_path = file_obj.name
if file_path.lower().endswith('.csv'):
df = pd.read_csv(file_path)
elif file_path.lower().endswith(('.xls', '.xlsx')):
df = pd.read_excel(file_path)
else:
return "<p style='color:red; text-align:center; padding-top: 20px;'>Unsupported file type. Please upload a CSV or Excel file.</p>", None, gr.update(visible=False)
required_columns = ['subject', 'feedback_rating', 'instructor_rating']
if not all(col in df.columns for col in required_columns):
missing = [col for col in required_columns if col not in df.columns]
return f"<p style='color:red; text-align:center; padding-top: 20px;'>Missing required columns: {', '.join(missing)}.</p>", None, gr.update(visible=False)
except Exception as e:
return f"<p style='color:red; text-align:center; padding-top: 20px;'>Error reading file: {html.escape(str(e))}</p>", None, gr.update(visible=False)
initial_state = {"raw_data": df, "processed_data": {}, "visualizations": [], "insights": "", "report_html": ""}
try:
final_state = evaluation_app.invoke(initial_state)
report_html_content = final_state["report_html"]
except Exception as e:
import traceback
tb_str = traceback.format_exc()
error_msg = f"An error occurred during report generation: {html.escape(str(e))}"
print(f"{error_msg}\n{tb_str}")
return f"<p style='color:red; text-align:center; padding-top: 20px;'>{error_msg}</p>", None, gr.update(visible=False)
try:
# Use a more robust way to name the temporary file for download
base_name = os.path.splitext(os.path.basename(file_path))[0]
report_file_name = f"{base_name}_evaluation_report.html"
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".html", prefix=f"{base_name}_report_", encoding="utf-8") as tmp_file:
tmp_file.write(report_html_content)
temp_file_path = tmp_file.name
return report_html_content, gr.update(value=temp_file_path, label=f"Download: {report_file_name}", visible=True), gr.update(visible=True)
except Exception as e:
return f"<p style='color:red; text-align:center; padding-top: 20px;'>Error creating download file: {html.escape(str(e))}</p>", None, gr.update(visible=False)
# --- GRADIO APP USING gr.Blocks ---
with gr.Blocks(
title="Evaluation Analytics System",
theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky)
) as app:
gr.Markdown(
"""
# 📊 Formation Evaluation Analytics Dashboard
Upload evaluation data (CSV or Excel format) to generate an interactive report.
Required columns: `subject`, `feedback_rating` (1-5), `instructor_rating` (1-5).
"""
)
with gr.Row():
with gr.Column(scale=1, min_width=300): # Input column
file_input = gr.File(
label="Upload Evaluation Data",
file_types=['.csv', '.xls', '.xlsx'],
# type="filepath" # 'filepath' is often better for NamedTemporaryFile
)
generate_button = gr.Button("Generate Report", variant="primary")
# Placeholder for download link, initially hidden
download_file_output = gr.File(label="Download Full Report", visible=False, interactive=False)
with gr.Column(scale=3): # Output column - larger for report preview
report_output_html = gr.HTML(
label="Analysis Report Preview",
value="<p style='text-align:center; color:grey; padding-top:50px;'>Upload a file and click 'Generate Report' to see the analysis.</p>"
)
# Define interactions
generate_button.click(
fn=generate_report_gradio,
inputs=[file_input],
outputs=[report_output_html, download_file_output, download_file_output] # download_file_output is updated twice for value and visibility
)
app.launch(ssr_mode=False)