Spaces:

ArkenB
/

formation

Sleeping

App Files Files Community

formation / app.py

ArkenB

Update app.py

5847257 verified 9 months ago

raw

history blame contribute delete

28 kB

	import gradio as gr
	import tempfile
	import pandas as pd
	import numpy as np
	import plotly.express as px
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots

	from langgraph.graph import StateGraph, END
	from typing import TypedDict, List, Dict, Any, Annotated
	import json
	from langchain_google_genai import ChatGoogleGenerativeAI
	from langchain.prompts import ChatPromptTemplate
	import html
	import markdown


	class EvaluationState(TypedDict):
	raw_data: pd.DataFrame
	processed_data: Dict[str, Any]
	visualizations: List[Dict[str, Any]]
	insights: str
	report_html: str

	# Node 1: Data Preprocessing
	def preprocess_data(state: EvaluationState) -> EvaluationState:
	"""Process the raw CSV/Excel data and extract key metrics."""
	df = state["raw_data"]

	# Basic cleaning
	# Assuming columns are: subject, feedback_rating (1-5), instructor_rating (1-5)
	# Convert ratings to numeric if they aren't already
	if 'feedback_rating' in df.columns:
	df['feedback_rating'] = pd.to_numeric(df['feedback_rating'], errors='coerce')
	if 'instructor_rating' in df.columns:
	df['instructor_rating'] = pd.to_numeric(df['instructor_rating'], errors='coerce')

	# Calculate metrics
	processed_data = {}

	# For each subject, calculate distribution of feedback and instructor ratings
	subjects = df['subject'].unique()

	processed_data['subjects'] = []
	for subject in subjects:
	subject_data = df[df['subject'] == subject]

	feedback_distribution = {}
	instructor_distribution = {}

	# Count ratings by level 1-5
	for i in range(1, 6):
	feedback_distribution[i] = int(subject_data['feedback_rating'].eq(i).sum())
	instructor_distribution[i] = int(subject_data['instructor_rating'].eq(i).sum())

	# Calculate averages
	avg_feedback = subject_data['feedback_rating'].mean()
	avg_instructor = subject_data['instructor_rating'].mean()

	# Calculate correlation between feedback and instructor rating
	correlation = subject_data['feedback_rating'].corr(subject_data['instructor_rating'])

	processed_data['subjects'].append({
	'name': subject,
	'feedback_distribution': feedback_distribution,
	'instructor_distribution': instructor_distribution,
	'avg_feedback': avg_feedback,
	'avg_instructor': avg_instructor,
	'correlation': correlation if pd.notna(correlation) else 0.0, # Handle NaN correlation
	'sample_size': len(subject_data)
	})

	# Overall metrics
	processed_data['overall'] = {
	'avg_feedback': df['feedback_rating'].mean(),
	'avg_instructor': df['instructor_rating'].mean(),
	'total_responses': len(df),
	'correlation': df['feedback_rating'].corr(df['instructor_rating']) if pd.notna(df['feedback_rating'].corr(df['instructor_rating'])) else 0.0 # Handle NaN
	}

	state["processed_data"] = processed_data
	return state

	# Node 2: Create Visualizations
	def create_visualizations(state: EvaluationState) -> EvaluationState:
	"""Create plotly visualizations based on the processed data."""
	processed_data = state["processed_data"]
	visualizations = []

	# 1. Overall distribution of feedback ratings
	feedback_data = []
	for subject_data in processed_data['subjects']:
	for rating, count in subject_data['feedback_distribution'].items():
	feedback_data.append({
	'Subject': subject_data['name'],
	'Rating': rating,
	'Count': count,
	'Type': 'Feedback'
	})

	if feedback_data: # Only create plot if data exists
	feedback_df = pd.DataFrame(feedback_data)
	fig_feedback = px.bar(
	feedback_df,
	x='Rating',
	y='Count',
	color='Subject',
	title='Distribution of Feedback Ratings by Subject',
	labels={'Rating': 'Rating (1-5)', 'Count': 'Number of Responses'},
	barmode='group'
	)
	visualizations.append({
	'title': 'feedback_distribution',
	'fig': fig_feedback,
	'description': 'Distribution of feedback ratings across different subjects'
	})

	# 2. Overall distribution of instructor ratings
	instructor_data = []
	for subject_data in processed_data['subjects']:
	for rating, count in subject_data['instructor_distribution'].items():
	instructor_data.append({
	'Subject': subject_data['name'],
	'Rating': rating,
	'Count': count,
	'Type': 'Instructor'
	})

	if instructor_data: # Only create plot if data exists
	instructor_df = pd.DataFrame(instructor_data)
	fig_instructor = px.bar(
	instructor_df,
	x='Rating',
	y='Count',
	color='Subject',
	title='Distribution of Instructor Ratings by Subject',
	labels={'Rating': 'Rating (1-5)', 'Count': 'Number of Responses'},
	barmode='group'
	)
	visualizations.append({
	'title': 'instructor_distribution',
	'fig': fig_instructor,
	'description': 'Distribution of instructor ratings across different subjects'
	})

	# 3. Radar chart comparing average ratings across subjects
	categories = [subject['name'] for subject in processed_data['subjects']]
	if categories: # Only create plot if categories exist
	fig_radar = go.Figure()

	fig_radar.add_trace(go.Scatterpolar(
	r=[subject['avg_feedback'] for subject in processed_data['subjects']],
	theta=categories,
	fill='toself',
	name='Avg. Feedback Rating'
	))

	fig_radar.add_trace(go.Scatterpolar(
	r=[subject['avg_instructor'] for subject in processed_data['subjects']],
	theta=categories,
	fill='toself',
	name='Avg. Instructor Rating'
	))

	fig_radar.update_layout(
	polar=dict(
	radialaxis=dict(
	visible=True,
	range=[0, 5]
	)
	),
	title_text="Average Ratings Comparison by Subject" # Use title_text
	)
	visualizations.append({
	'title': 'radar_comparison',
	'fig': fig_radar,
	'description': 'Comparison of average feedback and instructor ratings by subject'
	})

	# 4. Correlation scatter plot
	correlation_data = []
	for subject_data in processed_data['subjects']:
	correlation_data.append({
	'Subject': subject_data['name'],
	'Correlation': subject_data['correlation'],
	'Sample Size': subject_data['sample_size']
	})

	if correlation_data: # Only create plot if data exists
	correlation_df = pd.DataFrame(correlation_data)
	fig_corr = px.scatter(
	correlation_df,
	x='Subject',
	y='Correlation',
	size='Sample Size',
	title='Correlation between Feedback and Instructor Ratings',
	labels={'Correlation': 'Pearson Correlation Coefficient'},
	color='Correlation',
	color_continuous_scale=px.colors.diverging.RdBu,
	color_continuous_midpoint=0,
	# Ensure hover data is meaningful
	hover_name='Subject',
	hover_data={'Subject': False, 'Correlation': ':.2f', 'Sample Size': True}
	)
	visualizations.append({
	'title': 'correlation_analysis',
	'fig': fig_corr,
	'description': 'Correlation between feedback and instructor ratings by subject'
	})

	state["visualizations"] = visualizations
	return state

	# Node 3: Generate Insights using LangChain + Google Generative AI
	def generate_insights(state: EvaluationState) -> EvaluationState:
	"""Generate narrative insights based on the data analysis using GenAI."""
	processed_data = state["processed_data"]

	# Ensure GOOGLE_API_KEY is set in your environment for this to work
	# Example: export GOOGLE_API_KEY="your_actual_api_key"
	# Or ensure os.environ["GOOGLE_API_KEY"] is set before calling the LLM
	google_api_key = os.getenv("GOOGLE_API_KEY")

	try:
	if not google_api_key:
	raise ValueError("GOOGLE_API_KEY not found in environment.")

	# Prepare data for the LLM
	subjects_info = []
	for subject in processed_data['subjects']:
	subject_info = f"Subject: {subject['name']}\n"
	subject_info += f" Average Feedback Rating: {subject['avg_feedback']:.2f}/5\n"
	subject_info += f" Average Instructor Rating: {subject['avg_instructor']:.2f}/5\n"
	subject_info += f" Correlation between Feedback and Instructor Rating: {subject['correlation']:.2f}\n"
	subject_info += f" Sample Size: {subject['sample_size']} responses\n"
	subject_info += " Feedback Rating Distribution (Rating: Count): "
	subject_info += ", ".join([f"{k}: {v}" for k, v in subject['feedback_distribution'].items()])
	subject_info += "\n Instructor Rating Distribution (Rating: Count): "
	subject_info += ", ".join([f"{k}: {v}" for k, v in subject['instructor_distribution'].items()])
	subjects_info.append(subject_info)

	overall_info = f"Overall Average Feedback Rating: {processed_data['overall']['avg_feedback']:.2f}/5\n"
	overall_info += f"Overall Average Instructor Rating: {processed_data['overall']['avg_instructor']:.2f}/5\n"
	overall_info += f"Overall Correlation: {processed_data['overall']['correlation']:.2f}\n"
	overall_info += f"Total Responses: {processed_data['overall']['total_responses']}"

	prompt = ChatPromptTemplate.from_messages([
	("system", """You are an expert data analyst . Your task is to analyze employees evaluation data and provide comprehensive, actionable insights.
	Format your response in Markdown.

	Your analysis should cover:
	1. Executive Summary: A brief overview of key findings.
	2. Overall Performance Patterns: Discuss general trends, average ratings, and overall sentiment.
	3. Subject-Specific Analysis:
	* Highlight subjects with notably high or low ratings (both feedback and instructor).
	* Discuss variations between subjects.
	* Analyze the correlation between feedback and instructor ratings for each subject.
	4. Key Observations & Potential Issues: Identify any outliers, significant discrepancies (e.g., large gap between feedback and instructor rating for a subject), or patterns that warrant attention.
	5. Actionable Recommendations: Based on the data, provide specific, data-driven recommendations for improvement (e.g., for specific subjects, for instructor development, for curriculum adjustments).

	Be specific, use the data provided, and ensure your insights are clear and well-structured.
	"""),
	("user", """Please analyze the following student evaluation data:

	Individual Subject Information:
	{subjects_info}

	Overall Statistics:
	{overall_info}

	Provide your analysis in Markdown format.
	""")
	])


	llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", api_key=google_api_key) # Example model
	chain = prompt \| llm
	response = chain.invoke({
	"subjects_info": "\n\n".join(subjects_info),
	"overall_info": overall_info
	})

	state["insights"] = response.content
	except Exception as e:
	import traceback
	error_message = f"Error in generate_insights: {e}\n{traceback.format_exc()}"
	print(error_message)

	# Enhanced Fallback Insights (using Markdown)
	fallback_insights = f"""
	# AI-Generated Insights (Fallback)

	Due to an issue connecting to the AI service, a basic analysis is provided below.

	## Executive Summary

	The overall average feedback rating is {processed_data['overall']['avg_feedback']:.2f}/5, and the overall average instructor rating is {processed_data['overall']['avg_instructor']:.2f}/5.
	The correlation between feedback and instructor ratings across all subjects is {processed_data['overall']['correlation']:.2f}.
	A total of {processed_data['overall']['total_responses']} responses were analyzed.

	## Overall Performance Patterns

	The data indicates a general performance level around the average ratings mentioned. Further subject-specific details are below.

	## Subject-Specific Analysis
	"""

	for subject in processed_data['subjects']:
	fallback_insights += f"""
	### {subject['name']}
	- Average Feedback Rating: {subject['avg_feedback']:.2f}/5
	- Average Instructor Rating: {subject['avg_instructor']:.2f}/5
	- Correlation (Feedback vs. Instructor): {subject['correlation']:.2f}
	- Sample Size: {subject['sample_size']} responses
	- Feedback Distribution (Rating: Count): {', '.join([f'{k}: {v}' for k,v in subject['feedback_distribution'].items()])}
	- Instructor Distribution (Rating: Count): {', '.join([f'{k}: {v}' for k,v in subject['instructor_distribution'].items()])}
	"""

	fallback_insights += """
	## Recommendations (Generic)

	1. Investigate Low-Performing Subjects: Focus on subjects with ratings significantly below average to identify areas for improvement.
	2. Analyze Rating Correlations: Understand why correlations between student feedback and instructor ratings vary across subjects.
	3. Gather Qualitative Feedback: For subjects with polarized or unexpectedly low ratings, consider gathering more detailed qualitative feedback.
	4. Share Best Practices: Identify practices from highly-rated courses/instructors and explore ways to share them.

	Note: This is a fallback analysis. For detailed, AI-powered insights, please ensure the Google Generative AI integration is correctly configured with a valid API key and model name.
	"""
	state["insights"] = fallback_insights

	return state

	# Node 4: Compile HTML Report
	def compile_report(state: EvaluationState) -> EvaluationState:
	"""Compile all analysis and visualizations into an HTML report."""
	visualizations = state["visualizations"]
	insights = state["insights"]
	processed_data = state["processed_data"]

	from datetime import datetime
	date_generated = datetime.now().strftime("%B %d, %Y")

	subject_rows = ""
	for subject in processed_data['subjects']:
	subject_rows += f"""
	<tr>
	<td>{html.escape(subject['name'])}</td>
	<td>{subject['avg_feedback']:.2f}</td>
	<td>{subject['avg_instructor']:.2f}</td>
	<td>{subject['correlation']:.2f}</td>
	<td>{subject['sample_size']}</td>
	</tr>
	"""

	plotly_js_calls = []
	visualizations_html_divs = [] # Renamed to avoid conflict

	for i, viz_data in enumerate(visualizations):
	div_id = f"viz-{i}"
	visualizations_html_divs.append(f"""
	<div class="visualization">
	<h3>{html.escape(viz_data['description'])}</h3>
	<div id="{div_id}" style="width: 100%; height: 500px; min-height: 400px;"></div>
	</div>
	""")
	if viz_data['fig'] is not None:
	try:
	fig_json = viz_data['fig'].to_json()
	plotly_js_calls.append(f"""
	try {{
	var figure_data_{i} = {fig_json};
	Plotly.newPlot('{div_id}', figure_data_{i}.data, figure_data_{i}.layout, {{responsive: true}});
	}} catch (plotError) {{
	console.error('Error rendering plot {div_id}:', plotError);
	document.getElementById('{div_id}').innerHTML = '<p style=\\"color:red; padding:10px;\\">Error rendering this chart: ' + plotError.message + '</p>';
	}}
	""")
	except Exception as e:
	print(f"Error converting figure {viz_data['title']} to JSON: {e}")
	plotly_js_calls.append(f"""
	document.getElementById('{div_id}').innerHTML = '<p style=\\"color:red; padding:10px;\\">Error preparing chart data for {html.escape(viz_data['description'])}.</p>';
	""")
	else:
	plotly_js_calls.append(f"""
	document.getElementById('{div_id}').innerHTML = '<p style=\\"color:orange; padding:10px;\\">No data to display for {html.escape(viz_data['description'])}.</p>';
	""")

	# Join all individual plot rendering calls
	plotly_script_content = "\n".join(plotly_js_calls)

	# Convert insights from Markdown to HTML
	# The 'nl2br' extension converts single newlines to <br>, useful for LLM output.
	# 'fenced_code' for code blocks, 'tables' for markdown tables.
	insights_html_content = markdown.markdown(insights, extensions=['fenced_code', 'tables', 'nl2br', 'extra'])

	report_html = f"""
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Akwel Evaluation Analysis Report</title>
	<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
	<style>
	body {{ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; line-height: 1.6; color: #333; max-width: 1200px; margin: 0 auto; padding: 20px; background-color: #f4f7f6; }}
	h1, h2, h3 {{ color: #2c3e50; margin-top: 1.5em; margin-bottom: 0.5em; }}
	h1 {{ font-size: 2.2em; border-bottom: 2px solid #3498db; padding-bottom: 0.3em; }}
	h2 {{ font-size: 1.8em; border-bottom: 1px solid #b0bec5; padding-bottom: 0.2em; }}
	h3 {{ font-size: 1.4em; }}
	.visualization {{ margin: 30px 0; border: 1px solid #ddd; border-radius: 8px; padding: 20px; background-color: #fff; box-shadow: 0 2px 10px rgba(0,0,0,0.05); }}
	.stats-container {{ display: flex; flex-wrap: wrap; justify-content: space-around; margin-bottom: 30px; gap: 20px; }}
	.stat-card {{ background-color: #ffffff; border-radius: 10px; padding: 25px; margin-bottom: 20px; flex: 1; min-width: 280px; box-shadow: 0 4px 8px rgba(0,0,0,0.1); border-left: 5px solid #3498db; }}
	.stat-card h3 {{ margin-top: 0; color: #3498db; }}
	.stat-card p {{ font-size: 1.1em; }}
	.insights {{ background-color: #eaf5ff; padding: 25px; border-radius: 10px; margin: 30px 0; border-left: 5px solid #1abc9c; }}
	.insights h2 {{ color: #16a085; }}
	.insights p, .insights li {{ font-size: 1.05em; }}
	.insights ul, .insights ol {{ padding-left: 20px; }}
	.insights strong {{ color: #2c3e50; }}
	table {{ width: 100%; border-collapse: collapse; margin: 25px 0; box-shadow: 0 2px 8px rgba(0,0,0,0.05); }}
	th, td {{ border: 1px solid #ddd; padding: 12px; text-align: left; }}
	th {{ background-color: #3498db; color: white; font-weight: bold; }}
	tr:nth-child(even) {{ background-color: #f8f9fa; }}
	tr:hover {{ background-color: #e9ecef; }}
	footer {{ text-align: center; margin-top: 40px; padding-top: 20px; border-top: 1px solid #ddd; font-size: 0.9em; color: #777; }}
	</style>
	</head>
	<body>
	<h1>Employee Evaluation Analysis Report</h1>

	<div class="stats-container">
	<div class="stat-card">
	<h3>Overall Feedback Rating</h3>
	<p style="font-size: 28px; font-weight: bold;">{processed_data['overall']['avg_feedback']:.2f}/5</p>
	<p>Based on {processed_data['overall']['total_responses']} total responses</p>
	</div>
	<div class="stat-card">
	<h3>Overall Instructor Rating</h3>
	<p style="font-size: 28px; font-weight: bold;">{processed_data['overall']['avg_instructor']:.2f}/5</p>
	<p>Correlation with feedback: {processed_data['overall']['correlation']:.2f}</p>
	</div>
	</div>

	<h2>Performance by Subject</h2>
	<table>
	<thead><tr>
	<th>Subject</th>
	<th>Avg. Feedback</th>
	<th>Avg. Instructor</th>
	<th>Correlation</th>
	<th>Sample Size</th>
	</tr></thead>
	<tbody>
	{subject_rows}
	</tbody>
	</table>

	<h2>Visualizations</h2>
	{''.join(visualizations_html_divs)}

	<div class="insights">
	<h2>AI-Generated Insights</h2>
	{insights_html_content}
	</div>

	<footer>
	<p>Report generated on {date_generated}</p>
	</footer>

	<script>
	// Wait for the DOM to be fully loaded before trying to render plots
	document.addEventListener('DOMContentLoaded', function() {{
	if (typeof Plotly !== 'undefined') {{
	console.log("Plotly object found. Attempting to render charts.");
	// This is where the generated JS for all plots goes
	{plotly_script_content}
	console.log("Plotly chart rendering JS execution sequence initiated.");
	}} else {{
	console.error("Plotly.js is not loaded! Charts cannot be displayed.");
	const vizDivs = document.querySelectorAll('.visualization div[id^="viz-"]');
	vizDivs.forEach(div => {{
	div.innerHTML = "<p style='color:red; padding:10px;'>Error: Chart library (Plotly.js) did not load. Charts cannot be displayed. Check internet connection or CDN link.</p>";
	}});
	}}
	}});
	</script>
	</body>
	</html>
	"""

	state["report_html"] = report_html
	return state


	# Define the LangGraph workflow
	def create_evaluation_graph():
	"""Create the LangGraph workflow for the evaluation analytics system."""
	graph = StateGraph(EvaluationState)

	graph.add_node("preprocess_data", preprocess_data)
	graph.add_node("create_visualizations", create_visualizations)
	graph.add_node("generate_insights", generate_insights)
	graph.add_node("compile_report", compile_report)

	graph.add_edge("preprocess_data", "create_visualizations")
	graph.add_edge("create_visualizations", "generate_insights")
	graph.add_edge("generate_insights", "compile_report")
	graph.add_edge("compile_report", END)

	graph.set_entry_point("preprocess_data")

	return graph.compile()

	evaluation_app = create_evaluation_graph()

	def generate_report_gradio(file_obj):
	if file_obj is None:
	return "<p style='color:red; text-align:center; padding-top: 20px;'>Please upload a CSV or Excel file.</p>", None, gr.update(visible=False)

	try:
	file_path = file_obj.name
	if file_path.lower().endswith('.csv'):
	df = pd.read_csv(file_path)
	elif file_path.lower().endswith(('.xls', '.xlsx')):
	df = pd.read_excel(file_path)
	else:
	return "<p style='color:red; text-align:center; padding-top: 20px;'>Unsupported file type. Please upload a CSV or Excel file.</p>", None, gr.update(visible=False)

	required_columns = ['subject', 'feedback_rating', 'instructor_rating']
	if not all(col in df.columns for col in required_columns):
	missing = [col for col in required_columns if col not in df.columns]
	return f"<p style='color:red; text-align:center; padding-top: 20px;'>Missing required columns: {', '.join(missing)}.</p>", None, gr.update(visible=False)

	except Exception as e:
	return f"<p style='color:red; text-align:center; padding-top: 20px;'>Error reading file: {html.escape(str(e))}</p>", None, gr.update(visible=False)

	initial_state = {"raw_data": df, "processed_data": {}, "visualizations": [], "insights": "", "report_html": ""}

	try:
	final_state = evaluation_app.invoke(initial_state)
	report_html_content = final_state["report_html"]
	except Exception as e:
	import traceback
	tb_str = traceback.format_exc()
	error_msg = f"An error occurred during report generation: {html.escape(str(e))}"
	print(f"{error_msg}\n{tb_str}")
	return f"<p style='color:red; text-align:center; padding-top: 20px;'>{error_msg}</p>", None, gr.update(visible=False)

	try:
	# Use a more robust way to name the temporary file for download
	base_name = os.path.splitext(os.path.basename(file_path))[0]
	report_file_name = f"{base_name}_evaluation_report.html"

	with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".html", prefix=f"{base_name}_report_", encoding="utf-8") as tmp_file:
	tmp_file.write(report_html_content)
	temp_file_path = tmp_file.name


	return report_html_content, gr.update(value=temp_file_path, label=f"Download: {report_file_name}", visible=True), gr.update(visible=True)
	except Exception as e:
	return f"<p style='color:red; text-align:center; padding-top: 20px;'>Error creating download file: {html.escape(str(e))}</p>", None, gr.update(visible=False)

	# --- GRADIO APP USING gr.Blocks ---

	with gr.Blocks(
	title="Evaluation Analytics System",
	theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky)
	) as app:
	gr.Markdown(
	"""
	# 📊 Formation Evaluation Analytics Dashboard
	Upload evaluation data (CSV or Excel format) to generate an interactive report.
	Required columns: `subject`, `feedback_rating` (1-5), `instructor_rating` (1-5).

	"""
	)

	with gr.Row():
	with gr.Column(scale=1, min_width=300): # Input column
	file_input = gr.File(
	label="Upload Evaluation Data",
	file_types=['.csv', '.xls', '.xlsx'],
	# type="filepath" # 'filepath' is often better for NamedTemporaryFile
	)
	generate_button = gr.Button("Generate Report", variant="primary")

	# Placeholder for download link, initially hidden
	download_file_output = gr.File(label="Download Full Report", visible=False, interactive=False)

	with gr.Column(scale=3): # Output column - larger for report preview
	report_output_html = gr.HTML(
	label="Analysis Report Preview",
	value="<p style='text-align:center; color:grey; padding-top:50px;'>Upload a file and click 'Generate Report' to see the analysis.</p>"
	)

	# Define interactions
	generate_button.click(
	fn=generate_report_gradio,
	inputs=[file_input],
	outputs=[report_output_html, download_file_output, download_file_output] # download_file_output is updated twice for value and visibility
	)

	app.launch(ssr_mode=False)