akwel_performance / src /report_generator_llm.py
ArkenB's picture
Create report_generator_llm.py
7b2ec25 verified
# src/report_generator_llm.py
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage
import pandas as pd
import json # For pretty printing dicts if needed
import numpy as np
def generate_analysis_text(
processed_data: dict,
correlations: dict
) -> str:
"""
Generates textual analysis and insights using an LLM based on processed data.
Does NOT handle charts or full HTML structure.
"""
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=os.getenv("GOOGLE_API_KEY"))
# --- Data Summary Preparation (Keep this concise) ---
data_summary = "Key Data Points:\n"
if 'avg_scores_subject' in processed_data and isinstance(processed_data['avg_scores_subject'], pd.DataFrame):
if not processed_data['avg_scores_subject'].empty:
data_summary += "Average Scores per Subject (Rounded to 1 decimal):\n" + processed_data['avg_scores_subject'].to_markdown(index=False, floatfmt=".1f") + "\n\n"
else: data_summary += "Average Scores per Subject: No data.\n\n"
if 'avg_scores_dept' in processed_data and isinstance(processed_data['avg_scores_dept'], pd.DataFrame):
if not processed_data['avg_scores_dept'].empty:
data_summary += "Average Scores per Department (Rounded to 1 decimal):\n" + processed_data['avg_scores_dept'].to_markdown(index=False, floatfmt=".1f") + "\n\n"
else: data_summary += "Average Scores per Department: No data.\n\n"
if 'feedback_distribution' in processed_data and processed_data['feedback_distribution']:
subjects_with_fb_dist = list(processed_data['feedback_distribution'].keys())
data_summary += f"Feedback distributions available for: {', '.join(subjects_with_fb_dist)}\n\n"
if 'instructor_rating_distribution' in processed_data and processed_data['instructor_rating_distribution']:
subjects_with_ir_dist = list(processed_data['instructor_rating_distribution'].keys())
data_summary += f"Instructor rating distributions available for: {', '.join(subjects_with_ir_dist)}\n\n"
correlation_summary = "Correlation between Feedback Stars and Instructor Rating:\n"
if correlations:
for key, value in correlations.items():
if isinstance(value, (int, float, np.number)): # Check if value is numeric
correlation_summary += f"- {str(key)}: {value:.2f}\n"
else:
correlation_summary += f"- {str(key)}: N/A\n"
else: correlation_summary += "No correlation data.\n"
correlation_summary += "\n"
prompt = f"""
You are an expert data analyst reviewing employee training feedback data.
Based ONLY on the following data summaries, provide textual analysis and insights.
Format your response clearly using simple paragraphs. Use the specified headings.
**DATA SUMMARIES:**
{data_summary}
{correlation_summary}
**YOUR TASK:**
Generate content for a report, following this structure:
## Executive Summary
[Your 2-3 paragraph overview of key findings, overall effectiveness, and general sentiment based on the data summaries.]
## Overall Performance Analysis
[Your analysis of general trends in average feedback and instructor ratings across subjects and departments, referring to the average score data.]
## Subject Performance Analysis
[Your analysis discussing subjects with notably high or low average scores or interesting distribution patterns (based on the available subjects listed in the summary). Compare subjects where appropriate.]
## Department Performance Analysis
[Your analysis discussing departments with notably high or low average scores. Mention any significant differences observed.]
## Correlation Insights
[Your discussion on the overall correlation value and what it implies about the relationship between feedback and instructor ratings.]
## Key Insights and Recommendations
[Your summary of 2-4 critical insights derived ONLY from the provided data summaries. Provide specific, actionable recommendations based on these insights.]
**IMPORTANT INSTRUCTIONS:**
- Write ONLY the text content for each section.
- Use the exact headings provided above (e.g., "## Executive Summary").
- Do NOT include any HTML tags like <html>, <head>, <body>, <div> (except simple <p> tags if needed for paragraph breaks, though markdown paragraphs are often better).
- Focus also on interpreting the provided data .
- Ensure your analysis is objective and directly supported by the data given.
- Never use astreisks
Generate the analysis text now.
"""
print("Generating analysis text with LLM...")
try:
response = llm.invoke([HumanMessage(content=prompt)])
analysis_text = response.content
print("LLM analysis text generation complete.")
# Basic cleaning (remove potential markdown backticks)
analysis_text = analysis_text.strip()
if analysis_text.startswith("```"):
analysis_text = analysis_text[3:]
if analysis_text.endswith("```"):
analysis_text = analysis_text[:-3]
return analysis_text.strip()
except Exception as e:
print(f"Error calling LLM for analysis text: {e}")
import traceback
traceback.print_exc()
return f"Error generating analysis: {e}" # Return error message as text