Spaces:
Paused
Paused
| # src/report_generator_llm.py | |
| import os | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from langchain_core.messages import HumanMessage | |
| import pandas as pd | |
| import json # For pretty printing dicts if needed | |
| import numpy as np | |
| def generate_analysis_text( | |
| processed_data: dict, | |
| correlations: dict | |
| ) -> str: | |
| """ | |
| Generates textual analysis and insights using an LLM based on processed data. | |
| Does NOT handle charts or full HTML structure. | |
| """ | |
| llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=os.getenv("GOOGLE_API_KEY")) | |
| # --- Data Summary Preparation (Keep this concise) --- | |
| data_summary = "Key Data Points:\n" | |
| if 'avg_scores_subject' in processed_data and isinstance(processed_data['avg_scores_subject'], pd.DataFrame): | |
| if not processed_data['avg_scores_subject'].empty: | |
| data_summary += "Average Scores per Subject (Rounded to 1 decimal):\n" + processed_data['avg_scores_subject'].to_markdown(index=False, floatfmt=".1f") + "\n\n" | |
| else: data_summary += "Average Scores per Subject: No data.\n\n" | |
| if 'avg_scores_dept' in processed_data and isinstance(processed_data['avg_scores_dept'], pd.DataFrame): | |
| if not processed_data['avg_scores_dept'].empty: | |
| data_summary += "Average Scores per Department (Rounded to 1 decimal):\n" + processed_data['avg_scores_dept'].to_markdown(index=False, floatfmt=".1f") + "\n\n" | |
| else: data_summary += "Average Scores per Department: No data.\n\n" | |
| if 'feedback_distribution' in processed_data and processed_data['feedback_distribution']: | |
| subjects_with_fb_dist = list(processed_data['feedback_distribution'].keys()) | |
| data_summary += f"Feedback distributions available for: {', '.join(subjects_with_fb_dist)}\n\n" | |
| if 'instructor_rating_distribution' in processed_data and processed_data['instructor_rating_distribution']: | |
| subjects_with_ir_dist = list(processed_data['instructor_rating_distribution'].keys()) | |
| data_summary += f"Instructor rating distributions available for: {', '.join(subjects_with_ir_dist)}\n\n" | |
| correlation_summary = "Correlation between Feedback Stars and Instructor Rating:\n" | |
| if correlations: | |
| for key, value in correlations.items(): | |
| if isinstance(value, (int, float, np.number)): # Check if value is numeric | |
| correlation_summary += f"- {str(key)}: {value:.2f}\n" | |
| else: | |
| correlation_summary += f"- {str(key)}: N/A\n" | |
| else: correlation_summary += "No correlation data.\n" | |
| correlation_summary += "\n" | |
| prompt = f""" | |
| You are an expert data analyst reviewing employee training feedback data. | |
| Based ONLY on the following data summaries, provide textual analysis and insights. | |
| Format your response clearly using simple paragraphs. Use the specified headings. | |
| **DATA SUMMARIES:** | |
| {data_summary} | |
| {correlation_summary} | |
| **YOUR TASK:** | |
| Generate content for a report, following this structure: | |
| ## Executive Summary | |
| [Your 2-3 paragraph overview of key findings, overall effectiveness, and general sentiment based on the data summaries.] | |
| ## Overall Performance Analysis | |
| [Your analysis of general trends in average feedback and instructor ratings across subjects and departments, referring to the average score data.] | |
| ## Subject Performance Analysis | |
| [Your analysis discussing subjects with notably high or low average scores or interesting distribution patterns (based on the available subjects listed in the summary). Compare subjects where appropriate.] | |
| ## Department Performance Analysis | |
| [Your analysis discussing departments with notably high or low average scores. Mention any significant differences observed.] | |
| ## Correlation Insights | |
| [Your discussion on the overall correlation value and what it implies about the relationship between feedback and instructor ratings.] | |
| ## Key Insights and Recommendations | |
| [Your summary of 2-4 critical insights derived ONLY from the provided data summaries. Provide specific, actionable recommendations based on these insights.] | |
| **IMPORTANT INSTRUCTIONS:** | |
| - Write ONLY the text content for each section. | |
| - Use the exact headings provided above (e.g., "## Executive Summary"). | |
| - Do NOT include any HTML tags like <html>, <head>, <body>, <div> (except simple <p> tags if needed for paragraph breaks, though markdown paragraphs are often better). | |
| - Focus also on interpreting the provided data . | |
| - Ensure your analysis is objective and directly supported by the data given. | |
| - Never use astreisks | |
| Generate the analysis text now. | |
| """ | |
| print("Generating analysis text with LLM...") | |
| try: | |
| response = llm.invoke([HumanMessage(content=prompt)]) | |
| analysis_text = response.content | |
| print("LLM analysis text generation complete.") | |
| # Basic cleaning (remove potential markdown backticks) | |
| analysis_text = analysis_text.strip() | |
| if analysis_text.startswith("```"): | |
| analysis_text = analysis_text[3:] | |
| if analysis_text.endswith("```"): | |
| analysis_text = analysis_text[:-3] | |
| return analysis_text.strip() | |
| except Exception as e: | |
| print(f"Error calling LLM for analysis text: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return f"Error generating analysis: {e}" # Return error message as text | |