| | import json |
| | import typing_extensions as typing |
| | import google.generativeai as genai |
| | from typing import List, Dict, Any |
| | import numpy as np |
| | from collections import defaultdict |
| |
|
| | from dotenv import load_dotenv |
| | import os |
| | import pymongo |
| | from pymongo import MongoClient |
| |
|
| | load_dotenv() |
| | GEMINI_API_KEY = os.getenv('GEMINI_KEY') |
| |
|
| | class EngagementMetrics(typing.TypedDict): |
| | participation_level: str |
| | question_quality: str |
| | concept_understanding: str |
| |
|
| | class StudentInsight(typing.TypedDict): |
| | student_id: str |
| | performance_level: str |
| | struggling_topics: list[str] |
| | engagement_metrics: EngagementMetrics |
| |
|
| | class TopicInsight(typing.TypedDict): |
| | topic: str |
| | difficulty_level: float |
| | student_count: int |
| | common_issues: list[str] |
| | key_misconceptions: list[str] |
| |
|
| | class RecommendedAction(typing.TypedDict): |
| | action: str |
| | priority: str |
| | target_group: str |
| | reasoning: str |
| | expected_impact: str |
| |
|
| | class ClassDistribution(typing.TypedDict): |
| | high_performers: float |
| | average_performers: float |
| | at_risk: float |
| |
|
| | class CourseHealth(typing.TypedDict): |
| | overall_engagement: float |
| | critical_topics: list[str] |
| | class_distribution: ClassDistribution |
| |
|
| | class InterventionMetrics(typing.TypedDict): |
| | immediate_attention_needed: list[str] |
| | monitoring_required: list[str] |
| |
|
| | class AnalyticsResponse(typing.TypedDict): |
| | topic_insights: list[TopicInsight] |
| | student_insights: list[StudentInsight] |
| | recommended_actions: list[RecommendedAction] |
| | course_health: CourseHealth |
| | intervention_metrics: InterventionMetrics |
| |
|
| |
|
| |
|
| | class NovaScholarAnalytics: |
| | def __init__(self, model_name: str = "gemini-1.5-flash"): |
| | genai.configure(api_key=GEMINI_API_KEY) |
| | self.model = genai.GenerativeModel(model_name) |
| | |
| | def _create_analytics_prompt(self, chat_histories: List[Dict], all_topics: List[str]) -> str: |
| | """Creates a structured prompt for Gemini to analyze chat histories.""" |
| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| | |
| |
|
| | |
| | |
| |
|
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| |
|
| | |
| | return f"""Analyze the provided student chat histories for a university course and generate concise, actionable analytics. |
| | Context: |
| | - Chat histories: {json.dumps(chat_histories, indent=2)} |
| | - These are pre-class interactions between students and an AI tutor aimed at identifying learning difficulties and improving course delivery. |
| | - Topics covered: {', '.join(all_topics)}. |
| | |
| | Your task is to provide detailed analytics that will help faculty address challenges effectively and enhance learning outcomes. |
| | |
| | Output Format (strictly follow this JSON structure): |
| | {{ |
| | "topic_wise_insights": [ |
| | {{ |
| | "topic": "<string>", |
| | "struggling_percentage": <number between 0 and 1>, |
| | "key_issues": ["<string>", "<string>", ...], |
| | "key_misconceptions": ["<string>", "<string>", ...], |
| | "recommended_actions": {{ |
| | "description": "<string>", |
| | "priority": "high|medium|low", |
| | "expected_outcome": "<string>" |
| | }} |
| | }} |
| | ], |
| | "ai_recommended_actions": [ |
| | {{ |
| | "action": "<string>", |
| | "priority": "high|medium|low", |
| | "reasoning": "<string>", |
| | "expected_outcome": "<string>", |
| | "pedagogy_recommendations": {{ |
| | "methods": ["<string>", "<string>", ...], |
| | "resources": ["<string>", "<string>", ...], |
| | "expected_impact": "<string>" |
| | }} |
| | }} |
| | ], |
| | "student_analytics": [ |
| | {{ |
| | "student_id": "<string>", |
| | "engagement_metrics": {{ |
| | "participation_level": <number between 0 and 1>, |
| | "concept_understanding": "strong|moderate|needs_improvement", |
| | "question_quality": "advanced|intermediate|basic" |
| | }}, |
| | "struggling_topics": ["<string>", "<string>", ...], |
| | "personalized_recommendation": "<string>" |
| | }} |
| | ] |
| | }} |
| | |
| | Guidelines for Analysis: |
| | - Focus on actionable and concise insights rather than exhaustive details. |
| | - Use both explicit (e.g., direct questions) and implicit (e.g., repeated follow-ups) cues to identify areas of difficulty. |
| | - Prioritize topics with higher difficulty scores or more students struggling. |
| | - Ensure numerical values (e.g., difficulty levels, percentages) are between 0 and 1 where applicable. |
| | - Make sure to include All** students in the analysis, not just a subset. |
| | - for the ai_recommended_actions: |
| | - Prioritize pedagogy recommendations for critical topics with the high difficulty scores or struggling percentages. |
| | - For each action: |
| | - Include specific teaching methods (e.g., interactive discussions or quizzes, problem-based learning, practical examples etc). |
| | - Recommend supporting resources (e.g., videos, handouts, simulations). |
| | - Provide reasoning for the recommendation and the expected outcomes for student learning. |
| | - Example: |
| | - **Action:** Conduct an interactive problem-solving session on "<Topic Name>". |
| | - **Reasoning:** Students showed difficulty in applying concepts to practical problems. |
| | - **Expected Outcome:** Improved practical understanding and application of the topic. |
| | - **Pedagogy Recommendations:** |
| | - **Methods:** Group discussions, real-world case studies. |
| | - **Resources:** Online interactive tools, relevant case studies, video walkthroughs. |
| | - **Expected Impact:** Enhance conceptual clarity by 40% and practical application by 30%. |
| | |
| | The response must adhere strictly to the above JSON structure, with all fields populated appropriately.""" |
| |
|
| | |
| | def _calculate_class_distribution(self, analytics: Dict) -> Dict: |
| | """Calculate the distribution of students across performance levels.""" |
| | try: |
| | total_students = len(analytics.get("student_insights", [])) |
| | if total_students == 0: |
| | return { |
| | "high_performers": 0, |
| | "average_performers": 0, |
| | "at_risk": 0 |
| | } |
| | |
| | distribution = defaultdict(int) |
| | |
| | for student in analytics.get("student_insights", []): |
| | performance_level = student.get("performance_level", "average") |
| | |
| | if performance_level in ["excellent", "high", "high_performer"]: |
| | distribution["high_performers"] += 1 |
| | elif performance_level in ["struggling", "low", "at_risk"]: |
| | distribution["at_risk"] += 1 |
| | else: |
| | distribution["average_performers"] += 1 |
| | |
| | |
| | return { |
| | level: count/total_students |
| | for level, count in distribution.items() |
| | } |
| | except Exception as e: |
| | print(f"Error calculating class distribution: {str(e)}") |
| | return { |
| | "high_performers": 0, |
| | "average_performers": 0, |
| | "at_risk": 0 |
| | } |
| |
|
| | def _identify_urgent_cases(self, analytics: Dict) -> List[str]: |
| | """Identify students needing immediate attention.""" |
| | try: |
| | urgent_cases = [] |
| | for student in analytics.get("student_insights", []): |
| | student_id = student.get("student_id") |
| | if not student_id: |
| | continue |
| | |
| | |
| | risk_factors = 0 |
| | |
| | |
| | if student.get("performance_level") in ["struggling", "at_risk", "low"]: |
| | risk_factors += 1 |
| | |
| | |
| | if len(student.get("struggling_topics", [])) >= 2: |
| | risk_factors += 1 |
| | |
| | |
| | engagement = student.get("engagement_metrics", {}) |
| | if (engagement.get("participation_level") == "low" or |
| | engagement.get("concept_understanding") == "needs_improvement"): |
| | risk_factors += 1 |
| | |
| | |
| | if risk_factors >= 2: |
| | urgent_cases.append(student_id) |
| | |
| | return urgent_cases |
| | except Exception as e: |
| | print(f"Error identifying urgent cases: {str(e)}") |
| | return [] |
| |
|
| | def _identify_monitoring_cases(self, analytics: Dict) -> List[str]: |
| | """Identify students who need monitoring but aren't urgent cases.""" |
| | try: |
| | monitoring_cases = [] |
| | urgent_cases = set(self._identify_urgent_cases(analytics)) |
| | |
| | for student in analytics.get("student_insights", []): |
| | student_id = student.get("student_id") |
| | if not student_id or student_id in urgent_cases: |
| | continue |
| | |
| | |
| | monitoring_needed = False |
| | |
| | |
| | if len(student.get("struggling_topics", [])) == 1: |
| | monitoring_needed = True |
| | |
| | |
| | engagement = student.get("engagement_metrics", {}) |
| | if engagement.get("participation_level") == "medium": |
| | monitoring_needed = True |
| | |
| | |
| | if student.get("performance_level") == "average": |
| | monitoring_needed = True |
| | |
| | if monitoring_needed: |
| | monitoring_cases.append(student_id) |
| | |
| | return monitoring_cases |
| | except Exception as e: |
| | print(f"Error identifying monitoring cases: {str(e)}") |
| | return [] |
| |
|
| | def _identify_critical_topics(self, analytics: Dict) -> List[str]: |
| | """ |
| | Identify critical topics that need attention based on multiple factors. |
| | Returns a list of topic names that are considered critical. |
| | """ |
| | try: |
| | critical_topics = [] |
| | topics = analytics.get("topic_insights", []) |
| | |
| | for topic in topics: |
| | if not isinstance(topic, dict): |
| | continue |
| | |
| | |
| | critical_score = 0 |
| | |
| | |
| | difficulty_level = topic.get("difficulty_level", 0) |
| | if difficulty_level > 0.7: |
| | critical_score += 2 |
| | elif difficulty_level > 0.5: |
| | critical_score += 1 |
| | |
| | |
| | student_count = topic.get("student_count", 0) |
| | total_students = len(analytics.get("student_insights", [])) |
| | if total_students > 0: |
| | struggle_ratio = student_count / total_students |
| | if struggle_ratio > 0.5: |
| | critical_score += 2 |
| | elif struggle_ratio > 0.3: |
| | critical_score += 1 |
| | |
| | |
| | if len(topic.get("common_issues", [])) > 2: |
| | critical_score += 1 |
| | |
| | |
| | if len(topic.get("key_misconceptions", [])) > 1: |
| | critical_score += 1 |
| | |
| | |
| | if critical_score >= 3: |
| | critical_topics.append(topic.get("topic", "Unknown Topic")) |
| | |
| | return critical_topics |
| | |
| | except Exception as e: |
| | print(f"Error identifying critical topics: {str(e)}") |
| | return [] |
| |
|
| | def _calculate_engagement(self, analytics: Dict) -> Dict: |
| | """ |
| | Calculate detailed engagement metrics across all students. |
| | Returns a dictionary with engagement statistics. |
| | """ |
| | try: |
| | total_students = len(analytics.get("student_insights", [])) |
| | if total_students == 0: |
| | return { |
| | "total_students": 0, |
| | "overall_score": 0, |
| | "engagement_distribution": { |
| | "high": 0, |
| | "medium": 0, |
| | "low": 0 |
| | }, |
| | "participation_metrics": { |
| | "average_topics_per_student": 0, |
| | "active_participants": 0 |
| | } |
| | } |
| | |
| | engagement_levels = defaultdict(int) |
| | total_topics_engaged = 0 |
| | active_participants = 0 |
| | |
| | for student in analytics.get("student_insights", []): |
| | |
| | metrics = student.get("engagement_metrics", {}) |
| | |
| | |
| | participation = metrics.get("participation_level", "low").lower() |
| | engagement_levels[participation] += 1 |
| | |
| | |
| | topics_count = len(student.get("struggling_topics", [])) |
| | total_topics_engaged += topics_count |
| | |
| | |
| | if topics_count > 0: |
| | active_participants += 1 |
| | |
| | |
| | weighted_score = ( |
| | (engagement_levels["high"] * 1.0 + |
| | engagement_levels["medium"] * 0.6 + |
| | engagement_levels["low"] * 0.2) / total_students |
| | ) |
| | |
| | return { |
| | "total_students": total_students, |
| | "overall_score": round(weighted_score, 2), |
| | "engagement_distribution": { |
| | level: count/total_students |
| | for level, count in engagement_levels.items() |
| | }, |
| | "participation_metrics": { |
| | "average_topics_per_student": round(total_topics_engaged / total_students, 2), |
| | "active_participants_ratio": round(active_participants / total_students, 2) |
| | } |
| | } |
| | |
| | except Exception as e: |
| | print(f"Error calculating engagement: {str(e)}") |
| | return { |
| | "total_students": 0, |
| | "overall_score": 0, |
| | "engagement_distribution": { |
| | "high": 0, |
| | "medium": 0, |
| | "low": 0 |
| | }, |
| | "participation_metrics": { |
| | "average_topics_per_student": 0, |
| | "active_participants_ratio": 0 |
| | } |
| | } |
| |
|
| | def _process_gemini_response(self, response: str) -> Dict: |
| | """Process and validate Gemini's response.""" |
| | |
| | |
| | |
| | |
| | |
| | |
| | try: |
| | |
| | analytics = json.loads(response) |
| | |
| | |
| | required_fields = { |
| | "topic_insights": [], |
| | "student_insights": [], |
| | "recommended_actions": [] |
| | } |
| | |
| | |
| | for field, default_value in required_fields.items(): |
| | if field not in analytics or not analytics[field]: |
| | analytics[field] = default_value |
| | |
| | |
| | return self._enrich_analytics(analytics) |
| | |
| | except (json.JSONDecodeError, KeyError, TypeError) as e: |
| | print(f"Error processing Gemini response: {str(e)}") |
| | print(f"Raw response: {response}") |
| | return self._fallback_analytics() |
| |
|
| | def _enrich_analytics(self, analytics: Dict) -> Dict: |
| | """Add derived insights and metrics to the analytics.""" |
| | |
| | analytics["course_health"] = { |
| | "overall_engagement": self._calculate_engagement(analytics), |
| | "critical_topics": self._identify_critical_topics(analytics), |
| | "class_distribution": self._calculate_class_distribution(analytics) |
| | } |
| | |
| | |
| | analytics["intervention_metrics"] = { |
| | "immediate_attention_needed": self._identify_urgent_cases(analytics), |
| | "monitoring_required": self._identify_monitoring_cases(analytics) |
| | } |
| | |
| | return analytics |
| |
|
| | def _calculate_engagement(self, analytics: Dict) -> Dict: |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | """Calculate overall engagement metrics with defensive programming.""" |
| | try: |
| | total_students = len(analytics.get("student_insights", [])) |
| | if total_students == 0: |
| | return { |
| | "total_students": 0, |
| | "engagement_distribution": { |
| | "high": 0, |
| | "medium": 0, |
| | "low": 0 |
| | } |
| | } |
| | |
| | engagement_levels = defaultdict(int) |
| | |
| | for student in analytics.get("student_insights", []): |
| | metrics = student.get("engagement_metrics", {}) |
| | level = metrics.get("participation_level", "low") |
| | engagement_levels[level] += 1 |
| | |
| | return { |
| | "total_students": total_students, |
| | "engagement_distribution": { |
| | level: count/total_students |
| | for level, count in engagement_levels.items() |
| | } |
| | } |
| | except Exception as e: |
| | print(f"Error calculating engagement: {str(e)}") |
| | return { |
| | "total_students": 0, |
| | "engagement_distribution": { |
| | "high": 0, |
| | "medium": 0, |
| | "low": 0 |
| | } |
| | } |
| |
|
| | def _identify_critical_topics(self, analytics: Dict) -> List[Dict]: |
| | |
| | |
| | |
| | |
| | |
| | |
| | """Identify topics needing immediate attention with defensive programming.""" |
| | try: |
| | return [ |
| | topic for topic in analytics.get("topic_insights", []) |
| | if topic.get("difficulty_level", 0) > 0.7 or |
| | len(topic.get("common_issues", [])) > 2 |
| | ] |
| | except Exception as e: |
| | print(f"Error identifying critical topics: {str(e)}") |
| | return [] |
| |
|
| | def generate_analytics(self, chat_histories: List[Dict], all_topics: List[str]) -> Dict: |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| |
|
| | |
| | """Main method to generate analytics with better error handling.""" |
| | try: |
| | |
| | print("Input validation:") |
| | print(f"Chat histories: {len(chat_histories)} entries") |
| | print(f"Topics: {all_topics}") |
| |
|
| | if not chat_histories or not all_topics: |
| | print("Missing required input data") |
| | return self._fallback_analytics() |
| |
|
| | |
| | try: |
| | processed_histories = self._preprocess_chat_histories(chat_histories) |
| | print("Successfully preprocessed chat histories") |
| | except Exception as preprocess_error: |
| | print(f"Error in preprocessing: {str(preprocess_error)}") |
| | return self._fallback_analytics() |
| |
|
| | |
| | try: |
| | prompt = self._create_analytics_prompt(processed_histories, all_topics) |
| | print("Successfully created prompt") |
| | print("Prompt preview:", prompt[:200] + "...") |
| | except Exception as prompt_error: |
| | print(f"Error in prompt creation: {str(prompt_error)}") |
| | return self._fallback_analytics() |
| |
|
| | |
| | response = self.model.generate_content( |
| | prompt, |
| | generation_config=genai.GenerationConfig( |
| | response_mime_type="application/json", |
| | temperature=0.15 |
| | ) |
| | ) |
| | |
| | if not response.text: |
| | print("Empty response from Gemini") |
| | return self._fallback_analytics() |
| | |
| | analytics = json.loads(response.text) |
| | return analytics |
| | |
| | except Exception as e: |
| | print(f"Error generating analytics: {str(e)}") |
| | print(f"Error type: {type(e)}") |
| | import traceback |
| | print("Full traceback:", traceback.format_exc()) |
| | return self._fallback_analytics() |
| |
|
| | def _preprocess_chat_histories(self, chat_histories: List[Dict]) -> List[Dict]: |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | """Preprocess chat histories to focus on relevant information.""" |
| | processed = [] |
| | |
| | for chat in chat_histories: |
| | |
| | user_id = str(chat["user_id"]["$oid"]) if isinstance(chat["user_id"], dict) and "$oid" in chat["user_id"] else str(chat["user_id"]) |
| | |
| | try: |
| | processed_chat = { |
| | "user_id": user_id, |
| | "messages": [ |
| | { |
| | "prompt": msg["prompt"], |
| | "response": msg["response"] |
| | } |
| | for msg in chat["messages"] |
| | ] |
| | } |
| | processed.append(processed_chat) |
| | print(f"Successfully processed chat for user: {user_id}") |
| | except Exception as e: |
| | print(f"Error processing chat for user: {user_id}") |
| | print(f"Error details: {str(e)}") |
| | continue |
| | |
| | return processed |
| |
|
| | def _fallback_analytics(self) -> Dict: |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | """Provide comprehensive fallback analytics that match our schema.""" |
| | return { |
| | "topic_insights": [], |
| | "student_insights": [], |
| | "recommended_actions": [ |
| | { |
| | "action": "Review analytics generation process", |
| | "priority": "high", |
| | "target_group": "system_administrators", |
| | "reasoning": "Analytics generation failed", |
| | "expected_impact": "Restore analytics functionality" |
| | } |
| | ], |
| | "course_health": { |
| | "overall_engagement": 0, |
| | "critical_topics": [], |
| | "class_distribution": { |
| | "high_performers": 0, |
| | "average_performers": 0, |
| | "at_risk": 0 |
| | } |
| | }, |
| | "intervention_metrics": { |
| | "immediate_attention_needed": [], |
| | "monitoring_required": [] |
| | } |
| | } |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |