Spaces:
Sleeping
Sleeping
| # dashboard_analyzer.py | |
| import logging | |
| import json | |
| from datetime import datetime | |
| logger = logging.getLogger(__name__) | |
| # --- Constants for Scoring and Analysis --- | |
| # These can be tweaked to adjust the analysis logic. They are data-agnostic. | |
| THRESHOLDS = { | |
| 'CGPA_EXCELLENT': 8.5, | |
| 'CGPA_GOOD': 7.5, | |
| 'LEETCODE_TOTAL_HIGH': 200, | |
| 'LEETCODE_TOTAL_MEDIUM': 100, | |
| 'GITHUB_STARS_HIGH': 10, | |
| 'GITHUB_REPOS_HIGH': 20, | |
| } | |
| WEIGHTS = { | |
| 'LEETCODE_EASY': 0.2, | |
| 'LEETCODE_MEDIUM': 1.0, | |
| 'LEETCODE_HARD': 2.5, | |
| 'GITHUB_STARS': 2.0, | |
| 'GITHUB_FORKS': 3.0, | |
| 'GITHUB_REPOS': 0.5, | |
| } | |
| def get_dashboard_metrics(student_data: dict) -> dict: | |
| """ | |
| Performs a fully data-driven, advanced analysis of a student's raw JSON data | |
| to extract a rich set of metrics for the dashboard without any hardcoded assumptions. | |
| Args: | |
| student_data (dict): The dictionary containing all data for one student. | |
| Returns: | |
| dict: A deeply nested dictionary with structured dashboard metrics and insights. | |
| """ | |
| if not student_data: | |
| return {"error": "No student data provided."} | |
| # --- Perform analysis on different sections of the profile --- | |
| academics = _analyze_academics(student_data.get("academic_profile", {})) | |
| leetcode = _analyze_leetcode(student_data.get("coding_profiles", {}).get("leetcode", {})) | |
| github = _analyze_github(student_data.get("coding_profiles", {}).get("github", {})) | |
| skills = _extract_skills(student_data) | |
| completeness = _calculate_profile_completeness(student_data) | |
| # --- Synthesize overall insights from the analyses --- | |
| archetype = _determine_student_archetype(skills, leetcode, github) | |
| # --- Assemble the final, comprehensive metrics object --- | |
| return { | |
| "overall_summary": { | |
| "student_archetype": archetype, | |
| "profile_completeness": completeness | |
| }, | |
| "academics": academics, | |
| "coding_profiles": { | |
| "leetcode": leetcode, | |
| "github": github | |
| }, | |
| "skills_distribution": skills, | |
| } | |
| def _analyze_academics(academic_data: dict) -> dict: | |
| """ | |
| Analyzes academic performance dynamically from the data provided. | |
| Includes trajectory, overall subject performance, and detailed semester overviews. | |
| """ | |
| cgpa = academic_data.get("overall_cgpa", 0) | |
| # Qualitative Rating based on CGPA | |
| if cgpa >= THRESHOLDS['CGPA_EXCELLENT']: rating = "Excellent" | |
| elif cgpa >= THRESHOLDS['CGPA_GOOD']: rating = "Good" | |
| else: rating = "Needs Improvement" | |
| # Academic Trajectory based on SGPA trend | |
| sgpa_list = [sem.get("sgpa", 0) for sem in academic_data.get("semester_performance", [])] | |
| trajectory = "Stable" | |
| if len(sgpa_list) > 2: | |
| first_half_avg = sum(sgpa_list[:len(sgpa_list)//2]) / (len(sgpa_list)//2) | |
| second_half_avg = sum(sgpa_list[len(sgpa_list)//2:]) / (len(sgpa_list) - len(sgpa_list)//2) | |
| if second_half_avg > first_half_avg + 0.2: trajectory = "Improving" | |
| elif second_half_avg < first_half_avg - 0.2: trajectory = "Declining" | |
| # --- Detailed Semester Overviews and Overall Subject Analysis --- | |
| all_subjects_overall = [] | |
| semester_overviews = [] | |
| for semester_info in academic_data.get("semester_performance", []): | |
| semester_subjects = [] | |
| high_grades_count = 0 | |
| for subject_info in semester_info.get("subjects", []): | |
| subject_record = { | |
| "name": subject_info.get("subject"), | |
| "marks": subject_info.get("marks", 0) | |
| } | |
| semester_subjects.append(subject_record) | |
| all_subjects_overall.append(subject_record) | |
| if subject_info.get("grade") in ['O', 'A+']: | |
| high_grades_count += 1 | |
| if semester_subjects: | |
| semester_subjects.sort(key=lambda x: x['marks']) # Sort by marks ascending | |
| semester_overviews.append({ | |
| "semester_number": semester_info.get("semester"), | |
| "sgpa": semester_info.get("sgpa"), | |
| "percentage": semester_info.get("percentage"), | |
| "top_subject": semester_subjects[-1], # Last item is highest | |
| "bottom_subject": semester_subjects[0], # First item is lowest | |
| "high_grades_count": high_grades_count | |
| }) | |
| # Determine overall subject strengths and weaknesses from all semesters | |
| overall_strengths = [] | |
| overall_weaknesses = [] | |
| if all_subjects_overall: | |
| all_subjects_overall.sort(key=lambda x: x['marks'], reverse=True) # Sort descending | |
| overall_strengths = all_subjects_overall[:3] # Top 3 overall | |
| overall_weaknesses = all_subjects_overall[-3:] # Bottom 3 overall | |
| return { | |
| "cgpa": cgpa, | |
| "rating": rating, | |
| "trajectory": trajectory, | |
| "overall_subject_strengths": overall_strengths, | |
| "overall_subject_weaknesses": overall_weaknesses, | |
| "semester_overviews": semester_overviews | |
| } | |
| # --- The following functions are already fully data-driven and remain unchanged --- | |
| def _analyze_leetcode(leetcode_data: dict) -> dict: | |
| """Performs a nuanced analysis of LeetCode performance.""" | |
| if not leetcode_data: return {"rating": "Not Available", "score": 0, "total_solved": 0} | |
| total_solved = leetcode_data.get("totalSolved", 0) | |
| try: | |
| easy = int(leetcode_data.get("problemsByDifficulty", {}).get("Easy", "0/0").split('/')[0]) | |
| medium = int(leetcode_data.get("problemsByDifficulty", {}).get("Medium", "0/0").split('/')[0]) | |
| hard = int(leetcode_data.get("problemsByDifficulty", {}).get("Hard", "0/0").split('/')[0]) | |
| except (ValueError, IndexError): easy, medium, hard = 0, 0, 0 | |
| raw_score = (easy * WEIGHTS['LEETCODE_EASY'] + medium * WEIGHTS['LEETCODE_MEDIUM'] + hard * WEIGHTS['LEETCODE_HARD']) | |
| target_score = (150 * WEIGHTS['LEETCODE_EASY'] + 100 * WEIGHTS['LEETCODE_MEDIUM'] + 30 * WEIGHTS['LEETCODE_HARD']) | |
| normalized_score = round((raw_score / target_score) * 10, 1) if target_score > 0 else 0 | |
| final_score = min(normalized_score, 10.0) | |
| rating = "Beginner" | |
| if hard > 10 or medium > 50: rating = "Advanced Problem Solver" | |
| elif medium > 25 or total_solved > THRESHOLDS['LEETCODE_TOTAL_HIGH']: rating = "Active Competitor" | |
| elif total_solved > THRESHOLDS['LEETCODE_TOTAL_MEDIUM']: rating = "Consistent Learner" | |
| return {"rating": rating, "score": final_score, "total_solved": total_solved, "difficulty_breakdown": {"easy": easy, "medium": medium, "hard": hard}} | |
| def _analyze_github(github_data: dict) -> dict: | |
| """Analyzes GitHub profile for activity, impact, and tech stack.""" | |
| if not github_data: return {"rating": "Not Available", "activity_level": "Unknown"} | |
| stats, repos = github_data.get("stats", {}), github_data.get("top_repositories", []) | |
| activity_level = "Low" | |
| if repos: | |
| try: | |
| latest_push = max(datetime.strptime(repo['last_pushed'], "%Y-%m-%d") for repo in repos if repo.get('last_pushed')) | |
| if (datetime.now() - latest_push).days < 7: activity_level = "Very Active" | |
| elif (datetime.now() - latest_push).days < 30: activity_level = "Active" | |
| elif (datetime.now() - latest_push).days < 90: activity_level = "Inactive" | |
| except (ValueError, TypeError): pass | |
| impact_score = sum(repo.get('stars', 0) * WEIGHTS['GITHUB_STARS'] + repo.get('forks', 0) * WEIGHTS['GITHUB_FORKS'] for repo in repos) | |
| top_languages = list(dict.fromkeys([repo.get("language") for repo in repos if repo.get("language")]))[:3] | |
| rating = "Needs Development" | |
| if impact_score > 50 or stats.get('public_repos', 0) > THRESHOLDS['GITHUB_REPOS_HIGH']: rating = "Strong Profile" | |
| elif activity_level in ["Very Active", "Active"] or stats.get('public_repos', 0) > 10: rating = "Good Profile" | |
| return {"rating": rating, "activity_level": activity_level, "top_languages": top_languages, "stats": stats} | |
| def _extract_skills(student_data: dict) -> dict: # MODIFIED to return a dict | |
| """ | |
| Extracts, combines, cleans, and COUNTS key skills for chart display. | |
| """ | |
| from collections import Counter | |
| resume_skills = student_data.get("resume", {}).get("key_skills", []) | |
| leetcode_skills = [ | |
| item.get("skill") for item in student_data.get("coding_profiles", {}).get("leetcode", {}).get("topSkillsSummary", []) | |
| ] | |
| # Normalize skills to title case for consistency | |
| normalized_resume = [s.strip().title() for s in resume_skills] | |
| normalized_leetcode = [s.strip().title() for s in leetcode_skills] | |
| # Combine and count occurrences (though here they are unique, this is a robust way to handle it) | |
| all_skills = normalized_resume + normalized_leetcode | |
| # Using Counter will give a dict like {'Python': 2, 'Java': 1}, perfect for charts | |
| # In this case, since we combine unique lists, counts will be 1 or 2, but it provides the right structure. | |
| skill_counts = dict(Counter(all_skills)) | |
| return skill_counts | |
| def _calculate_profile_completeness(student_data: dict) -> dict: | |
| """Scores the profile based on the presence of key data points.""" | |
| checks = { | |
| "Academics": bool(student_data.get("academic_profile", {}).get("semester_performance")), | |
| "Resume": bool(student_data.get("resume", {}).get("key_skills")), | |
| "LeetCode": bool(student_data.get("coding_profiles", {}).get("leetcode")), | |
| "GitHub": bool(student_data.get("coding_profiles", {}).get("github")), | |
| "Codeforces": bool(student_data.get("coding_profiles", {}).get("codeforces")) | |
| } | |
| score = int((sum(checks.values()) / len(checks)) * 100) | |
| return {"score_percentage": score, "missing_sections": [key for key, value in checks.items() if not value]} | |
| def _determine_student_archetype(skills: list, leetcode_metrics: dict, github_metrics: dict) -> list: | |
| """Generates dynamic tags based on analyzed metrics.""" | |
| archetypes = [] | |
| skills_lower = {s.lower() for s in skills} | |
| if any(kw in skills_lower for kw in ["tensorflow", "pytorch", "ai", "machine learning", "nlp", "computer vision"]): archetypes.append("AI/ML Enthusiast") | |
| if any(kw in skills_lower for kw in ["react", "node", "flask", "django", "backend", "frontend"]): archetypes.append("Web Developer") | |
| if leetcode_metrics.get("rating") in ["Advanced Problem Solver", "Active Competitor"]: archetypes.append("Competitive Programmer") | |
| if any(kw in skills_lower for kw in ["aws", "google cloud", "docker", "kubernetes"]): archetypes.append("Cloud & DevOps Oriented") | |
| return archetypes if archetypes else ["Generalist"] | |
| # --- Testing Block --- | |
| if __name__ == '__main__': | |
| print("Testing advanced, fully data-driven dashboard_analyzer.py...") | |
| try: | |
| with open('final_cleaned_student_data.json', 'r', encoding='utf-8') as f: | |
| full_data = json.load(f) | |
| sample_enrollment = "35214811922" | |
| student_sample = full_data.get(sample_enrollment) | |
| if student_sample: | |
| metrics = get_dashboard_metrics(student_sample) | |
| print("\n--- Generated Advanced Metrics ---") | |
| print(json.dumps(metrics, indent=4)) | |
| else: | |
| print(f"Error: Student with enrollment '{sample_enrollment}' not found.") | |
| except FileNotFoundError: | |
| print("Error: `final_cleaned_student_data.json` not found.") | |
| except Exception as e: | |
| logger.error(f"An unexpected error occurred during testing: {e}", exc_info=True) |