Student_Analyzer / dashboard_analyzer.py
akshit7093's picture
changes
e4d9b49
# dashboard_analyzer.py
import logging
import json
from datetime import datetime
logger = logging.getLogger(__name__)
# --- Constants for Scoring and Analysis ---
# These can be tweaked to adjust the analysis logic. They are data-agnostic.
THRESHOLDS = {
'CGPA_EXCELLENT': 8.5,
'CGPA_GOOD': 7.5,
'LEETCODE_TOTAL_HIGH': 200,
'LEETCODE_TOTAL_MEDIUM': 100,
'GITHUB_STARS_HIGH': 10,
'GITHUB_REPOS_HIGH': 20,
}
WEIGHTS = {
'LEETCODE_EASY': 0.2,
'LEETCODE_MEDIUM': 1.0,
'LEETCODE_HARD': 2.5,
'GITHUB_STARS': 2.0,
'GITHUB_FORKS': 3.0,
'GITHUB_REPOS': 0.5,
}
def get_dashboard_metrics(student_data: dict) -> dict:
"""
Performs a fully data-driven, advanced analysis of a student's raw JSON data
to extract a rich set of metrics for the dashboard without any hardcoded assumptions.
Args:
student_data (dict): The dictionary containing all data for one student.
Returns:
dict: A deeply nested dictionary with structured dashboard metrics and insights.
"""
if not student_data:
return {"error": "No student data provided."}
# --- Perform analysis on different sections of the profile ---
academics = _analyze_academics(student_data.get("academic_profile", {}))
leetcode = _analyze_leetcode(student_data.get("coding_profiles", {}).get("leetcode", {}))
github = _analyze_github(student_data.get("coding_profiles", {}).get("github", {}))
skills = _extract_skills(student_data)
completeness = _calculate_profile_completeness(student_data)
# --- Synthesize overall insights from the analyses ---
archetype = _determine_student_archetype(skills, leetcode, github)
# --- Assemble the final, comprehensive metrics object ---
return {
"overall_summary": {
"student_archetype": archetype,
"profile_completeness": completeness
},
"academics": academics,
"coding_profiles": {
"leetcode": leetcode,
"github": github
},
"skills_distribution": skills,
}
def _analyze_academics(academic_data: dict) -> dict:
"""
Analyzes academic performance dynamically from the data provided.
Includes trajectory, overall subject performance, and detailed semester overviews.
"""
cgpa = academic_data.get("overall_cgpa", 0)
# Qualitative Rating based on CGPA
if cgpa >= THRESHOLDS['CGPA_EXCELLENT']: rating = "Excellent"
elif cgpa >= THRESHOLDS['CGPA_GOOD']: rating = "Good"
else: rating = "Needs Improvement"
# Academic Trajectory based on SGPA trend
sgpa_list = [sem.get("sgpa", 0) for sem in academic_data.get("semester_performance", [])]
trajectory = "Stable"
if len(sgpa_list) > 2:
first_half_avg = sum(sgpa_list[:len(sgpa_list)//2]) / (len(sgpa_list)//2)
second_half_avg = sum(sgpa_list[len(sgpa_list)//2:]) / (len(sgpa_list) - len(sgpa_list)//2)
if second_half_avg > first_half_avg + 0.2: trajectory = "Improving"
elif second_half_avg < first_half_avg - 0.2: trajectory = "Declining"
# --- Detailed Semester Overviews and Overall Subject Analysis ---
all_subjects_overall = []
semester_overviews = []
for semester_info in academic_data.get("semester_performance", []):
semester_subjects = []
high_grades_count = 0
for subject_info in semester_info.get("subjects", []):
subject_record = {
"name": subject_info.get("subject"),
"marks": subject_info.get("marks", 0)
}
semester_subjects.append(subject_record)
all_subjects_overall.append(subject_record)
if subject_info.get("grade") in ['O', 'A+']:
high_grades_count += 1
if semester_subjects:
semester_subjects.sort(key=lambda x: x['marks']) # Sort by marks ascending
semester_overviews.append({
"semester_number": semester_info.get("semester"),
"sgpa": semester_info.get("sgpa"),
"percentage": semester_info.get("percentage"),
"top_subject": semester_subjects[-1], # Last item is highest
"bottom_subject": semester_subjects[0], # First item is lowest
"high_grades_count": high_grades_count
})
# Determine overall subject strengths and weaknesses from all semesters
overall_strengths = []
overall_weaknesses = []
if all_subjects_overall:
all_subjects_overall.sort(key=lambda x: x['marks'], reverse=True) # Sort descending
overall_strengths = all_subjects_overall[:3] # Top 3 overall
overall_weaknesses = all_subjects_overall[-3:] # Bottom 3 overall
return {
"cgpa": cgpa,
"rating": rating,
"trajectory": trajectory,
"overall_subject_strengths": overall_strengths,
"overall_subject_weaknesses": overall_weaknesses,
"semester_overviews": semester_overviews
}
# --- The following functions are already fully data-driven and remain unchanged ---
def _analyze_leetcode(leetcode_data: dict) -> dict:
"""Performs a nuanced analysis of LeetCode performance."""
if not leetcode_data: return {"rating": "Not Available", "score": 0, "total_solved": 0}
total_solved = leetcode_data.get("totalSolved", 0)
try:
easy = int(leetcode_data.get("problemsByDifficulty", {}).get("Easy", "0/0").split('/')[0])
medium = int(leetcode_data.get("problemsByDifficulty", {}).get("Medium", "0/0").split('/')[0])
hard = int(leetcode_data.get("problemsByDifficulty", {}).get("Hard", "0/0").split('/')[0])
except (ValueError, IndexError): easy, medium, hard = 0, 0, 0
raw_score = (easy * WEIGHTS['LEETCODE_EASY'] + medium * WEIGHTS['LEETCODE_MEDIUM'] + hard * WEIGHTS['LEETCODE_HARD'])
target_score = (150 * WEIGHTS['LEETCODE_EASY'] + 100 * WEIGHTS['LEETCODE_MEDIUM'] + 30 * WEIGHTS['LEETCODE_HARD'])
normalized_score = round((raw_score / target_score) * 10, 1) if target_score > 0 else 0
final_score = min(normalized_score, 10.0)
rating = "Beginner"
if hard > 10 or medium > 50: rating = "Advanced Problem Solver"
elif medium > 25 or total_solved > THRESHOLDS['LEETCODE_TOTAL_HIGH']: rating = "Active Competitor"
elif total_solved > THRESHOLDS['LEETCODE_TOTAL_MEDIUM']: rating = "Consistent Learner"
return {"rating": rating, "score": final_score, "total_solved": total_solved, "difficulty_breakdown": {"easy": easy, "medium": medium, "hard": hard}}
def _analyze_github(github_data: dict) -> dict:
"""Analyzes GitHub profile for activity, impact, and tech stack."""
if not github_data: return {"rating": "Not Available", "activity_level": "Unknown"}
stats, repos = github_data.get("stats", {}), github_data.get("top_repositories", [])
activity_level = "Low"
if repos:
try:
latest_push = max(datetime.strptime(repo['last_pushed'], "%Y-%m-%d") for repo in repos if repo.get('last_pushed'))
if (datetime.now() - latest_push).days < 7: activity_level = "Very Active"
elif (datetime.now() - latest_push).days < 30: activity_level = "Active"
elif (datetime.now() - latest_push).days < 90: activity_level = "Inactive"
except (ValueError, TypeError): pass
impact_score = sum(repo.get('stars', 0) * WEIGHTS['GITHUB_STARS'] + repo.get('forks', 0) * WEIGHTS['GITHUB_FORKS'] for repo in repos)
top_languages = list(dict.fromkeys([repo.get("language") for repo in repos if repo.get("language")]))[:3]
rating = "Needs Development"
if impact_score > 50 or stats.get('public_repos', 0) > THRESHOLDS['GITHUB_REPOS_HIGH']: rating = "Strong Profile"
elif activity_level in ["Very Active", "Active"] or stats.get('public_repos', 0) > 10: rating = "Good Profile"
return {"rating": rating, "activity_level": activity_level, "top_languages": top_languages, "stats": stats}
def _extract_skills(student_data: dict) -> dict: # MODIFIED to return a dict
"""
Extracts, combines, cleans, and COUNTS key skills for chart display.
"""
from collections import Counter
resume_skills = student_data.get("resume", {}).get("key_skills", [])
leetcode_skills = [
item.get("skill") for item in student_data.get("coding_profiles", {}).get("leetcode", {}).get("topSkillsSummary", [])
]
# Normalize skills to title case for consistency
normalized_resume = [s.strip().title() for s in resume_skills]
normalized_leetcode = [s.strip().title() for s in leetcode_skills]
# Combine and count occurrences (though here they are unique, this is a robust way to handle it)
all_skills = normalized_resume + normalized_leetcode
# Using Counter will give a dict like {'Python': 2, 'Java': 1}, perfect for charts
# In this case, since we combine unique lists, counts will be 1 or 2, but it provides the right structure.
skill_counts = dict(Counter(all_skills))
return skill_counts
def _calculate_profile_completeness(student_data: dict) -> dict:
"""Scores the profile based on the presence of key data points."""
checks = {
"Academics": bool(student_data.get("academic_profile", {}).get("semester_performance")),
"Resume": bool(student_data.get("resume", {}).get("key_skills")),
"LeetCode": bool(student_data.get("coding_profiles", {}).get("leetcode")),
"GitHub": bool(student_data.get("coding_profiles", {}).get("github")),
"Codeforces": bool(student_data.get("coding_profiles", {}).get("codeforces"))
}
score = int((sum(checks.values()) / len(checks)) * 100)
return {"score_percentage": score, "missing_sections": [key for key, value in checks.items() if not value]}
def _determine_student_archetype(skills: list, leetcode_metrics: dict, github_metrics: dict) -> list:
"""Generates dynamic tags based on analyzed metrics."""
archetypes = []
skills_lower = {s.lower() for s in skills}
if any(kw in skills_lower for kw in ["tensorflow", "pytorch", "ai", "machine learning", "nlp", "computer vision"]): archetypes.append("AI/ML Enthusiast")
if any(kw in skills_lower for kw in ["react", "node", "flask", "django", "backend", "frontend"]): archetypes.append("Web Developer")
if leetcode_metrics.get("rating") in ["Advanced Problem Solver", "Active Competitor"]: archetypes.append("Competitive Programmer")
if any(kw in skills_lower for kw in ["aws", "google cloud", "docker", "kubernetes"]): archetypes.append("Cloud & DevOps Oriented")
return archetypes if archetypes else ["Generalist"]
# --- Testing Block ---
if __name__ == '__main__':
print("Testing advanced, fully data-driven dashboard_analyzer.py...")
try:
with open('final_cleaned_student_data.json', 'r', encoding='utf-8') as f:
full_data = json.load(f)
sample_enrollment = "35214811922"
student_sample = full_data.get(sample_enrollment)
if student_sample:
metrics = get_dashboard_metrics(student_sample)
print("\n--- Generated Advanced Metrics ---")
print(json.dumps(metrics, indent=4))
else:
print(f"Error: Student with enrollment '{sample_enrollment}' not found.")
except FileNotFoundError:
print("Error: `final_cleaned_student_data.json` not found.")
except Exception as e:
logger.error(f"An unexpected error occurred during testing: {e}", exc_info=True)