Spaces:

akshit7093
/

Student_Analyzer

Sleeping

File size: 11,525 Bytes

# dashboard_analyzer.py
import logging
import json
from datetime import datetime

logger = logging.getLogger(__name__)

# --- Constants for Scoring and Analysis ---
# These can be tweaked to adjust the analysis logic. They are data-agnostic.
THRESHOLDS = {
    'CGPA_EXCELLENT': 8.5,
    'CGPA_GOOD': 7.5,
    'LEETCODE_TOTAL_HIGH': 200,
    'LEETCODE_TOTAL_MEDIUM': 100,
    'GITHUB_STARS_HIGH': 10,
    'GITHUB_REPOS_HIGH': 20,
}

WEIGHTS = {
    'LEETCODE_EASY': 0.2,
    'LEETCODE_MEDIUM': 1.0,
    'LEETCODE_HARD': 2.5,
    'GITHUB_STARS': 2.0,
    'GITHUB_FORKS': 3.0,
    'GITHUB_REPOS': 0.5,
}

def get_dashboard_metrics(student_data: dict) -> dict:
    """
    Performs a fully data-driven, advanced analysis of a student's raw JSON data
    to extract a rich set of metrics for the dashboard without any hardcoded assumptions.

    Args:
        student_data (dict): The dictionary containing all data for one student.

    Returns:
        dict: A deeply nested dictionary with structured dashboard metrics and insights.
    """
    if not student_data:
        return {"error": "No student data provided."}

    # --- Perform analysis on different sections of the profile ---
    academics = _analyze_academics(student_data.get("academic_profile", {}))
    leetcode = _analyze_leetcode(student_data.get("coding_profiles", {}).get("leetcode", {}))
    github = _analyze_github(student_data.get("coding_profiles", {}).get("github", {}))
    skills = _extract_skills(student_data)
    completeness = _calculate_profile_completeness(student_data)
    
    # --- Synthesize overall insights from the analyses ---
    archetype = _determine_student_archetype(skills, leetcode, github)

    # --- Assemble the final, comprehensive metrics object ---
    return {
        "overall_summary": {
            "student_archetype": archetype,
            "profile_completeness": completeness
        },
        "academics": academics,
        "coding_profiles": {
            "leetcode": leetcode,
            "github": github
        },
        "skills_distribution": skills,
    }

def _analyze_academics(academic_data: dict) -> dict:
    """
    Analyzes academic performance dynamically from the data provided.
    Includes trajectory, overall subject performance, and detailed semester overviews.
    """
    cgpa = academic_data.get("overall_cgpa", 0)
    
    # Qualitative Rating based on CGPA
    if cgpa >= THRESHOLDS['CGPA_EXCELLENT']: rating = "Excellent"
    elif cgpa >= THRESHOLDS['CGPA_GOOD']: rating = "Good"
    else: rating = "Needs Improvement"

    # Academic Trajectory based on SGPA trend
    sgpa_list = [sem.get("sgpa", 0) for sem in academic_data.get("semester_performance", [])]
    trajectory = "Stable"
    if len(sgpa_list) > 2:
        first_half_avg = sum(sgpa_list[:len(sgpa_list)//2]) / (len(sgpa_list)//2)
        second_half_avg = sum(sgpa_list[len(sgpa_list)//2:]) / (len(sgpa_list) - len(sgpa_list)//2)
        if second_half_avg > first_half_avg + 0.2: trajectory = "Improving"
        elif second_half_avg < first_half_avg - 0.2: trajectory = "Declining"

    # --- Detailed Semester Overviews and Overall Subject Analysis ---
    all_subjects_overall = []
    semester_overviews = []

    for semester_info in academic_data.get("semester_performance", []):
        semester_subjects = []
        high_grades_count = 0
        
        for subject_info in semester_info.get("subjects", []):
            subject_record = {
                "name": subject_info.get("subject"),
                "marks": subject_info.get("marks", 0)
            }
            semester_subjects.append(subject_record)
            all_subjects_overall.append(subject_record)
            
            if subject_info.get("grade") in ['O', 'A+']:
                high_grades_count += 1
        
        if semester_subjects:
            semester_subjects.sort(key=lambda x: x['marks']) # Sort by marks ascending
            
            semester_overviews.append({
                "semester_number": semester_info.get("semester"),
                "sgpa": semester_info.get("sgpa"),
                "percentage": semester_info.get("percentage"),
                "top_subject": semester_subjects[-1], # Last item is highest
                "bottom_subject": semester_subjects[0], # First item is lowest
                "high_grades_count": high_grades_count
            })

    # Determine overall subject strengths and weaknesses from all semesters
    overall_strengths = []
    overall_weaknesses = []
    if all_subjects_overall:
        all_subjects_overall.sort(key=lambda x: x['marks'], reverse=True) # Sort descending
        overall_strengths = all_subjects_overall[:3] # Top 3 overall
        overall_weaknesses = all_subjects_overall[-3:] # Bottom 3 overall

    return {
        "cgpa": cgpa,
        "rating": rating,
        "trajectory": trajectory,
        "overall_subject_strengths": overall_strengths,
        "overall_subject_weaknesses": overall_weaknesses,
        "semester_overviews": semester_overviews
    }

# --- The following functions are already fully data-driven and remain unchanged ---

def _analyze_leetcode(leetcode_data: dict) -> dict:
    """Performs a nuanced analysis of LeetCode performance."""
    if not leetcode_data: return {"rating": "Not Available", "score": 0, "total_solved": 0}
    total_solved = leetcode_data.get("totalSolved", 0)
    try:
        easy = int(leetcode_data.get("problemsByDifficulty", {}).get("Easy", "0/0").split('/')[0])
        medium = int(leetcode_data.get("problemsByDifficulty", {}).get("Medium", "0/0").split('/')[0])
        hard = int(leetcode_data.get("problemsByDifficulty", {}).get("Hard", "0/0").split('/')[0])
    except (ValueError, IndexError): easy, medium, hard = 0, 0, 0
    raw_score = (easy * WEIGHTS['LEETCODE_EASY'] + medium * WEIGHTS['LEETCODE_MEDIUM'] + hard * WEIGHTS['LEETCODE_HARD'])
    target_score = (150 * WEIGHTS['LEETCODE_EASY'] + 100 * WEIGHTS['LEETCODE_MEDIUM'] + 30 * WEIGHTS['LEETCODE_HARD'])
    normalized_score = round((raw_score / target_score) * 10, 1) if target_score > 0 else 0
    final_score = min(normalized_score, 10.0)
    rating = "Beginner"
    if hard > 10 or medium > 50: rating = "Advanced Problem Solver"
    elif medium > 25 or total_solved > THRESHOLDS['LEETCODE_TOTAL_HIGH']: rating = "Active Competitor"
    elif total_solved > THRESHOLDS['LEETCODE_TOTAL_MEDIUM']: rating = "Consistent Learner"
    return {"rating": rating, "score": final_score, "total_solved": total_solved, "difficulty_breakdown": {"easy": easy, "medium": medium, "hard": hard}}

def _analyze_github(github_data: dict) -> dict:
    """Analyzes GitHub profile for activity, impact, and tech stack."""
    if not github_data: return {"rating": "Not Available", "activity_level": "Unknown"}
    stats, repos = github_data.get("stats", {}), github_data.get("top_repositories", [])
    activity_level = "Low"
    if repos:
        try:
            latest_push = max(datetime.strptime(repo['last_pushed'], "%Y-%m-%d") for repo in repos if repo.get('last_pushed'))
            if (datetime.now() - latest_push).days < 7: activity_level = "Very Active"
            elif (datetime.now() - latest_push).days < 30: activity_level = "Active"
            elif (datetime.now() - latest_push).days < 90: activity_level = "Inactive"
        except (ValueError, TypeError): pass
    impact_score = sum(repo.get('stars', 0) * WEIGHTS['GITHUB_STARS'] + repo.get('forks', 0) * WEIGHTS['GITHUB_FORKS'] for repo in repos)
    top_languages = list(dict.fromkeys([repo.get("language") for repo in repos if repo.get("language")]))[:3]
    rating = "Needs Development"
    if impact_score > 50 or stats.get('public_repos', 0) > THRESHOLDS['GITHUB_REPOS_HIGH']: rating = "Strong Profile"
    elif activity_level in ["Very Active", "Active"] or stats.get('public_repos', 0) > 10: rating = "Good Profile"
    return {"rating": rating, "activity_level": activity_level, "top_languages": top_languages, "stats": stats}

def _extract_skills(student_data: dict) -> dict: # MODIFIED to return a dict
    """
    Extracts, combines, cleans, and COUNTS key skills for chart display.
    """
    from collections import Counter

    resume_skills = student_data.get("resume", {}).get("key_skills", [])
    leetcode_skills = [
        item.get("skill") for item in student_data.get("coding_profiles", {}).get("leetcode", {}).get("topSkillsSummary", [])
    ]
    
    # Normalize skills to title case for consistency
    normalized_resume = [s.strip().title() for s in resume_skills]
    normalized_leetcode = [s.strip().title() for s in leetcode_skills]

    # Combine and count occurrences (though here they are unique, this is a robust way to handle it)
    all_skills = normalized_resume + normalized_leetcode
    
    # Using Counter will give a dict like {'Python': 2, 'Java': 1}, perfect for charts
    # In this case, since we combine unique lists, counts will be 1 or 2, but it provides the right structure.
    skill_counts = dict(Counter(all_skills))
    
    return skill_counts

def _calculate_profile_completeness(student_data: dict) -> dict:
    """Scores the profile based on the presence of key data points."""
    checks = {
        "Academics": bool(student_data.get("academic_profile", {}).get("semester_performance")),
        "Resume": bool(student_data.get("resume", {}).get("key_skills")),
        "LeetCode": bool(student_data.get("coding_profiles", {}).get("leetcode")),
        "GitHub": bool(student_data.get("coding_profiles", {}).get("github")),
        "Codeforces": bool(student_data.get("coding_profiles", {}).get("codeforces"))
    }
    score = int((sum(checks.values()) / len(checks)) * 100)
    return {"score_percentage": score, "missing_sections": [key for key, value in checks.items() if not value]}

def _determine_student_archetype(skills: list, leetcode_metrics: dict, github_metrics: dict) -> list:
    """Generates dynamic tags based on analyzed metrics."""
    archetypes = []
    skills_lower = {s.lower() for s in skills}
    if any(kw in skills_lower for kw in ["tensorflow", "pytorch", "ai", "machine learning", "nlp", "computer vision"]): archetypes.append("AI/ML Enthusiast")
    if any(kw in skills_lower for kw in ["react", "node", "flask", "django", "backend", "frontend"]): archetypes.append("Web Developer")
    if leetcode_metrics.get("rating") in ["Advanced Problem Solver", "Active Competitor"]: archetypes.append("Competitive Programmer")
    if any(kw in skills_lower for kw in ["aws", "google cloud", "docker", "kubernetes"]): archetypes.append("Cloud & DevOps Oriented")
    return archetypes if archetypes else ["Generalist"]

# --- Testing Block ---
if __name__ == '__main__':
    print("Testing advanced, fully data-driven dashboard_analyzer.py...")
    try:
        with open('final_cleaned_student_data.json', 'r', encoding='utf-8') as f:
            full_data = json.load(f)
        sample_enrollment = "35214811922"
        student_sample = full_data.get(sample_enrollment)
        if student_sample:
            metrics = get_dashboard_metrics(student_sample)
            print("\n--- Generated Advanced Metrics ---")
            print(json.dumps(metrics, indent=4))
        else:
            print(f"Error: Student with enrollment '{sample_enrollment}' not found.")
    except FileNotFoundError:
        print("Error: `final_cleaned_student_data.json` not found.")
    except Exception as e:
        logger.error(f"An unexpected error occurred during testing: {e}", exc_info=True)