Spaces:

akshit7093
/

Student_Analyzer

Sleeping

App Files Files Community

Student_Analyzer / dashboard_analyzer.py

akshit7093

changes

e4d9b49 3 months ago

raw

history blame contribute delete

11.5 kB

	# dashboard_analyzer.py
	import logging
	import json
	from datetime import datetime

	logger = logging.getLogger(__name__)

	# --- Constants for Scoring and Analysis ---
	# These can be tweaked to adjust the analysis logic. They are data-agnostic.
	THRESHOLDS = {
	'CGPA_EXCELLENT': 8.5,
	'CGPA_GOOD': 7.5,
	'LEETCODE_TOTAL_HIGH': 200,
	'LEETCODE_TOTAL_MEDIUM': 100,
	'GITHUB_STARS_HIGH': 10,
	'GITHUB_REPOS_HIGH': 20,
	}

	WEIGHTS = {
	'LEETCODE_EASY': 0.2,
	'LEETCODE_MEDIUM': 1.0,
	'LEETCODE_HARD': 2.5,
	'GITHUB_STARS': 2.0,
	'GITHUB_FORKS': 3.0,
	'GITHUB_REPOS': 0.5,
	}

	def get_dashboard_metrics(student_data: dict) -> dict:
	"""
	Performs a fully data-driven, advanced analysis of a student's raw JSON data
	to extract a rich set of metrics for the dashboard without any hardcoded assumptions.

	Args:
	student_data (dict): The dictionary containing all data for one student.

	Returns:
	dict: A deeply nested dictionary with structured dashboard metrics and insights.
	"""
	if not student_data:
	return {"error": "No student data provided."}

	# --- Perform analysis on different sections of the profile ---
	academics = _analyze_academics(student_data.get("academic_profile", {}))
	leetcode = _analyze_leetcode(student_data.get("coding_profiles", {}).get("leetcode", {}))
	github = _analyze_github(student_data.get("coding_profiles", {}).get("github", {}))
	skills = _extract_skills(student_data)
	completeness = _calculate_profile_completeness(student_data)

	# --- Synthesize overall insights from the analyses ---
	archetype = _determine_student_archetype(skills, leetcode, github)

	# --- Assemble the final, comprehensive metrics object ---
	return {
	"overall_summary": {
	"student_archetype": archetype,
	"profile_completeness": completeness
	},
	"academics": academics,
	"coding_profiles": {
	"leetcode": leetcode,
	"github": github
	},
	"skills_distribution": skills,
	}

	def _analyze_academics(academic_data: dict) -> dict:
	"""
	Analyzes academic performance dynamically from the data provided.
	Includes trajectory, overall subject performance, and detailed semester overviews.
	"""
	cgpa = academic_data.get("overall_cgpa", 0)

	# Qualitative Rating based on CGPA
	if cgpa >= THRESHOLDS['CGPA_EXCELLENT']: rating = "Excellent"
	elif cgpa >= THRESHOLDS['CGPA_GOOD']: rating = "Good"
	else: rating = "Needs Improvement"

	# Academic Trajectory based on SGPA trend
	sgpa_list = [sem.get("sgpa", 0) for sem in academic_data.get("semester_performance", [])]
	trajectory = "Stable"
	if len(sgpa_list) > 2:
	first_half_avg = sum(sgpa_list[:len(sgpa_list)//2]) / (len(sgpa_list)//2)
	second_half_avg = sum(sgpa_list[len(sgpa_list)//2:]) / (len(sgpa_list) - len(sgpa_list)//2)
	if second_half_avg > first_half_avg + 0.2: trajectory = "Improving"
	elif second_half_avg < first_half_avg - 0.2: trajectory = "Declining"

	# --- Detailed Semester Overviews and Overall Subject Analysis ---
	all_subjects_overall = []
	semester_overviews = []

	for semester_info in academic_data.get("semester_performance", []):
	semester_subjects = []
	high_grades_count = 0

	for subject_info in semester_info.get("subjects", []):
	subject_record = {
	"name": subject_info.get("subject"),
	"marks": subject_info.get("marks", 0)
	}
	semester_subjects.append(subject_record)
	all_subjects_overall.append(subject_record)

	if subject_info.get("grade") in ['O', 'A+']:
	high_grades_count += 1

	if semester_subjects:
	semester_subjects.sort(key=lambda x: x['marks']) # Sort by marks ascending

	semester_overviews.append({
	"semester_number": semester_info.get("semester"),
	"sgpa": semester_info.get("sgpa"),
	"percentage": semester_info.get("percentage"),
	"top_subject": semester_subjects[-1], # Last item is highest
	"bottom_subject": semester_subjects[0], # First item is lowest
	"high_grades_count": high_grades_count
	})

	# Determine overall subject strengths and weaknesses from all semesters
	overall_strengths = []
	overall_weaknesses = []
	if all_subjects_overall:
	all_subjects_overall.sort(key=lambda x: x['marks'], reverse=True) # Sort descending
	overall_strengths = all_subjects_overall[:3] # Top 3 overall
	overall_weaknesses = all_subjects_overall[-3:] # Bottom 3 overall

	return {
	"cgpa": cgpa,
	"rating": rating,
	"trajectory": trajectory,
	"overall_subject_strengths": overall_strengths,
	"overall_subject_weaknesses": overall_weaknesses,
	"semester_overviews": semester_overviews
	}

	# --- The following functions are already fully data-driven and remain unchanged ---

	def _analyze_leetcode(leetcode_data: dict) -> dict:
	"""Performs a nuanced analysis of LeetCode performance."""
	if not leetcode_data: return {"rating": "Not Available", "score": 0, "total_solved": 0}
	total_solved = leetcode_data.get("totalSolved", 0)
	try:
	easy = int(leetcode_data.get("problemsByDifficulty", {}).get("Easy", "0/0").split('/')[0])
	medium = int(leetcode_data.get("problemsByDifficulty", {}).get("Medium", "0/0").split('/')[0])
	hard = int(leetcode_data.get("problemsByDifficulty", {}).get("Hard", "0/0").split('/')[0])
	except (ValueError, IndexError): easy, medium, hard = 0, 0, 0
	raw_score = (easy * WEIGHTS['LEETCODE_EASY'] + medium * WEIGHTS['LEETCODE_MEDIUM'] + hard * WEIGHTS['LEETCODE_HARD'])
	target_score = (150 * WEIGHTS['LEETCODE_EASY'] + 100 * WEIGHTS['LEETCODE_MEDIUM'] + 30 * WEIGHTS['LEETCODE_HARD'])
	normalized_score = round((raw_score / target_score) * 10, 1) if target_score > 0 else 0
	final_score = min(normalized_score, 10.0)
	rating = "Beginner"
	if hard > 10 or medium > 50: rating = "Advanced Problem Solver"
	elif medium > 25 or total_solved > THRESHOLDS['LEETCODE_TOTAL_HIGH']: rating = "Active Competitor"
	elif total_solved > THRESHOLDS['LEETCODE_TOTAL_MEDIUM']: rating = "Consistent Learner"
	return {"rating": rating, "score": final_score, "total_solved": total_solved, "difficulty_breakdown": {"easy": easy, "medium": medium, "hard": hard}}

	def _analyze_github(github_data: dict) -> dict:
	"""Analyzes GitHub profile for activity, impact, and tech stack."""
	if not github_data: return {"rating": "Not Available", "activity_level": "Unknown"}
	stats, repos = github_data.get("stats", {}), github_data.get("top_repositories", [])
	activity_level = "Low"
	if repos:
	try:
	latest_push = max(datetime.strptime(repo['last_pushed'], "%Y-%m-%d") for repo in repos if repo.get('last_pushed'))
	if (datetime.now() - latest_push).days < 7: activity_level = "Very Active"
	elif (datetime.now() - latest_push).days < 30: activity_level = "Active"
	elif (datetime.now() - latest_push).days < 90: activity_level = "Inactive"
	except (ValueError, TypeError): pass
	impact_score = sum(repo.get('stars', 0) * WEIGHTS['GITHUB_STARS'] + repo.get('forks', 0) * WEIGHTS['GITHUB_FORKS'] for repo in repos)
	top_languages = list(dict.fromkeys([repo.get("language") for repo in repos if repo.get("language")]))[:3]
	rating = "Needs Development"
	if impact_score > 50 or stats.get('public_repos', 0) > THRESHOLDS['GITHUB_REPOS_HIGH']: rating = "Strong Profile"
	elif activity_level in ["Very Active", "Active"] or stats.get('public_repos', 0) > 10: rating = "Good Profile"
	return {"rating": rating, "activity_level": activity_level, "top_languages": top_languages, "stats": stats}

	def _extract_skills(student_data: dict) -> dict: # MODIFIED to return a dict
	"""
	Extracts, combines, cleans, and COUNTS key skills for chart display.
	"""
	from collections import Counter

	resume_skills = student_data.get("resume", {}).get("key_skills", [])
	leetcode_skills = [
	item.get("skill") for item in student_data.get("coding_profiles", {}).get("leetcode", {}).get("topSkillsSummary", [])
	]

	# Normalize skills to title case for consistency
	normalized_resume = [s.strip().title() for s in resume_skills]
	normalized_leetcode = [s.strip().title() for s in leetcode_skills]

	# Combine and count occurrences (though here they are unique, this is a robust way to handle it)
	all_skills = normalized_resume + normalized_leetcode

	# Using Counter will give a dict like {'Python': 2, 'Java': 1}, perfect for charts
	# In this case, since we combine unique lists, counts will be 1 or 2, but it provides the right structure.
	skill_counts = dict(Counter(all_skills))

	return skill_counts

	def _calculate_profile_completeness(student_data: dict) -> dict:
	"""Scores the profile based on the presence of key data points."""
	checks = {
	"Academics": bool(student_data.get("academic_profile", {}).get("semester_performance")),
	"Resume": bool(student_data.get("resume", {}).get("key_skills")),
	"LeetCode": bool(student_data.get("coding_profiles", {}).get("leetcode")),
	"GitHub": bool(student_data.get("coding_profiles", {}).get("github")),
	"Codeforces": bool(student_data.get("coding_profiles", {}).get("codeforces"))
	}
	score = int((sum(checks.values()) / len(checks)) * 100)
	return {"score_percentage": score, "missing_sections": [key for key, value in checks.items() if not value]}

	def _determine_student_archetype(skills: list, leetcode_metrics: dict, github_metrics: dict) -> list:
	"""Generates dynamic tags based on analyzed metrics."""
	archetypes = []
	skills_lower = {s.lower() for s in skills}
	if any(kw in skills_lower for kw in ["tensorflow", "pytorch", "ai", "machine learning", "nlp", "computer vision"]): archetypes.append("AI/ML Enthusiast")
	if any(kw in skills_lower for kw in ["react", "node", "flask", "django", "backend", "frontend"]): archetypes.append("Web Developer")
	if leetcode_metrics.get("rating") in ["Advanced Problem Solver", "Active Competitor"]: archetypes.append("Competitive Programmer")
	if any(kw in skills_lower for kw in ["aws", "google cloud", "docker", "kubernetes"]): archetypes.append("Cloud & DevOps Oriented")
	return archetypes if archetypes else ["Generalist"]

	# --- Testing Block ---
	if __name__ == '__main__':
	print("Testing advanced, fully data-driven dashboard_analyzer.py...")
	try:
	with open('final_cleaned_student_data.json', 'r', encoding='utf-8') as f:
	full_data = json.load(f)
	sample_enrollment = "35214811922"
	student_sample = full_data.get(sample_enrollment)
	if student_sample:
	metrics = get_dashboard_metrics(student_sample)
	print("\n--- Generated Advanced Metrics ---")
	print(json.dumps(metrics, indent=4))
	else:
	print(f"Error: Student with enrollment '{sample_enrollment}' not found.")
	except FileNotFoundError:
	print("Error: `final_cleaned_student_data.json` not found.")
	except Exception as e:
	logger.error(f"An unexpected error occurred during testing: {e}", exc_info=True)