Spaces:

parthnuwal7
/

FCT

Sleeping

FCT / services /universal_module.py

Parthnuwal7

Adding analytical content

3d015cd 3 months ago

10.1 kB

	"""Universal Module - Academic & Experience Scoring"""
	import numpy as np
	import re
	from typing import Dict, Tuple

	class UniversalModule:
	"""Scores based on academic performance and experience"""

	def __init__(self):
	self.feature_weights = {
	'cgpa_norm': 0.30,
	'sgpa_trend': 0.15,
	'sgpa_consistency': 0.10,
	'marks_consistency': 0.10,
	'academic_improvement': 0.10,
	'internship_exposure': 0.10,
	'ec_quality': 0.08,
	'cert_quality': 0.07
	}

	def score(self, student_data: Dict) -> Tuple[float, float, Dict]:
	"""
	Calculate universal score
	Returns: (score, confidence, features_dict)
	"""
	features = {}

	# CGPA normalization (0-10 scale)
	cgpa = student_data.get('cgpa', 0)
	features['cgpa_norm'] = min(cgpa / 10.0, 1.0)

	# SGPA trend (improvement across semesters) - filter out null values
	sgpa_values = []
	for sem_num in range(1, 9):
	sem_val = student_data.get(f'sgpa_sem{sem_num}')
	if sem_val is not None and sem_val > 0: # Ignore null/zero values
	sgpa_values.append(sem_val)

	if len(sgpa_values) >= 2:
	# Calculate trend from first to last available semester
	trend = (sgpa_values[-1] - sgpa_values[0]) / 10.0 # Normalize
	features['sgpa_trend'] = max(0, min(trend + 0.5, 1.0)) # Center at 0.5
	else:
	features['sgpa_trend'] = 0.5 # Neutral if insufficient data

	# SGPA consistency (lower std = more consistent = better)
	if len(sgpa_values) >= 3:
	std_dev = np.std(sgpa_values)
	features['sgpa_consistency'] = max(0, 1 - (std_dev / 3.0)) # Inverse relationship
	else:
	features['sgpa_consistency'] = 0.5

	# Marks consistency across 10th, 12th, CGPA
	tenth = student_data.get('tenth_pct')
	twelfth = student_data.get('twelfth_pct')

	if tenth and twelfth and cgpa:
	cgpa_pct = (cgpa / 10.0) * 100
	marks_std = np.std([tenth, twelfth, cgpa_pct])
	features['marks_consistency'] = max(0, 1 - (marks_std / 30.0))
	else:
	features['marks_consistency'] = 0.5

	# Academic improvement flag
	if tenth and twelfth and cgpa:
	cgpa_pct = (cgpa / 10.0) * 100
	if cgpa_pct > twelfth and twelfth > tenth:
	features['academic_improvement'] = 1.0
	elif cgpa_pct > twelfth or twelfth > tenth:
	features['academic_improvement'] = 0.7
	else:
	features['academic_improvement'] = 0.3
	else:
	features['academic_improvement'] = 0.5

	# Extract features from text responses (handle None values)
	internship_text = student_data.get('internship_text') or ''
	ec_text = student_data.get('extracurricular_text') or ''
	cert_text = student_data.get('certifications_text') or ''

	# Internship exposure - extract from text
	features['internship_exposure'] = self._assess_internship_quality(internship_text)

	# Extracurricular quality - extract from text
	features['ec_quality'] = self._assess_extracurricular_quality(ec_text)

	# Certification quality - extract from text
	features['cert_quality'] = self._assess_certification_quality(cert_text)

	# Calculate weighted score
	score = sum(features[k] * self.feature_weights[k] for k in features.keys())

	# Calculate confidence based on data completeness
	total_fields = 8
	filled_fields = sum([
	1 if cgpa > 0 else 0,
	1 if len(sgpa_values) >= 2 else 0,
	1 if len(sgpa_values) >= 3 else 0,
	1 if tenth and twelfth else 0,
	1 if tenth and twelfth and cgpa else 0,
	1 if len(internship_text) > 20 else 0,
	1 if len(ec_text) > 20 else 0,
	1 if len(cert_text) > 20 else 0
	])
	confidence = filled_fields / total_fields

	return score, confidence, features

	def explain(self, features: Dict) -> Dict:
	"""Generate explanation for scores"""
	explanations = {
	'top_positive_features': [],
	'top_negative_features': []
	}

	# Sort features by value
	sorted_features = sorted(features.items(), key=lambda x: x[1], reverse=True)

	# Top 3 positive
	for feat, val in sorted_features[:3]:
	if val > 0.6:
	explanations['top_positive_features'].append({
	'feature': feat,
	'value': round(val, 2),
	'description': self._get_feature_description(feat, val)
	})

	# Top 3 negative
	for feat, val in sorted_features[-3:]:
	if val < 0.4:
	explanations['top_negative_features'].append({
	'feature': feat,
	'value': round(val, 2),
	'description': self._get_feature_description(feat, val)
	})

	return explanations

	def _assess_internship_quality(self, text: str) -> float:
	"""Extract internship quality from text"""
	if not text or len(text) < 20:
	return 0.0

	score = 0.0
	text_lower = text.lower()

	# Duration indicators
	duration_patterns = [
	(r'\b(\d+)\s*months?\b', 1.0),
	(r'\b(\d+)\s*weeks?\b', 0.25),
	(r'summer\s+internship', 0.5),
	(r'year\s+long\|full\s+year\|annual', 1.0),
	]

	max_duration_score = 0.0
	for pattern, multiplier in duration_patterns:
	matches = re.findall(pattern, text_lower)
	if matches:
	if pattern.startswith(r'\b(\d+)'):
	duration = max([int(m) for m in matches]) * multiplier
	max_duration_score = max(max_duration_score, min(duration / 6.0, 1.0))
	else:
	max_duration_score = max(max_duration_score, multiplier)

	score += max_duration_score * 0.4

	# Quality indicators
	quality_keywords = ['company', 'startup', 'corporation', 'project', 'developed',
	'implemented', 'built', 'deployed', 'managed', 'led']
	quality_count = sum(1 for kw in quality_keywords if kw in text_lower)
	score += min(quality_count / len(quality_keywords), 1.0) * 0.4

	# Length indicates detail
	score += min(len(text) / 500, 1.0) * 0.2

	return min(score, 1.0)

	def _assess_extracurricular_quality(self, text: str) -> float:
	"""Extract extracurricular quality from text"""
	if not text or len(text) < 20:
	return 0.0

	score = 0.0
	text_lower = text.lower()

	# Leadership indicators
	leadership_keywords = ['led', 'organized', 'president', 'captain', 'head',
	'coordinator', 'managed', 'founded']
	leadership_count = sum(1 for kw in leadership_keywords if kw in text_lower)
	score += min(leadership_count / 3, 1.0) * 0.4

	# Activity types
	activity_keywords = ['club', 'society', 'competition', 'hackathon', 'event',
	'volunteer', 'sports', 'cultural', 'technical']
	activity_count = sum(1 for kw in activity_keywords if kw in text_lower)
	score += min(activity_count / 4, 1.0) * 0.4

	# Detail level
	score += min(len(text) / 400, 1.0) * 0.2

	return min(score, 1.0)

	def _assess_certification_quality(self, text: str) -> float:
	"""Extract certification quality from text"""
	if not text or len(text) < 20:
	return 0.0

	score = 0.0
	text_lower = text.lower()

	# Platform indicators (reputable sources)
	platform_keywords = ['coursera', 'udemy', 'edx', 'linkedin', 'google',
	'microsoft', 'aws', 'azure', 'ibm', 'oracle']
	platform_count = sum(1 for kw in platform_keywords if kw in text_lower)
	score += min(platform_count / 3, 1.0) * 0.4

	# Technical skills
	tech_keywords = ['python', 'java', 'machine learning', 'data science', 'cloud',
	'programming', 'development', 'database', 'web', 'mobile']
	tech_count = sum(1 for kw in tech_keywords if kw in text_lower)
	score += min(tech_count / 4, 1.0) * 0.4

	# Detail level
	score += min(len(text) / 400, 1.0) * 0.2

	return min(score, 1.0)

	def _get_feature_description(self, feature: str, value: float) -> str:
	"""Get human-readable description of feature"""
	descriptions = {
	'cgpa_norm': f"CGPA performance: {value*10:.1f}/10",
	'sgpa_trend': "Strong upward trend in semester grades" if value > 0.6 else "Declining semester grades",
	'sgpa_consistency': "Very consistent semester performance" if value > 0.7 else "Inconsistent semester performance",
	'marks_consistency': "Consistent performance across academics" if value > 0.7 else "Variable academic performance",
	'academic_improvement': "Clear improvement over time" if value > 0.7 else "Limited academic growth",
	'internship_exposure': "Strong internship experience" if value > 0.6 else "Limited internship exposure",
	'ec_quality': "Excellent extracurricular involvement" if value > 0.6 else "Limited extracurricular activities",
	'cert_quality': "Strong certification portfolio" if value > 0.6 else "Few professional certifications"
	}
	return descriptions.get(feature, feature)