FCT / services /universal_module.py
Parthnuwal7
Adding analytical content
3d015cd
"""Universal Module - Academic & Experience Scoring"""
import numpy as np
import re
from typing import Dict, Tuple
class UniversalModule:
"""Scores based on academic performance and experience"""
def __init__(self):
self.feature_weights = {
'cgpa_norm': 0.30,
'sgpa_trend': 0.15,
'sgpa_consistency': 0.10,
'marks_consistency': 0.10,
'academic_improvement': 0.10,
'internship_exposure': 0.10,
'ec_quality': 0.08,
'cert_quality': 0.07
}
def score(self, student_data: Dict) -> Tuple[float, float, Dict]:
"""
Calculate universal score
Returns: (score, confidence, features_dict)
"""
features = {}
# CGPA normalization (0-10 scale)
cgpa = student_data.get('cgpa', 0)
features['cgpa_norm'] = min(cgpa / 10.0, 1.0)
# SGPA trend (improvement across semesters) - filter out null values
sgpa_values = []
for sem_num in range(1, 9):
sem_val = student_data.get(f'sgpa_sem{sem_num}')
if sem_val is not None and sem_val > 0: # Ignore null/zero values
sgpa_values.append(sem_val)
if len(sgpa_values) >= 2:
# Calculate trend from first to last available semester
trend = (sgpa_values[-1] - sgpa_values[0]) / 10.0 # Normalize
features['sgpa_trend'] = max(0, min(trend + 0.5, 1.0)) # Center at 0.5
else:
features['sgpa_trend'] = 0.5 # Neutral if insufficient data
# SGPA consistency (lower std = more consistent = better)
if len(sgpa_values) >= 3:
std_dev = np.std(sgpa_values)
features['sgpa_consistency'] = max(0, 1 - (std_dev / 3.0)) # Inverse relationship
else:
features['sgpa_consistency'] = 0.5
# Marks consistency across 10th, 12th, CGPA
tenth = student_data.get('tenth_pct')
twelfth = student_data.get('twelfth_pct')
if tenth and twelfth and cgpa:
cgpa_pct = (cgpa / 10.0) * 100
marks_std = np.std([tenth, twelfth, cgpa_pct])
features['marks_consistency'] = max(0, 1 - (marks_std / 30.0))
else:
features['marks_consistency'] = 0.5
# Academic improvement flag
if tenth and twelfth and cgpa:
cgpa_pct = (cgpa / 10.0) * 100
if cgpa_pct > twelfth and twelfth > tenth:
features['academic_improvement'] = 1.0
elif cgpa_pct > twelfth or twelfth > tenth:
features['academic_improvement'] = 0.7
else:
features['academic_improvement'] = 0.3
else:
features['academic_improvement'] = 0.5
# Extract features from text responses (handle None values)
internship_text = student_data.get('internship_text') or ''
ec_text = student_data.get('extracurricular_text') or ''
cert_text = student_data.get('certifications_text') or ''
# Internship exposure - extract from text
features['internship_exposure'] = self._assess_internship_quality(internship_text)
# Extracurricular quality - extract from text
features['ec_quality'] = self._assess_extracurricular_quality(ec_text)
# Certification quality - extract from text
features['cert_quality'] = self._assess_certification_quality(cert_text)
# Calculate weighted score
score = sum(features[k] * self.feature_weights[k] for k in features.keys())
# Calculate confidence based on data completeness
total_fields = 8
filled_fields = sum([
1 if cgpa > 0 else 0,
1 if len(sgpa_values) >= 2 else 0,
1 if len(sgpa_values) >= 3 else 0,
1 if tenth and twelfth else 0,
1 if tenth and twelfth and cgpa else 0,
1 if len(internship_text) > 20 else 0,
1 if len(ec_text) > 20 else 0,
1 if len(cert_text) > 20 else 0
])
confidence = filled_fields / total_fields
return score, confidence, features
def explain(self, features: Dict) -> Dict:
"""Generate explanation for scores"""
explanations = {
'top_positive_features': [],
'top_negative_features': []
}
# Sort features by value
sorted_features = sorted(features.items(), key=lambda x: x[1], reverse=True)
# Top 3 positive
for feat, val in sorted_features[:3]:
if val > 0.6:
explanations['top_positive_features'].append({
'feature': feat,
'value': round(val, 2),
'description': self._get_feature_description(feat, val)
})
# Top 3 negative
for feat, val in sorted_features[-3:]:
if val < 0.4:
explanations['top_negative_features'].append({
'feature': feat,
'value': round(val, 2),
'description': self._get_feature_description(feat, val)
})
return explanations
def _assess_internship_quality(self, text: str) -> float:
"""Extract internship quality from text"""
if not text or len(text) < 20:
return 0.0
score = 0.0
text_lower = text.lower()
# Duration indicators
duration_patterns = [
(r'\b(\d+)\s*months?\b', 1.0),
(r'\b(\d+)\s*weeks?\b', 0.25),
(r'summer\s+internship', 0.5),
(r'year\s+long|full\s+year|annual', 1.0),
]
max_duration_score = 0.0
for pattern, multiplier in duration_patterns:
matches = re.findall(pattern, text_lower)
if matches:
if pattern.startswith(r'\b(\d+)'):
duration = max([int(m) for m in matches]) * multiplier
max_duration_score = max(max_duration_score, min(duration / 6.0, 1.0))
else:
max_duration_score = max(max_duration_score, multiplier)
score += max_duration_score * 0.4
# Quality indicators
quality_keywords = ['company', 'startup', 'corporation', 'project', 'developed',
'implemented', 'built', 'deployed', 'managed', 'led']
quality_count = sum(1 for kw in quality_keywords if kw in text_lower)
score += min(quality_count / len(quality_keywords), 1.0) * 0.4
# Length indicates detail
score += min(len(text) / 500, 1.0) * 0.2
return min(score, 1.0)
def _assess_extracurricular_quality(self, text: str) -> float:
"""Extract extracurricular quality from text"""
if not text or len(text) < 20:
return 0.0
score = 0.0
text_lower = text.lower()
# Leadership indicators
leadership_keywords = ['led', 'organized', 'president', 'captain', 'head',
'coordinator', 'managed', 'founded']
leadership_count = sum(1 for kw in leadership_keywords if kw in text_lower)
score += min(leadership_count / 3, 1.0) * 0.4
# Activity types
activity_keywords = ['club', 'society', 'competition', 'hackathon', 'event',
'volunteer', 'sports', 'cultural', 'technical']
activity_count = sum(1 for kw in activity_keywords if kw in text_lower)
score += min(activity_count / 4, 1.0) * 0.4
# Detail level
score += min(len(text) / 400, 1.0) * 0.2
return min(score, 1.0)
def _assess_certification_quality(self, text: str) -> float:
"""Extract certification quality from text"""
if not text or len(text) < 20:
return 0.0
score = 0.0
text_lower = text.lower()
# Platform indicators (reputable sources)
platform_keywords = ['coursera', 'udemy', 'edx', 'linkedin', 'google',
'microsoft', 'aws', 'azure', 'ibm', 'oracle']
platform_count = sum(1 for kw in platform_keywords if kw in text_lower)
score += min(platform_count / 3, 1.0) * 0.4
# Technical skills
tech_keywords = ['python', 'java', 'machine learning', 'data science', 'cloud',
'programming', 'development', 'database', 'web', 'mobile']
tech_count = sum(1 for kw in tech_keywords if kw in text_lower)
score += min(tech_count / 4, 1.0) * 0.4
# Detail level
score += min(len(text) / 400, 1.0) * 0.2
return min(score, 1.0)
def _get_feature_description(self, feature: str, value: float) -> str:
"""Get human-readable description of feature"""
descriptions = {
'cgpa_norm': f"CGPA performance: {value*10:.1f}/10",
'sgpa_trend': "Strong upward trend in semester grades" if value > 0.6 else "Declining semester grades",
'sgpa_consistency': "Very consistent semester performance" if value > 0.7 else "Inconsistent semester performance",
'marks_consistency': "Consistent performance across academics" if value > 0.7 else "Variable academic performance",
'academic_improvement': "Clear improvement over time" if value > 0.7 else "Limited academic growth",
'internship_exposure': "Strong internship experience" if value > 0.6 else "Limited internship exposure",
'ec_quality': "Excellent extracurricular involvement" if value > 0.6 else "Limited extracurricular activities",
'cert_quality': "Strong certification portfolio" if value > 0.6 else "Few professional certifications"
}
return descriptions.get(feature, feature)