Spaces:
Sleeping
Sleeping
File size: 10,073 Bytes
3d015cd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 | """Universal Module - Academic & Experience Scoring"""
import numpy as np
import re
from typing import Dict, Tuple
class UniversalModule:
"""Scores based on academic performance and experience"""
def __init__(self):
self.feature_weights = {
'cgpa_norm': 0.30,
'sgpa_trend': 0.15,
'sgpa_consistency': 0.10,
'marks_consistency': 0.10,
'academic_improvement': 0.10,
'internship_exposure': 0.10,
'ec_quality': 0.08,
'cert_quality': 0.07
}
def score(self, student_data: Dict) -> Tuple[float, float, Dict]:
"""
Calculate universal score
Returns: (score, confidence, features_dict)
"""
features = {}
# CGPA normalization (0-10 scale)
cgpa = student_data.get('cgpa', 0)
features['cgpa_norm'] = min(cgpa / 10.0, 1.0)
# SGPA trend (improvement across semesters) - filter out null values
sgpa_values = []
for sem_num in range(1, 9):
sem_val = student_data.get(f'sgpa_sem{sem_num}')
if sem_val is not None and sem_val > 0: # Ignore null/zero values
sgpa_values.append(sem_val)
if len(sgpa_values) >= 2:
# Calculate trend from first to last available semester
trend = (sgpa_values[-1] - sgpa_values[0]) / 10.0 # Normalize
features['sgpa_trend'] = max(0, min(trend + 0.5, 1.0)) # Center at 0.5
else:
features['sgpa_trend'] = 0.5 # Neutral if insufficient data
# SGPA consistency (lower std = more consistent = better)
if len(sgpa_values) >= 3:
std_dev = np.std(sgpa_values)
features['sgpa_consistency'] = max(0, 1 - (std_dev / 3.0)) # Inverse relationship
else:
features['sgpa_consistency'] = 0.5
# Marks consistency across 10th, 12th, CGPA
tenth = student_data.get('tenth_pct')
twelfth = student_data.get('twelfth_pct')
if tenth and twelfth and cgpa:
cgpa_pct = (cgpa / 10.0) * 100
marks_std = np.std([tenth, twelfth, cgpa_pct])
features['marks_consistency'] = max(0, 1 - (marks_std / 30.0))
else:
features['marks_consistency'] = 0.5
# Academic improvement flag
if tenth and twelfth and cgpa:
cgpa_pct = (cgpa / 10.0) * 100
if cgpa_pct > twelfth and twelfth > tenth:
features['academic_improvement'] = 1.0
elif cgpa_pct > twelfth or twelfth > tenth:
features['academic_improvement'] = 0.7
else:
features['academic_improvement'] = 0.3
else:
features['academic_improvement'] = 0.5
# Extract features from text responses (handle None values)
internship_text = student_data.get('internship_text') or ''
ec_text = student_data.get('extracurricular_text') or ''
cert_text = student_data.get('certifications_text') or ''
# Internship exposure - extract from text
features['internship_exposure'] = self._assess_internship_quality(internship_text)
# Extracurricular quality - extract from text
features['ec_quality'] = self._assess_extracurricular_quality(ec_text)
# Certification quality - extract from text
features['cert_quality'] = self._assess_certification_quality(cert_text)
# Calculate weighted score
score = sum(features[k] * self.feature_weights[k] for k in features.keys())
# Calculate confidence based on data completeness
total_fields = 8
filled_fields = sum([
1 if cgpa > 0 else 0,
1 if len(sgpa_values) >= 2 else 0,
1 if len(sgpa_values) >= 3 else 0,
1 if tenth and twelfth else 0,
1 if tenth and twelfth and cgpa else 0,
1 if len(internship_text) > 20 else 0,
1 if len(ec_text) > 20 else 0,
1 if len(cert_text) > 20 else 0
])
confidence = filled_fields / total_fields
return score, confidence, features
def explain(self, features: Dict) -> Dict:
"""Generate explanation for scores"""
explanations = {
'top_positive_features': [],
'top_negative_features': []
}
# Sort features by value
sorted_features = sorted(features.items(), key=lambda x: x[1], reverse=True)
# Top 3 positive
for feat, val in sorted_features[:3]:
if val > 0.6:
explanations['top_positive_features'].append({
'feature': feat,
'value': round(val, 2),
'description': self._get_feature_description(feat, val)
})
# Top 3 negative
for feat, val in sorted_features[-3:]:
if val < 0.4:
explanations['top_negative_features'].append({
'feature': feat,
'value': round(val, 2),
'description': self._get_feature_description(feat, val)
})
return explanations
def _assess_internship_quality(self, text: str) -> float:
"""Extract internship quality from text"""
if not text or len(text) < 20:
return 0.0
score = 0.0
text_lower = text.lower()
# Duration indicators
duration_patterns = [
(r'\b(\d+)\s*months?\b', 1.0),
(r'\b(\d+)\s*weeks?\b', 0.25),
(r'summer\s+internship', 0.5),
(r'year\s+long|full\s+year|annual', 1.0),
]
max_duration_score = 0.0
for pattern, multiplier in duration_patterns:
matches = re.findall(pattern, text_lower)
if matches:
if pattern.startswith(r'\b(\d+)'):
duration = max([int(m) for m in matches]) * multiplier
max_duration_score = max(max_duration_score, min(duration / 6.0, 1.0))
else:
max_duration_score = max(max_duration_score, multiplier)
score += max_duration_score * 0.4
# Quality indicators
quality_keywords = ['company', 'startup', 'corporation', 'project', 'developed',
'implemented', 'built', 'deployed', 'managed', 'led']
quality_count = sum(1 for kw in quality_keywords if kw in text_lower)
score += min(quality_count / len(quality_keywords), 1.0) * 0.4
# Length indicates detail
score += min(len(text) / 500, 1.0) * 0.2
return min(score, 1.0)
def _assess_extracurricular_quality(self, text: str) -> float:
"""Extract extracurricular quality from text"""
if not text or len(text) < 20:
return 0.0
score = 0.0
text_lower = text.lower()
# Leadership indicators
leadership_keywords = ['led', 'organized', 'president', 'captain', 'head',
'coordinator', 'managed', 'founded']
leadership_count = sum(1 for kw in leadership_keywords if kw in text_lower)
score += min(leadership_count / 3, 1.0) * 0.4
# Activity types
activity_keywords = ['club', 'society', 'competition', 'hackathon', 'event',
'volunteer', 'sports', 'cultural', 'technical']
activity_count = sum(1 for kw in activity_keywords if kw in text_lower)
score += min(activity_count / 4, 1.0) * 0.4
# Detail level
score += min(len(text) / 400, 1.0) * 0.2
return min(score, 1.0)
def _assess_certification_quality(self, text: str) -> float:
"""Extract certification quality from text"""
if not text or len(text) < 20:
return 0.0
score = 0.0
text_lower = text.lower()
# Platform indicators (reputable sources)
platform_keywords = ['coursera', 'udemy', 'edx', 'linkedin', 'google',
'microsoft', 'aws', 'azure', 'ibm', 'oracle']
platform_count = sum(1 for kw in platform_keywords if kw in text_lower)
score += min(platform_count / 3, 1.0) * 0.4
# Technical skills
tech_keywords = ['python', 'java', 'machine learning', 'data science', 'cloud',
'programming', 'development', 'database', 'web', 'mobile']
tech_count = sum(1 for kw in tech_keywords if kw in text_lower)
score += min(tech_count / 4, 1.0) * 0.4
# Detail level
score += min(len(text) / 400, 1.0) * 0.2
return min(score, 1.0)
def _get_feature_description(self, feature: str, value: float) -> str:
"""Get human-readable description of feature"""
descriptions = {
'cgpa_norm': f"CGPA performance: {value*10:.1f}/10",
'sgpa_trend': "Strong upward trend in semester grades" if value > 0.6 else "Declining semester grades",
'sgpa_consistency': "Very consistent semester performance" if value > 0.7 else "Inconsistent semester performance",
'marks_consistency': "Consistent performance across academics" if value > 0.7 else "Variable academic performance",
'academic_improvement': "Clear improvement over time" if value > 0.7 else "Limited academic growth",
'internship_exposure': "Strong internship experience" if value > 0.6 else "Limited internship exposure",
'ec_quality': "Excellent extracurricular involvement" if value > 0.6 else "Limited extracurricular activities",
'cert_quality': "Strong certification portfolio" if value > 0.6 else "Few professional certifications"
}
return descriptions.get(feature, feature)
|