Spaces:
Sleeping
Sleeping
| """Universal Module - Academic & Experience Scoring""" | |
| import numpy as np | |
| import re | |
| from typing import Dict, Tuple | |
| class UniversalModule: | |
| """Scores based on academic performance and experience""" | |
| def __init__(self): | |
| self.feature_weights = { | |
| 'cgpa_norm': 0.30, | |
| 'sgpa_trend': 0.15, | |
| 'sgpa_consistency': 0.10, | |
| 'marks_consistency': 0.10, | |
| 'academic_improvement': 0.10, | |
| 'internship_exposure': 0.10, | |
| 'ec_quality': 0.08, | |
| 'cert_quality': 0.07 | |
| } | |
| def score(self, student_data: Dict) -> Tuple[float, float, Dict]: | |
| """ | |
| Calculate universal score | |
| Returns: (score, confidence, features_dict) | |
| """ | |
| features = {} | |
| # CGPA normalization (0-10 scale) | |
| cgpa = student_data.get('cgpa', 0) | |
| features['cgpa_norm'] = min(cgpa / 10.0, 1.0) | |
| # SGPA trend (improvement across semesters) - filter out null values | |
| sgpa_values = [] | |
| for sem_num in range(1, 9): | |
| sem_val = student_data.get(f'sgpa_sem{sem_num}') | |
| if sem_val is not None and sem_val > 0: # Ignore null/zero values | |
| sgpa_values.append(sem_val) | |
| if len(sgpa_values) >= 2: | |
| # Calculate trend from first to last available semester | |
| trend = (sgpa_values[-1] - sgpa_values[0]) / 10.0 # Normalize | |
| features['sgpa_trend'] = max(0, min(trend + 0.5, 1.0)) # Center at 0.5 | |
| else: | |
| features['sgpa_trend'] = 0.5 # Neutral if insufficient data | |
| # SGPA consistency (lower std = more consistent = better) | |
| if len(sgpa_values) >= 3: | |
| std_dev = np.std(sgpa_values) | |
| features['sgpa_consistency'] = max(0, 1 - (std_dev / 3.0)) # Inverse relationship | |
| else: | |
| features['sgpa_consistency'] = 0.5 | |
| # Marks consistency across 10th, 12th, CGPA | |
| tenth = student_data.get('tenth_pct') | |
| twelfth = student_data.get('twelfth_pct') | |
| if tenth and twelfth and cgpa: | |
| cgpa_pct = (cgpa / 10.0) * 100 | |
| marks_std = np.std([tenth, twelfth, cgpa_pct]) | |
| features['marks_consistency'] = max(0, 1 - (marks_std / 30.0)) | |
| else: | |
| features['marks_consistency'] = 0.5 | |
| # Academic improvement flag | |
| if tenth and twelfth and cgpa: | |
| cgpa_pct = (cgpa / 10.0) * 100 | |
| if cgpa_pct > twelfth and twelfth > tenth: | |
| features['academic_improvement'] = 1.0 | |
| elif cgpa_pct > twelfth or twelfth > tenth: | |
| features['academic_improvement'] = 0.7 | |
| else: | |
| features['academic_improvement'] = 0.3 | |
| else: | |
| features['academic_improvement'] = 0.5 | |
| # Extract features from text responses (handle None values) | |
| internship_text = student_data.get('internship_text') or '' | |
| ec_text = student_data.get('extracurricular_text') or '' | |
| cert_text = student_data.get('certifications_text') or '' | |
| # Internship exposure - extract from text | |
| features['internship_exposure'] = self._assess_internship_quality(internship_text) | |
| # Extracurricular quality - extract from text | |
| features['ec_quality'] = self._assess_extracurricular_quality(ec_text) | |
| # Certification quality - extract from text | |
| features['cert_quality'] = self._assess_certification_quality(cert_text) | |
| # Calculate weighted score | |
| score = sum(features[k] * self.feature_weights[k] for k in features.keys()) | |
| # Calculate confidence based on data completeness | |
| total_fields = 8 | |
| filled_fields = sum([ | |
| 1 if cgpa > 0 else 0, | |
| 1 if len(sgpa_values) >= 2 else 0, | |
| 1 if len(sgpa_values) >= 3 else 0, | |
| 1 if tenth and twelfth else 0, | |
| 1 if tenth and twelfth and cgpa else 0, | |
| 1 if len(internship_text) > 20 else 0, | |
| 1 if len(ec_text) > 20 else 0, | |
| 1 if len(cert_text) > 20 else 0 | |
| ]) | |
| confidence = filled_fields / total_fields | |
| return score, confidence, features | |
| def explain(self, features: Dict) -> Dict: | |
| """Generate explanation for scores""" | |
| explanations = { | |
| 'top_positive_features': [], | |
| 'top_negative_features': [] | |
| } | |
| # Sort features by value | |
| sorted_features = sorted(features.items(), key=lambda x: x[1], reverse=True) | |
| # Top 3 positive | |
| for feat, val in sorted_features[:3]: | |
| if val > 0.6: | |
| explanations['top_positive_features'].append({ | |
| 'feature': feat, | |
| 'value': round(val, 2), | |
| 'description': self._get_feature_description(feat, val) | |
| }) | |
| # Top 3 negative | |
| for feat, val in sorted_features[-3:]: | |
| if val < 0.4: | |
| explanations['top_negative_features'].append({ | |
| 'feature': feat, | |
| 'value': round(val, 2), | |
| 'description': self._get_feature_description(feat, val) | |
| }) | |
| return explanations | |
| def _assess_internship_quality(self, text: str) -> float: | |
| """Extract internship quality from text""" | |
| if not text or len(text) < 20: | |
| return 0.0 | |
| score = 0.0 | |
| text_lower = text.lower() | |
| # Duration indicators | |
| duration_patterns = [ | |
| (r'\b(\d+)\s*months?\b', 1.0), | |
| (r'\b(\d+)\s*weeks?\b', 0.25), | |
| (r'summer\s+internship', 0.5), | |
| (r'year\s+long|full\s+year|annual', 1.0), | |
| ] | |
| max_duration_score = 0.0 | |
| for pattern, multiplier in duration_patterns: | |
| matches = re.findall(pattern, text_lower) | |
| if matches: | |
| if pattern.startswith(r'\b(\d+)'): | |
| duration = max([int(m) for m in matches]) * multiplier | |
| max_duration_score = max(max_duration_score, min(duration / 6.0, 1.0)) | |
| else: | |
| max_duration_score = max(max_duration_score, multiplier) | |
| score += max_duration_score * 0.4 | |
| # Quality indicators | |
| quality_keywords = ['company', 'startup', 'corporation', 'project', 'developed', | |
| 'implemented', 'built', 'deployed', 'managed', 'led'] | |
| quality_count = sum(1 for kw in quality_keywords if kw in text_lower) | |
| score += min(quality_count / len(quality_keywords), 1.0) * 0.4 | |
| # Length indicates detail | |
| score += min(len(text) / 500, 1.0) * 0.2 | |
| return min(score, 1.0) | |
| def _assess_extracurricular_quality(self, text: str) -> float: | |
| """Extract extracurricular quality from text""" | |
| if not text or len(text) < 20: | |
| return 0.0 | |
| score = 0.0 | |
| text_lower = text.lower() | |
| # Leadership indicators | |
| leadership_keywords = ['led', 'organized', 'president', 'captain', 'head', | |
| 'coordinator', 'managed', 'founded'] | |
| leadership_count = sum(1 for kw in leadership_keywords if kw in text_lower) | |
| score += min(leadership_count / 3, 1.0) * 0.4 | |
| # Activity types | |
| activity_keywords = ['club', 'society', 'competition', 'hackathon', 'event', | |
| 'volunteer', 'sports', 'cultural', 'technical'] | |
| activity_count = sum(1 for kw in activity_keywords if kw in text_lower) | |
| score += min(activity_count / 4, 1.0) * 0.4 | |
| # Detail level | |
| score += min(len(text) / 400, 1.0) * 0.2 | |
| return min(score, 1.0) | |
| def _assess_certification_quality(self, text: str) -> float: | |
| """Extract certification quality from text""" | |
| if not text or len(text) < 20: | |
| return 0.0 | |
| score = 0.0 | |
| text_lower = text.lower() | |
| # Platform indicators (reputable sources) | |
| platform_keywords = ['coursera', 'udemy', 'edx', 'linkedin', 'google', | |
| 'microsoft', 'aws', 'azure', 'ibm', 'oracle'] | |
| platform_count = sum(1 for kw in platform_keywords if kw in text_lower) | |
| score += min(platform_count / 3, 1.0) * 0.4 | |
| # Technical skills | |
| tech_keywords = ['python', 'java', 'machine learning', 'data science', 'cloud', | |
| 'programming', 'development', 'database', 'web', 'mobile'] | |
| tech_count = sum(1 for kw in tech_keywords if kw in text_lower) | |
| score += min(tech_count / 4, 1.0) * 0.4 | |
| # Detail level | |
| score += min(len(text) / 400, 1.0) * 0.2 | |
| return min(score, 1.0) | |
| def _get_feature_description(self, feature: str, value: float) -> str: | |
| """Get human-readable description of feature""" | |
| descriptions = { | |
| 'cgpa_norm': f"CGPA performance: {value*10:.1f}/10", | |
| 'sgpa_trend': "Strong upward trend in semester grades" if value > 0.6 else "Declining semester grades", | |
| 'sgpa_consistency': "Very consistent semester performance" if value > 0.7 else "Inconsistent semester performance", | |
| 'marks_consistency': "Consistent performance across academics" if value > 0.7 else "Variable academic performance", | |
| 'academic_improvement': "Clear improvement over time" if value > 0.7 else "Limited academic growth", | |
| 'internship_exposure': "Strong internship experience" if value > 0.6 else "Limited internship exposure", | |
| 'ec_quality': "Excellent extracurricular involvement" if value > 0.6 else "Limited extracurricular activities", | |
| 'cert_quality': "Strong certification portfolio" if value > 0.6 else "Few professional certifications" | |
| } | |
| return descriptions.get(feature, feature) | |