File size: 21,479 Bytes
3d015cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b78c71
 
3d015cd
 
 
6b78c71
 
3d015cd
 
 
6b78c71
 
3d015cd
 
 
6b78c71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d015cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b78c71
3d015cd
 
 
 
 
 
6b78c71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d015cd
 
 
 
 
 
 
 
6b78c71
3d015cd
 
 
 
 
 
6b78c71
3d015cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b78c71
3d015cd
 
6b78c71
 
 
 
3d015cd
 
6b78c71
 
 
 
3d015cd
 
6b78c71
 
 
 
 
 
 
 
3d015cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b78c71
3d015cd
6b78c71
3d015cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
"""
Student Output Service - Individual student JSON formatting
Provides structured analysis output for single students
"""
import logging
from typing import Dict, List, Any, Optional
from datetime import datetime
from dataclasses import dataclass, asdict

logger = logging.getLogger(__name__)


@dataclass
class SkillRecommendation:
    """Skill gap recommendation for student"""
    skill: str
    priority: str
    recommended_courses: List[str]
    certifications: List[str]


@dataclass
class CareerPath:
    """Suggested career path"""
    role: str
    fit_score: float
    requirements_met: List[str]
    requirements_gap: List[str]


class StudentOutputService:
    """
    Formats individual student analysis into structured JSON
    """
    
    # Grade thresholds
    GRADE_THRESHOLDS = [
        (0.90, 'A+', 'Outstanding'),
        (0.80, 'A', 'Excellent'),
        (0.70, 'B+', 'Very Good'),
        (0.60, 'B', 'Good'),
        (0.50, 'C', 'Average'),
        (0.40, 'D', 'Below Average'),
        (0.00, 'F', 'Needs Improvement')
    ]
    
    def __init__(self):
        # Role requirements mapping
        self.career_requirements = {
            'software_engineer': {
                'required': ['python', 'sql', 'git', 'problem_solving'],
                'preferred': ['cloud', 'docker', 'system_design'],
                'description': 'Design, develop, and maintain software systems and applications'
            },
            'data_scientist': {
                'required': ['python', 'sql', 'statistics', 'machine_learning'],
                'preferred': ['deep_learning', 'spark', 'mlops'],
                'description': 'Analyze complex data to extract insights and build predictive models'
            },
            'product_manager': {
                'required': ['communication', 'leadership', 'analytics'],
                'preferred': ['sql', 'strategic_thinking', 'stakeholder_management'],
                'description': 'Define product vision and strategy, coordinate cross-functional teams'
            },
            'mechanical_engineer': {
                'required': ['cad', 'engineering_drawing', 'manufacturing'],
                'preferred': ['fea', 'cfd', 'automation'],
                'description': 'Design and develop mechanical systems, components, and machinery'
            },
            'data_analyst': {
                'required': ['sql', 'excel', 'statistics', 'visualization'],
                'preferred': ['python', 'tableau', 'power_bi'],
                'description': 'Transform raw data into actionable business insights and reports'
            },
            'full_stack_developer': {
                'required': ['javascript', 'html', 'css', 'nodejs'],
                'preferred': ['react', 'mongodb', 'aws'],
                'description': 'Build complete web applications from frontend to backend'
            },
            'devops_engineer': {
                'required': ['linux', 'docker', 'ci_cd', 'scripting'],
                'preferred': ['kubernetes', 'terraform', 'aws'],
                'description': 'Automate and streamline software development and deployment processes'
            },
            'business_analyst': {
                'required': ['communication', 'sql', 'requirements_gathering'],
                'preferred': ['powerpoint', 'stakeholder_management', 'agile'],
                'description': 'Bridge the gap between business needs and technical solutions'
            }
        }
    
    def format_student_output(self,
                              student_id: str,
                              score_packet: Dict[str, Any],
                              domain_analysis: Dict[str, Any] = None,
                              raw_data: Dict[str, Any] = None) -> Dict[str, Any]:
        """
        Format comprehensive student analysis JSON
        
        Args:
            student_id: Student identifier
            score_packet: Output from scoring endpoint
            domain_analysis: Output from FCT (optional)
            raw_data: Original student data (optional)
        
        Returns:
            Structured student JSON
        """
        # Extract core scores
        final_score = score_packet.get('final_score', 0)
        grade, grade_desc = self._get_grade(final_score)
        
        # Component scores
        component_scores = score_packet.get('scores', {}).get('component_scores', {})
        confidences = score_packet.get('scores', {}).get('confidences', {})
        detailed_features = score_packet.get('detailed_features', {})
        
        # Domain info
        detected_domain = (
            domain_analysis.get('detected_domain') if domain_analysis 
            else score_packet.get('domain_type', 'general')
        )
        
        # Build output
        output = {
            'student_id': student_id,
            'generated_at': datetime.utcnow().isoformat() + 'Z',
            
            'summary': {
                'final_score': round(final_score, 3),
                'grade': grade,
                'grade_description': grade_desc,
                'percentile': score_packet.get('percentile', 50),
                'placement_ready': final_score >= 0.60
            },
            
            'scores': {
                'universal': {
                    'score': round(component_scores.get('universal', 0), 3),
                    'confidence': round(confidences.get('universal', 0), 3),
                    'features': detailed_features.get('universal', {})
                },
                'personality': {
                    'score': round(component_scores.get('personality', 0), 3),
                    'confidence': round(confidences.get('personality', 0), 3),
                    'traits': detailed_features.get('personality', {})
                },
                'text': {
                    'score': round(component_scores.get('text', 0), 3),
                    'confidence': round(confidences.get('text', 0), 3),
                    'aspects': detailed_features.get('text', {})
                }
            },
            
            'domain_analysis': self._format_domain_analysis(
                detected_domain, domain_analysis, raw_data
            ),
            
            'strengths': self._identify_strengths(detailed_features),
            
            'improvement_areas': self._identify_improvements(detailed_features),
            
            'career_suggestions': self._suggest_careers(
                detected_domain, detailed_features, raw_data
            ),
            
            'skill_recommendations': self._recommend_skills(
                detected_domain, raw_data
            ),
            
            'explanations': score_packet.get('explanations', {})
        }
        
        # Add fidelity if available
        if domain_analysis and 'fidelity' in domain_analysis:
            output['fidelity_assessment'] = domain_analysis['fidelity']
        
        return output
    
    def _get_grade(self, score: float) -> tuple:
        """Get grade and description for score"""
        for threshold, grade, desc in self.GRADE_THRESHOLDS:
            if score >= threshold:
                return (grade, desc)
        return ('F', 'Needs Improvement')
    
    def _format_domain_analysis(self, detected_domain: str,
                                domain_analysis: Dict,
                                raw_data: Dict) -> Dict[str, Any]:
        """Format domain-specific analysis"""
        result = {
            'detected_domain': detected_domain,
            'display_name': detected_domain.replace('_', ' ').title()
        }
        
        if domain_analysis:
            result['domain_confidence'] = domain_analysis.get('domain_confidence', 0)
            result['aspects'] = domain_analysis.get('aspects', {})
        
        # Skill gaps from raw data
        if raw_data and 'skills' in raw_data:
            skills = raw_data.get('skills', [])
            if isinstance(skills, str):
                skills = [s.strip().lower() for s in skills.split(',')]
            result['current_skills'] = skills
        
        return result
    
    def _identify_strengths(self, features: Dict) -> List[Dict]:
        """Identify top strengths from features"""
        strengths = []
        
        # Universal features
        universal = features.get('universal', {})
        if universal.get('cgpa_norm', 0) > 0.8:
            strengths.append({
                'area': 'Academic Excellence',
                'score': universal['cgpa_norm'],
                'description': 'Strong academic performance with high CGPA'
            })
        
        if universal.get('internship_exposure', 0) > 0.7:
            strengths.append({
                'area': 'Industry Experience',
                'score': universal['internship_exposure'],
                'description': 'Significant practical experience through internships'
            })
        
        # Personality traits
        personality = features.get('personality', {})
        for trait, score in personality.items():
            if score > 0.75:
                strengths.append({
                    'area': trait.title(),
                    'score': score,
                    'description': self._get_trait_description(trait, 'high')
                })
        
        # Text aspects
        text = features.get('text', {})
        if text.get('leadership_score', 0) > 0.7:
            strengths.append({
                'area': 'Leadership',
                'score': text['leadership_score'],
                'description': 'Demonstrated leadership abilities with concrete examples'
            })
        
        if text.get('technical_skills', 0) > 0.7:
            strengths.append({
                'area': 'Technical Skills',
                'score': text['technical_skills'],
                'description': 'Strong technical competencies'
            })
        
        # Sort by score and return top 5
        strengths.sort(key=lambda x: x['score'], reverse=True)
        return strengths[:5]
    
    def _identify_improvements(self, features: Dict) -> List[Dict]:
        """Identify areas needing improvement"""
        improvements = []
        
        # Universal features
        universal = features.get('universal', {})
        if universal.get('ec_quality', 0) < 0.4:
            improvements.append({
                'area': 'Extracurricular Activities',
                'current_score': universal.get('ec_quality', 0),
                'suggestion': 'Strengthen your profile by joining technical clubs, participating in hackathons and coding competitions, or taking leadership roles in student organizations. These experiences demonstrate initiative and teamwork to recruiters.'
            })
        
        if universal.get('cert_quality', 0) < 0.4:
            improvements.append({
                'area': 'Professional Certifications',
                'current_score': universal.get('cert_quality', 0),
                'suggestion': 'Boost your credentials with industry-recognized certifications like AWS Cloud Practitioner, Google Data Analytics, or Microsoft Azure Fundamentals. These demonstrate commitment to continuous learning and are valued by employers.'
            })
        
        if universal.get('internship_exposure', 0) < 0.4:
            improvements.append({
                'area': 'Internship Experience',
                'current_score': universal.get('internship_exposure', 0),
                'suggestion': 'Gain practical experience through internships at startups or established companies. Apply through campus placements, LinkedIn, or platforms like Internshala and AngelList. Even short-term projects count towards industry exposure.'
            })
        
        if universal.get('project_quality', 0) < 0.5:
            improvements.append({
                'area': 'Project Portfolio',
                'current_score': universal.get('project_quality', 0),
                'suggestion': 'Build impressive projects that showcase your skills. Create a GitHub portfolio with well-documented code, deploy live projects, and contribute to open-source. Quality matters more than quantity - focus on 2-3 impactful projects.'
            })
        
        # Text aspects
        text = features.get('text', {})
        if text.get('communication', 0) < 0.5:
            improvements.append({
                'area': 'Communication Skills',
                'current_score': text.get('communication', 0),
                'suggestion': 'Improve your communication by practicing technical presentations, writing detailed project documentation, and articulating your thoughts clearly. Consider joining Toastmasters or taking a business communication course.'
            })
        
        if text.get('career_alignment', 0) < 0.5:
            improvements.append({
                'area': 'Career Clarity',
                'current_score': text.get('career_alignment', 0),
                'suggestion': 'Define clear career goals by researching industry roles, talking to professionals in your field of interest, and creating a 1-year and 5-year career roadmap. This clarity helps you make focused skill-building decisions.'
            })
        
        # Sort by score (lowest first)
        improvements.sort(key=lambda x: x['current_score'])
        return improvements[:4]
    
    def _suggest_careers(self, domain: str, features: Dict,
                        raw_data: Dict) -> List[Dict]:
        """Suggest career paths based on profile"""
        suggestions = []
        
        # Get student skills
        skills = []
        if raw_data and 'skills' in raw_data:
            skills_raw = raw_data.get('skills', [])
            if isinstance(skills_raw, str):
                skills = [s.strip().lower() for s in skills_raw.split(',')]
            else:
                skills = [s.lower() for s in skills_raw]
        
        # Text features for soft skills
        text = features.get('text', {})
        
        for role, reqs in self.career_requirements.items():
            # Calculate fit score
            required_met = sum(1 for r in reqs['required'] 
                              if r in skills or self._has_soft_skill(r, text))
            preferred_met = sum(1 for p in reqs['preferred']
                               if p in skills or self._has_soft_skill(p, text))
            
            total_reqs = len(reqs['required'])
            fit_score = (required_met / total_reqs) if total_reqs else 0
            fit_score += (preferred_met / len(reqs['preferred'])) * 0.3 if reqs['preferred'] else 0
            fit_score = min(fit_score, 1.0)
            
            if fit_score > 0.3:  # Minimum threshold
                suggestions.append({
                    'role': role.replace('_', ' ').title(),
                    'fit_score': round(fit_score, 2),
                    'requirements_met': [r for r in reqs['required'] 
                                        if r in skills or self._has_soft_skill(r, text)],
                    'requirements_gap': [r for r in reqs['required']
                                        if r not in skills and not self._has_soft_skill(r, text)]
                })
        
        # Sort by fit score
        suggestions.sort(key=lambda x: x['fit_score'], reverse=True)
        return suggestions[:3]
    
    def _has_soft_skill(self, skill: str, text_features: Dict) -> bool:
        """Check if student has a soft skill based on text analysis"""
        skill_mapping = {
            'communication': 'communication',
            'leadership': 'leadership_score',
            'problem_solving': 'problem_solving',
            'teamwork': 'teamwork'
        }
        
        if skill in skill_mapping:
            return text_features.get(skill_mapping[skill], 0) > 0.6
        return False
    
    def _recommend_skills(self, domain: str, raw_data: Dict) -> List[Dict]:
        """Recommend skills to acquire"""
        recommendations = []
        
        # Domain-specific recommendations with enhanced courses
        domain_skills = {
            'software_engineering': [
                {'skill': 'cloud', 'courses': ['AWS Solutions Architect (Udemy)', 'Google Cloud Fundamentals (Coursera)', 'Azure Fundamentals (Microsoft Learn)'], 'certs': ['AWS Certified Cloud Practitioner', 'Google Cloud Associate']},
                {'skill': 'system_design', 'courses': ['Grokking System Design (Educative)', 'System Design Primer (GitHub)', 'Designing Data-Intensive Applications (Book)'], 'certs': []},
                {'skill': 'devops', 'courses': ['Docker Mastery (Udemy)', 'Kubernetes for Beginners (KodeKloud)', 'CI/CD with Jenkins (LinkedIn Learning)'], 'certs': ['Docker Certified Associate', 'CKA Kubernetes']},
                {'skill': 'data_structures', 'courses': ['Data Structures & Algorithms (GeeksforGeeks)', 'LeetCode Premium', 'Cracking the Coding Interview (Book)'], 'certs': []}
            ],
            'data_science': [
                {'skill': 'deep_learning', 'courses': ['Deep Learning Specialization (Coursera - Andrew Ng)', 'Fast.ai Practical Deep Learning', 'PyTorch Fundamentals (Microsoft)'], 'certs': ['TensorFlow Developer Certificate']},
                {'skill': 'mlops', 'courses': ['MLOps Specialization (Coursera)', 'Made With ML', 'Full Stack Deep Learning'], 'certs': ['Google ML Engineer', 'AWS ML Specialty']},
                {'skill': 'statistics', 'courses': ['Statistics with Python (Coursera)', 'Khan Academy Statistics', 'Think Stats (Book)'], 'certs': []},
                {'skill': 'sql_analytics', 'courses': ['SQL for Data Science (Coursera)', 'Mode Analytics SQL Tutorial', 'DataCamp SQL Track'], 'certs': ['Microsoft Data Analyst Associate']}
            ],
            'mechanical_engineering': [
                {'skill': 'ev_powertrain', 'courses': ['Electric Vehicle Technology (NPTEL)', 'EV Design Fundamentals (Udemy)', 'Battery Technology (Coursera)'], 'certs': ['SAE EV Certificate']},
                {'skill': 'automation', 'courses': ['Industrial Automation (NPTEL)', 'PLC Programming (Udemy)', 'Industry 4.0 Fundamentals'], 'certs': ['Siemens Automation Certificate']},
                {'skill': 'cfd', 'courses': ['Computational Fluid Dynamics (NPTEL)', 'ANSYS Fluent Tutorials', 'OpenFOAM Basics'], 'certs': []},
                {'skill': 'additive_manufacturing', 'courses': ['3D Printing Fundamentals (Coursera)', 'Additive Manufacturing (NPTEL)'], 'certs': []}
            ],
            'business': [
                {'skill': 'data_visualization', 'courses': ['Tableau Desktop Specialist (Tableau)', 'Power BI (Microsoft Learn)', 'Data Storytelling (LinkedIn Learning)'], 'certs': ['Tableau Desktop Specialist', 'Power BI Data Analyst']},
                {'skill': 'financial_modeling', 'courses': ['Financial Modeling (CFI)', 'Excel for Finance (Coursera)', 'Valuation Fundamentals'], 'certs': ['FMVA Certification']}
            ]
        }
        
        # Get current skills
        current_skills = []
        if raw_data and 'skills' in raw_data:
            skills_raw = raw_data.get('skills', [])
            if isinstance(skills_raw, str):
                current_skills = [s.strip().lower() for s in skills_raw.split(',')]
        
        # Recommend missing skills
        domain_recs = domain_skills.get(domain, domain_skills.get('software_engineering', []))
        
        for rec in domain_recs:
            if rec['skill'] not in current_skills:
                recommendations.append({
                    'skill': rec['skill'].replace('_', ' ').title(),
                    'priority': 'high' if len(recommendations) < 2 else 'medium',
                    'recommended_courses': rec['courses'],
                    'certifications': rec.get('certs', [])
                })
        
        return recommendations[:4]
    
    def _get_trait_description(self, trait: str, level: str) -> str:
        """Get description for personality trait"""
        descriptions = {
            'openness': {
                'high': 'Creative, curious, and open to new experiences',
                'low': 'Practical and focused on concrete tasks'
            },
            'conscientiousness': {
                'high': 'Organized, disciplined, and reliable',
                'low': 'Flexible and adaptable to changing situations'
            },
            'extraversion': {
                'high': 'Energetic, sociable, and thrives in team settings',
                'low': 'Focused, reflective, and excels in independent work'
            },
            'agreeableness': {
                'high': 'Cooperative, empathetic, and team-oriented',
                'low': 'Independent thinker, comfortable with competition'
            },
            'stability': {
                'high': 'Emotionally resilient and handles stress well',
                'low': 'Sensitive and responsive to feedback'
            }
        }
        
        return descriptions.get(trait, {}).get(level, f"Strong {trait}")


# Singleton
_student_output_service: Optional[StudentOutputService] = None


def get_student_output_service() -> StudentOutputService:
    """Get singleton student output service"""
    global _student_output_service
    if _student_output_service is None:
        _student_output_service = StudentOutputService()
    return _student_output_service