Spaces:

parthnuwal7
/

FCT

Sleeping

File size: 21,479 Bytes

"""
Student Output Service - Individual student JSON formatting
Provides structured analysis output for single students
"""
import logging
from typing import Dict, List, Any, Optional
from datetime import datetime
from dataclasses import dataclass, asdict

logger = logging.getLogger(__name__)


@dataclass
class SkillRecommendation:
    """Skill gap recommendation for student"""
    skill: str
    priority: str
    recommended_courses: List[str]
    certifications: List[str]


@dataclass
class CareerPath:
    """Suggested career path"""
    role: str
    fit_score: float
    requirements_met: List[str]
    requirements_gap: List[str]


class StudentOutputService:
    """
    Formats individual student analysis into structured JSON
    """
    
    # Grade thresholds
    GRADE_THRESHOLDS = [
        (0.90, 'A+', 'Outstanding'),
        (0.80, 'A', 'Excellent'),
        (0.70, 'B+', 'Very Good'),
        (0.60, 'B', 'Good'),
        (0.50, 'C', 'Average'),
        (0.40, 'D', 'Below Average'),
        (0.00, 'F', 'Needs Improvement')
    ]
    
    def __init__(self):
        # Role requirements mapping
        self.career_requirements = {
            'software_engineer': {
                'required': ['python', 'sql', 'git', 'problem_solving'],
                'preferred': ['cloud', 'docker', 'system_design'],
                'description': 'Design, develop, and maintain software systems and applications'
            },
            'data_scientist': {
                'required': ['python', 'sql', 'statistics', 'machine_learning'],
                'preferred': ['deep_learning', 'spark', 'mlops'],
                'description': 'Analyze complex data to extract insights and build predictive models'
            },
            'product_manager': {
                'required': ['communication', 'leadership', 'analytics'],
                'preferred': ['sql', 'strategic_thinking', 'stakeholder_management'],
                'description': 'Define product vision and strategy, coordinate cross-functional teams'
            },
            'mechanical_engineer': {
                'required': ['cad', 'engineering_drawing', 'manufacturing'],
                'preferred': ['fea', 'cfd', 'automation'],
                'description': 'Design and develop mechanical systems, components, and machinery'
            },
            'data_analyst': {
                'required': ['sql', 'excel', 'statistics', 'visualization'],
                'preferred': ['python', 'tableau', 'power_bi'],
                'description': 'Transform raw data into actionable business insights and reports'
            },
            'full_stack_developer': {
                'required': ['javascript', 'html', 'css', 'nodejs'],
                'preferred': ['react', 'mongodb', 'aws'],
                'description': 'Build complete web applications from frontend to backend'
            },
            'devops_engineer': {
                'required': ['linux', 'docker', 'ci_cd', 'scripting'],
                'preferred': ['kubernetes', 'terraform', 'aws'],
                'description': 'Automate and streamline software development and deployment processes'
            },
            'business_analyst': {
                'required': ['communication', 'sql', 'requirements_gathering'],
                'preferred': ['powerpoint', 'stakeholder_management', 'agile'],
                'description': 'Bridge the gap between business needs and technical solutions'
            }
        }
    
    def format_student_output(self,
                              student_id: str,
                              score_packet: Dict[str, Any],
                              domain_analysis: Dict[str, Any] = None,
                              raw_data: Dict[str, Any] = None) -> Dict[str, Any]:
        """
        Format comprehensive student analysis JSON
        
        Args:
            student_id: Student identifier
            score_packet: Output from scoring endpoint
            domain_analysis: Output from FCT (optional)
            raw_data: Original student data (optional)
        
        Returns:
            Structured student JSON
        """
        # Extract core scores
        final_score = score_packet.get('final_score', 0)
        grade, grade_desc = self._get_grade(final_score)
        
        # Component scores
        component_scores = score_packet.get('scores', {}).get('component_scores', {})
        confidences = score_packet.get('scores', {}).get('confidences', {})
        detailed_features = score_packet.get('detailed_features', {})
        
        # Domain info
        detected_domain = (
            domain_analysis.get('detected_domain') if domain_analysis 
            else score_packet.get('domain_type', 'general')
        )
        
        # Build output
        output = {
            'student_id': student_id,
            'generated_at': datetime.utcnow().isoformat() + 'Z',
            
            'summary': {
                'final_score': round(final_score, 3),
                'grade': grade,
                'grade_description': grade_desc,
                'percentile': score_packet.get('percentile', 50),
                'placement_ready': final_score >= 0.60
            },
            
            'scores': {
                'universal': {
                    'score': round(component_scores.get('universal', 0), 3),
                    'confidence': round(confidences.get('universal', 0), 3),
                    'features': detailed_features.get('universal', {})
                },
                'personality': {
                    'score': round(component_scores.get('personality', 0), 3),
                    'confidence': round(confidences.get('personality', 0), 3),
                    'traits': detailed_features.get('personality', {})
                },
                'text': {
                    'score': round(component_scores.get('text', 0), 3),
                    'confidence': round(confidences.get('text', 0), 3),
                    'aspects': detailed_features.get('text', {})
                }
            },
            
            'domain_analysis': self._format_domain_analysis(
                detected_domain, domain_analysis, raw_data
            ),
            
            'strengths': self._identify_strengths(detailed_features),
            
            'improvement_areas': self._identify_improvements(detailed_features),
            
            'career_suggestions': self._suggest_careers(
                detected_domain, detailed_features, raw_data
            ),
            
            'skill_recommendations': self._recommend_skills(
                detected_domain, raw_data
            ),
            
            'explanations': score_packet.get('explanations', {})
        }
        
        # Add fidelity if available
        if domain_analysis and 'fidelity' in domain_analysis:
            output['fidelity_assessment'] = domain_analysis['fidelity']
        
        return output
    
    def _get_grade(self, score: float) -> tuple:
        """Get grade and description for score"""
        for threshold, grade, desc in self.GRADE_THRESHOLDS:
            if score >= threshold:
                return (grade, desc)
        return ('F', 'Needs Improvement')
    
    def _format_domain_analysis(self, detected_domain: str,
                                domain_analysis: Dict,
                                raw_data: Dict) -> Dict[str, Any]:
        """Format domain-specific analysis"""
        result = {
            'detected_domain': detected_domain,
            'display_name': detected_domain.replace('_', ' ').title()
        }
        
        if domain_analysis:
            result['domain_confidence'] = domain_analysis.get('domain_confidence', 0)
            result['aspects'] = domain_analysis.get('aspects', {})
        
        # Skill gaps from raw data
        if raw_data and 'skills' in raw_data:
            skills = raw_data.get('skills', [])
            if isinstance(skills, str):
                skills = [s.strip().lower() for s in skills.split(',')]
            result['current_skills'] = skills
        
        return result
    
    def _identify_strengths(self, features: Dict) -> List[Dict]:
        """Identify top strengths from features"""
        strengths = []
        
        # Universal features
        universal = features.get('universal', {})
        if universal.get('cgpa_norm', 0) > 0.8:
            strengths.append({
                'area': 'Academic Excellence',
                'score': universal['cgpa_norm'],
                'description': 'Strong academic performance with high CGPA'
            })
        
        if universal.get('internship_exposure', 0) > 0.7:
            strengths.append({
                'area': 'Industry Experience',
                'score': universal['internship_exposure'],
                'description': 'Significant practical experience through internships'
            })
        
        # Personality traits
        personality = features.get('personality', {})
        for trait, score in personality.items():
            if score > 0.75:
                strengths.append({
                    'area': trait.title(),
                    'score': score,
                    'description': self._get_trait_description(trait, 'high')
                })
        
        # Text aspects
        text = features.get('text', {})
        if text.get('leadership_score', 0) > 0.7:
            strengths.append({
                'area': 'Leadership',
                'score': text['leadership_score'],
                'description': 'Demonstrated leadership abilities with concrete examples'
            })
        
        if text.get('technical_skills', 0) > 0.7:
            strengths.append({
                'area': 'Technical Skills',
                'score': text['technical_skills'],
                'description': 'Strong technical competencies'
            })
        
        # Sort by score and return top 5
        strengths.sort(key=lambda x: x['score'], reverse=True)
        return strengths[:5]
    
    def _identify_improvements(self, features: Dict) -> List[Dict]:
        """Identify areas needing improvement"""
        improvements = []
        
        # Universal features
        universal = features.get('universal', {})
        if universal.get('ec_quality', 0) < 0.4:
            improvements.append({
                'area': 'Extracurricular Activities',
                'current_score': universal.get('ec_quality', 0),
                'suggestion': 'Strengthen your profile by joining technical clubs, participating in hackathons and coding competitions, or taking leadership roles in student organizations. These experiences demonstrate initiative and teamwork to recruiters.'
            })
        
        if universal.get('cert_quality', 0) < 0.4:
            improvements.append({
                'area': 'Professional Certifications',
                'current_score': universal.get('cert_quality', 0),
                'suggestion': 'Boost your credentials with industry-recognized certifications like AWS Cloud Practitioner, Google Data Analytics, or Microsoft Azure Fundamentals. These demonstrate commitment to continuous learning and are valued by employers.'
            })
        
        if universal.get('internship_exposure', 0) < 0.4:
            improvements.append({
                'area': 'Internship Experience',
                'current_score': universal.get('internship_exposure', 0),
                'suggestion': 'Gain practical experience through internships at startups or established companies. Apply through campus placements, LinkedIn, or platforms like Internshala and AngelList. Even short-term projects count towards industry exposure.'
            })
        
        if universal.get('project_quality', 0) < 0.5:
            improvements.append({
                'area': 'Project Portfolio',
                'current_score': universal.get('project_quality', 0),
                'suggestion': 'Build impressive projects that showcase your skills. Create a GitHub portfolio with well-documented code, deploy live projects, and contribute to open-source. Quality matters more than quantity - focus on 2-3 impactful projects.'
            })
        
        # Text aspects
        text = features.get('text', {})
        if text.get('communication', 0) < 0.5:
            improvements.append({
                'area': 'Communication Skills',
                'current_score': text.get('communication', 0),
                'suggestion': 'Improve your communication by practicing technical presentations, writing detailed project documentation, and articulating your thoughts clearly. Consider joining Toastmasters or taking a business communication course.'
            })
        
        if text.get('career_alignment', 0) < 0.5:
            improvements.append({
                'area': 'Career Clarity',
                'current_score': text.get('career_alignment', 0),
                'suggestion': 'Define clear career goals by researching industry roles, talking to professionals in your field of interest, and creating a 1-year and 5-year career roadmap. This clarity helps you make focused skill-building decisions.'
            })
        
        # Sort by score (lowest first)
        improvements.sort(key=lambda x: x['current_score'])
        return improvements[:4]
    
    def _suggest_careers(self, domain: str, features: Dict,
                        raw_data: Dict) -> List[Dict]:
        """Suggest career paths based on profile"""
        suggestions = []
        
        # Get student skills
        skills = []
        if raw_data and 'skills' in raw_data:
            skills_raw = raw_data.get('skills', [])
            if isinstance(skills_raw, str):
                skills = [s.strip().lower() for s in skills_raw.split(',')]
            else:
                skills = [s.lower() for s in skills_raw]
        
        # Text features for soft skills
        text = features.get('text', {})
        
        for role, reqs in self.career_requirements.items():
            # Calculate fit score
            required_met = sum(1 for r in reqs['required'] 
                              if r in skills or self._has_soft_skill(r, text))
            preferred_met = sum(1 for p in reqs['preferred']
                               if p in skills or self._has_soft_skill(p, text))
            
            total_reqs = len(reqs['required'])
            fit_score = (required_met / total_reqs) if total_reqs else 0
            fit_score += (preferred_met / len(reqs['preferred'])) * 0.3 if reqs['preferred'] else 0
            fit_score = min(fit_score, 1.0)
            
            if fit_score > 0.3:  # Minimum threshold
                suggestions.append({
                    'role': role.replace('_', ' ').title(),
                    'fit_score': round(fit_score, 2),
                    'requirements_met': [r for r in reqs['required'] 
                                        if r in skills or self._has_soft_skill(r, text)],
                    'requirements_gap': [r for r in reqs['required']
                                        if r not in skills and not self._has_soft_skill(r, text)]
                })
        
        # Sort by fit score
        suggestions.sort(key=lambda x: x['fit_score'], reverse=True)
        return suggestions[:3]
    
    def _has_soft_skill(self, skill: str, text_features: Dict) -> bool:
        """Check if student has a soft skill based on text analysis"""
        skill_mapping = {
            'communication': 'communication',
            'leadership': 'leadership_score',
            'problem_solving': 'problem_solving',
            'teamwork': 'teamwork'
        }
        
        if skill in skill_mapping:
            return text_features.get(skill_mapping[skill], 0) > 0.6
        return False
    
    def _recommend_skills(self, domain: str, raw_data: Dict) -> List[Dict]:
        """Recommend skills to acquire"""
        recommendations = []
        
        # Domain-specific recommendations with enhanced courses
        domain_skills = {
            'software_engineering': [
                {'skill': 'cloud', 'courses': ['AWS Solutions Architect (Udemy)', 'Google Cloud Fundamentals (Coursera)', 'Azure Fundamentals (Microsoft Learn)'], 'certs': ['AWS Certified Cloud Practitioner', 'Google Cloud Associate']},
                {'skill': 'system_design', 'courses': ['Grokking System Design (Educative)', 'System Design Primer (GitHub)', 'Designing Data-Intensive Applications (Book)'], 'certs': []},
                {'skill': 'devops', 'courses': ['Docker Mastery (Udemy)', 'Kubernetes for Beginners (KodeKloud)', 'CI/CD with Jenkins (LinkedIn Learning)'], 'certs': ['Docker Certified Associate', 'CKA Kubernetes']},
                {'skill': 'data_structures', 'courses': ['Data Structures & Algorithms (GeeksforGeeks)', 'LeetCode Premium', 'Cracking the Coding Interview (Book)'], 'certs': []}
            ],
            'data_science': [
                {'skill': 'deep_learning', 'courses': ['Deep Learning Specialization (Coursera - Andrew Ng)', 'Fast.ai Practical Deep Learning', 'PyTorch Fundamentals (Microsoft)'], 'certs': ['TensorFlow Developer Certificate']},
                {'skill': 'mlops', 'courses': ['MLOps Specialization (Coursera)', 'Made With ML', 'Full Stack Deep Learning'], 'certs': ['Google ML Engineer', 'AWS ML Specialty']},
                {'skill': 'statistics', 'courses': ['Statistics with Python (Coursera)', 'Khan Academy Statistics', 'Think Stats (Book)'], 'certs': []},
                {'skill': 'sql_analytics', 'courses': ['SQL for Data Science (Coursera)', 'Mode Analytics SQL Tutorial', 'DataCamp SQL Track'], 'certs': ['Microsoft Data Analyst Associate']}
            ],
            'mechanical_engineering': [
                {'skill': 'ev_powertrain', 'courses': ['Electric Vehicle Technology (NPTEL)', 'EV Design Fundamentals (Udemy)', 'Battery Technology (Coursera)'], 'certs': ['SAE EV Certificate']},
                {'skill': 'automation', 'courses': ['Industrial Automation (NPTEL)', 'PLC Programming (Udemy)', 'Industry 4.0 Fundamentals'], 'certs': ['Siemens Automation Certificate']},
                {'skill': 'cfd', 'courses': ['Computational Fluid Dynamics (NPTEL)', 'ANSYS Fluent Tutorials', 'OpenFOAM Basics'], 'certs': []},
                {'skill': 'additive_manufacturing', 'courses': ['3D Printing Fundamentals (Coursera)', 'Additive Manufacturing (NPTEL)'], 'certs': []}
            ],
            'business': [
                {'skill': 'data_visualization', 'courses': ['Tableau Desktop Specialist (Tableau)', 'Power BI (Microsoft Learn)', 'Data Storytelling (LinkedIn Learning)'], 'certs': ['Tableau Desktop Specialist', 'Power BI Data Analyst']},
                {'skill': 'financial_modeling', 'courses': ['Financial Modeling (CFI)', 'Excel for Finance (Coursera)', 'Valuation Fundamentals'], 'certs': ['FMVA Certification']}
            ]
        }
        
        # Get current skills
        current_skills = []
        if raw_data and 'skills' in raw_data:
            skills_raw = raw_data.get('skills', [])
            if isinstance(skills_raw, str):
                current_skills = [s.strip().lower() for s in skills_raw.split(',')]
        
        # Recommend missing skills
        domain_recs = domain_skills.get(domain, domain_skills.get('software_engineering', []))
        
        for rec in domain_recs:
            if rec['skill'] not in current_skills:
                recommendations.append({
                    'skill': rec['skill'].replace('_', ' ').title(),
                    'priority': 'high' if len(recommendations) < 2 else 'medium',
                    'recommended_courses': rec['courses'],
                    'certifications': rec.get('certs', [])
                })
        
        return recommendations[:4]
    
    def _get_trait_description(self, trait: str, level: str) -> str:
        """Get description for personality trait"""
        descriptions = {
            'openness': {
                'high': 'Creative, curious, and open to new experiences',
                'low': 'Practical and focused on concrete tasks'
            },
            'conscientiousness': {
                'high': 'Organized, disciplined, and reliable',
                'low': 'Flexible and adaptable to changing situations'
            },
            'extraversion': {
                'high': 'Energetic, sociable, and thrives in team settings',
                'low': 'Focused, reflective, and excels in independent work'
            },
            'agreeableness': {
                'high': 'Cooperative, empathetic, and team-oriented',
                'low': 'Independent thinker, comfortable with competition'
            },
            'stability': {
                'high': 'Emotionally resilient and handles stress well',
                'low': 'Sensitive and responsive to feedback'
            }
        }
        
        return descriptions.get(trait, {}).get(level, f"Strong {trait}")


# Singleton
_student_output_service: Optional[StudentOutputService] = None


def get_student_output_service() -> StudentOutputService:
    """Get singleton student output service"""
    global _student_output_service
    if _student_output_service is None:
        _student_output_service = StudentOutputService()
    return _student_output_service