File size: 8,079 Bytes
3d015cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
"""Business/Finance Domain Plugin

Scores business competency based on:
- Resume content (ATS-style keyword matching)
- Case study submission analysis
- Excel/analytical test scores
- Internship experience in business domains
"""
import re
import time
import logging
from typing import Dict, List
from .base_plugin import BaseDomainPlugin, DomainScore
from .plugin_factory import register_plugin

logger = logging.getLogger(__name__)


@register_plugin('business')
class BusinessPlugin(BaseDomainPlugin):
    """Business/Finance domain scoring plugin"""
    
    def __init__(self):
        super().__init__()
        # Business-relevant keywords
        self.business_keywords = {
            'consulting': ['consulting', 'consultant', 'advisory', 'strategy', 'mckinsey', 'bain', 'bcg'],
            'finance': ['finance', 'banking', 'investment', 'equity', 'portfolio', 'analyst', 'goldman', 'morgan'],
            'analytics': ['data analysis', 'business intelligence', 'tableau', 'power bi', 'sql', 'excel'],
            'management': ['project management', 'product management', 'stakeholder', 'agile', 'scrum'],
            'sales': ['sales', 'business development', 'client acquisition', 'revenue', 'crm'],
            'operations': ['operations', 'supply chain', 'logistics', 'process improvement', 'lean', 'six sigma']
        }
    
    def _get_domain_type(self) -> str:
        return 'business'
    
    def _get_feature_weights(self) -> Dict[str, float]:
        return {
            'resume_keyword_score': 0.30,
            'internship_relevance': 0.25,
            'case_study_score': 0.20,
            'excel_test_score': 0.15,
            'business_depth': 0.10
        }
    
    def get_required_fields(self) -> List[str]:
        return ['resume_text']  # Resume text (extracted from PDF)
    
    def get_optional_fields(self) -> List[str]:
        return ['case_study_text', 'excel_test_score', 'internship_descriptions']
    
    def score(self, evidence_data: Dict) -> DomainScore:
        """Calculate business domain score"""
        start_time = time.time()
        features = {}
        
        # Resume keyword analysis
        resume_text = evidence_data.get('resume_text', '')
        if resume_text:
            features['resume_keyword_score'] = self._analyze_resume_keywords(resume_text)
            features['internship_relevance'] = self._extract_internship_relevance(resume_text)
            features['business_depth'] = self._assess_business_depth(resume_text)
        else:
            features['resume_keyword_score'] = 0.0
            features['internship_relevance'] = 0.0
            features['business_depth'] = 0.0
        
        # Case study analysis
        case_study = evidence_data.get('case_study_text', '')
        if case_study:
            features['case_study_score'] = self._analyze_case_study(case_study)
        else:
            features['case_study_score'] = 0.0
        
        # Excel test score (normalized 0-100 to 0-1)
        excel_score = evidence_data.get('excel_test_score', 0)
        features['excel_test_score'] = min(excel_score / 100, 1.0) if excel_score else 0.0
        
        # Calculate weighted score
        score = sum(features[k] * self.feature_weights[k] for k in features.keys())
        
        # Calculate confidence
        confidence = self.calculate_confidence(evidence_data)
        
        processing_time = (time.time() - start_time) * 1000
        
        return DomainScore(
            domain_type='business',
            score=min(score, 1.0),
            confidence=confidence,
            raw_features=features,
            processing_time_ms=processing_time
        )
    
    def _analyze_resume_keywords(self, resume_text: str) -> float:
        """
        ATS-style keyword matching for business roles
        Returns: 0-1 score based on keyword density and relevance
        """
        text_lower = resume_text.lower()
        
        # Count keywords in each category
        category_scores = {}
        for category, keywords in self.business_keywords.items():
            matches = sum(1 for kw in keywords if kw in text_lower)
            category_scores[category] = min(matches / len(keywords), 1.0)
        
        # Average across categories with some categories weighted more
        weights = {
            'consulting': 0.20,
            'finance': 0.20,
            'analytics': 0.20,
            'management': 0.15,
            'sales': 0.15,
            'operations': 0.10
        }
        
        score = sum(category_scores.get(cat, 0) * weight for cat, weight in weights.items())
        
        logger.info(f"Resume keyword score: {score:.2f} (categories: {category_scores})")
        return score
    
    def _extract_internship_relevance(self, resume_text: str) -> float:
        """
        Extract and score internship relevance to business
        Returns: 0-1 score based on business-related internships
        """
        text_lower = resume_text.lower()
        
        # Internship indicators
        internship_patterns = [
            r'intern(?:ship)?\s+at\s+([^\n]+)',
            r'(?:summer|winter)\s+intern',
            r'([a-z\s]+)\s+intern'
        ]
        
        internship_mentions = []
        for pattern in internship_patterns:
            matches = re.findall(pattern, text_lower)
            internship_mentions.extend(matches)
        
        if not internship_mentions:
            return 0.0
        
        # Score based on business keyword overlap in internship context
        business_internship_score = 0.0
        for mention in internship_mentions[:5]:  # Top 5 internships
            mention_text = mention if isinstance(mention, str) else ' '.join(mention)
            for category, keywords in self.business_keywords.items():
                if any(kw in mention_text for kw in keywords):
                    business_internship_score += 0.2
        
        score = min(business_internship_score, 1.0)
        logger.info(f"Internship relevance: {score:.2f}")
        return score
    
    def _assess_business_depth(self, resume_text: str) -> float:
        """
        Assess overall business knowledge depth
        Returns: 0-1 score based on technical business terms
        """
        text_lower = resume_text.lower()
        
        # Advanced business terms
        advanced_terms = [
            'financial modeling', 'valuation', 'dcf', 'market research',
            'competitive analysis', 'business plan', 'roi', 'kpi',
            'p&l', 'balance sheet', 'cash flow', 'stakeholder management',
            'go-to-market', 'pricing strategy', 'market segmentation'
        ]
        
        term_count = sum(1 for term in advanced_terms if term in text_lower)
        score = min(term_count / 10, 1.0)  # 10+ terms = max
        
        logger.info(f"Business depth score: {score:.2f} ({term_count} advanced terms)")
        return score
    
    def _analyze_case_study(self, case_study_text: str) -> float:
        """
        Analyze case study submission quality
        Returns: 0-1 score based on structure and depth
        """
        if not case_study_text or len(case_study_text) < 100:
            return 0.0
        
        score = 0.0
        text_lower = case_study_text.lower()
        
        # Structure indicators
        structure_keywords = ['problem', 'analysis', 'solution', 'recommendation', 'conclusion']
        structure_score = sum(0.1 for kw in structure_keywords if kw in text_lower)
        score += min(structure_score, 0.4)
        
        # Analytical depth
        analytical_terms = ['data', 'metric', 'assumption', 'framework', 'hypothesis', 'evidence']
        analytical_score = sum(0.05 for term in analytical_terms if term in text_lower)
        score += min(analytical_score, 0.3)
        
        # Length (quality proxy)
        length_score = min(len(case_study_text) / 2000, 0.3)  # 2000+ chars = max
        score += length_score
        
        logger.info(f"Case study score: {score:.2f}")
        return min(score, 1.0)