Spaces:
Sleeping
Sleeping
File size: 8,079 Bytes
3d015cd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 | """Business/Finance Domain Plugin
Scores business competency based on:
- Resume content (ATS-style keyword matching)
- Case study submission analysis
- Excel/analytical test scores
- Internship experience in business domains
"""
import re
import time
import logging
from typing import Dict, List
from .base_plugin import BaseDomainPlugin, DomainScore
from .plugin_factory import register_plugin
logger = logging.getLogger(__name__)
@register_plugin('business')
class BusinessPlugin(BaseDomainPlugin):
"""Business/Finance domain scoring plugin"""
def __init__(self):
super().__init__()
# Business-relevant keywords
self.business_keywords = {
'consulting': ['consulting', 'consultant', 'advisory', 'strategy', 'mckinsey', 'bain', 'bcg'],
'finance': ['finance', 'banking', 'investment', 'equity', 'portfolio', 'analyst', 'goldman', 'morgan'],
'analytics': ['data analysis', 'business intelligence', 'tableau', 'power bi', 'sql', 'excel'],
'management': ['project management', 'product management', 'stakeholder', 'agile', 'scrum'],
'sales': ['sales', 'business development', 'client acquisition', 'revenue', 'crm'],
'operations': ['operations', 'supply chain', 'logistics', 'process improvement', 'lean', 'six sigma']
}
def _get_domain_type(self) -> str:
return 'business'
def _get_feature_weights(self) -> Dict[str, float]:
return {
'resume_keyword_score': 0.30,
'internship_relevance': 0.25,
'case_study_score': 0.20,
'excel_test_score': 0.15,
'business_depth': 0.10
}
def get_required_fields(self) -> List[str]:
return ['resume_text'] # Resume text (extracted from PDF)
def get_optional_fields(self) -> List[str]:
return ['case_study_text', 'excel_test_score', 'internship_descriptions']
def score(self, evidence_data: Dict) -> DomainScore:
"""Calculate business domain score"""
start_time = time.time()
features = {}
# Resume keyword analysis
resume_text = evidence_data.get('resume_text', '')
if resume_text:
features['resume_keyword_score'] = self._analyze_resume_keywords(resume_text)
features['internship_relevance'] = self._extract_internship_relevance(resume_text)
features['business_depth'] = self._assess_business_depth(resume_text)
else:
features['resume_keyword_score'] = 0.0
features['internship_relevance'] = 0.0
features['business_depth'] = 0.0
# Case study analysis
case_study = evidence_data.get('case_study_text', '')
if case_study:
features['case_study_score'] = self._analyze_case_study(case_study)
else:
features['case_study_score'] = 0.0
# Excel test score (normalized 0-100 to 0-1)
excel_score = evidence_data.get('excel_test_score', 0)
features['excel_test_score'] = min(excel_score / 100, 1.0) if excel_score else 0.0
# Calculate weighted score
score = sum(features[k] * self.feature_weights[k] for k in features.keys())
# Calculate confidence
confidence = self.calculate_confidence(evidence_data)
processing_time = (time.time() - start_time) * 1000
return DomainScore(
domain_type='business',
score=min(score, 1.0),
confidence=confidence,
raw_features=features,
processing_time_ms=processing_time
)
def _analyze_resume_keywords(self, resume_text: str) -> float:
"""
ATS-style keyword matching for business roles
Returns: 0-1 score based on keyword density and relevance
"""
text_lower = resume_text.lower()
# Count keywords in each category
category_scores = {}
for category, keywords in self.business_keywords.items():
matches = sum(1 for kw in keywords if kw in text_lower)
category_scores[category] = min(matches / len(keywords), 1.0)
# Average across categories with some categories weighted more
weights = {
'consulting': 0.20,
'finance': 0.20,
'analytics': 0.20,
'management': 0.15,
'sales': 0.15,
'operations': 0.10
}
score = sum(category_scores.get(cat, 0) * weight for cat, weight in weights.items())
logger.info(f"Resume keyword score: {score:.2f} (categories: {category_scores})")
return score
def _extract_internship_relevance(self, resume_text: str) -> float:
"""
Extract and score internship relevance to business
Returns: 0-1 score based on business-related internships
"""
text_lower = resume_text.lower()
# Internship indicators
internship_patterns = [
r'intern(?:ship)?\s+at\s+([^\n]+)',
r'(?:summer|winter)\s+intern',
r'([a-z\s]+)\s+intern'
]
internship_mentions = []
for pattern in internship_patterns:
matches = re.findall(pattern, text_lower)
internship_mentions.extend(matches)
if not internship_mentions:
return 0.0
# Score based on business keyword overlap in internship context
business_internship_score = 0.0
for mention in internship_mentions[:5]: # Top 5 internships
mention_text = mention if isinstance(mention, str) else ' '.join(mention)
for category, keywords in self.business_keywords.items():
if any(kw in mention_text for kw in keywords):
business_internship_score += 0.2
score = min(business_internship_score, 1.0)
logger.info(f"Internship relevance: {score:.2f}")
return score
def _assess_business_depth(self, resume_text: str) -> float:
"""
Assess overall business knowledge depth
Returns: 0-1 score based on technical business terms
"""
text_lower = resume_text.lower()
# Advanced business terms
advanced_terms = [
'financial modeling', 'valuation', 'dcf', 'market research',
'competitive analysis', 'business plan', 'roi', 'kpi',
'p&l', 'balance sheet', 'cash flow', 'stakeholder management',
'go-to-market', 'pricing strategy', 'market segmentation'
]
term_count = sum(1 for term in advanced_terms if term in text_lower)
score = min(term_count / 10, 1.0) # 10+ terms = max
logger.info(f"Business depth score: {score:.2f} ({term_count} advanced terms)")
return score
def _analyze_case_study(self, case_study_text: str) -> float:
"""
Analyze case study submission quality
Returns: 0-1 score based on structure and depth
"""
if not case_study_text or len(case_study_text) < 100:
return 0.0
score = 0.0
text_lower = case_study_text.lower()
# Structure indicators
structure_keywords = ['problem', 'analysis', 'solution', 'recommendation', 'conclusion']
structure_score = sum(0.1 for kw in structure_keywords if kw in text_lower)
score += min(structure_score, 0.4)
# Analytical depth
analytical_terms = ['data', 'metric', 'assumption', 'framework', 'hypothesis', 'evidence']
analytical_score = sum(0.05 for term in analytical_terms if term in text_lower)
score += min(analytical_score, 0.3)
# Length (quality proxy)
length_score = min(len(case_study_text) / 2000, 0.3) # 2000+ chars = max
score += length_score
logger.info(f"Case study score: {score:.2f}")
return min(score, 1.0)
|