Spaces:

parthnuwal7
/

FCT

Sleeping

FCT / services /domain_plugins /business_plugin.py

Parthnuwal7

Adding analytical content

3d015cd 3 months ago

8.08 kB

	"""Business/Finance Domain Plugin

	Scores business competency based on:
	- Resume content (ATS-style keyword matching)
	- Case study submission analysis
	- Excel/analytical test scores
	- Internship experience in business domains
	"""
	import re
	import time
	import logging
	from typing import Dict, List
	from .base_plugin import BaseDomainPlugin, DomainScore
	from .plugin_factory import register_plugin

	logger = logging.getLogger(__name__)


	@register_plugin('business')
	class BusinessPlugin(BaseDomainPlugin):
	"""Business/Finance domain scoring plugin"""

	def __init__(self):
	super().__init__()
	# Business-relevant keywords
	self.business_keywords = {
	'consulting': ['consulting', 'consultant', 'advisory', 'strategy', 'mckinsey', 'bain', 'bcg'],
	'finance': ['finance', 'banking', 'investment', 'equity', 'portfolio', 'analyst', 'goldman', 'morgan'],
	'analytics': ['data analysis', 'business intelligence', 'tableau', 'power bi', 'sql', 'excel'],
	'management': ['project management', 'product management', 'stakeholder', 'agile', 'scrum'],
	'sales': ['sales', 'business development', 'client acquisition', 'revenue', 'crm'],
	'operations': ['operations', 'supply chain', 'logistics', 'process improvement', 'lean', 'six sigma']
	}

	def _get_domain_type(self) -> str:
	return 'business'

	def _get_feature_weights(self) -> Dict[str, float]:
	return {
	'resume_keyword_score': 0.30,
	'internship_relevance': 0.25,
	'case_study_score': 0.20,
	'excel_test_score': 0.15,
	'business_depth': 0.10
	}

	def get_required_fields(self) -> List[str]:
	return ['resume_text'] # Resume text (extracted from PDF)

	def get_optional_fields(self) -> List[str]:
	return ['case_study_text', 'excel_test_score', 'internship_descriptions']

	def score(self, evidence_data: Dict) -> DomainScore:
	"""Calculate business domain score"""
	start_time = time.time()
	features = {}

	# Resume keyword analysis
	resume_text = evidence_data.get('resume_text', '')
	if resume_text:
	features['resume_keyword_score'] = self._analyze_resume_keywords(resume_text)
	features['internship_relevance'] = self._extract_internship_relevance(resume_text)
	features['business_depth'] = self._assess_business_depth(resume_text)
	else:
	features['resume_keyword_score'] = 0.0
	features['internship_relevance'] = 0.0
	features['business_depth'] = 0.0

	# Case study analysis
	case_study = evidence_data.get('case_study_text', '')
	if case_study:
	features['case_study_score'] = self._analyze_case_study(case_study)
	else:
	features['case_study_score'] = 0.0

	# Excel test score (normalized 0-100 to 0-1)
	excel_score = evidence_data.get('excel_test_score', 0)
	features['excel_test_score'] = min(excel_score / 100, 1.0) if excel_score else 0.0

	# Calculate weighted score
	score = sum(features[k] * self.feature_weights[k] for k in features.keys())

	# Calculate confidence
	confidence = self.calculate_confidence(evidence_data)

	processing_time = (time.time() - start_time) * 1000

	return DomainScore(
	domain_type='business',
	score=min(score, 1.0),
	confidence=confidence,
	raw_features=features,
	processing_time_ms=processing_time
	)

	def _analyze_resume_keywords(self, resume_text: str) -> float:
	"""
	ATS-style keyword matching for business roles
	Returns: 0-1 score based on keyword density and relevance
	"""
	text_lower = resume_text.lower()

	# Count keywords in each category
	category_scores = {}
	for category, keywords in self.business_keywords.items():
	matches = sum(1 for kw in keywords if kw in text_lower)
	category_scores[category] = min(matches / len(keywords), 1.0)

	# Average across categories with some categories weighted more
	weights = {
	'consulting': 0.20,
	'finance': 0.20,
	'analytics': 0.20,
	'management': 0.15,
	'sales': 0.15,
	'operations': 0.10
	}

	score = sum(category_scores.get(cat, 0) * weight for cat, weight in weights.items())

	logger.info(f"Resume keyword score: {score:.2f} (categories: {category_scores})")
	return score

	def _extract_internship_relevance(self, resume_text: str) -> float:
	"""
	Extract and score internship relevance to business
	Returns: 0-1 score based on business-related internships
	"""
	text_lower = resume_text.lower()

	# Internship indicators
	internship_patterns = [
	r'intern(?:ship)?\s+at\s+([^\n]+)',
	r'(?:summer\|winter)\s+intern',
	r'([a-z\s]+)\s+intern'
	]

	internship_mentions = []
	for pattern in internship_patterns:
	matches = re.findall(pattern, text_lower)
	internship_mentions.extend(matches)

	if not internship_mentions:
	return 0.0

	# Score based on business keyword overlap in internship context
	business_internship_score = 0.0
	for mention in internship_mentions[:5]: # Top 5 internships
	mention_text = mention if isinstance(mention, str) else ' '.join(mention)
	for category, keywords in self.business_keywords.items():
	if any(kw in mention_text for kw in keywords):
	business_internship_score += 0.2

	score = min(business_internship_score, 1.0)
	logger.info(f"Internship relevance: {score:.2f}")
	return score

	def _assess_business_depth(self, resume_text: str) -> float:
	"""
	Assess overall business knowledge depth
	Returns: 0-1 score based on technical business terms
	"""
	text_lower = resume_text.lower()

	# Advanced business terms
	advanced_terms = [
	'financial modeling', 'valuation', 'dcf', 'market research',
	'competitive analysis', 'business plan', 'roi', 'kpi',
	'p&l', 'balance sheet', 'cash flow', 'stakeholder management',
	'go-to-market', 'pricing strategy', 'market segmentation'
	]

	term_count = sum(1 for term in advanced_terms if term in text_lower)
	score = min(term_count / 10, 1.0) # 10+ terms = max

	logger.info(f"Business depth score: {score:.2f} ({term_count} advanced terms)")
	return score

	def _analyze_case_study(self, case_study_text: str) -> float:
	"""
	Analyze case study submission quality
	Returns: 0-1 score based on structure and depth
	"""
	if not case_study_text or len(case_study_text) < 100:
	return 0.0

	score = 0.0
	text_lower = case_study_text.lower()

	# Structure indicators
	structure_keywords = ['problem', 'analysis', 'solution', 'recommendation', 'conclusion']
	structure_score = sum(0.1 for kw in structure_keywords if kw in text_lower)
	score += min(structure_score, 0.4)

	# Analytical depth
	analytical_terms = ['data', 'metric', 'assumption', 'framework', 'hypothesis', 'evidence']
	analytical_score = sum(0.05 for term in analytical_terms if term in text_lower)
	score += min(analytical_score, 0.3)

	# Length (quality proxy)
	length_score = min(len(case_study_text) / 2000, 0.3) # 2000+ chars = max
	score += length_score

	logger.info(f"Case study score: {score:.2f}")
	return min(score, 1.0)