Spaces:
Sleeping
Sleeping
| import re | |
| import logging | |
| from typing import List, Dict, Optional | |
| import google.generativeai as genai | |
| from app.utils.config import Config | |
| logger = logging.getLogger(__name__) | |
| class ScoringService: | |
| """Service for scoring LinkedIn candidates based on multiple criteria""" | |
| def __init__(self): | |
| self.gemini_model = None | |
| if Config.GEMINI_API_KEY: | |
| genai.configure(api_key=Config.GEMINI_API_KEY) | |
| self.gemini_model = genai.GenerativeModel('gemini-2.5-flash') | |
| # Elite and strong schools for education scoring | |
| self.elite_schools = { | |
| 'harvard', 'stanford', 'mit', 'caltech', 'princeton', 'yale', 'columbia', | |
| 'university of pennsylvania', 'upenn', 'dartmouth', 'brown', 'cornell', | |
| 'university of chicago', 'northwestern', 'duke', 'johns hopkins', | |
| 'carnegie mellon', 'cmu', 'berkeley', 'ucla', 'usc', 'georgia tech', | |
| 'university of michigan', 'university of illinois', 'uiuc' | |
| } | |
| self.strong_schools = { | |
| 'nyu', 'boston university', 'tufts', 'northeastern', 'georgetown', | |
| 'vanderbilt', 'rice', 'emory', 'wake forest', 'university of virginia', | |
| 'university of north carolina', 'unc', 'university of texas', 'ut austin', | |
| 'university of washington', 'university of wisconsin', 'purdue', | |
| 'university of maryland', 'rutgers', 'university of florida', | |
| 'university of california', 'uc', 'university of massachusetts', 'umass' | |
| } | |
| # Top tech companies for company relevance scoring | |
| self.tier_1_companies = { | |
| 'google', 'alphabet', 'microsoft', 'apple', 'amazon', 'meta', 'facebook', | |
| 'netflix', 'tesla', 'nvidia', 'salesforce', 'oracle', 'adobe', | |
| 'intel', 'cisco', 'ibm', 'paypal', 'uber', 'lyft', 'airbnb', | |
| 'stripe', 'square', 'twilio', 'slack', 'zoom', 'dropbox' | |
| } | |
| self.tier_2_companies = { | |
| 'linkedin', 'twitter', 'snapchat', 'pinterest', 'spotify', 'discord', | |
| 'roblox', 'unity', 'autodesk', 'workday', 'servicenow', 'splunk', | |
| 'datadog', 'mongodb', 'elastic', 'atlassian', 'jira', 'confluence', | |
| 'github', 'gitlab', 'hashicorp', 'docker', 'kubernetes', 'red hat' | |
| } | |
| def score_candidates(self, candidates: List[Dict], job_description: str, batch_size: int = 5) -> List[Dict]: | |
| """ | |
| Score candidates based on multiple criteria with batch processing for AI scoring | |
| Args: | |
| candidates: List of candidate profile dictionaries | |
| job_description: Job requirements and description | |
| batch_size: Number of candidates to process in each AI batch | |
| Returns: | |
| List of candidates with score breakdowns | |
| """ | |
| scored_candidates = [] | |
| # Process candidates in batches for AI scoring | |
| for i in range(0, len(candidates), batch_size): | |
| batch = candidates[i:i + batch_size] | |
| batch_scores = self._process_candidate_batch(batch, job_description) | |
| scored_candidates.extend(batch_scores) | |
| # Sort by total score (descending) | |
| scored_candidates.sort(key=lambda x: x['score_breakdown']['total_score'], reverse=True) | |
| return scored_candidates | |
| def _process_candidate_batch(self, candidates: List[Dict], job_description: str) -> List[Dict]: | |
| """Process a batch of candidates, using AI for experience scoring when available""" | |
| scored_candidates = [] | |
| # Get AI experience scores for the batch if Gemini is available | |
| ai_experience_scores = {} | |
| if self.gemini_model: | |
| try: | |
| ai_experience_scores = self._get_batch_experience_scores(candidates, job_description) | |
| except Exception as e: | |
| logger.warning(f"Error in batch AI scoring: {str(e)}") | |
| for candidate in candidates: | |
| try: | |
| score_breakdown = self._calculate_score_breakdown( | |
| candidate, | |
| job_description, | |
| ai_experience_scores.get(candidate.get('name', 'Unknown')) | |
| ) | |
| scored_candidates.append({ | |
| 'profile': candidate, | |
| 'score_breakdown': score_breakdown | |
| }) | |
| except Exception as e: | |
| logger.error(f"Error scoring candidate {candidate.get('name', 'Unknown')}: {str(e)}") | |
| # Add candidate with default scores | |
| default_breakdown = self._get_default_score_breakdown() | |
| scored_candidates.append({ | |
| 'profile': candidate, | |
| 'score_breakdown': default_breakdown | |
| }) | |
| return scored_candidates | |
| def _get_batch_experience_scores(self, candidates: List[Dict], job_description: str) -> Dict[str, float]: | |
| """Get experience match scores for a batch of candidates using Gemini AI""" | |
| try: | |
| # Prepare batch prompt with all candidates | |
| candidates_text = "" | |
| candidate_names = [] | |
| for i, candidate in enumerate(candidates, 1): | |
| name = candidate.get('name', f'Candidate {i}') | |
| candidate_names.append(name) | |
| candidate_profile = f""" | |
| {i}. {name}: | |
| - Headline: {candidate.get('headline', '')} | |
| - Company: {candidate.get('company', '')} | |
| - Education: {candidate.get('education', '')} | |
| - Experience Summary: {candidate.get('experience_summary', '')} | |
| """ | |
| candidates_text += candidate_profile + "\n" | |
| prompt = f""" | |
| Analyze how well each candidate's profile matches the job requirements. | |
| Job Description: | |
| {job_description} | |
| Candidates to evaluate: | |
| {candidates_text} | |
| Rate each candidate's match from 1-10 where: | |
| 10 = Perfect match with all required skills and experience | |
| 8-9 = Strong match with most requirements | |
| 6-7 = Good match with some requirements | |
| 4-5 = Moderate match with basic requirements | |
| 1-3 = Poor match with few requirements | |
| Consider: | |
| - Skills alignment | |
| - Experience relevance | |
| - Industry fit | |
| - Technical expertise | |
| Return scores in this exact format: | |
| 1. [Candidate Name]: [Score] | |
| 2. [Candidate Name]: [Score] | |
| ... | |
| Example: | |
| 1. John Smith: 8.5 | |
| 2. Jane Doe: 7.2 | |
| """ | |
| response = self.gemini_model.generate_content(prompt) | |
| score_text = response.text.strip() | |
| # Parse scores from response | |
| scores = {} | |
| for line in score_text.split('\n'): | |
| # Match pattern like "1. John Smith: 8.5" or "John Smith: 8.5" | |
| match = re.search(r'(?:^\d+\.\s*)?([^:]+):\s*(\d+(?:\.\d+)?)', line) | |
| if match: | |
| name = match.group(1).strip() | |
| score = float(match.group(2)) | |
| # Clamp score between 1-10 | |
| scores[name] = min(max(score, 1.0), 10.0) | |
| # If we couldn't parse all scores, use fallback for missing ones | |
| for name in candidate_names: | |
| if name not in scores: | |
| logger.warning(f"Could not parse AI score for {name}, using fallback") | |
| # Find the candidate and use fallback scoring | |
| candidate = next((c for c in candidates if c.get('name') == name), None) | |
| if candidate: | |
| scores[name] = self._fallback_experience_score(candidate, job_description) | |
| return scores | |
| except Exception as e: | |
| logger.error(f"Error in batch AI experience scoring: {str(e)}") | |
| return {} | |
| def _calculate_score_breakdown(self, candidate: Dict, job_description: str, ai_experience_score: Optional[float] = None) -> Dict: | |
| """Calculate comprehensive score breakdown for a candidate""" | |
| # Education scoring (20% weight) | |
| education_score = self._calculate_education_score(candidate.get('education', '')) | |
| # Career trajectory scoring (20% weight) | |
| career_score = self._calculate_career_trajectory_score(candidate) | |
| # Company relevance scoring (15% weight) | |
| company_score = self._calculate_company_relevance_score(candidate.get('company', '')) | |
| # Experience match scoring (25% weight) | |
| if ai_experience_score is not None: | |
| experience_score = ai_experience_score | |
| else: | |
| experience_score = self._calculate_experience_match_score(candidate, job_description) | |
| # Location scoring (10% weight) | |
| location_score = self._calculate_location_score(candidate.get('location', '')) | |
| # Tenure scoring (10% weight) | |
| tenure_score = self._calculate_tenure_score(candidate) | |
| # Calculate weighted total score | |
| total_score = ( | |
| education_score * Config.EDUCATION_WEIGHT + | |
| career_score * Config.CAREER_TRAJECTORY_WEIGHT + | |
| company_score * Config.COMPANY_RELEVANCE_WEIGHT + | |
| experience_score * Config.EXPERIENCE_MATCH_WEIGHT + | |
| location_score * Config.LOCATION_WEIGHT + | |
| tenure_score * Config.TENURE_WEIGHT | |
| ) | |
| return { | |
| 'education_score': round(education_score, 2), | |
| 'career_trajectory_score': round(career_score, 2), | |
| 'company_relevance_score': round(company_score, 2), | |
| 'experience_match_score': round(experience_score, 2), | |
| 'location_score': round(location_score, 2), | |
| 'tenure_score': round(tenure_score, 2), | |
| 'total_score': round(total_score, 2) | |
| } | |
| def _calculate_education_score(self, education: str) -> float: | |
| """Calculate education score based on school tier""" | |
| if not education: | |
| return 5.0 # Default score for missing education | |
| education_lower = education.lower() | |
| # Check for elite schools | |
| for school in self.elite_schools: | |
| if school in education_lower: | |
| return 10.0 | |
| # Check for strong schools | |
| for school in self.strong_schools: | |
| if school in education_lower: | |
| return 8.0 | |
| # Check for any university/college | |
| if any(keyword in education_lower for keyword in ['university', 'college', 'institute']): | |
| return 6.0 | |
| return 4.0 # Default for other education | |
| def _calculate_career_trajectory_score(self, candidate: Dict) -> float: | |
| """Calculate career trajectory score based on job progression""" | |
| headline = candidate.get('headline', '').lower() | |
| experience = candidate.get('experience_summary', '').lower() | |
| # Senior/leadership positions | |
| senior_keywords = ['senior', 'lead', 'principal', 'staff', 'director', 'manager', 'head of'] | |
| if any(keyword in headline for keyword in senior_keywords): | |
| return 9.0 | |
| # Mid-level positions | |
| mid_keywords = ['engineer', 'developer', 'analyst', 'specialist'] | |
| if any(keyword in headline for keyword in mid_keywords): | |
| return 7.0 | |
| # Entry-level positions | |
| entry_keywords = ['junior', 'associate', 'intern', 'graduate'] | |
| if any(keyword in headline for keyword in entry_keywords): | |
| return 5.0 | |
| # Default score | |
| return 6.0 | |
| def _calculate_company_relevance_score(self, company: str) -> float: | |
| """Calculate company relevance score based on company tier""" | |
| if not company: | |
| return 5.0 # Default score for missing company | |
| company_lower = company.lower() | |
| # Check for tier 1 companies | |
| for tier1_company in self.tier_1_companies: | |
| if tier1_company in company_lower: | |
| return 10.0 | |
| # Check for tier 2 companies | |
| for tier2_company in self.tier_2_companies: | |
| if tier2_company in company_lower: | |
| return 8.0 | |
| # Check for startup indicators | |
| startup_indicators = ['startup', 'inc', 'llc', 'corp', 'ltd'] | |
| if any(indicator in company_lower for indicator in startup_indicators): | |
| return 6.0 | |
| return 5.0 # Default for other companies | |
| def _calculate_experience_match_score(self, candidate: Dict, job_description: str) -> float: | |
| """Calculate experience match score using Gemini AI (fallback method)""" | |
| try: | |
| if not self.gemini_model: | |
| return self._fallback_experience_score(candidate, job_description) | |
| # Prepare candidate profile for analysis | |
| candidate_profile = f""" | |
| Name: {candidate.get('name', 'Unknown')} | |
| Headline: {candidate.get('headline', '')} | |
| Company: {candidate.get('company', '')} | |
| Education: {candidate.get('education', '')} | |
| Experience Summary: {candidate.get('experience_summary', '')} | |
| """ | |
| prompt = f""" | |
| Analyze how well this candidate's profile matches the job requirements. | |
| Job Description: | |
| {job_description} | |
| Candidate Profile: | |
| {candidate_profile} | |
| Rate the match from 1-10 where: | |
| 10 = Perfect match with all required skills and experience | |
| 8-9 = Strong match with most requirements | |
| 6-7 = Good match with some requirements | |
| 4-5 = Moderate match with basic requirements | |
| 1-3 = Poor match with few requirements | |
| Consider: | |
| - Skills alignment | |
| - Experience relevance | |
| - Industry fit | |
| - Technical expertise | |
| Return only the numerical score (1-10). | |
| """ | |
| response = self.gemini_model.generate_content(prompt) | |
| score_text = response.text.strip() | |
| # Extract numerical score | |
| score_match = re.search(r'(\d+(?:\.\d+)?)', score_text) | |
| if score_match: | |
| score = float(score_match.group(1)) | |
| return min(max(score, 1.0), 10.0) # Clamp between 1-10 | |
| return 5.0 # Default if parsing fails | |
| except Exception as e: | |
| logger.warning(f"Error in AI experience scoring: {str(e)}") | |
| return self._fallback_experience_score(candidate, job_description) | |
| def _fallback_experience_score(self, candidate: Dict, job_description: str) -> float: | |
| """Fallback experience scoring using keyword matching""" | |
| candidate_text = f"{candidate.get('headline', '')} {candidate.get('experience_summary', '')}".lower() | |
| job_desc_lower = job_description.lower() | |
| # Extract common tech keywords | |
| tech_keywords = [ | |
| 'python', 'javascript', 'java', 'react', 'node.js', 'angular', 'vue', | |
| 'sql', 'mongodb', 'postgresql', 'aws', 'azure', 'gcp', 'docker', | |
| 'kubernetes', 'machine learning', 'ai', 'data science', 'devops', | |
| 'agile', 'scrum', 'git', 'api', 'rest', 'graphql', 'microservices' | |
| ] | |
| # Count matching keywords | |
| matches = 0 | |
| for keyword in tech_keywords: | |
| if keyword in candidate_text and keyword in job_desc_lower: | |
| matches += 1 | |
| # Score based on matches | |
| if matches >= 5: | |
| return 9.0 | |
| elif matches >= 3: | |
| return 7.0 | |
| elif matches >= 1: | |
| return 5.0 | |
| else: | |
| return 3.0 | |
| def _calculate_location_score(self, location: str) -> float: | |
| """Calculate location score based on tech hub proximity""" | |
| if not location: | |
| return 5.0 # Default score for missing location | |
| location_lower = location.lower() | |
| # Major tech hubs | |
| major_hubs = ['san francisco', 'sf', 'bay area', 'silicon valley', 'seattle', 'new york', 'nyc'] | |
| if any(hub in location_lower for hub in major_hubs): | |
| return 10.0 | |
| # Secondary tech hubs | |
| secondary_hubs = ['austin', 'boston', 'denver', 'atlanta', 'chicago', 'los angeles', 'la'] | |
| if any(hub in location_lower for hub in secondary_hubs): | |
| return 8.0 | |
| # Remote work indicators | |
| remote_indicators = ['remote', 'work from home', 'wfh', 'virtual'] | |
| if any(indicator in location_lower for indicator in remote_indicators): | |
| return 7.0 | |
| return 5.0 # Default for other locations | |
| def _calculate_tenure_score(self, candidate: Dict) -> float: | |
| """Calculate tenure score based on experience indicators""" | |
| headline = candidate.get('headline', '').lower() | |
| experience = candidate.get('experience_summary', '').lower() | |
| # Look for tenure indicators | |
| tenure_indicators = ['years', 'yr', 'experience', 'since', 'established'] | |
| has_tenure_info = any(indicator in experience for indicator in tenure_indicators) | |
| # Senior positions suggest longer tenure | |
| senior_indicators = ['senior', 'lead', 'principal', 'staff', 'director'] | |
| is_senior = any(indicator in headline for indicator in senior_indicators) | |
| if is_senior and has_tenure_info: | |
| return 9.0 | |
| elif is_senior: | |
| return 8.0 | |
| elif has_tenure_info: | |
| return 7.0 | |
| else: | |
| return 5.0 # Default score | |
| def _get_default_score_breakdown(self) -> Dict: | |
| """Get default score breakdown for error cases""" | |
| return { | |
| 'education_score': 5.0, | |
| 'career_trajectory_score': 5.0, | |
| 'company_relevance_score': 5.0, | |
| 'experience_match_score': 5.0, | |
| 'location_score': 5.0, | |
| 'tenure_score': 5.0, | |
| 'total_score': 5.0 | |
| } |