"""Tech/CS Domain Plugin Scores technical competency based on: - GitHub activity (commits, repos, stars, descriptions) - LeetCode profile (problems solved, ranking) - Portfolio links (project depth analysis) """ import re import time import logging import requests from typing import Dict, List from .base_plugin import BaseDomainPlugin, DomainScore from .plugin_factory import register_plugin logger = logging.getLogger(__name__) @register_plugin('tech') class TechPlugin(BaseDomainPlugin): """Technical domain scoring plugin""" def _get_domain_type(self) -> str: return 'tech' def _get_feature_weights(self) -> Dict[str, float]: return { 'github_activity_score': 0.30, 'github_repo_quality': 0.20, 'leetcode_score': 0.25, 'portfolio_depth': 0.15, 'recent_activity': 0.10 } def get_required_fields(self) -> List[str]: return ['github_url'] # At least GitHub is required def get_optional_fields(self) -> List[str]: return ['leetcode_handle', 'portfolio_url', 'linkedin_url'] def score(self, evidence_data: Dict) -> DomainScore: """Calculate tech domain score""" start_time = time.time() features = {} # GitHub analysis github_url = evidence_data.get('github_url', '') if github_url: features['github_activity_score'] = self._analyze_github_activity(github_url) features['github_repo_quality'] = self._analyze_repo_quality(github_url) features['recent_activity'] = self._check_recent_commits(github_url) else: features['github_activity_score'] = 0.0 features['github_repo_quality'] = 0.0 features['recent_activity'] = 0.0 # LeetCode analysis leetcode_handle = evidence_data.get('leetcode_handle', '') if leetcode_handle: features['leetcode_score'] = self._analyze_leetcode(leetcode_handle) else: features['leetcode_score'] = 0.0 # Portfolio analysis portfolio_url = evidence_data.get('portfolio_url', '') if portfolio_url: features['portfolio_depth'] = self._analyze_portfolio(portfolio_url) else: features['portfolio_depth'] = 0.0 # Calculate weighted score score = sum(features[k] * self.feature_weights[k] for k in features.keys()) # Calculate confidence confidence = self.calculate_confidence(evidence_data) processing_time = (time.time() - start_time) * 1000 return DomainScore( domain_type='tech', score=min(score, 1.0), confidence=confidence, raw_features=features, processing_time_ms=processing_time ) def _analyze_github_activity(self, github_url: str) -> float: """ Analyze GitHub profile activity Returns: 0-1 score based on public repos, commits, contributions """ try: username = self._extract_github_username(github_url) if not username: return 0.0 # GitHub API endpoint api_url = f"https://api.github.com/users/{username}" headers = {'Accept': 'application/vnd.github.v3+json'} response = requests.get(api_url, headers=headers, timeout=5) if response.status_code != 200: logger.warning(f"GitHub API error for {username}: {response.status_code}") return 0.3 # Fallback score if API fails data = response.json() # Extract metrics public_repos = data.get('public_repos', 0) followers = data.get('followers', 0) following = data.get('following', 0) # Simple scoring heuristic repo_score = min(public_repos / 20, 1.0) * 0.5 # 20+ repos = max follower_score = min(followers / 50, 1.0) * 0.3 # 50+ followers = max engagement_score = min((followers + following) / 100, 1.0) * 0.2 total_score = repo_score + follower_score + engagement_score logger.info(f"GitHub activity for {username}: {total_score:.2f}") return total_score except Exception as e: logger.error(f"Error analyzing GitHub activity: {e}") return 0.3 # Fallback score def _analyze_repo_quality(self, github_url: str) -> float: """ Analyze quality of top repositories Returns: 0-1 score based on stars, forks, descriptions """ try: username = self._extract_github_username(github_url) if not username: return 0.0 # Get repos sorted by stars api_url = f"https://api.github.com/users/{username}/repos?sort=stars&per_page=10" headers = {'Accept': 'application/vnd.github.v3+json'} response = requests.get(api_url, headers=headers, timeout=5) if response.status_code != 200: return 0.3 repos = response.json() if not repos: return 0.0 # Analyze top repos total_stars = sum(r.get('stargazers_count', 0) for r in repos[:5]) total_forks = sum(r.get('forks_count', 0) for r in repos[:5]) has_descriptions = sum(1 for r in repos[:5] if r.get('description')) has_readmes = sum(1 for r in repos[:5] if r.get('has_wiki') or r.get('has_pages')) star_score = min(total_stars / 50, 1.0) * 0.4 # 50+ stars = max fork_score = min(total_forks / 20, 1.0) * 0.2 # 20+ forks = max desc_score = (has_descriptions / 5) * 0.2 readme_score = (has_readmes / 5) * 0.2 total_score = star_score + fork_score + desc_score + readme_score logger.info(f"GitHub repo quality for {username}: {total_score:.2f}") return total_score except Exception as e: logger.error(f"Error analyzing repo quality: {e}") return 0.3 def _check_recent_commits(self, github_url: str) -> float: """ Check for recent activity (commits in last 90 days) Returns: 0-1 score based on recency """ try: username = self._extract_github_username(github_url) if not username: return 0.0 # Get recent events api_url = f"https://api.github.com/users/{username}/events/public?per_page=30" headers = {'Accept': 'application/vnd.github.v3+json'} response = requests.get(api_url, headers=headers, timeout=5) if response.status_code != 200: return 0.5 # Neutral fallback events = response.json() # Count push events (commits) in last 90 days from datetime import datetime, timedelta ninety_days_ago = datetime.now() - timedelta(days=90) recent_commits = 0 for event in events: if event.get('type') == 'PushEvent': created_at = datetime.strptime(event['created_at'], '%Y-%m-%dT%H:%M:%SZ') if created_at > ninety_days_ago: recent_commits += 1 # Score based on commit frequency score = min(recent_commits / 20, 1.0) # 20+ commits in 90 days = max logger.info(f"Recent activity for {username}: {score:.2f} ({recent_commits} commits)") return score except Exception as e: logger.error(f"Error checking recent activity: {e}") return 0.5 def _analyze_leetcode(self, leetcode_handle: str) -> float: """ Analyze LeetCode profile Returns: 0-1 score based on problems solved and ranking Note: LeetCode doesn't have an official public API, so this uses heuristic fallback In production, consider using unofficial APIs or web scraping with proper rate limiting """ try: # Placeholder for LeetCode analysis # In real implementation, would scrape profile or use unofficial API # For now, return medium score if handle is provided logger.info(f"LeetCode handle provided: {leetcode_handle}") return 0.5 # Neutral score when handle exists except Exception as e: logger.error(f"Error analyzing LeetCode: {e}") return 0.0 def _analyze_portfolio(self, portfolio_url: str) -> float: """ Analyze portfolio website Returns: 0-1 score based on presence and basic checks """ try: # Basic URL validation if not portfolio_url.startswith(('http://', 'https://')): portfolio_url = 'https://' + portfolio_url # Check if URL is accessible response = requests.head(portfolio_url, timeout=5, allow_redirects=True) if response.status_code == 200: # Portfolio exists and is accessible score = 0.7 # Bonus for custom domain (not github.io, netlify.app, etc.) if not any(host in portfolio_url for host in ['github.io', 'netlify.app', 'vercel.app', 'repl.it']): score += 0.3 logger.info(f"Portfolio accessible: {portfolio_url} (score: {score})") return min(score, 1.0) else: logger.warning(f"Portfolio not accessible: {portfolio_url}") return 0.2 # Some credit for providing URL except Exception as e: logger.error(f"Error analyzing portfolio: {e}") return 0.2 def _extract_github_username(self, github_url: str) -> str: """Extract username from GitHub URL""" # Handle formats: https://github.com/username or github.com/username pattern = r'github\.com/([a-zA-Z0-9_-]+)' match = re.search(pattern, github_url) return match.group(1) if match else ''