Spaces:

parthnuwal7
/

FCT

Sleeping

File size: 10,623 Bytes

3d015cd

"""Tech/CS Domain Plugin

Scores technical competency based on:
- GitHub activity (commits, repos, stars, descriptions)
- LeetCode profile (problems solved, ranking)
- Portfolio links (project depth analysis)
"""
import re
import time
import logging
import requests
from typing import Dict, List
from .base_plugin import BaseDomainPlugin, DomainScore
from .plugin_factory import register_plugin

logger = logging.getLogger(__name__)


@register_plugin('tech')
class TechPlugin(BaseDomainPlugin):
    """Technical domain scoring plugin"""
    
    def _get_domain_type(self) -> str:
        return 'tech'
    
    def _get_feature_weights(self) -> Dict[str, float]:
        return {
            'github_activity_score': 0.30,
            'github_repo_quality': 0.20,
            'leetcode_score': 0.25,
            'portfolio_depth': 0.15,
            'recent_activity': 0.10
        }
    
    def get_required_fields(self) -> List[str]:
        return ['github_url']  # At least GitHub is required
    
    def get_optional_fields(self) -> List[str]:
        return ['leetcode_handle', 'portfolio_url', 'linkedin_url']
    
    def score(self, evidence_data: Dict) -> DomainScore:
        """Calculate tech domain score"""
        start_time = time.time()
        features = {}
        
        # GitHub analysis
        github_url = evidence_data.get('github_url', '')
        if github_url:
            features['github_activity_score'] = self._analyze_github_activity(github_url)
            features['github_repo_quality'] = self._analyze_repo_quality(github_url)
            features['recent_activity'] = self._check_recent_commits(github_url)
        else:
            features['github_activity_score'] = 0.0
            features['github_repo_quality'] = 0.0
            features['recent_activity'] = 0.0
        
        # LeetCode analysis
        leetcode_handle = evidence_data.get('leetcode_handle', '')
        if leetcode_handle:
            features['leetcode_score'] = self._analyze_leetcode(leetcode_handle)
        else:
            features['leetcode_score'] = 0.0
        
        # Portfolio analysis
        portfolio_url = evidence_data.get('portfolio_url', '')
        if portfolio_url:
            features['portfolio_depth'] = self._analyze_portfolio(portfolio_url)
        else:
            features['portfolio_depth'] = 0.0
        
        # Calculate weighted score
        score = sum(features[k] * self.feature_weights[k] for k in features.keys())
        
        # Calculate confidence
        confidence = self.calculate_confidence(evidence_data)
        
        processing_time = (time.time() - start_time) * 1000
        
        return DomainScore(
            domain_type='tech',
            score=min(score, 1.0),
            confidence=confidence,
            raw_features=features,
            processing_time_ms=processing_time
        )
    
    def _analyze_github_activity(self, github_url: str) -> float:
        """
        Analyze GitHub profile activity
        Returns: 0-1 score based on public repos, commits, contributions
        """
        try:
            username = self._extract_github_username(github_url)
            if not username:
                return 0.0
            
            # GitHub API endpoint
            api_url = f"https://api.github.com/users/{username}"
            headers = {'Accept': 'application/vnd.github.v3+json'}
            
            response = requests.get(api_url, headers=headers, timeout=5)
            
            if response.status_code != 200:
                logger.warning(f"GitHub API error for {username}: {response.status_code}")
                return 0.3  # Fallback score if API fails
            
            data = response.json()
            
            # Extract metrics
            public_repos = data.get('public_repos', 0)
            followers = data.get('followers', 0)
            following = data.get('following', 0)
            
            # Simple scoring heuristic
            repo_score = min(public_repos / 20, 1.0) * 0.5  # 20+ repos = max
            follower_score = min(followers / 50, 1.0) * 0.3  # 50+ followers = max
            engagement_score = min((followers + following) / 100, 1.0) * 0.2
            
            total_score = repo_score + follower_score + engagement_score
            
            logger.info(f"GitHub activity for {username}: {total_score:.2f}")
            return total_score
            
        except Exception as e:
            logger.error(f"Error analyzing GitHub activity: {e}")
            return 0.3  # Fallback score
    
    def _analyze_repo_quality(self, github_url: str) -> float:
        """
        Analyze quality of top repositories
        Returns: 0-1 score based on stars, forks, descriptions
        """
        try:
            username = self._extract_github_username(github_url)
            if not username:
                return 0.0
            
            # Get repos sorted by stars
            api_url = f"https://api.github.com/users/{username}/repos?sort=stars&per_page=10"
            headers = {'Accept': 'application/vnd.github.v3+json'}
            
            response = requests.get(api_url, headers=headers, timeout=5)
            
            if response.status_code != 200:
                return 0.3
            
            repos = response.json()
            
            if not repos:
                return 0.0
            
            # Analyze top repos
            total_stars = sum(r.get('stargazers_count', 0) for r in repos[:5])
            total_forks = sum(r.get('forks_count', 0) for r in repos[:5])
            has_descriptions = sum(1 for r in repos[:5] if r.get('description'))
            has_readmes = sum(1 for r in repos[:5] if r.get('has_wiki') or r.get('has_pages'))
            
            star_score = min(total_stars / 50, 1.0) * 0.4  # 50+ stars = max
            fork_score = min(total_forks / 20, 1.0) * 0.2  # 20+ forks = max
            desc_score = (has_descriptions / 5) * 0.2
            readme_score = (has_readmes / 5) * 0.2
            
            total_score = star_score + fork_score + desc_score + readme_score
            
            logger.info(f"GitHub repo quality for {username}: {total_score:.2f}")
            return total_score
            
        except Exception as e:
            logger.error(f"Error analyzing repo quality: {e}")
            return 0.3
    
    def _check_recent_commits(self, github_url: str) -> float:
        """
        Check for recent activity (commits in last 90 days)
        Returns: 0-1 score based on recency
        """
        try:
            username = self._extract_github_username(github_url)
            if not username:
                return 0.0
            
            # Get recent events
            api_url = f"https://api.github.com/users/{username}/events/public?per_page=30"
            headers = {'Accept': 'application/vnd.github.v3+json'}
            
            response = requests.get(api_url, headers=headers, timeout=5)
            
            if response.status_code != 200:
                return 0.5  # Neutral fallback
            
            events = response.json()
            
            # Count push events (commits) in last 90 days
            from datetime import datetime, timedelta
            ninety_days_ago = datetime.now() - timedelta(days=90)
            
            recent_commits = 0
            for event in events:
                if event.get('type') == 'PushEvent':
                    created_at = datetime.strptime(event['created_at'], '%Y-%m-%dT%H:%M:%SZ')
                    if created_at > ninety_days_ago:
                        recent_commits += 1
            
            # Score based on commit frequency
            score = min(recent_commits / 20, 1.0)  # 20+ commits in 90 days = max
            
            logger.info(f"Recent activity for {username}: {score:.2f} ({recent_commits} commits)")
            return score
            
        except Exception as e:
            logger.error(f"Error checking recent activity: {e}")
            return 0.5
    
    def _analyze_leetcode(self, leetcode_handle: str) -> float:
        """
        Analyze LeetCode profile
        Returns: 0-1 score based on problems solved and ranking
        
        Note: LeetCode doesn't have an official public API, so this uses heuristic fallback
        In production, consider using unofficial APIs or web scraping with proper rate limiting
        """
        try:
            # Placeholder for LeetCode analysis
            # In real implementation, would scrape profile or use unofficial API
            
            # For now, return medium score if handle is provided
            logger.info(f"LeetCode handle provided: {leetcode_handle}")
            return 0.5  # Neutral score when handle exists
            
        except Exception as e:
            logger.error(f"Error analyzing LeetCode: {e}")
            return 0.0
    
    def _analyze_portfolio(self, portfolio_url: str) -> float:
        """
        Analyze portfolio website
        Returns: 0-1 score based on presence and basic checks
        """
        try:
            # Basic URL validation
            if not portfolio_url.startswith(('http://', 'https://')):
                portfolio_url = 'https://' + portfolio_url
            
            # Check if URL is accessible
            response = requests.head(portfolio_url, timeout=5, allow_redirects=True)
            
            if response.status_code == 200:
                # Portfolio exists and is accessible
                score = 0.7
                
                # Bonus for custom domain (not github.io, netlify.app, etc.)
                if not any(host in portfolio_url for host in ['github.io', 'netlify.app', 'vercel.app', 'repl.it']):
                    score += 0.3
                
                logger.info(f"Portfolio accessible: {portfolio_url} (score: {score})")
                return min(score, 1.0)
            else:
                logger.warning(f"Portfolio not accessible: {portfolio_url}")
                return 0.2  # Some credit for providing URL
                
        except Exception as e:
            logger.error(f"Error analyzing portfolio: {e}")
            return 0.2
    
    def _extract_github_username(self, github_url: str) -> str:
        """Extract username from GitHub URL"""
        # Handle formats: https://github.com/username or github.com/username
        pattern = r'github\.com/([a-zA-Z0-9_-]+)'
        match = re.search(pattern, github_url)
        return match.group(1) if match else ''