FCT / services /domain_plugins /tech_plugin.py
Parthnuwal7
Adding analytical content
3d015cd
"""Tech/CS Domain Plugin
Scores technical competency based on:
- GitHub activity (commits, repos, stars, descriptions)
- LeetCode profile (problems solved, ranking)
- Portfolio links (project depth analysis)
"""
import re
import time
import logging
import requests
from typing import Dict, List
from .base_plugin import BaseDomainPlugin, DomainScore
from .plugin_factory import register_plugin
logger = logging.getLogger(__name__)
@register_plugin('tech')
class TechPlugin(BaseDomainPlugin):
"""Technical domain scoring plugin"""
def _get_domain_type(self) -> str:
return 'tech'
def _get_feature_weights(self) -> Dict[str, float]:
return {
'github_activity_score': 0.30,
'github_repo_quality': 0.20,
'leetcode_score': 0.25,
'portfolio_depth': 0.15,
'recent_activity': 0.10
}
def get_required_fields(self) -> List[str]:
return ['github_url'] # At least GitHub is required
def get_optional_fields(self) -> List[str]:
return ['leetcode_handle', 'portfolio_url', 'linkedin_url']
def score(self, evidence_data: Dict) -> DomainScore:
"""Calculate tech domain score"""
start_time = time.time()
features = {}
# GitHub analysis
github_url = evidence_data.get('github_url', '')
if github_url:
features['github_activity_score'] = self._analyze_github_activity(github_url)
features['github_repo_quality'] = self._analyze_repo_quality(github_url)
features['recent_activity'] = self._check_recent_commits(github_url)
else:
features['github_activity_score'] = 0.0
features['github_repo_quality'] = 0.0
features['recent_activity'] = 0.0
# LeetCode analysis
leetcode_handle = evidence_data.get('leetcode_handle', '')
if leetcode_handle:
features['leetcode_score'] = self._analyze_leetcode(leetcode_handle)
else:
features['leetcode_score'] = 0.0
# Portfolio analysis
portfolio_url = evidence_data.get('portfolio_url', '')
if portfolio_url:
features['portfolio_depth'] = self._analyze_portfolio(portfolio_url)
else:
features['portfolio_depth'] = 0.0
# Calculate weighted score
score = sum(features[k] * self.feature_weights[k] for k in features.keys())
# Calculate confidence
confidence = self.calculate_confidence(evidence_data)
processing_time = (time.time() - start_time) * 1000
return DomainScore(
domain_type='tech',
score=min(score, 1.0),
confidence=confidence,
raw_features=features,
processing_time_ms=processing_time
)
def _analyze_github_activity(self, github_url: str) -> float:
"""
Analyze GitHub profile activity
Returns: 0-1 score based on public repos, commits, contributions
"""
try:
username = self._extract_github_username(github_url)
if not username:
return 0.0
# GitHub API endpoint
api_url = f"https://api.github.com/users/{username}"
headers = {'Accept': 'application/vnd.github.v3+json'}
response = requests.get(api_url, headers=headers, timeout=5)
if response.status_code != 200:
logger.warning(f"GitHub API error for {username}: {response.status_code}")
return 0.3 # Fallback score if API fails
data = response.json()
# Extract metrics
public_repos = data.get('public_repos', 0)
followers = data.get('followers', 0)
following = data.get('following', 0)
# Simple scoring heuristic
repo_score = min(public_repos / 20, 1.0) * 0.5 # 20+ repos = max
follower_score = min(followers / 50, 1.0) * 0.3 # 50+ followers = max
engagement_score = min((followers + following) / 100, 1.0) * 0.2
total_score = repo_score + follower_score + engagement_score
logger.info(f"GitHub activity for {username}: {total_score:.2f}")
return total_score
except Exception as e:
logger.error(f"Error analyzing GitHub activity: {e}")
return 0.3 # Fallback score
def _analyze_repo_quality(self, github_url: str) -> float:
"""
Analyze quality of top repositories
Returns: 0-1 score based on stars, forks, descriptions
"""
try:
username = self._extract_github_username(github_url)
if not username:
return 0.0
# Get repos sorted by stars
api_url = f"https://api.github.com/users/{username}/repos?sort=stars&per_page=10"
headers = {'Accept': 'application/vnd.github.v3+json'}
response = requests.get(api_url, headers=headers, timeout=5)
if response.status_code != 200:
return 0.3
repos = response.json()
if not repos:
return 0.0
# Analyze top repos
total_stars = sum(r.get('stargazers_count', 0) for r in repos[:5])
total_forks = sum(r.get('forks_count', 0) for r in repos[:5])
has_descriptions = sum(1 for r in repos[:5] if r.get('description'))
has_readmes = sum(1 for r in repos[:5] if r.get('has_wiki') or r.get('has_pages'))
star_score = min(total_stars / 50, 1.0) * 0.4 # 50+ stars = max
fork_score = min(total_forks / 20, 1.0) * 0.2 # 20+ forks = max
desc_score = (has_descriptions / 5) * 0.2
readme_score = (has_readmes / 5) * 0.2
total_score = star_score + fork_score + desc_score + readme_score
logger.info(f"GitHub repo quality for {username}: {total_score:.2f}")
return total_score
except Exception as e:
logger.error(f"Error analyzing repo quality: {e}")
return 0.3
def _check_recent_commits(self, github_url: str) -> float:
"""
Check for recent activity (commits in last 90 days)
Returns: 0-1 score based on recency
"""
try:
username = self._extract_github_username(github_url)
if not username:
return 0.0
# Get recent events
api_url = f"https://api.github.com/users/{username}/events/public?per_page=30"
headers = {'Accept': 'application/vnd.github.v3+json'}
response = requests.get(api_url, headers=headers, timeout=5)
if response.status_code != 200:
return 0.5 # Neutral fallback
events = response.json()
# Count push events (commits) in last 90 days
from datetime import datetime, timedelta
ninety_days_ago = datetime.now() - timedelta(days=90)
recent_commits = 0
for event in events:
if event.get('type') == 'PushEvent':
created_at = datetime.strptime(event['created_at'], '%Y-%m-%dT%H:%M:%SZ')
if created_at > ninety_days_ago:
recent_commits += 1
# Score based on commit frequency
score = min(recent_commits / 20, 1.0) # 20+ commits in 90 days = max
logger.info(f"Recent activity for {username}: {score:.2f} ({recent_commits} commits)")
return score
except Exception as e:
logger.error(f"Error checking recent activity: {e}")
return 0.5
def _analyze_leetcode(self, leetcode_handle: str) -> float:
"""
Analyze LeetCode profile
Returns: 0-1 score based on problems solved and ranking
Note: LeetCode doesn't have an official public API, so this uses heuristic fallback
In production, consider using unofficial APIs or web scraping with proper rate limiting
"""
try:
# Placeholder for LeetCode analysis
# In real implementation, would scrape profile or use unofficial API
# For now, return medium score if handle is provided
logger.info(f"LeetCode handle provided: {leetcode_handle}")
return 0.5 # Neutral score when handle exists
except Exception as e:
logger.error(f"Error analyzing LeetCode: {e}")
return 0.0
def _analyze_portfolio(self, portfolio_url: str) -> float:
"""
Analyze portfolio website
Returns: 0-1 score based on presence and basic checks
"""
try:
# Basic URL validation
if not portfolio_url.startswith(('http://', 'https://')):
portfolio_url = 'https://' + portfolio_url
# Check if URL is accessible
response = requests.head(portfolio_url, timeout=5, allow_redirects=True)
if response.status_code == 200:
# Portfolio exists and is accessible
score = 0.7
# Bonus for custom domain (not github.io, netlify.app, etc.)
if not any(host in portfolio_url for host in ['github.io', 'netlify.app', 'vercel.app', 'repl.it']):
score += 0.3
logger.info(f"Portfolio accessible: {portfolio_url} (score: {score})")
return min(score, 1.0)
else:
logger.warning(f"Portfolio not accessible: {portfolio_url}")
return 0.2 # Some credit for providing URL
except Exception as e:
logger.error(f"Error analyzing portfolio: {e}")
return 0.2
def _extract_github_username(self, github_url: str) -> str:
"""Extract username from GitHub URL"""
# Handle formats: https://github.com/username or github.com/username
pattern = r'github\.com/([a-zA-Z0-9_-]+)'
match = re.search(pattern, github_url)
return match.group(1) if match else ''