"""Gap Analysis + Recommendation services. Gap analysis — Weighted Competency Fulfillment Algorithm. See research/design-decision-changes.md §Module 4 for derivation. Pure functions, no DB writes: the report is computed on demand from UserSkill + RoleSkill state. Recommendations — composite ranking over the Resource catalog. See research/08-next-modules-build-plan.md §Module A. Type weights from research/03-gap-analysis-engine-research.md §2.1 are the single source of truth for type preference. """ from __future__ import annotations from dataclasses import asdict, dataclass, field from typing import Iterable from apps.resources.models import Resource, SkillResource from apps.roles.models import Role, RoleSkill from apps.skills.models import UserSkill from apps.skills.utils import proficiency_to_level LEVEL_THRESHOLDS: dict[str, int] = { 'BEGINNER': 40, 'INTERMEDIATE': 60, 'ADVANCED': 100, } MANDATORY_CAP = 60.0 BANDS: list[tuple[float, str]] = [ (85.0, 'STRONG'), (65.0, 'GOOD'), (45.0, 'PARTIAL'), (0.0, 'WEAK'), ] @dataclass class SkillGap: skill_id: int skill_name: str category: str required_level: str threshold: int user_proficiency: int weight: float is_mandatory: bool gap: int satisfaction: float gap_type: str severity: str | None def to_dict(self) -> dict: return asdict(self) @dataclass class GapReport: role_id: int role_name: str readiness: float band: str mandatory_cap_applied: bool no_requirements: bool gaps: list[SkillGap] = field(default_factory=list) def to_dict(self) -> dict: return { 'role_id': self.role_id, 'role_name': self.role_name, 'readiness': round(self.readiness, 2), 'band': self.band, 'mandatory_cap_applied': self.mandatory_cap_applied, 'mandatory_cap': MANDATORY_CAP, 'no_requirements': self.no_requirements, 'gaps': [g.to_dict() for g in self.gaps], } def _band_for(readiness: float) -> str: for floor, name in BANDS: if readiness >= floor: return name return 'WEAK' def _classify( user_proficiency: int, threshold: int, is_mandatory: bool, ) -> tuple[str, str | None]: """Return (gap_type, severity) per §3 of design-decision-changes.md.""" if user_proficiency >= threshold: return 'MET', None if user_proficiency == 0: return 'MISSING', 'CRITICAL' if is_mandatory else 'MEDIUM' ratio = (threshold - user_proficiency) / threshold if is_mandatory or ratio >= 0.5: severity = 'HIGH' elif ratio >= 0.25: severity = 'MEDIUM' else: severity = 'LOW' return 'INSUFFICIENT', severity def _user_proficiency_map(user_id: int, skill_ids: Iterable[int]) -> dict[int, int]: qs = UserSkill.objects.filter(user_id=user_id, skill_id__in=list(skill_ids)) return {us.skill_id: us.proficiency for us in qs} def compute_gap_report(user, role: Role) -> GapReport: role_skills = list( RoleSkill.objects.filter(role=role).select_related('skill') ) if not role_skills: return GapReport( role_id=role.id, role_name=role.role_name, readiness=100.0, band=_band_for(100.0), mandatory_cap_applied=False, no_requirements=True, ) prof_map = _user_proficiency_map(user.id, (rs.skill_id for rs in role_skills)) gaps: list[SkillGap] = [] weighted_sat_sum = 0.0 weight_sum = 0.0 mandatory_shortfall = False for rs in role_skills: threshold = LEVEL_THRESHOLDS[rs.required_level] user_prof = prof_map.get(rs.skill_id, 0) satisfaction = min(user_prof / threshold, 1.0) gap_points = max(threshold - user_prof, 0) gap_type, severity = _classify(user_prof, threshold, rs.is_mandatory) if rs.is_mandatory and satisfaction < 1.0: mandatory_shortfall = True weight_sum += rs.weight weighted_sat_sum += satisfaction * rs.weight gaps.append(SkillGap( skill_id=rs.skill_id, skill_name=rs.skill.skill_name, category=rs.skill.category, required_level=rs.required_level, threshold=threshold, user_proficiency=user_prof, weight=rs.weight, is_mandatory=rs.is_mandatory, gap=gap_points, satisfaction=round(satisfaction, 4), gap_type=gap_type, severity=severity, )) if weight_sum <= 0: # No positive weight to aggregate. With the RoleSkill DB CheckConstraints # in force a mandatory skill always has weight>0, so weight_sum<=0 implies # no mandatory shortfall and the documented zero-weight contract returns # 100/no_requirements. The mandatory branch is defense-in-depth against a # constraint-bypassing bulk write — it avoids a 0.0/0.0 ZeroDivisionError. readiness = 0.0 if mandatory_shortfall else 100.0 return GapReport( role_id=role.id, role_name=role.role_name, readiness=readiness, band=_band_for(readiness), # readiness here is 0 (mandatory unmet) or 100 (no requirements); # neither is a value "capped down to 60", so the cap is not applied. mandatory_cap_applied=False, no_requirements=not mandatory_shortfall, gaps=gaps, ) raw_readiness = (weighted_sat_sum / weight_sum) * 100.0 cap_applied = mandatory_shortfall and raw_readiness > MANDATORY_CAP readiness = MANDATORY_CAP if cap_applied else raw_readiness readiness = max(0.0, min(readiness, 100.0)) gaps.sort( key=lambda g: ( -{'CRITICAL': 4, 'HIGH': 3, 'MEDIUM': 2, 'LOW': 1, None: 0}[g.severity], -g.weight, g.skill_name, ) ) return GapReport( role_id=role.id, role_name=role.role_name, readiness=readiness, band=_band_for(readiness), mandatory_cap_applied=cap_applied, no_requirements=False, gaps=gaps, ) # --------------------------------------------------------------------------- # Recommendation service # --------------------------------------------------------------------------- # # Recommendations are ranked by a TWO-KEYED order so that material which is # clearly the wrong level can never leapfrog appropriately-levelled material on # quality (type/relevance/rating) alone: # # 1. LEVEL TIER (primary) — the distance between the user's CURRENT level for # the skill (proficiency_to_level(), NOT the role's required level) and the # resource's difficulty. distance 0-1 = "near" (tier 0); distance >=2 = # "far" (tier 1). Every near resource ranks above every far one. So a # beginner always sees BEGINNER/INTERMEDIATE content before ADVANCED — a # hard guarantee, independent of how good the far resource looks otherwise. # # 2. COMPOSITE QUALITY SCORE (secondary) — orders resources WITHIN a tier: # # score = TYPE_WEIGHT[type] * w_type # + DIFFICULTY_MATCH(user_current_level, difficulty) * w_diff # + relevance_score * 10 * w_rel # normalise to 0..10 # + (rating / 5.0) * 10 * w_rating # normalise to 0..10 # # Within the near tier the difficulty term still gives exact-level a gentle # edge over one-level-off, but it stays SOFT: a much more relevant/stronger # one-level-off resource can still outrank a weak exact-level one. The only # HARD rule is the near-over-far split above. # # Coefficients (single source of truth — keep in sync with the docstring above): # # w_type = 1.0 (dominant — follows the type preference from research # §2.1 "Video > Course > Article > Docs") # w_diff = 1.0 # w_rel = 1.5 (relevance is heavily weighted — a very-relevant DOCS # can outrank a somewhat-relevant VIDEO at the same level) # w_rating = 0.5 # # Notable consequences, verified by unit tests: # * Equal rating/relevance/difficulty → VIDEO > COURSE > ARTICLE > DOCS. # * A resource <=1 level from the user ALWAYS ranks above one >=2 levels away, # regardless of type/relevance/rating (the level-tier guarantee). # * Within a tier, exact-level beats one-level-off on equal quality, yet a # relevance_score=1.0 DOCS can outrank a relevance_score=0.3 VIDEO at the # same level (relevance is weighted 1.5x), both at rating 3.5/5: # DOCS : 4*1.0 + 10*1.0 + (1.0*10)*1.5 + (3.5/5*10)*0.5 = 32.5 # VIDEO: 10*1.0 + 10*1.0 + (0.3*10)*1.5 + (3.5/5*10)*0.5 = 28.0 # * Stable tie-break by resource_id ASC so API snapshots don't flap. TYPE_WEIGHTS: dict[str, int] = { 'VIDEO': 10, 'COURSE': 8, 'ARTICLE': 6, 'DOCS': 4, } DIFFICULTY_ORDER: dict[str, int] = { 'BEGINNER': 0, 'INTERMEDIATE': 1, 'ADVANCED': 2, } W_TYPE = 1.0 W_DIFF = 1.0 W_REL = 1.5 W_RATING = 0.5 DIFFICULTY_EXACT = 10.0 DIFFICULTY_ADJACENT = 5.0 DIFFICULTY_MISMATCH = 0.0 # A resource within this many levels of the user's current level is "near" # (tier 0); anything further is "far" (tier 1) and is ranked below every near # resource. With the 3-level scale this means: appropriate-level + one step are # near, a two-step mismatch (e.g. ADVANCED for a BEGINNER) is far. NEAR_MAX_DISTANCE = 1 DEFAULT_LIMIT_PER_SKILL = 3 # Upper bound on resources returned per skill gap. Caps an adversarial # ?limit=99999999 so a single request can't try to materialise the whole # catalog per skill; the curated catalog is small, so 50 is generous. MAX_LIMIT_PER_SKILL = 50 @dataclass class RecommendationItem: resource_id: int title: str provider: str url: str type: str difficulty_level: str duration: int rating: float relevance_score: float score: float def to_dict(self) -> dict: return asdict(self) def _difficulty_match_score(anchor_level: str, resource_level: str) -> float: anchor = DIFFICULTY_ORDER[anchor_level] res = DIFFICULTY_ORDER[resource_level] dist = abs(anchor - res) if dist == 0: return DIFFICULTY_EXACT if dist == 1: return DIFFICULTY_ADJACENT return DIFFICULTY_MISMATCH def _difficulty_tier(anchor_level: str, resource_level: str) -> int: """Primary ranking key: 0 if the resource is within NEAR_MAX_DISTANCE levels of the user's current level ("near"), else 1 ("far"). Sorting on this BEFORE the composite score guarantees a clearly-mismatched resource (>=2 levels off, e.g. ADVANCED for a beginner) can never outrank an appropriately-levelled one on type/relevance/rating alone. """ dist = abs(DIFFICULTY_ORDER[anchor_level] - DIFFICULTY_ORDER[resource_level]) return 0 if dist <= NEAR_MAX_DISTANCE else 1 def _score( type_: str, anchor_level: str, resource_level: str, relevance_score: float, rating: float, ) -> float: return ( TYPE_WEIGHTS.get(type_, 0) * W_TYPE + _difficulty_match_score(anchor_level, resource_level) * W_DIFF + (relevance_score * 10.0) * W_REL + (rating / 5.0 * 10.0) * W_RATING ) def compute_recommendations( user, role: Role, limit_per_skill: int = DEFAULT_LIMIT_PER_SKILL, ) -> dict: """Return {"role_id", "role_name", "recommendations": {skill_id: [...]}}. Only returns entries for skills with an actual gap (MISSING or INSUFFICIENT); MET skills are skipped. Stable tie-break by resource_id ASC. """ report = compute_gap_report(user, role) gap_skills = [g for g in report.gaps if g.gap_type != 'MET'] recommendations: dict[int, list[dict]] = {} if not gap_skills: return { 'role_id': role.id, 'role_name': role.role_name, 'recommendations': recommendations, } skill_ids = [g.skill_id for g in gap_skills] links = ( SkillResource.objects .filter(skill_id__in=skill_ids) .select_related('resource') ) by_skill: dict[int, list[tuple[SkillResource, Resource]]] = { sid: [] for sid in skill_ids } for link in links: by_skill[link.skill_id].append((link, link.resource)) # Anchor difficulty matching to the user's CURRENT level for each gap skill # (derived from proficiency), not the role's required level — so learners are # steered to material at their level first. See the scoring block above. anchor_by_skill = { g.skill_id: proficiency_to_level(g.user_proficiency) for g in gap_skills } for skill_id, pairs in by_skill.items(): anchor_level = anchor_by_skill[skill_id] scored: list[tuple[int, float, int, RecommendationItem]] = [] for link, resource in pairs: score = _score( resource.type, anchor_level, resource.difficulty_level, link.relevance_score, resource.rating, ) tier = _difficulty_tier(anchor_level, resource.difficulty_level) scored.append(( tier, score, resource.id, RecommendationItem( resource_id=resource.id, title=resource.title, provider=resource.provider, url=resource.url, type=resource.type, difficulty_level=resource.difficulty_level, duration=resource.duration, rating=resource.rating, relevance_score=link.relevance_score, score=round(score, 4), ), )) # Primary: level tier (near before far). Secondary: composite score desc. # Tertiary: resource_id asc for stable, snapshot-friendly ordering. scored.sort(key=lambda t: (t[0], -t[1], t[2])) top = [item.to_dict() for _, _, _, item in scored[:limit_per_skill]] recommendations[skill_id] = top return { 'role_id': role.id, 'role_name': role.role_name, 'recommendations': recommendations, } # --------------------------------------------------------------------------- # Role suggestion service # --------------------------------------------------------------------------- # # Ranks every active role by how well the user's current skills fit it, so the # UI can surface "Recommended for you" (top fit) above the rest of the catalog. # The fit reflects an uploaded resume automatically: CV → UserSkill → gap # analysis is already wired, so suggestions update once parsed skills are saved. # # Reuses compute_gap_report per role (no duplicated readiness math). For the # small curated catalog (~10 roles, ceiling ~50) the ~2 queries/role cost is # negligible; batch into a single RoleSkill + UserSkill pass if it ever grows. DEFAULT_TOP_MISSING = 3 def suggest_roles(user, top_missing: int = DEFAULT_TOP_MISSING) -> dict: """Return {"has_skills", "roles": [...]} ranked by skill fit. Each role row: role_id, role_name, industry, readiness, band, matched_skills/total_skills, top_missing_skills (most-severe first), no_requirements. Sorted by (no_requirements last, readiness desc, role_name asc) — roles with no requirements report readiness=100 spuriously, so they sink to the bottom rather than top the list. """ has_skills = UserSkill.objects.filter(user_id=user.id).exists() rows: list[dict] = [] for role in Role.objects.filter(is_active=True): report = compute_gap_report(user, role) total = len(report.gaps) matched = sum(1 for g in report.gaps if g.gap_type == 'MET') # report.gaps is severity-sorted, so the first non-MET names are the # most critical missing skills. missing = [g.skill_name for g in report.gaps if g.gap_type != 'MET'] rows.append({ 'role_id': role.id, 'role_name': role.role_name, 'industry': role.industry, 'readiness': round(report.readiness, 2), 'band': report.band, 'matched_skills': matched, 'total_skills': total, 'top_missing_skills': missing[:top_missing], 'no_requirements': report.no_requirements, }) rows.sort(key=lambda r: (r['no_requirements'], -r['readiness'], r['role_name'])) return {'has_skills': has_skills, 'roles': rows}