Spaces:
Sleeping
Sleeping
| """Gap Analysis + Recommendation services. | |
| Gap analysis — Weighted Competency Fulfillment Algorithm. | |
| See research/design-decision-changes.md §Module 4 for derivation. Pure functions, | |
| no DB writes: the report is computed on demand from UserSkill + RoleSkill state. | |
| Recommendations — composite ranking over the Resource catalog. | |
| See research/08-next-modules-build-plan.md §Module A. Type weights from | |
| research/03-gap-analysis-engine-research.md §2.1 are the single source of truth | |
| for type preference. | |
| """ | |
| from __future__ import annotations | |
| from dataclasses import asdict, dataclass, field | |
| from typing import Iterable | |
| from apps.resources.models import Resource, SkillResource | |
| from apps.roles.models import Role, RoleSkill | |
| from apps.skills.models import UserSkill | |
| from apps.skills.utils import proficiency_to_level | |
| LEVEL_THRESHOLDS: dict[str, int] = { | |
| 'BEGINNER': 40, | |
| 'INTERMEDIATE': 60, | |
| 'ADVANCED': 100, | |
| } | |
| MANDATORY_CAP = 60.0 | |
| BANDS: list[tuple[float, str]] = [ | |
| (85.0, 'STRONG'), | |
| (65.0, 'GOOD'), | |
| (45.0, 'PARTIAL'), | |
| (0.0, 'WEAK'), | |
| ] | |
| class SkillGap: | |
| skill_id: int | |
| skill_name: str | |
| category: str | |
| required_level: str | |
| threshold: int | |
| user_proficiency: int | |
| weight: float | |
| is_mandatory: bool | |
| gap: int | |
| satisfaction: float | |
| gap_type: str | |
| severity: str | None | |
| def to_dict(self) -> dict: | |
| return asdict(self) | |
| class GapReport: | |
| role_id: int | |
| role_name: str | |
| readiness: float | |
| band: str | |
| mandatory_cap_applied: bool | |
| no_requirements: bool | |
| gaps: list[SkillGap] = field(default_factory=list) | |
| def to_dict(self) -> dict: | |
| return { | |
| 'role_id': self.role_id, | |
| 'role_name': self.role_name, | |
| 'readiness': round(self.readiness, 2), | |
| 'band': self.band, | |
| 'mandatory_cap_applied': self.mandatory_cap_applied, | |
| 'mandatory_cap': MANDATORY_CAP, | |
| 'no_requirements': self.no_requirements, | |
| 'gaps': [g.to_dict() for g in self.gaps], | |
| } | |
| def _band_for(readiness: float) -> str: | |
| for floor, name in BANDS: | |
| if readiness >= floor: | |
| return name | |
| return 'WEAK' | |
| def _classify( | |
| user_proficiency: int, | |
| threshold: int, | |
| is_mandatory: bool, | |
| ) -> tuple[str, str | None]: | |
| """Return (gap_type, severity) per §3 of design-decision-changes.md.""" | |
| if user_proficiency >= threshold: | |
| return 'MET', None | |
| if user_proficiency == 0: | |
| return 'MISSING', 'CRITICAL' if is_mandatory else 'MEDIUM' | |
| ratio = (threshold - user_proficiency) / threshold | |
| if is_mandatory or ratio >= 0.5: | |
| severity = 'HIGH' | |
| elif ratio >= 0.25: | |
| severity = 'MEDIUM' | |
| else: | |
| severity = 'LOW' | |
| return 'INSUFFICIENT', severity | |
| def _user_proficiency_map(user_id: int, skill_ids: Iterable[int]) -> dict[int, int]: | |
| qs = UserSkill.objects.filter(user_id=user_id, skill_id__in=list(skill_ids)) | |
| return {us.skill_id: us.proficiency for us in qs} | |
| def compute_gap_report(user, role: Role) -> GapReport: | |
| role_skills = list( | |
| RoleSkill.objects.filter(role=role).select_related('skill') | |
| ) | |
| if not role_skills: | |
| return GapReport( | |
| role_id=role.id, | |
| role_name=role.role_name, | |
| readiness=100.0, | |
| band=_band_for(100.0), | |
| mandatory_cap_applied=False, | |
| no_requirements=True, | |
| ) | |
| prof_map = _user_proficiency_map(user.id, (rs.skill_id for rs in role_skills)) | |
| gaps: list[SkillGap] = [] | |
| weighted_sat_sum = 0.0 | |
| weight_sum = 0.0 | |
| mandatory_shortfall = False | |
| for rs in role_skills: | |
| threshold = LEVEL_THRESHOLDS[rs.required_level] | |
| user_prof = prof_map.get(rs.skill_id, 0) | |
| satisfaction = min(user_prof / threshold, 1.0) | |
| gap_points = max(threshold - user_prof, 0) | |
| gap_type, severity = _classify(user_prof, threshold, rs.is_mandatory) | |
| if rs.is_mandatory and satisfaction < 1.0: | |
| mandatory_shortfall = True | |
| weight_sum += rs.weight | |
| weighted_sat_sum += satisfaction * rs.weight | |
| gaps.append(SkillGap( | |
| skill_id=rs.skill_id, | |
| skill_name=rs.skill.skill_name, | |
| category=rs.skill.category, | |
| required_level=rs.required_level, | |
| threshold=threshold, | |
| user_proficiency=user_prof, | |
| weight=rs.weight, | |
| is_mandatory=rs.is_mandatory, | |
| gap=gap_points, | |
| satisfaction=round(satisfaction, 4), | |
| gap_type=gap_type, | |
| severity=severity, | |
| )) | |
| if weight_sum <= 0: | |
| # No positive weight to aggregate. With the RoleSkill DB CheckConstraints | |
| # in force a mandatory skill always has weight>0, so weight_sum<=0 implies | |
| # no mandatory shortfall and the documented zero-weight contract returns | |
| # 100/no_requirements. The mandatory branch is defense-in-depth against a | |
| # constraint-bypassing bulk write — it avoids a 0.0/0.0 ZeroDivisionError. | |
| readiness = 0.0 if mandatory_shortfall else 100.0 | |
| return GapReport( | |
| role_id=role.id, | |
| role_name=role.role_name, | |
| readiness=readiness, | |
| band=_band_for(readiness), | |
| # readiness here is 0 (mandatory unmet) or 100 (no requirements); | |
| # neither is a value "capped down to 60", so the cap is not applied. | |
| mandatory_cap_applied=False, | |
| no_requirements=not mandatory_shortfall, | |
| gaps=gaps, | |
| ) | |
| raw_readiness = (weighted_sat_sum / weight_sum) * 100.0 | |
| cap_applied = mandatory_shortfall and raw_readiness > MANDATORY_CAP | |
| readiness = MANDATORY_CAP if cap_applied else raw_readiness | |
| readiness = max(0.0, min(readiness, 100.0)) | |
| gaps.sort( | |
| key=lambda g: ( | |
| -{'CRITICAL': 4, 'HIGH': 3, 'MEDIUM': 2, 'LOW': 1, None: 0}[g.severity], | |
| -g.weight, | |
| g.skill_name, | |
| ) | |
| ) | |
| return GapReport( | |
| role_id=role.id, | |
| role_name=role.role_name, | |
| readiness=readiness, | |
| band=_band_for(readiness), | |
| mandatory_cap_applied=cap_applied, | |
| no_requirements=False, | |
| gaps=gaps, | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Recommendation service | |
| # --------------------------------------------------------------------------- | |
| # | |
| # Recommendations are ranked by a TWO-KEYED order so that material which is | |
| # clearly the wrong level can never leapfrog appropriately-levelled material on | |
| # quality (type/relevance/rating) alone: | |
| # | |
| # 1. LEVEL TIER (primary) — the distance between the user's CURRENT level for | |
| # the skill (proficiency_to_level(), NOT the role's required level) and the | |
| # resource's difficulty. distance 0-1 = "near" (tier 0); distance >=2 = | |
| # "far" (tier 1). Every near resource ranks above every far one. So a | |
| # beginner always sees BEGINNER/INTERMEDIATE content before ADVANCED — a | |
| # hard guarantee, independent of how good the far resource looks otherwise. | |
| # | |
| # 2. COMPOSITE QUALITY SCORE (secondary) — orders resources WITHIN a tier: | |
| # | |
| # score = TYPE_WEIGHT[type] * w_type | |
| # + DIFFICULTY_MATCH(user_current_level, difficulty) * w_diff | |
| # + relevance_score * 10 * w_rel # normalise to 0..10 | |
| # + (rating / 5.0) * 10 * w_rating # normalise to 0..10 | |
| # | |
| # Within the near tier the difficulty term still gives exact-level a gentle | |
| # edge over one-level-off, but it stays SOFT: a much more relevant/stronger | |
| # one-level-off resource can still outrank a weak exact-level one. The only | |
| # HARD rule is the near-over-far split above. | |
| # | |
| # Coefficients (single source of truth — keep in sync with the docstring above): | |
| # | |
| # w_type = 1.0 (dominant — follows the type preference from research | |
| # §2.1 "Video > Course > Article > Docs") | |
| # w_diff = 1.0 | |
| # w_rel = 1.5 (relevance is heavily weighted — a very-relevant DOCS | |
| # can outrank a somewhat-relevant VIDEO at the same level) | |
| # w_rating = 0.5 | |
| # | |
| # Notable consequences, verified by unit tests: | |
| # * Equal rating/relevance/difficulty → VIDEO > COURSE > ARTICLE > DOCS. | |
| # * A resource <=1 level from the user ALWAYS ranks above one >=2 levels away, | |
| # regardless of type/relevance/rating (the level-tier guarantee). | |
| # * Within a tier, exact-level beats one-level-off on equal quality, yet a | |
| # relevance_score=1.0 DOCS can outrank a relevance_score=0.3 VIDEO at the | |
| # same level (relevance is weighted 1.5x), both at rating 3.5/5: | |
| # DOCS : 4*1.0 + 10*1.0 + (1.0*10)*1.5 + (3.5/5*10)*0.5 = 32.5 | |
| # VIDEO: 10*1.0 + 10*1.0 + (0.3*10)*1.5 + (3.5/5*10)*0.5 = 28.0 | |
| # * Stable tie-break by resource_id ASC so API snapshots don't flap. | |
| TYPE_WEIGHTS: dict[str, int] = { | |
| 'VIDEO': 10, | |
| 'COURSE': 8, | |
| 'ARTICLE': 6, | |
| 'DOCS': 4, | |
| } | |
| DIFFICULTY_ORDER: dict[str, int] = { | |
| 'BEGINNER': 0, | |
| 'INTERMEDIATE': 1, | |
| 'ADVANCED': 2, | |
| } | |
| W_TYPE = 1.0 | |
| W_DIFF = 1.0 | |
| W_REL = 1.5 | |
| W_RATING = 0.5 | |
| DIFFICULTY_EXACT = 10.0 | |
| DIFFICULTY_ADJACENT = 5.0 | |
| DIFFICULTY_MISMATCH = 0.0 | |
| # A resource within this many levels of the user's current level is "near" | |
| # (tier 0); anything further is "far" (tier 1) and is ranked below every near | |
| # resource. With the 3-level scale this means: appropriate-level + one step are | |
| # near, a two-step mismatch (e.g. ADVANCED for a BEGINNER) is far. | |
| NEAR_MAX_DISTANCE = 1 | |
| DEFAULT_LIMIT_PER_SKILL = 3 | |
| # Upper bound on resources returned per skill gap. Caps an adversarial | |
| # ?limit=99999999 so a single request can't try to materialise the whole | |
| # catalog per skill; the curated catalog is small, so 50 is generous. | |
| MAX_LIMIT_PER_SKILL = 50 | |
| class RecommendationItem: | |
| resource_id: int | |
| title: str | |
| provider: str | |
| url: str | |
| type: str | |
| difficulty_level: str | |
| duration: int | |
| rating: float | |
| relevance_score: float | |
| score: float | |
| def to_dict(self) -> dict: | |
| return asdict(self) | |
| def _difficulty_match_score(anchor_level: str, resource_level: str) -> float: | |
| anchor = DIFFICULTY_ORDER[anchor_level] | |
| res = DIFFICULTY_ORDER[resource_level] | |
| dist = abs(anchor - res) | |
| if dist == 0: | |
| return DIFFICULTY_EXACT | |
| if dist == 1: | |
| return DIFFICULTY_ADJACENT | |
| return DIFFICULTY_MISMATCH | |
| def _difficulty_tier(anchor_level: str, resource_level: str) -> int: | |
| """Primary ranking key: 0 if the resource is within NEAR_MAX_DISTANCE levels | |
| of the user's current level ("near"), else 1 ("far"). | |
| Sorting on this BEFORE the composite score guarantees a clearly-mismatched | |
| resource (>=2 levels off, e.g. ADVANCED for a beginner) can never outrank an | |
| appropriately-levelled one on type/relevance/rating alone. | |
| """ | |
| dist = abs(DIFFICULTY_ORDER[anchor_level] - DIFFICULTY_ORDER[resource_level]) | |
| return 0 if dist <= NEAR_MAX_DISTANCE else 1 | |
| def _score( | |
| type_: str, | |
| anchor_level: str, | |
| resource_level: str, | |
| relevance_score: float, | |
| rating: float, | |
| ) -> float: | |
| return ( | |
| TYPE_WEIGHTS.get(type_, 0) * W_TYPE | |
| + _difficulty_match_score(anchor_level, resource_level) * W_DIFF | |
| + (relevance_score * 10.0) * W_REL | |
| + (rating / 5.0 * 10.0) * W_RATING | |
| ) | |
| def compute_recommendations( | |
| user, | |
| role: Role, | |
| limit_per_skill: int = DEFAULT_LIMIT_PER_SKILL, | |
| ) -> dict: | |
| """Return {"role_id", "role_name", "recommendations": {skill_id: [...]}}. | |
| Only returns entries for skills with an actual gap (MISSING or INSUFFICIENT); | |
| MET skills are skipped. Stable tie-break by resource_id ASC. | |
| """ | |
| report = compute_gap_report(user, role) | |
| gap_skills = [g for g in report.gaps if g.gap_type != 'MET'] | |
| recommendations: dict[int, list[dict]] = {} | |
| if not gap_skills: | |
| return { | |
| 'role_id': role.id, | |
| 'role_name': role.role_name, | |
| 'recommendations': recommendations, | |
| } | |
| skill_ids = [g.skill_id for g in gap_skills] | |
| links = ( | |
| SkillResource.objects | |
| .filter(skill_id__in=skill_ids) | |
| .select_related('resource') | |
| ) | |
| by_skill: dict[int, list[tuple[SkillResource, Resource]]] = { | |
| sid: [] for sid in skill_ids | |
| } | |
| for link in links: | |
| by_skill[link.skill_id].append((link, link.resource)) | |
| # Anchor difficulty matching to the user's CURRENT level for each gap skill | |
| # (derived from proficiency), not the role's required level — so learners are | |
| # steered to material at their level first. See the scoring block above. | |
| anchor_by_skill = { | |
| g.skill_id: proficiency_to_level(g.user_proficiency) for g in gap_skills | |
| } | |
| for skill_id, pairs in by_skill.items(): | |
| anchor_level = anchor_by_skill[skill_id] | |
| scored: list[tuple[int, float, int, RecommendationItem]] = [] | |
| for link, resource in pairs: | |
| score = _score( | |
| resource.type, | |
| anchor_level, | |
| resource.difficulty_level, | |
| link.relevance_score, | |
| resource.rating, | |
| ) | |
| tier = _difficulty_tier(anchor_level, resource.difficulty_level) | |
| scored.append(( | |
| tier, | |
| score, | |
| resource.id, | |
| RecommendationItem( | |
| resource_id=resource.id, | |
| title=resource.title, | |
| provider=resource.provider, | |
| url=resource.url, | |
| type=resource.type, | |
| difficulty_level=resource.difficulty_level, | |
| duration=resource.duration, | |
| rating=resource.rating, | |
| relevance_score=link.relevance_score, | |
| score=round(score, 4), | |
| ), | |
| )) | |
| # Primary: level tier (near before far). Secondary: composite score desc. | |
| # Tertiary: resource_id asc for stable, snapshot-friendly ordering. | |
| scored.sort(key=lambda t: (t[0], -t[1], t[2])) | |
| top = [item.to_dict() for _, _, _, item in scored[:limit_per_skill]] | |
| recommendations[skill_id] = top | |
| return { | |
| 'role_id': role.id, | |
| 'role_name': role.role_name, | |
| 'recommendations': recommendations, | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Role suggestion service | |
| # --------------------------------------------------------------------------- | |
| # | |
| # Ranks every active role by how well the user's current skills fit it, so the | |
| # UI can surface "Recommended for you" (top fit) above the rest of the catalog. | |
| # The fit reflects an uploaded resume automatically: CV → UserSkill → gap | |
| # analysis is already wired, so suggestions update once parsed skills are saved. | |
| # | |
| # Reuses compute_gap_report per role (no duplicated readiness math). For the | |
| # small curated catalog (~10 roles, ceiling ~50) the ~2 queries/role cost is | |
| # negligible; batch into a single RoleSkill + UserSkill pass if it ever grows. | |
| DEFAULT_TOP_MISSING = 3 | |
| def suggest_roles(user, top_missing: int = DEFAULT_TOP_MISSING) -> dict: | |
| """Return {"has_skills", "roles": [...]} ranked by skill fit. | |
| Each role row: role_id, role_name, industry, readiness, band, | |
| matched_skills/total_skills, top_missing_skills (most-severe first), | |
| no_requirements. Sorted by (no_requirements last, readiness desc, | |
| role_name asc) — roles with no requirements report readiness=100 | |
| spuriously, so they sink to the bottom rather than top the list. | |
| """ | |
| has_skills = UserSkill.objects.filter(user_id=user.id).exists() | |
| rows: list[dict] = [] | |
| for role in Role.objects.filter(is_active=True): | |
| report = compute_gap_report(user, role) | |
| total = len(report.gaps) | |
| matched = sum(1 for g in report.gaps if g.gap_type == 'MET') | |
| # report.gaps is severity-sorted, so the first non-MET names are the | |
| # most critical missing skills. | |
| missing = [g.skill_name for g in report.gaps if g.gap_type != 'MET'] | |
| rows.append({ | |
| 'role_id': role.id, | |
| 'role_name': role.role_name, | |
| 'industry': role.industry, | |
| 'readiness': round(report.readiness, 2), | |
| 'band': report.band, | |
| 'matched_skills': matched, | |
| 'total_skills': total, | |
| 'top_missing_skills': missing[:top_missing], | |
| 'no_requirements': report.no_requirements, | |
| }) | |
| rows.sort(key=lambda r: (r['no_requirements'], -r['readiness'], r['role_name'])) | |
| return {'has_skills': has_skills, 'roles': rows} | |