File size: 5,356 Bytes
5a3b322
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
from __future__ import annotations

from typing import List, Optional, Dict
import math

from schemas.candidates import RankedList, RankedItem
from schemas.query_plan import QueryPlan


# Tunable weights / tolerances (small so relevance stays primary)
W_DURATION = 0.12
W_LANGUAGE = 0.08
W_FLAGS = 0.08
W_INTENT = 0.12
TOLERANCE_MINUTES = 20  # how far from target is still "ok"

INTENT_BEHAV_TYPES = {"Personality & Behavior", "Biodata & Situational Judgement", "Assessment Exercises", "Simulations"}
INTENT_TECH_TYPES = {"Knowledge & Skills", "Ability & Aptitude"}


def _safe_float(val) -> Optional[float]:
    try:
        if val is None:
            return None
        f = float(val)
        if math.isfinite(f):
            return f
    except Exception:
        return None
    return None


def _duration_score(duration: Optional[float], constraint) -> float:
    if constraint is None:
        return 0.0
    if duration is None:
        # Missing is neutral
        return 0.0
    target = constraint.minutes or 0
    if target <= 0:
        return 0.0
    diff = abs(duration - target)
    score = max(0.0, 1.0 - diff / TOLERANCE_MINUTES)
    if constraint.mode == "MAX" and duration > target:
        # small penalty if over max
        score = -score
    return score


def _language_score(plan: QueryPlan, meta_langs: List[str]) -> (float, bool):
    if not plan.language:
        return 0.0, True
    if not meta_langs:
        # missing metadata -> neutral, allow
        return 0.0, True
    match = any(plan.language.lower() in lang.lower() for lang in meta_langs)
    if match:
        return 1.0, True
    # treat language as soft penalty rather than hard drop
    return -1.0, True


def _flags_score(plan: QueryPlan, remote: Optional[bool], adaptive: Optional[bool]) -> (float, bool):
    score = 0.0
    flags = plan.flags or {}
    want_remote = flags.get("remote")
    want_adaptive = flags.get("adaptive")

    # Remote handling
    if want_remote is True:
        if remote is False:
            return -1.0, True  # soft penalty
        if remote is True:
            score += 1.0

    # Adaptive handling
    if want_adaptive is True:
        if adaptive is False:
            return -1.0, True  # soft penalty
        if adaptive is True:
            score += 1.0

    return score, True


def _intent_score(plan: QueryPlan, test_types: List[str]) -> float:
    if not test_types:
        return 0.0
    types_set = set(test_types)
    if plan.intent == "BEHAVIORAL":
        return 1.0 if types_set & INTENT_BEHAV_TYPES else 0.0
    if plan.intent == "TECH":
        return 1.0 if types_set & INTENT_TECH_TYPES else 0.0
    if plan.intent == "MIXED":
        beh = 1.0 if types_set & INTENT_BEHAV_TYPES else 0.0
        tech = 1.0 if types_set & INTENT_TECH_TYPES else 0.0
        return 0.5 * (beh + tech)
    return 0.0


def apply_constraints(plan: QueryPlan, ranked: RankedList, catalog_by_id: Dict[str, dict], k: int = 10) -> RankedList:
    """
    Deterministic, missingness-aware constraint layer.
    - Duration: soft boost/penalty; missing is neutral.
    - Language: soft penalty if mismatch; missing metadata is neutral.
    - Intent alignment: small boost when test_type matches intent.
    """
    rescored: List[RankedItem] = []
    for idx, item in enumerate(ranked.items):
        if item.assessment_id not in catalog_by_id:
            continue
        meta = catalog_by_id[item.assessment_id]
        duration = _safe_float(meta.get("duration_minutes") or meta.get("duration"))
        test_types = meta.get("test_type_full") or meta.get("test_type") or []
        if isinstance(test_types, str):
            test_types = [t.strip() for t in test_types.replace("/", ",").split(",") if t.strip()]
        meta_langs = meta.get("languages") or []
        if isinstance(meta_langs, str):
            meta_langs = [meta_langs]
        remote = meta.get("remote_support")
        adaptive = meta.get("adaptive_support")

        dur_s = _duration_score(duration, plan.duration)
        lang_s, allow_lang = _language_score(plan, meta_langs)
        flag_s, allow_flags = _flags_score(plan, remote, adaptive)
        intent_s = _intent_score(plan, test_types)

        if not allow_lang or not allow_flags:
            continue

        # Fallback if upstream left score as None: use a simple rank-based proxy.
        base_score = item.score if item.score is not None else 1.0 / (idx + 1)
        final_score = base_score
        final_score += W_DURATION * dur_s
        final_score += W_LANGUAGE * lang_s
        final_score += W_FLAGS * flag_s
        final_score += W_INTENT * intent_s

        debug = {
            "base_score": base_score,
            "duration": duration,
            "duration_score": W_DURATION * dur_s,
            "language": plan.language,
            "language_score": W_LANGUAGE * lang_s,
            "flags_score": W_FLAGS * flag_s,
            "intent_score": W_INTENT * intent_s,
            "test_types": test_types,
            "languages_meta": meta_langs,
            "remote": remote,
            "adaptive": adaptive,
            "final_score": final_score,
        }

        rescored.append(RankedItem(assessment_id=item.assessment_id, score=final_score, debug=debug))

    rescored.sort(key=lambda x: x.score, reverse=True)
    return RankedList(items=rescored[:k])