File size: 4,036 Bytes
5f43c7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
"""heavy.py — heavy-turn ranking by Anthropic COST (not raw cacheRead).

Two SEPARATE notions of "heavy", because one number can't answer both questions:

  * `Turn.heavy`  — the top-N turns by COST. A RELATIVE ranking ("which were the
    heaviest"), used to glow the biggest nodes in the graph. At most N, and ONLY
    among turns carrying non-trivial cost — so a 3-turn session no longer paints a
    no-op query/reply turn "heavy" just because top-N can't fill from a tiny set
    (the "top-3 of 3" lie). A genuinely small clean session still shows its top few.
  * `Turn.overBudget` — every turn whose COST clears an ABSOLUTE floor. This is the
    fix for "the top-3 lie": in a session where ten turns each cost a fortune,
    ranking surfaces three and implies the other seven were normal. The floor flags
    all of them. Quiet on genuinely small sessions (nothing clears the floor).

COST = cost-weighted tokens (input-token-equivalents; see contract.COST_WEIGHTS).
That is the real-money signal — cacheRead is cheap (0.1x) and re-paid every
round-trip, so ranking by cacheRead overstates big-context turns and hides
generation-heavy ones. Pure code, NO model.

On the fixture the cost top-3 is still {9, 10, 13} (cacheRead dominates magnitude
there), so the Phase-2 oracle is unchanged; the over-budget set is wider.
"""
from __future__ import annotations

_TOP_N = 3

# Absolute per-turn floor, in cost-weighted tokens (input-token-equivalents). 500k
# input-equivalents is ≈ $2.50 on Opus pricing ($5/Mtok) — a turn worth flagging as
# expensive on its own, regardless of how the rest of the session ranks. Tunable in
# one place; conservative enough that small/cheap turns never trip it.
OVER_BUDGET_COST = 500_000

# Eligibility floor for the RELATIVE heavy ranking, as a fraction of the session's
# heaviest turn. A turn below it isn't "one of the heaviest" by any honest reading,
# so it never rides into the top-N just because the session is too small to fill N
# (a 3-turn session must not flag a ~0-cost query/reply turn "heavy"). Self-scaling,
# so no magic absolute; the OVER_BUDGET_COST floor still flags genuine expense apart
# from this. 1% is conservative — it only ever drops negligible turns.
HEAVY_MIN_FRACTION = 0.01


def _cost(t) -> int:
    return t.tokens.cost()


def annotate_heavy(turns, top_n: int = _TOP_N, over_budget: int = OVER_BUDGET_COST) -> list[int]:
    """Mark heavy (top-N by cost, non-trivial only) AND overBudget (cost >= floor).
    In place.

    Returns the heavy turn indices — at most N, and never a turn whose cost is
    negligible relative to the session's heaviest (so the relative ranking can't
    manufacture "heavy" on a tiny session). Deterministic tie-break: by (-cost, i).
    """
    for t in turns:
        t.heavy = False
        t.overBudget = False

    ranked = sorted(turns, key=lambda t: (-_cost(t), t.i))
    top_cost = _cost(ranked[0]) if ranked else 0
    floor = top_cost * HEAVY_MIN_FRACTION
    # eligible = ranked turns carrying non-trivial cost; a near-zero turn never
    # ranks heavy even when top-N would otherwise have room for it.
    eligible = [t for t in ranked if _cost(t) > 0 and _cost(t) >= floor]
    heavy_set = {t.i for t in eligible[:top_n]}
    for t in turns:
        t.heavy = t.i in heavy_set
        t.overBudget = _cost(t) >= over_budget

    return sorted(heavy_set)


def over_budget_turns(turns, over_budget: int = OVER_BUDGET_COST) -> list[int]:
    """The indices of every turn whose cost clears the absolute floor (for the
    session header / advisor). Read `Turn.overBudget` after annotate_heavy."""
    return sorted(t.i for t in turns if _cost(t) >= over_budget)


def heaviest_turn(turns):
    """Return the single most expensive turn by COST (None if no turns) — the chat's
    focus fallback. Ranked by cost, not cacheRead, so 'why was this expensive?'
    lands on the turn that actually cost the most."""
    if not turns:
        return None
    return min(turns, key=lambda t: (-_cost(t), t.i))