File size: 13,115 Bytes
dba351a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49fb892
 
 
 
dba351a
 
49fb892
 
dba351a
49fb892
dba351a
 
49fb892
dba351a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0010624
dba351a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9246aa7
 
dba351a
 
 
 
 
 
 
 
 
 
 
9246aa7
 
dba351a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0010624
 
 
dba351a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0010624
 
dba351a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0010624
dba351a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
"""
analytics/execution_layer.py

Tier 5A — Execution Layer (Alpha Release)

Post-model enrichment pass operating exclusively on already-computed outputs
(model probs + book odds). No simulation logic, no probability calculations,
no model changes.

Entry point: enrich_with_execution_layer(df) → df with execution fields added.
"""

from __future__ import annotations

import statistics
from typing import Any

import pandas as pd

from analytics.no_vig_props import american_to_implied_prob

# ---------------------------------------------------------------------------
# Thresholds
# ---------------------------------------------------------------------------
OUTLIER_THRESHOLD = 0.03   # 3pp deviation from median → outlier
STALE_THRESHOLD = 0.025    # 2.5pp worse than median → stale book
AGGRESSIVE_THRESHOLD = 0.02  # 2pp better than median → aggressive/timing flag

_TIMESTAMP_KEYS = ("last_update", "timestamp", "odds_timestamp", "updated_at")


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def _safe_float(val: Any, default: float | None = None) -> float | None:
    if val is None:
        return default
    try:
        return float(val)
    except (TypeError, ValueError):
        return default


def _safe_implied(odds: Any) -> float | None:
    if odds is None:
        return None
    try:
        return american_to_implied_prob(odds)
    except Exception:
        return None


def _make_player_game_key(row: pd.Series) -> str:
    explicit_key = str(row.get("player_event_market_key") or "").strip()
    if explicit_key and explicit_key not in ("nan", "None", ""):
        return explicit_key

    event_id = str(row.get("event_id") or "").strip()
    player_name = str(row.get("player_name") or "").strip()
    market_family = str(row.get("market_family") or row.get("market") or "").strip()
    threshold = str(row.get("threshold") or "").strip()
    if event_id and event_id not in ("nan", "None", ""):
        return f"{event_id}|{player_name}|{market_family}|{threshold}"
    away = str(row.get("away_team") or "").strip()
    home = str(row.get("home_team") or "").strip()
    return f"{away}|{home}|{player_name}|{market_family}|{threshold}"


def _make_game_key(row: pd.Series) -> str:
    event_id = str(row.get("event_id") or "").strip()
    if event_id and event_id not in ("nan", "None", ""):
        return event_id
    away = str(row.get("away_team") or "").strip()
    home = str(row.get("home_team") or "").strip()
    return f"{away}_{home}"


# ---------------------------------------------------------------------------
# Task 1 — Market Disagreement
# ---------------------------------------------------------------------------

def _compute_market_fields(df: pd.DataFrame) -> pd.DataFrame:
    """Add best_price, median_price, market_width, market_outlier_flag, stale_book_flag."""
    df = df.copy()

    # Build scoped player-game keys
    keys = df.apply(_make_player_game_key, axis=1)
    df["_pg_key"] = keys

    # Pre-compute implied probs for each row
    df["_implied"] = df["odds_american"].apply(_safe_implied)

    # Group stats per scoped player-game key
    group_stats: dict[str, dict] = {}
    for key, grp in df.groupby("_pg_key"):
        implied_vals = [v for v in grp["_implied"].tolist() if v is not None]
        if not implied_vals:
            group_stats[key] = {
                "best": None, "worst": None, "median": None, "width": None
            }
            continue
        best = min(implied_vals)   # lowest implied = best for bettor
        worst = max(implied_vals)
        med = statistics.median(implied_vals)
        width = abs(worst - best)
        group_stats[key] = {"best": best, "worst": worst, "median": med, "width": width}

    best_prices: list[float | None] = []
    median_prices: list[float | None] = []
    market_widths: list[float | None] = []
    outlier_flags: list[bool] = []
    stale_flags: list[bool] = []

    for _, row in df.iterrows():
        key = row["_pg_key"]
        stats = group_stats.get(key, {})
        this_implied = row["_implied"]

        best_prices.append(stats.get("best"))
        median_prices.append(stats.get("median"))
        market_widths.append(stats.get("width"))

        med = stats.get("median")
        if this_implied is not None and med is not None:
            outlier_flags.append(abs(this_implied - med) > OUTLIER_THRESHOLD)
            stale_flags.append((this_implied - med) > STALE_THRESHOLD)
        else:
            outlier_flags.append(False)
            stale_flags.append(False)

    df["best_price"] = best_prices
    df["median_price"] = median_prices
    df["market_width"] = market_widths
    df["market_outlier_flag"] = outlier_flags
    df["stale_book_flag"] = stale_flags

    df.drop(columns=["_pg_key", "_implied"], inplace=True)
    return df


# ---------------------------------------------------------------------------
# Task 2 — Edge Quality Filters
# ---------------------------------------------------------------------------

def _compute_edge_quality(df: pd.DataFrame) -> pd.DataFrame:
    """Add execution_confidence_score, execution_volatility_score, execution_signal_strength_score,
    edge_raw, edge_filtered, edge_filter_flags."""
    df = df.copy()

    conf_scores: list[float] = []
    vol_scores: list[float] = []
    sig_scores: list[float] = []
    edge_raws: list[float | None] = []
    edge_filtered_vals: list[float | None] = []
    edge_flag_strs: list[str] = []

    for _, row in df.iterrows():
        source = str(row.get("model_hr_prob_source") or "unavailable")
        context_applied = bool(row.get("pregame_context_applied") or False)
        edge_raw = _safe_float(row.get("edge"))
        market_width = _safe_float(row.get("market_width"), default=0.0)

        # Context adj magnitude
        pitcher_adj = _safe_float(row.get("pregame_pitcher_context_adj"), default=0.0)
        park_adj = _safe_float(row.get("pregame_park_context_adj"), default=0.0)
        context_mag = abs(pitcher_adj or 0.0) + abs(park_adj or 0.0)

        # Confidence score
        if source == "internal_model_baseline":
            conf = 1.0 if context_applied else 0.7
        elif source == "shared_pregame_engine":
            conf = 0.95 if context_applied else 0.80
        else:
            conf = 0.3

        # Volatility score (weighted blend, range [0, 1])
        width_component = min(1.0, (market_width or 0.0) / 0.10)
        ctx_component = min(1.0, context_mag / 0.02) if context_mag > 0 else 0.0
        vol = 0.7 * width_component + 0.3 * ctx_component

        # Signal strength score
        if source == "internal_model_baseline":
            sig = 0.7 + (0.3 if context_applied else 0.0)
        elif source == "shared_pregame_engine":
            sig = 0.85 + (0.15 if context_applied else 0.0)
        else:
            sig = 0.1
        sig = min(1.0, sig)

        # Edge filtered + flags
        if edge_raw is None:
            edge_filt = None
            flags = "clean"
        else:
            edge_filt = edge_raw
            applied: list[str] = []

            # Confidence penalty
            if conf < 0.5:
                scale = conf / 0.5
                edge_filt = edge_filt * scale
                applied.append("conf_penalty")

            # Volatility penalty
            vol_pen = min(0.02, vol * 0.02)
            if vol_pen > 0:
                edge_filt = edge_filt - vol_pen
                applied.append("vol_penalty")

            # Weak signal suppression
            if sig < 0.3:
                edge_filt = edge_filt * 0.5
                applied.append("weak_signal")

            flags = ",".join(applied) if applied else "clean"

        conf_scores.append(conf)
        vol_scores.append(vol)
        sig_scores.append(sig)
        edge_raws.append(edge_raw)
        edge_filtered_vals.append(edge_filt)
        edge_flag_strs.append(flags)

    df["execution_confidence_score"] = conf_scores
    df["execution_volatility_score"] = vol_scores
    df["execution_signal_strength_score"] = sig_scores
    df["edge_raw"] = edge_raws
    df["edge_filtered"] = edge_filtered_vals
    df["edge_filter_flags"] = edge_flag_strs
    return df


# ---------------------------------------------------------------------------
# Task 3 — Timing Heuristics
# ---------------------------------------------------------------------------

def _compute_timing_fields(df: pd.DataFrame) -> pd.DataFrame:
    """Add timing_flag, timing_reason."""
    df = df.copy()

    timing_flags: list[bool] = []
    timing_reasons: list[str] = []

    for _, row in df.iterrows():
        reasons: list[str] = []

        # Aggressive price: this book > 2pp better than median (lower implied)
        this_implied = _safe_implied(row.get("odds_american"))
        median_price = _safe_float(row.get("median_price"))
        if (
            this_implied is not None
            and median_price is not None
            and (median_price - this_implied) > AGGRESSIVE_THRESHOLD
        ):
            reasons.append("aggressive_price")

        # Timestamp presence
        has_ts = any(
            row.get(k) is not None and str(row.get(k)).strip() not in ("", "nan", "None")
            for k in _TIMESTAMP_KEYS
        )
        if has_ts:
            reasons.append("has_timestamp")

        if not reasons:
            reasons.append("none")

        timing_flags.append(len(reasons) > 1 or (len(reasons) == 1 and reasons[0] != "none"))
        timing_reasons.append(",".join(reasons))

    df["timing_flag"] = timing_flags
    df["timing_reason"] = timing_reasons
    return df


# ---------------------------------------------------------------------------
# Task 4 — Correlation Awareness
# ---------------------------------------------------------------------------

def _compute_correlation_fields(df: pd.DataFrame) -> pd.DataFrame:
    """Add correlation_flag, correlation_direction."""
    df = df.copy()

    # Count distinct players per game
    game_keys = df.apply(_make_game_key, axis=1)
    df["_game_key"] = game_keys

    player_counts: dict[str, int] = {}
    for key, grp in df.groupby("_game_key"):
        player_counts[key] = grp["player_name"].nunique()

    corr_directions: list[str] = []
    for _, row in df.iterrows():
        key = row["_game_key"]
        count = player_counts.get(key, 1)
        corr_directions.append("positive_stacked" if count > 2 else "positive")

    df["correlation_flag"] = True  # always True for HR props
    df["correlation_direction"] = corr_directions

    df.drop(columns=["_game_key"], inplace=True)
    return df


# ---------------------------------------------------------------------------
# Task 5 — Final Execution Score
# ---------------------------------------------------------------------------

def _compute_execution_score(df: pd.DataFrame) -> pd.DataFrame:
    """Add final_recommendation_score."""
    df = df.copy()

    scores: list[float | None] = []

    for _, row in df.iterrows():
        edge_filtered = _safe_float(row.get("edge_filtered"))
        if edge_filtered is None:
            scores.append(None)
            continue

        confidence_score = _safe_float(row.get("execution_confidence_score"), default=0.3)
        volatility_score = _safe_float(row.get("execution_volatility_score"), default=0.0)
        market_width = _safe_float(row.get("market_width"), default=0.0)
        timing_flag = bool(row.get("timing_flag") or False)

        base = edge_filtered * (0.4 + (confidence_score or 0.0) * 0.6)
        vol_penalty = min(0.015, (volatility_score or 0.0) * 0.015)
        market_bonus = min(0.01, max(0.0, 0.01 - (market_width or 0.0) * 0.5))
        timing_bonus = 0.005 if timing_flag else 0.0

        score = base - vol_penalty + market_bonus + timing_bonus
        score = max(-0.30, min(0.30, score))
        scores.append(score)

    df["final_recommendation_score"] = scores
    return df


# ---------------------------------------------------------------------------
# Public entry point
# ---------------------------------------------------------------------------

def enrich_with_execution_layer(df: pd.DataFrame) -> pd.DataFrame:
    """
    Run all five execution-layer passes on the mapped props DataFrame.

    Passes (in order):
      1. Market Disagreement  — best_price, median_price, market_width, flags
      2. Edge Quality         — execution confidence, volatility, signal, edge_filtered
      3. Timing Heuristics    — timing_flag, timing_reason
      4. Correlation          — correlation_flag, correlation_direction
      5. Execution Score      — final_recommendation_score

    Returns the enriched DataFrame. Does not modify simulation logic or
    model probabilities.
    """
    if df.empty:
        return df

    df = _compute_market_fields(df)
    df = _compute_edge_quality(df)
    df = _compute_timing_fields(df)
    df = _compute_correlation_fields(df)
    df = _compute_execution_score(df)
    return df