from __future__ import annotations

import numpy as np
import pandas as pd


def simulate_batter_outcomes(
    hit_prob: float,
    hr_prob: float,
    n_sims: int = 10000,
    batter_row: dict | None = None,
) -> pd.DataFrame:
    rng = np.random.default_rng()  # A2: fresh seed each run

    # A4: Batter-specific hit type distribution (empirical MLB 2024 baseline)
    hit_single_p = 0.62
    hit_double_p = 0.33
    hit_triple_p = 0.05

    if batter_row is not None:
        pull_rate = batter_row.get("pull_rate")
        air_ball_rate = batter_row.get("air_ball_rate")
        avg_launch_angle = batter_row.get("avg_launch_angle")

        try:
            if pull_rate is not None and air_ball_rate is not None:
                if float(pull_rate) >= 0.45 and float(air_ball_rate) >= 0.45:
                    # High pull + high air ball → more doubles
                    hit_single_p -= 0.04
                    hit_double_p += 0.04
        except Exception:
            pass

        try:
            if avg_launch_angle is not None and float(avg_launch_angle) >= 18:
                # Fly ball tendency → slight 2B/3B boost
                hit_single_p -= 0.02
                hit_double_p += 0.01
                hit_triple_p += 0.01
        except Exception:
            pass

    hit_type_probs = np.array([hit_single_p, hit_double_p, hit_triple_p])
    hit_type_probs = hit_type_probs / hit_type_probs.sum()

    # A3: Hierarchical sampling — HR first, then hit (mutually exclusive outcomes)
    is_hr = rng.binomial(1, min(hr_prob, 1.0), size=n_sims).astype(bool)

    # Among non-HR PAs, use hit_prob adjusted down by hr_prob already claimed
    adj_hit_prob = max(0.0, min(1.0, hit_prob - hr_prob))
    is_hit = rng.binomial(1, adj_hit_prob, size=n_sims).astype(bool)

    hits = np.zeros(n_sims, dtype=int)
    hrs = np.zeros(n_sims, dtype=int)
    total_bases = np.zeros(n_sims, dtype=int)

    # HRs take priority
    hr_mask = is_hr
    hits[hr_mask] = 1
    hrs[hr_mask] = 1
    total_bases[hr_mask] = 4

    # Non-HR hits distributed by batter-specific type distribution
    hit_mask = is_hit & ~is_hr
    hits[hit_mask] = 1
    hit_count = int(hit_mask.sum())
    if hit_count > 0:
        hit_types = rng.choice([1, 2, 3], size=hit_count, p=hit_type_probs)
        total_bases[hit_mask] = hit_types

    return pd.DataFrame(
        {
            "hit": hits,
            "hr": hrs,
            "total_bases": total_bases,
        }
    )