"""Stochastic noise models for the biological simulator."""

from __future__ import annotations

from typing import Dict, List, Tuple

import numpy as np


class NoiseModel:
    """Generates calibrated noise for simulated experimental outputs.

    All randomness is funnelled through a single ``numpy.Generator``
    so that episodes are reproducible given the same seed.
    """

    def __init__(self, seed: int = 42):
        self.rng = np.random.default_rng(seed)

    def reseed(self, seed: int) -> None:
        self.rng = np.random.default_rng(seed)

    # ── expression-level noise ──────────────────────────────────────────

    def add_expression_noise(
        self,
        true_values: Dict[str, float],
        noise_level: float,
        dropout_rate: float,
    ) -> Dict[str, float]:
        noisy: Dict[str, float] = {}
        for gene, value in true_values.items():
            # Dropout probability is inversely proportional to expression
            # magnitude: lowly expressed genes drop out much more readily,
            # matching the zero-inflation pattern in real scRNA-seq data.
            p_drop = dropout_rate / (1.0 + abs(value))
            if self.rng.random() < p_drop:
                noisy[gene] = 0.0
            else:
                sigma = noise_level * abs(value) + 0.1
                noisy[gene] = float(value + self.rng.normal(0, sigma))
        return noisy

    # ── effect-size sampling ────────────────────────────────────────────

    def sample_effect_sizes(
        self,
        true_effects: Dict[str, float],
        sample_size: int,
        noise_level: float,
    ) -> Dict[str, float]:
        se = noise_level / max(np.sqrt(max(sample_size, 1)), 1e-6)
        return {
            gene: float(effect + self.rng.normal(0, se))
            for gene, effect in true_effects.items()
        }

    def sample_p_values(
        self,
        true_effects: Dict[str, float],
        sample_size: int,
        noise_level: float,
    ) -> Dict[str, float]:
        """Simulate approximate p-values from z-statistics."""
        from scipy import stats  # type: ignore[import-untyped]

        p_values: Dict[str, float] = {}
        se = noise_level / max(np.sqrt(max(sample_size, 1)), 1e-6)
        for gene, effect in true_effects.items():
            z = abs(effect) / max(se, 1e-8)
            p_values[gene] = float(2 * stats.norm.sf(z))
        return p_values

    # ── false discovery helpers ─────────────────────────────────────────

    def generate_false_positives(
        self, n_background_genes: int, fdr: float
    ) -> List[str]:
        n_fp = int(self.rng.binomial(n_background_genes, fdr))
        return [f"FP_GENE_{i}" for i in range(n_fp)]

    def generate_false_negatives(
        self, true_genes: List[str], fnr: float
    ) -> List[str]:
        """Return the subset of *true_genes* that are missed."""
        return [g for g in true_genes if self.rng.random() < fnr]

    # ── quality helpers ─────────────────────────────────────────────────

    def quality_degradation(
        self, base_quality: float, factors: List[float]
    ) -> float:
        q = base_quality
        for f in factors:
            q *= f
        return float(np.clip(q + self.rng.normal(0, 0.02), 0.0, 1.0))

    def sample_qc_metric(
        self, mean: float, std: float, clip_lo: float = 0.0, clip_hi: float = 1.0
    ) -> float:
        return float(np.clip(self.rng.normal(mean, std), clip_lo, clip_hi))

    def sample_count(self, lam: float) -> int:
        return int(self.rng.poisson(max(lam, 0)))

    def coin_flip(self, p: float) -> bool:
        return bool(self.rng.random() < p)

    def sample_cluster_count(
        self, n_true_populations: int, quality: float
    ) -> int:
        """Over- or under-clustering depending on preprocessing quality."""
        delta = self.rng.integers(-2, 3)
        noise_clusters = max(0, int(round((1.0 - quality) * 3)))
        return max(1, n_true_populations + delta + noise_clusters)

    def shuffle_ranking(
        self, items: List[str], noise_level: float
    ) -> List[str]:
        """Permute a ranking with Gaussian noise on ordinals."""
        n = len(items)
        if n == 0:
            return []
        scores = np.arange(n, dtype=float) + self.rng.normal(
            0, noise_level * n, size=n
        )
        order = np.argsort(scores)
        return [items[int(i)] for i in order]