"""
Simulated Static-Analysis Scanner - live tool interaction layer.

Calling run_scanner(task, seed) simulates what happens when an agent invokes
an external security/lint scanner (e.g. Bandit, Semgrep, Pylint) against
the code under review.

Noise model (seed-controlled for reproducibility)
--------------------------------------------------
  Recall:    Each real issue is reported with probability SCANNER_RECALL (0.70).
             ~30 pct of issues are silently missed - the agent cannot rely
             solely on the scanner.
  Precision: SCANNER_NOISE_RATE false-positive findings are injected per run
             (0 to MAX_FALSE_POSITIVES). False positives reference
             plausible-but-wrong line numbers so the agent must verify first.

Important: scanner results are NOT automatically graded.  The agent must
still call ADD_COMMENT with the correct line + keyword to earn reward.
"""

from __future__ import annotations

import random
import zlib
from typing import Any

# XOR mask keeps the scanner RNG independent from the mutation-engine RNG
# even when both receive the same episode seed.
_SCANNER_RNG_SEED_MASK: int = 0xDEAD_BEEF

__all__ = ["run_scanner"]

# -- Noise parameters --------------------------------------------------------

SCANNER_RECALL: float = 0.70       # probability each real issue is reported
SCANNER_NOISE_RATE: float = 0.40   # probability a false-positive is injected
MAX_FALSE_POSITIVES: int = 2       # cap on false positives per run

# -- False-positive templates ------------------------------------------------
# Plausible-sounding scanner warnings that do not correspond to any real
# ground-truth issue.  Line numbers are randomised at runtime.

_FALSE_POSITIVE_TEMPLATES: list[dict[str, str]] = [
    {
        "rule":     "B105",
        "message":  "Possible hardcoded password: variable assigned string literal",
        "category": "security",
        "severity": "LOW",
    },
    {
        "rule":     "B324",
        "message":  "Use of weak MD4 or MD5 hash for security; consider stronger algorithm",
        "category": "security",
        "severity": "MEDIUM",
    },
    {
        "rule":     "W0611",
        "message":  "Imported but unused module detected in scope",
        "category": "style",
        "severity": "LOW",
    },
    {
        "rule":     "C0301",
        "message":  "Line too long (82 chars) - PEP 8 violation",
        "category": "style",
        "severity": "LOW",
    },
    {
        "rule":     "B007",
        "message":  "Loop control variable used in inner scope only; consider renaming to _",
        "category": "style",
        "severity": "LOW",
    },
    {
        "rule":     "W0702",
        "message":  "No exception type(s) specified in bare except clause",
        "category": "bug",
        "severity": "MEDIUM",
    },
    {
        "rule":     "B603",
        "message":  "subprocess call without shell=True; verify no injection risk",
        "category": "security",
        "severity": "LOW",
    },
]

_SCANNER_TOOL_NAMES: list[str] = ["bandit 1.7.5", "semgrep 1.45.0", "pylint 3.1.0"]

_UNVERIFIED_FINDINGS_NOTE: str = (
    "Scanner findings are UNVERIFIED. Use GET_CONTEXT or ADD_COMMENT "
    "to confirm each finding before including it in your review. "
    "False positives will be penalised."
)


def run_scanner(task: dict[str, Any], seed: int) -> dict[str, Any]:
    """
    Simulate running a static-analysis scanner against the task's code.

    Parameters
    ----------
    task:
        A (possibly mutated) task dict containing 'code' and 'issues'.
    seed:
        Reproducibility seed.  Different seeds produce different recall/FP draws.

    Returns
    -------
    dict with keys:
        tool:         Scanner name + version string.
        findings:     list[dict] - mix of true positives (recalled) and FPs.
        missed_count: How many real issues were silently missed.
        note:         Reminder that findings must be verified before commenting.
    """
    rng = random.Random(seed ^ _SCANNER_RNG_SEED_MASK)
    total_code_lines = max(len(task["code"].split("\n")), 1)

    true_positive_findings, missed_count = _build_true_positive_findings(
        task=task,
        total_code_lines=total_code_lines,
        rng=rng,
    )
    false_positive_findings = _build_false_positive_findings(
        task=task,
        total_code_lines=total_code_lines,
        rng=rng,
    )

    all_findings = true_positive_findings + false_positive_findings
    # Shuffle so false positives are not trivially identifiable by position.
    rng.shuffle(all_findings)

    return {
        "tool":         rng.choice(_SCANNER_TOOL_NAMES),
        "findings":     all_findings,
        "missed_count": missed_count,
        "note":         _UNVERIFIED_FINDINGS_NOTE,
    }


# -- Private helpers ---------------------------------------------------------

def _build_true_positive_findings(
    task: dict[str, Any],
    total_code_lines: int,
    rng: random.Random,
) -> tuple[list[dict[str, Any]], int]:
    """
    Recall each real issue with probability SCANNER_RECALL.

    Returns a 2-tuple of (findings_list, missed_count).
    """
    findings: list[dict[str, Any]] = []
    missed_count = 0

    for issue in task.get("issues", []):
        if rng.random() >= SCANNER_RECALL:
            missed_count += 1
            continue

        start, end = issue["line_range"]
        # Report midpoint with small stochastic jitter (+-1) so the agent
        # cannot blindly trust the reported line number.
        jitter = rng.choice([-1, 0, 0, 1])  # biased toward 0
        reported_line = max(1, min(total_code_lines, (start + end) // 2 + jitter))

        findings.append({
            "line":     reported_line,
            "rule":     _pick_rule_for_category(issue.get("category", "bug"), rng),
            "message":  _build_issue_message(issue),
            "category": issue.get("category", "bug"),
            "severity": _map_severity_to_scanner_level(issue.get("severity", "warning")),
            "verified": False,
        })

    return findings, missed_count


def _build_false_positive_findings(
    task: dict[str, Any],
    total_code_lines: int,
    rng: random.Random,
) -> list[dict[str, Any]]:
    """
    Inject up to MAX_FALSE_POSITIVES noise findings on lines away from real issues.
    """
    false_positive_count = sum(
        1 for _ in range(MAX_FALSE_POSITIVES) if rng.random() < SCANNER_NOISE_RATE
    )
    if false_positive_count == 0:
        return []

    # Collect line numbers occupied by real issues to avoid placing FPs there.
    real_issue_lines: set[int] = {
        line_num
        for issue in task.get("issues", [])
        for line_num in range(issue["line_range"][0], issue["line_range"][1] + 1)
    }
    candidate_lines = [
        line_num for line_num in range(1, total_code_lines + 1)
        if line_num not in real_issue_lines
    # If every line belongs to a real issue (tiny synthetic tasks), fall back to
    # the full line range so rng.choice always has candidates to pick from.
    ] or list(range(1, total_code_lines + 1))

    findings: list[dict[str, Any]] = []
    for _ in range(false_positive_count):
        template = rng.choice(_FALSE_POSITIVE_TEMPLATES)
        findings.append({
            "line":     rng.choice(candidate_lines),
            "rule":     template["rule"],
            "message":  template["message"],
            "category": template["category"],
            "severity": template["severity"],
            "verified": False,
        })
    return findings


def _pick_rule_for_category(category: str, rng: random.Random) -> str:
    """Return a plausible lint/security rule ID for the given issue category."""
    rules_by_category: dict[str, list[str]] = {
        "security":    ["B101", "B102", "B105", "B106", "B201", "B301", "B501"],
        "bug":         ["E501", "W0611", "W0702", "E711", "E712"],
        "performance": ["W0640", "C0200", "W0108"],
        "style":       ["C0301", "C0303", "W0611", "C0114"],
        "design":      ["R0201", "R0902", "R0914", "W0107"],
    }
    rule_pool = rules_by_category.get(category, rules_by_category["bug"])
    return rng.choice(rule_pool)


def _build_issue_message(issue: dict[str, Any]) -> str:
    """Build a plausible but intentionally vague scanner message for an issue."""
    messages_by_category: dict[str, list[str]] = {
        "security": [
            "Potential security vulnerability detected in this expression",
            "Sensitive data handling - review for exposure risk",
            "Input not sanitised before use in sensitive operation",
            "Hardcoded value detected; consider externalising to configuration",
        ],
        "bug": [
            "Potential logic error or incorrect operator usage",
            "Variable assigned but possibly never read in all paths",
            "Exception handling may suppress important errors",
            "Index or range expression may be incorrect",
        ],
        "performance": [
            "Repeated operation inside loop - consider hoisting",
            "Unbounded collection growth detected",
            "Synchronous call inside async context",
            "Sequential requests could be batched",
        ],
        "style": [
            "Code style violation detected",
            "Unused identifier in scope",
            "Magic number - consider named constant",
        ],
        "design": [
            "Resource may not be properly released",
            "Thread or task lifecycle not managed",
            "Retry logic missing backoff strategy",
        ],
    }
    category = issue.get("category", "bug")
    message_pool = messages_by_category.get(category, messages_by_category["bug"])

    # Prefix with the first few words of the issue description so the message
    # is loosely tied to the real issue without being an exact keyword match.
    description_prefix = " ".join(issue.get("description", "").split()[:6])
    suffix_prefix = f"{description_prefix} - " if description_prefix else ""

    # Use a separate RNG seeded on the issue id so this issue always maps to
    # the same scanner message regardless of evaluation order or FP count.
    # zlib.crc32 instead of builtin hash(): hash() of strings is salted with a
    # per-process random value (PEP 456), so the same issue id would produce
    # different messages across Python invocations, breaking the module's
    # reproducibility-from-seed contract.
    issue_id = issue.get("id", "")
    issue_rng = random.Random(zlib.crc32(issue_id.encode("utf-8")))
    return suffix_prefix + issue_rng.choice(message_pool)


def _map_severity_to_scanner_level(severity: str) -> str:
    """Map PRobe severity labels to scanner-style HIGH/MEDIUM/LOW levels."""
    severity_map: dict[str, str] = {
        "info":     "LOW",
        "warning":  "MEDIUM",
        "error":    "HIGH",
        "critical": "HIGH",
    }
    return severity_map.get(severity, "MEDIUM")