"""
SecureCodeEnv - Performance Grader v4

FIXES:
- Inverted baseline (naive faster than optimal) → return neutral 0.5
- Unmeasurable (-1.0) → return neutral 0.5
- Both timings identical → return neutral 0.5
- Agent faster than optimal → clamp to max 0.999 (not >1.0)
- All scores clamped to (0.001, 0.999)
"""
import sys, tempfile, os, json, subprocess
from graders.clamp import clamp

NEUTRAL = 0.5  # returned when measurement is unreliable


def grade_performance(code: str, task: dict) -> dict:
    test_cases   = task.get("test_cases", [])
    naive_code   = task.get("naive_code", "")
    optimal_code = task.get("optimal_code", "")

    if not test_cases or not naive_code or not optimal_code:
        return {"score": clamp(NEUTRAL), "time_score": clamp(NEUTRAL),
                "memory_score": clamp(NEUTRAL), "feedback": "No baselines — neutral score"}

    tc = next((t for t in test_cases
               if "fn" in t and "input" in t
               and "fn_class" not in t
               and "expected_exception" not in t), None)
    if not tc:
        return {"score": clamp(NEUTRAL), "time_score": clamp(NEUTRAL),
                "memory_score": clamp(NEUTRAL), "feedback": "No usable test case — neutral score"}

    fn_name = tc["fn"]
    inputs  = tc["input"]

    try:
        agent_ms   = _measure_ms(code,         fn_name, inputs)
        naive_ms   = _measure_ms(naive_code,   fn_name, inputs)
        optimal_ms = _measure_ms(optimal_code, fn_name, inputs)

        # Any unmeasurable result → neutral
        if any(x < 0 for x in [agent_ms, naive_ms, optimal_ms]):
            return _neutral(agent_ms, naive_ms, optimal_ms, "Unmeasurable timing")

        # Indistinguishable → neutral
        if abs(naive_ms - optimal_ms) < 0.05:
            return _neutral(agent_ms, naive_ms, optimal_ms, "Timings indistinguishable")

        # Inverted baseline (naive < optimal means naive is actually "better")
        # This happens when optimal uses safer-but-slower code (e.g. Path.resolve vs os.path.join)
        # In that case performance cannot be meaningfully scored → neutral
        if naive_ms < optimal_ms:
            return _neutral(agent_ms, naive_ms, optimal_ms,
                            "Baseline inverted (naive faster than optimal) — neutral")

        time_range = naive_ms - optimal_ms
        raw        = 1.0 - ((agent_ms - optimal_ms) / time_range)
        # raw > 1.0 when agent faster than optimal → clamp handles it
        time_score = clamp(raw)

        return {
            "score": time_score,
            "time_score": time_score,
            "memory_score": time_score,
            "agent_ms":   round(agent_ms, 3),
            "naive_ms":   round(naive_ms, 3),
            "optimal_ms": round(optimal_ms, 3),
            "feedback": _feedback(time_score),
        }
    except Exception as e:
        return {"score": clamp(NEUTRAL), "time_score": clamp(NEUTRAL),
                "memory_score": clamp(NEUTRAL),
                "feedback": f"Measurement error: {str(e)[:60]}"}


def _neutral(agent_ms, naive_ms, optimal_ms, reason: str) -> dict:
    return {
        "score": clamp(NEUTRAL),
        "time_score": clamp(NEUTRAL),
        "memory_score": clamp(NEUTRAL),
        "agent_ms":   round(agent_ms, 3) if agent_ms >= 0 else None,
        "naive_ms":   round(naive_ms, 3) if naive_ms >= 0 else None,
        "optimal_ms": round(optimal_ms, 3) if optimal_ms >= 0 else None,
        "feedback": reason,
    }


def _measure_ms(code: str, fn_name: str, inputs: list, runs: int = 50) -> float:
    """Returns ms or -1.0 if unmeasurable."""
    script = f"""
import timeit, json, sys
{code}
def _run():
    {fn_name}(*{json.dumps(inputs)})
times = timeit.repeat(_run, number={runs}, repeat=5)
best = min(times) / {runs} * 1000
sys.stdout.write(json.dumps({{"ms": best}}) + "\\n")
sys.stdout.flush()
"""
    tmp = None
    try:
        with tempfile.NamedTemporaryFile(mode="w", suffix=".py",
                                         delete=False, prefix="sce_perf_") as f:
            f.write(script); tmp = f.name
        proc = subprocess.run([sys.executable, tmp],
                              capture_output=True, text=True, timeout=30)
        for line in reversed(proc.stdout.strip().splitlines()):
            line = line.strip()
            if line.startswith("{"):
                return json.loads(line)["ms"]
        return -1.0
    except Exception:
        return -1.0
    finally:
        if tmp and os.path.exists(tmp):
            try: os.unlink(tmp)
            except OSError: pass


def _feedback(score: float) -> str:
    if score >= 0.85: return "Excellent — near-optimal efficiency"
    elif score >= 0.65: return "Good — minor optimisation possible"
    elif score >= 0.45: return "Acceptable — room for improvement"
    else:               return "Poor — significant performance gap"