"""Quick smoke test for the environment."""
import sys
sys.path.insert(0, "src/envs")
from narada.server.environment import NaradaEnvironment
from narada.models import NaradaAction

env = NaradaEnvironment()
result = env.reset(task_type="monogenic", seed=1)
obs = result.observation
print(f"Reset: task={obs.task_type}")
print(f"  Node: {obs.current_node.name} ({obs.current_node.id})")
print(f"  Phenotypes: {obs.phenotype_names}")
print(f"  Candidates: {[v.gene for v in obs.candidate_variants]}")
assert 0.01 < result.reward < 0.99, f"Reset reward {result.reward} must be in (0.01, 0.99)"
assert "disease_name" not in obs.info, "Disease should not leak before terminal state"
assert env.state().ground_truth_variants == [], "Ground truth should not leak before done"

# Hop to first neighbor
if obs.current_node.connected_node_ids:
    hop_target = obs.current_node.connected_node_ids[0]
    action = NaradaAction(action_type="hop", node_id=hop_target, reasoning="Exploring")
    result = env.step(action)
    obs = result.observation
    print(f"After hop to {hop_target}: reward={result.reward:.4f}, node={obs.current_node.name}")

# Backtrack
action = NaradaAction(action_type="backtrack", reasoning="Testing backtrack")
result = env.step(action)
print(f"After backtrack: reward={result.reward:.4f}")

# Flag a candidate
v_id = obs.candidate_variants[0].id
action = NaradaAction(action_type="flag_causal", variant_id=v_id, reasoning="Test flag")
result = env.step(action)
print(f"After flag ({v_id}): done={result.done} reward={result.reward:.4f}")
assert result.done, "Episode should be done after flag_causal"
assert 0.01 <= result.reward <= 0.99, f"Reward {result.reward} out of (0.01, 0.99)"

# State check
state = env.state()
print(f"State: flagged={state.flagged_variants} ground_truth={state.ground_truth_variants[:2]}")
assert state.ground_truth_variants, "Ground truth should be revealed after terminal state"

# Oligogenic should allow multiple correct flags before termination.
env = NaradaEnvironment()
result = env.reset(task_type="oligogenic", seed=2)
ground_truth = env._case.ground_truth_variant_ids  # test-only access
first = env.step(NaradaAction(action_type="flag_causal", variant_id=ground_truth[0], reasoning="First causal variant"))
assert not first.done, "Oligogenic episode should continue after first correct flag"
second = env.step(NaradaAction(action_type="flag_causal", variant_id=ground_truth[1], reasoning="Second causal variant"))
assert second.done, "Oligogenic episode should end after all causal variants are flagged"
assert second.reward > first.reward, "Completing oligogenic diagnosis should improve reward"

print("ENV SMOKE TEST PASSED")