Spaces:

ZENLLC
/

Agent-POV

Sleeping

App Files Files Community

ZENLLC commited on Jan 5

Commit

a8e6497

verified ·

1 Parent(s): 5872f77

Update app.py

Browse files

Files changed (1) hide show

app.py +784 -454

app.py CHANGED Viewed

@@ -9,33 +9,31 @@ from PIL import Image, ImageDraw
 import gradio as gr
 # ============================================================
-# ChronoSandbox — Agent Timeline Lab (Deterministic, Inspectable)
-# - Multi-agent gridworld
-# - First-person pseudo-3D raycast view for selected agent
-# - Global truth map + per-agent belief maps (fog-of-war memory)
-# - AutoRun animation, time dilation, rewind scrubber
-# - Branching timelines (fork from any previous step)
-# - Click-to-edit map tiles
 #
-# Compatible with older Gradio versions by avoiding fn_kwargs in .click()
 # ============================================================
 # -----------------------------
-# World / render config
 # -----------------------------
 GRID_W, GRID_H = 21, 15
-TILE = 22  # top-down pixels per tile
 VIEW_W, VIEW_H = 640, 360
 RAY_W = 320
 FOV_DEG = 78
 MAX_DEPTH = 20
-# 0=E,1=S,2=W,3=N
 DIRS = [(1, 0), (0, 1), (-1, 0), (0, -1)]
 ORI_DEG = [0, 90, 180, 270]
-# Tile types
 EMPTY = 0
 WALL = 1
 FOOD = 2
@@ -52,37 +50,69 @@ TILE_NAMES = {
     TELE: "Teleporter",
 }
-# Palette (simple + inspectable)
 SKY = np.array([14, 16, 26], dtype=np.uint8)
 FLOOR_NEAR = np.array([24, 26, 40], dtype=np.uint8)
 FLOOR_FAR = np.array([10, 11, 18], dtype=np.uint8)
 WALL_BASE = np.array([210, 210, 225], dtype=np.uint8)
 WALL_SIDE = np.array([150, 150, 170], dtype=np.uint8)
-AGENT_COLORS = {
-    "Predator": (255, 120, 90),
-    "Prey": (120, 255, 160),
-    "Scout": (120, 190, 255),
-}
 # -----------------------------
-# Deterministic RNG helper
 # -----------------------------
 def rng_for(seed: int, step: int, stream: int = 0) -> np.random.Generator:
     mix = (seed * 1_000_003) ^ (step * 9_999_937) ^ (stream * 97_531)
     return np.random.default_rng(mix & 0xFFFFFFFFFFFFFFFF)
 # -----------------------------
-# State definitions
 # -----------------------------
 @dataclass
 class Agent:
     name: str
     x: int
     y: int
-    ori: int  # 0..3
     energy: int = 100
 @dataclass
 class WorldState:
     seed: int
@@ -91,24 +121,35 @@ class WorldState:
     agents: Dict[str, Agent]
     controlled: str
     pov: str
-    autorun: bool
-    speed_hz: float
     overlay: bool
-    event_log: List[str]
     caught: bool
     branches: Dict[str, int]
 @dataclass
 class Snapshot:
     step: int
     agents: Dict[str, Dict]
     grid: List[List[int]]
-    event_log_tail: List[str]
     caught: bool
 def default_grid() -> List[List[int]]:
     g = [[EMPTY for _ in range(GRID_W)] for _ in range(GRID_H)]
-    # Border walls
     for x in range(GRID_W):
         g[0][x] = WALL
         g[GRID_H - 1][x] = WALL
@@ -116,12 +157,10 @@ def default_grid() -> List[List[int]]:
         g[y][0] = WALL
         g[y][GRID_W - 1] = WALL
-    # Interior structure
     for x in range(4, 17):
         g[7][x] = WALL
     g[7][10] = DOOR
-    # Items
     g[3][4] = FOOD
     g[11][15] = FOOD
     g[4][14] = NOISE
@@ -136,6 +175,7 @@ def init_state(seed: int) -> WorldState:
         "Prey":     Agent("Prey", 18, 12, 2, 100),
         "Scout":    Agent("Scout", 10, 3, 1, 100),
     }
     return WorldState(
         seed=seed,
         step=0,
@@ -143,25 +183,28 @@ def init_state(seed: int) -> WorldState:
         agents=agents,
         controlled="Predator",
         pov="Predator",
-        autorun=False,
-        speed_hz=8.0,
         overlay=False,
-        event_log=["Initialized world."],
         caught=False,
         branches={"main": 0},
     )
 # -----------------------------
-# Belief memory
 # -----------------------------
 def init_belief() -> Dict[str, np.ndarray]:
     b = {}
-    for name in ["Predator", "Prey", "Scout"]:
-        b[name] = -1 * np.ones((GRID_H, GRID_W), dtype=np.int16)
     return b
 # -----------------------------
-# Movement + collision
 # -----------------------------
 def in_bounds(x: int, y: int) -> bool:
     return 0 <= x < GRID_W and 0 <= y < GRID_H
@@ -169,37 +212,10 @@ def in_bounds(x: int, y: int) -> bool:
 def is_blocking(tile: int) -> bool:
     return tile == WALL
-def move_forward(state: WorldState, a: Agent) -> None:
-    dx, dy = DIRS[a.ori]
-    nx, ny = a.x + dx, a.y + dy
-    if not in_bounds(nx, ny):
-        return
-    if is_blocking(state.grid[ny][nx]):
-        return
-    if state.grid[ny][nx] == DOOR:
-        state.grid[ny][nx] = EMPTY
-        state.event_log.append(f"t={state.step}: {a.name} opened a door.")
-    a.x, a.y = nx, ny
-    if state.grid[ny][nx] == TELE:
-        teles = [(x, y) for y in range(GRID_H) for x in range(GRID_W) if state.grid[y][x] == TELE]
-        if len(teles) >= 2:
-            teles_sorted = sorted(teles)
-            idx = teles_sorted.index((nx, ny))
-            dest = teles_sorted[(idx + 1) % len(teles_sorted)]
-            a.x, a.y = dest
-            state.event_log.append(f"t={state.step}: {a.name} teleported.")
-def turn_left(a: Agent) -> None:
-    a.ori = (a.ori - 1) % 4
-def turn_right(a: Agent) -> None:
-    a.ori = (a.ori + 1) % 4
-# -----------------------------
-# LOS + FOV visibility
-# -----------------------------
-def los_clear(grid: List[List[int]], x0: int, y0: int, x1: int, y1: int) -> bool:
     dx = abs(x1 - x0)
     dy = abs(y1 - y0)
     sx = 1 if x0 < x1 else -1
@@ -220,7 +236,7 @@ def los_clear(grid: List[List[int]], x0: int, y0: int, x1: int, y1: int) -> bool
             err += dx
             y += sy
-def within_fov(observer: Agent, tx: int, ty: int, fov_deg: float = 78.0) -> bool:
     dx = tx - observer.x
     dy = ty - observer.y
     if dx == 0 and dy == 0:
@@ -231,10 +247,54 @@ def within_fov(observer: Agent, tx: int, ty: int, fov_deg: float = 78.0) -> bool
     return abs(diff) <= (fov_deg / 2)
 def visible(observer: Agent, target: Agent, grid: List[List[int]]) -> bool:
-    return within_fov(observer, target.x, target.y, FOV_DEG) and los_clear(grid, observer.x, observer.y, target.x, target.y)
 # -----------------------------
-# Raycast pseudo-3D render
 # -----------------------------
 def raycast_view(state: WorldState, observer: Agent) -> np.ndarray:
     img = np.zeros((VIEW_H, VIEW_W, 3), dtype=np.uint8)
@@ -257,7 +317,8 @@ def raycast_view(state: WorldState, observer: Agent) -> np.ndarray:
         cos_a = math.cos(ray_ang)
         depth = 0.0
-        hit_side = 0
         while depth < MAX_DEPTH:
             depth += 0.05
@@ -265,16 +326,16 @@ def raycast_view(state: WorldState, observer: Agent) -> np.ndarray:
             ty = int(oy + sin_a * depth)
             if not in_bounds(tx, ty):
                 break
             tile = state.grid[ty][tx]
             if tile == WALL:
-                hit_side = 1 if abs(cos_a) > abs(sin_a) else 0
                 break
             if tile == DOOR:
-                hit_side = 2
                 break
-        if depth >= MAX_DEPTH:
             continue
         depth *= math.cos(ray_ang - math.radians(ORI_DEG[observer.ori]))
@@ -284,12 +345,10 @@ def raycast_view(state: WorldState, observer: Agent) -> np.ndarray:
         y0 = max(0, VIEW_H // 2 - proj_h // 2)
         y1 = min(VIEW_H - 1, VIEW_H // 2 + proj_h // 2)
-        if hit_side == 0:
-            col = WALL_BASE.copy()
-        elif hit_side == 1:
-            col = WALL_SIDE.copy()
         else:
-            col = np.array([180, 210, 255], dtype=np.uint8)
         dim = max(0.25, 1.0 - (depth / MAX_DEPTH))
         col = (col * dim).astype(np.uint8)
@@ -298,8 +357,9 @@ def raycast_view(state: WorldState, observer: Agent) -> np.ndarray:
         x1 = int((rx + 1) * (VIEW_W / RAY_W))
         img[y0:y1, x0:x1] = col
-    for other_name, other in state.agents.items():
-        if other_name == observer.name:
             continue
         if visible(observer, other, state.grid):
             dx = other.x - observer.x
@@ -316,7 +376,7 @@ def raycast_view(state: WorldState, observer: Agent) -> np.ndarray:
             y1 = min(VIEW_H - 1, y_mid + h // 2)
             x0 = max(0, sx - w // 2)
             x1 = min(VIEW_W - 1, sx + w // 2)
-            col = AGENT_COLORS.get(other_name, (255, 200, 120))
             img[y0:y1, x0:x1] = np.array(col, dtype=np.uint8)
     if state.overlay:
@@ -326,9 +386,6 @@ def raycast_view(state: WorldState, observer: Agent) -> np.ndarray:
     return img
-# -----------------------------
-# Top-down render
-# -----------------------------
 def render_topdown(grid: np.ndarray, agents: Dict[str, Agent], title: str, show_agents: bool = True) -> Image.Image:
     w = grid.shape[1] * TILE
     h = grid.shape[0] * TILE
@@ -366,10 +423,10 @@ def render_topdown(grid: np.ndarray, agents: Dict[str, Agent], title: str, show_
         draw.line([0, yy, w, yy], fill=(12, 14, 22))
     if show_agents:
-        for name, a in agents.items():
             cx = a.x * TILE + TILE // 2
             cy = a.y * TILE + 28 + TILE // 2
-            col = AGENT_COLORS.get(name, (220, 220, 220))
             r = TILE // 3
             draw.ellipse([cx - r, cy - r, cx + r, cy + r], fill=col)
             dx, dy = DIRS[a.ori]
@@ -380,9 +437,105 @@ def render_topdown(grid: np.ndarray, agents: Dict[str, Agent], title: str, show_
     return im
 # -----------------------------
-# Policies (explicit + deterministic)
 # -----------------------------
-def predator_policy(state: WorldState, step: int) -> str:
     pred = state.agents["Predator"]
     prey = state.agents["Prey"]
     if visible(pred, prey, state.grid):
@@ -396,10 +549,10 @@ def predator_policy(state: WorldState, step: int) -> str:
         if diff > 10:
             return "R"
         return "F"
-    r = rng_for(state.seed, step, stream=1)
-    return r.choice(["F", "L", "R", "F", "F"])
-def prey_policy(state: WorldState, step: int) -> str:
     prey = state.agents["Prey"]
     pred = state.agents["Predator"]
     if visible(prey, pred, state.grid):
@@ -414,60 +567,62 @@ def prey_policy(state: WorldState, step: int) -> str:
         if diff_away > 10:
             return "R"
         return "F"
-    for turn in [0, -1, 1, 2]:
-        ori = (prey.ori + turn) % 4
-        dx, dy = DIRS[ori]
-        nx, ny = prey.x + dx, prey.y + dy
-        if in_bounds(nx, ny) and state.grid[ny][nx] == FOOD:
-            if turn == 0:
-                return "F"
-            if turn == -1:
-                return "L"
-            if turn == 1:
-                return "R"
-            return "R"
-    r = rng_for(state.seed, step, stream=2)
-    return r.choice(["F", "L", "R", "F"])
-def scout_policy(state: WorldState, step: int) -> str:
-    scout = state.agents["Scout"]
-    pred = state.agents["Predator"]
-    if los_clear(state.grid, scout.x, scout.y, pred.x, pred.y):
-        dist = abs(scout.x - pred.x) + abs(scout.y - pred.y)
-        if dist <= 3:
-            return "R"
-        r = rng_for(state.seed, step, stream=3)
-        return r.choice(["F", "L", "R", "F"])
-    dx = pred.x - scout.x
-    dy = pred.y - scout.y
-    ang = (math.degrees(math.atan2(dy, dx)) % 360)
-    facing = ORI_DEG[scout.ori]
-    diff = (ang - facing + 540) % 360 - 180
-    if diff < -10:
-        return "L"
-    if diff > 10:
-        return "R"
-    return "F"
-# -----------------------------
-# Simulation step
-# -----------------------------
-def apply_action(state: WorldState, agent_name: str, action: str) -> None:
-    a = state.agents[agent_name]
-    if action == "L":
-        turn_left(a)
-    elif action == "R":
-        turn_right(a)
-    elif action == "F":
-        move_forward(state, a)
-def consume_tiles(state: WorldState) -> None:
-    prey = state.agents["Prey"]
-    tile = state.grid[prey.y][prey.x]
-    if tile == FOOD:
-        prey.energy = min(200, prey.energy + 35)
-        state.grid[prey.y][prey.x] = EMPTY
-        state.event_log.append(f"t={state.step}: Prey ate food (+energy).")
 def check_catch(state: WorldState) -> None:
     pred = state.agents["Predator"]
@@ -476,50 +631,220 @@ def check_catch(state: WorldState) -> None:
         state.caught = True
         state.event_log.append(f"t={state.step}: CAUGHT.")
 def tick(state: WorldState, manual_action: Optional[str] = None) -> None:
     if state.caught:
         return
     if manual_action:
-        apply_action(state, state.controlled, manual_action)
-    step = state.step
-    if state.autorun and not manual_action:
-        if state.controlled == "Predator":
-            act = predator_policy(state, step)
-        elif state.controlled == "Prey":
-            act = prey_policy(state, step)
-        else:
-            act = scout_policy(state, step)
-        apply_action(state, state.controlled, act)
-    for name in ["Predator", "Prey", "Scout"]:
-        if name == state.controlled:
             continue
-        if name == "Predator":
-            act = predator_policy(state, step)
-        elif name == "Prey":
-            act = prey_policy(state, step)
-        else:
-            act = scout_policy(state, step)
-        apply_action(state, name, act)
-    consume_tiles(state)
     check_catch(state)
     state.step += 1
 # -----------------------------
-# History
 # -----------------------------
-MAX_HISTORY = 3000
 def snapshot_of(state: WorldState) -> Snapshot:
     return Snapshot(
         step=state.step,
         agents={k: asdict(v) for k, v in state.agents.items()},
         grid=[row[:] for row in state.grid],
-        event_log_tail=state.event_log[-12:],
         caught=state.caught,
     )
 def restore_into(state: WorldState, snap: Snapshot) -> None:
@@ -528,70 +853,82 @@ def restore_into(state: WorldState, snap: Snapshot) -> None:
     for k, d in snap.agents.items():
         state.agents[k] = Agent(**d)
     state.caught = snap.caught
-    state.event_log.append(f"Jumped to t={snap.step} (rewind).")
 # -----------------------------
-# Belief updates
 # -----------------------------
-def update_belief_for_agent(state: WorldState, belief: np.ndarray, agent: Agent) -> None:
-    belief[agent.y, agent.x] = state.grid[agent.y][agent.x]
-    base = math.radians(ORI_DEG[agent.ori])
-    half = math.radians(FOV_DEG / 2)
-    rays = 33 if agent.name != "Scout" else 45
-    for i in range(rays):
-        t = i / (rays - 1)
-        ang = base + (t * 2 - 1) * half
-        sin_a, cos_a = math.sin(ang), math.cos(ang)
-        ox, oy = agent.x + 0.5, agent.y + 0.5
-        depth = 0.0
-        while depth < MAX_DEPTH:
-            depth += 0.2
-            tx = int(ox + cos_a * depth)
-            ty = int(oy + sin_a * depth)
-            if not in_bounds(tx, ty):
-                break
-            belief[ty, tx] = state.grid[ty][tx]
-            if state.grid[ty][tx] == WALL:
-                break
-# -----------------------------
-# Views + UI helpers
-# -----------------------------
-def build_views(state: WorldState, beliefs: Dict[str, np.ndarray]) -> Tuple[np.ndarray, Image.Image, Image.Image, Image.Image, str, str]:
-    pov_agent = state.agents[state.pov]
-    for name, a in state.agents.items():
-        update_belief_for_agent(state, beliefs[name], a)
-    pov_img = raycast_view(state, pov_agent)
     truth_np = np.array(state.grid, dtype=np.int16)
-    truth_img = render_topdown(truth_np, state.agents, f"Truth Map — t={state.step}  seed={state.seed}", show_agents=True)
     ctrl = state.controlled
     other = "Prey" if ctrl == "Predator" else "Predator"
-    ctrl_img = render_topdown(beliefs[ctrl], state.agents, f"{ctrl} Belief (Fog-of-War)", show_agents=True)
-    other_img = render_topdown(beliefs[other], state.agents, f"{other} Belief (Fog-of-War)", show_agents=True)
     pred = state.agents["Predator"]
     prey = state.agents["Prey"]
     scout = state.agents["Scout"]
     status = (
-        f"Controlled: {state.controlled} | POV: {state.pov} | "
-        f"AutoRun: {state.autorun} @ {state.speed_hz:.2f} Hz | "
-        f"Caught: {state.caught}\n"
-        f"Pred({pred.x},{pred.y}) ori={pred.ori} | "
-        f"Prey({prey.x},{prey.y}) ori={prey.ori} energy={prey.energy} | "
-        f"Scout({scout.x},{scout.y}) ori={scout.ori}"
     )
-    log = "\n".join(state.event_log[-14:])
-    return pov_img, truth_img, ctrl_img, other_img, status, log
 def grid_click_to_tile(evt: gr.SelectData, selected_tile: int, state: WorldState) -> WorldState:
     x_px, y_px = evt.index
-    y_px = y_px - 28
     if y_px < 0:
         return state
     gx = int(x_px // TILE)
@@ -601,308 +938,301 @@ def grid_click_to_tile(evt: gr.SelectData, selected_tile: int, state: WorldState
     if gx == 0 or gy == 0 or gx == GRID_W - 1 or gy == GRID_H - 1:
         return state
     state.grid[gy][gx] = selected_tile
-    state.event_log.append(f"t={state.step}: Edited tile ({gx},{gy}) -> {TILE_NAMES.get(selected_tile, selected_tile)}.")
     return state
-def export_run(state: WorldState, history: List[Snapshot]) -> str:
-    payload = {
-        "seed": state.seed,
-        "current_step": state.step,
-        "controlled": state.controlled,
-        "pov": state.pov,
-        "autorun": state.autorun,
-        "speed_hz": state.speed_hz,
-        "overlay": state.overlay,
-        "branches": state.branches,
-        "history": [asdict(s) for s in history],
-    }
-    return json.dumps(payload, indent=2)
-def import_run(txt: str) -> Tuple[WorldState, List[Snapshot], Dict[str, np.ndarray], int]:
-    data = json.loads(txt)
-    st = init_state(int(data["seed"]))
-    st.controlled = data.get("controlled", "Predator")
-    st.pov = data.get("pov", st.controlled)
-    st.autorun = bool(data.get("autorun", False))
-    st.speed_hz = float(data.get("speed_hz", 8.0))
-    st.overlay = bool(data.get("overlay", False))
-    st.branches = dict(data.get("branches", {"main": 0}))
-    hist = [Snapshot(**s) for s in data.get("history", [])]
-    bel = init_belief()
-    r_idx = min(len(hist) - 1, len(hist) - 1 if hist else 0)
-    if hist:
-        restore_into(st, hist[-1])
-    st.event_log.append("Imported run.")
-    return st, hist, bel, r_idx
 # -----------------------------
-# Gradio app
 # -----------------------------
-with gr.Blocks(title="ChronoSandbox — Agent Timeline Lab") as demo:
     gr.Markdown(
-        "## ChronoSandbox — Agent Timeline Lab\n"
-        "Deterministic multi-agent POV sandbox with **time dilation, rewind, and branching**.\n"
-        "Explicit rules, replayable runs."
     )
-    st = gr.State(init_state(seed=1337))
-    history = gr.State([snapshot_of(init_state(seed=1337))])
     beliefs = gr.State(init_belief())
-    rewind_index = gr.State(0)
     with gr.Row():
-        pov_img = gr.Image(label="First-Person POV (Pseudo-3D)", type="numpy", width=VIEW_W, height=VIEW_H)
         with gr.Column():
-            status = gr.Textbox(label="Status", lines=3)
-            log = gr.Textbox(label="Event Log", lines=14)
     with gr.Row():
         truth = gr.Image(label="Truth Map (click to edit tiles)", type="pil")
-        belief_a = gr.Image(label="Belief A", type="pil")
-        belief_b = gr.Image(label="Belief B", type="pil")
     with gr.Row():
         with gr.Column(scale=2):
-            gr.Markdown("### Controls")
             with gr.Row():
-                btn_L = gr.Button("Turn Left (L)")
-                btn_F = gr.Button("Forward (F)")
-                btn_R = gr.Button("Turn Right (R)")
             with gr.Row():
-                toggle_control = gr.Button("Toggle Controlled Agent")
-                toggle_pov = gr.Button("Toggle POV Camera")
-                btn_step = gr.Button("Tick (Single Step)")
             with gr.Row():
-                autorun = gr.Checkbox(False, label="AutoRun")
-                overlay = gr.Checkbox(False, label="Overlay (reticle)")
-            speed = gr.Slider(0.25, 32.0, value=8.0, step=0.25, label="Speed (Hz) — time dilation")
             tile_pick = gr.Radio(
                 choices=[(TILE_NAMES[k], k) for k in [EMPTY, WALL, FOOD, NOISE, DOOR, TELE]],
                 value=WALL,
-                label="Click-edit tile type"
             )
-        with gr.Column(scale=2):
-            gr.Markdown("### Time Travel")
-            rewind = gr.Slider(0, 0, value=0, step=1, label="Rewind Scrubber (history index)")
-            btn_jump = gr.Button("Jump to Rewind Index")
-            btn_branch = gr.Button("Branch From Current (fork timeline)")
-            branch_name = gr.Textbox(value="branch_1", label="Branch name")
-            gr.Markdown("### Import / Export")
             export_box = gr.Textbox(label="Export JSON", lines=10)
-            btn_export = gr.Button("Export Run")
             import_box = gr.Textbox(label="Import JSON", lines=10)
-            btn_import = gr.Button("Import Run")
-    timer = gr.Timer(0.12)
-    def refresh(state: WorldState, hist: List[Snapshot], bel: Dict[str, np.ndarray], r_idx: int):
         r_max = max(0, len(hist) - 1)
-        r_idx = max(0, min(int(r_idx), r_max))
-        pov_np, truth_im, a_im, b_im, stxt, ltxt = build_views(state, bel)
         return (
-            pov_np,
-            truth_im,
-            a_im,
-            b_im,
-            stxt,
-            ltxt,
-            gr.update(maximum=r_max, value=r_idx),
-            r_idx
         )
-    def do_action(state: WorldState, hist: List[Snapshot], bel: Dict[str, np.ndarray], r_idx: int, act: str):
-        tick(state, manual_action=act)
         hist.append(snapshot_of(state))
         if len(hist) > MAX_HISTORY:
             hist.pop(0)
-        r_idx = len(hist) - 1
-        return refresh(state, hist, bel, r_idx) + (state, hist, bel, r_idx)
-    def do_tick(state: WorldState, hist: List[Snapshot], bel: Dict[str, np.ndarray], r_idx: int):
-        tick(state, manual_action=None)
-        hist.append(snapshot_of(state))
-        if len(hist) > MAX_HISTORY:
-            hist.pop(0)
-        r_idx = len(hist) - 1
-        return refresh(state, hist, bel, r_idx) + (state, hist, bel, r_idx)
-    def set_toggles(state: WorldState, hist: List[Snapshot], bel: Dict[str, np.ndarray], r_idx: int, ar: bool, sp: float, ov: bool):
-        state.autorun = bool(ar)
-        state.speed_hz = float(sp)
-        state.overlay = bool(ov)
-        return refresh(state, hist, bel, r_idx) + (state, hist, bel, r_idx)
-    def toggle_control_fn(state: WorldState, hist: List[Snapshot], bel: Dict[str, np.ndarray], r_idx: int):
         order = ["Predator", "Prey", "Scout"]
         i = order.index(state.controlled)
         state.controlled = order[(i + 1) % len(order)]
-        state.event_log.append(f"t={state.step}: Controlled -> {state.controlled}.")
-        return refresh(state, hist, bel, r_idx) + (state, hist, bel, r_idx)
-    def toggle_pov_fn(state: WorldState, hist: List[Snapshot], bel: Dict[str, np.ndarray], r_idx: int):
         order = ["Predator", "Prey", "Scout"]
         i = order.index(state.pov)
         state.pov = order[(i + 1) % len(order)]
-        state.event_log.append(f"t={state.step}: POV -> {state.pov}.")
-        return refresh(state, hist, bel, r_idx) + (state, hist, bel, r_idx)
-    def jump_fn(state: WorldState, hist: List[Snapshot], bel: Dict[str, np.ndarray], r_idx: int, idx: int):
-        if not hist:
-            return refresh(state, hist, bel, r_idx) + (state, hist, bel, r_idx)
-        idx = max(0, min(int(idx), len(hist) - 1))
-        restore_into(state, hist[idx])
-        r_idx = idx
-        return refresh(state, hist, bel, r_idx) + (state, hist, bel, r_idx)
-    def branch_fn(state: WorldState, hist: List[Snapshot], bel: Dict[str, np.ndarray], r_idx: int, name: str):
-        nm = (name or "").strip() or f"branch_{len(state.branches)+1}"
-        state.branches[nm] = r_idx
-        state.event_log.append(f"t={state.step}: Branched timeline '{nm}' at history idx={r_idx}.")
-        return refresh(state, hist, bel, r_idx) + (state, hist, bel, r_idx)
-    def truth_click(tile: int, state: WorldState, hist: List[Snapshot], bel: Dict[str, np.ndarray], r_idx: int, evt: gr.SelectData):
         state = grid_click_to_tile(evt, int(tile), state)
-        hist.append(snapshot_of(state))
-        if len(hist) > MAX_HISTORY:
-            hist.pop(0)
-        r_idx = len(hist) - 1
-        return refresh(state, hist, bel, r_idx) + (state, hist, bel, r_idx)
-    def export_fn(state: WorldState, hist: List[Snapshot]):
         return export_run(state, hist)
-    def import_fn(txt: str):
-        state, hist, bel, r_idx = import_run(txt)
-        pov_np, truth_im, a_im, b_im, stxt, ltxt = build_views(state, bel)
         r_max = max(0, len(hist) - 1)
         return (
-            pov_np, truth_im, a_im, b_im, stxt, ltxt,
-            gr.update(maximum=r_max, value=r_idx),
-            state, hist, bel, r_idx
         )
-    # --- CLICK HANDLERS (NO fn_kwargs; use lambdas for compatibility) ---
-    btn_L.click(
-        lambda s, h, b, r: do_action(s, h, b, r, "L"),
-        inputs=[st, history, beliefs, rewind_index],
-        outputs=[pov_img, truth, belief_a, belief_b, status, log, rewind, rewind_index, st, history, beliefs, rewind_index],
-        api_name=False,
-        queue=True,
-    )
-    btn_F.click(
-        lambda s, h, b, r: do_action(s, h, b, r, "F"),
-        inputs=[st, history, beliefs, rewind_index],
-        outputs=[pov_img, truth, belief_a, belief_b, status, log, rewind, rewind_index, st, history, beliefs, rewind_index],
-        api_name=False,
-        queue=True,
-    )
-    btn_R.click(
-        lambda s, h, b, r: do_action(s, h, b, r, "R"),
-        inputs=[st, history, beliefs, rewind_index],
-        outputs=[pov_img, truth, belief_a, belief_b, status, log, rewind, rewind_index, st, history, beliefs, rewind_index],
-        api_name=False,
-        queue=True,
-    )
-    btn_step.click(
-        do_tick,
-        inputs=[st, history, beliefs, rewind_index],
-        outputs=[pov_img, truth, belief_a, belief_b, status, log, rewind, rewind_index, st, history, beliefs, rewind_index],
-        queue=True
-    )
-    toggle_control.click(
-        toggle_control_fn,
-        inputs=[st, history, beliefs, rewind_index],
-        outputs=[pov_img, truth, belief_a, belief_b, status, log, rewind, rewind_index, st, history, beliefs, rewind_index],
-        queue=True
-    )
-    toggle_pov.click(
-        toggle_pov_fn,
-        inputs=[st, history, beliefs, rewind_index],
-        outputs=[pov_img, truth, belief_a, belief_b, status, log, rewind, rewind_index, st, history, beliefs, rewind_index],
-        queue=True
-    )
-    autorun.change(
-        set_toggles,
-        inputs=[st, history, beliefs, rewind_index, autorun, speed, overlay],
-        outputs=[pov_img, truth, belief_a, belief_b, status, log, rewind, rewind_index, st, history, beliefs, rewind_index],
-        queue=True
-    )
-    speed.change(
-        set_toggles,
-        inputs=[st, history, beliefs, rewind_index, autorun, speed, overlay],
-        outputs=[pov_img, truth, belief_a, belief_b, status, log, rewind, rewind_index, st, history, beliefs, rewind_index],
-        queue=True
-    )
-    overlay.change(
-        set_toggles,
-        inputs=[st, history, beliefs, rewind_index, autorun, speed, overlay],
-        outputs=[pov_img, truth, belief_a, belief_b, status, log, rewind, rewind_index, st, history, beliefs, rewind_index],
-        queue=True
-    )
-    btn_jump.click(
-        jump_fn,
-        inputs=[st, history, beliefs, rewind_index, rewind],
-        outputs=[pov_img, truth, belief_a, belief_b, status, log, rewind, rewind_index, st, history, beliefs, rewind_index],
-        queue=True
-    )
-    btn_branch.click(
-        branch_fn,
-        inputs=[st, history, beliefs, rewind_index, branch_name],
-        outputs=[pov_img, truth, belief_a, belief_b, status, log, rewind, rewind_index, st, history, beliefs, rewind_index],
-        queue=True
-    )
-    truth.select(
-        truth_click,
-        inputs=[tile_pick, st, history, beliefs, rewind_index],
-        outputs=[pov_img, truth, belief_a, belief_b, status, log, rewind, rewind_index, st, history, beliefs, rewind_index],
-        queue=True
-    )
     btn_export.click(export_fn, inputs=[st, history], outputs=[export_box], queue=True)
-    btn_import.click(
-        import_fn,
-        inputs=[import_box],
-        outputs=[pov_img, truth, belief_a, belief_b, status, log, rewind, st, history, beliefs, rewind_index],
-        queue=True
-    )
-    # Timer-driven autorun
-    def timer_fn(state: WorldState, hist: List[Snapshot], bel: Dict[str, np.ndarray], r_idx: int, ar: bool, sp: float):
-        state.autorun = bool(ar)
-        state.speed_hz = float(sp)
-        if not state.autorun or state.caught:
-            return refresh(state, hist, bel, r_idx) + (state, hist, bel, r_idx)
-        ticks_per_frame = max(1, int(round(state.speed_hz * 0.12)))
-        for _ in range(ticks_per_frame):
-            tick(state, manual_action=None)
-            hist.append(snapshot_of(state))
-            if len(hist) > MAX_HISTORY:
-                hist.pop(0)
-        r_idx = len(hist) - 1
-        return refresh(state, hist, bel, r_idx) + (state, hist, bel, r_idx)
-    timer.tick(
-        timer_fn,
-        inputs=[st, history, beliefs, rewind_index, autorun, speed],
-        outputs=[pov_img, truth, belief_a, belief_b, status, log, rewind, rewind_index, st, history, beliefs, rewind_index],
-        queue=True
-    )
-    demo.load(
-        refresh,
-        inputs=[st, history, beliefs, rewind_index],
-        outputs=[pov_img, truth, belief_a, belief_b, status, log, rewind, rewind_index],
-        queue=True
-    )
 demo.queue().launch()

 import gradio as gr
 # ============================================================
+# ChronoSandbox++ — Instrumented Training Arena
+# - Deterministic gridworld + first-person raycast view
+# - Click-to-edit environment (tiles)
+# - Full step trace: obs -> action -> reward -> q-update rationale
+# - Optional Q-learning (tabular) for Predator + Prey
+# - Batch training: run episodes fast, track metrics
+# - Export/import: environment, history, Q-tables, metrics
 #
+# Compatibility: avoids fn_kwargs + avoids gr.Timer
 # ============================================================
 # -----------------------------
+# Config
 # -----------------------------
 GRID_W, GRID_H = 21, 15
+TILE = 22
 VIEW_W, VIEW_H = 640, 360
 RAY_W = 320
 FOV_DEG = 78
 MAX_DEPTH = 20
 DIRS = [(1, 0), (0, 1), (-1, 0), (0, -1)]
 ORI_DEG = [0, 90, 180, 270]
 EMPTY = 0
 WALL = 1
 FOOD = 2
     TELE: "Teleporter",
 }
+AGENT_COLORS = {
+    "Predator": (255, 120, 90),
+    "Prey": (120, 255, 160),
+    "Scout": (120, 190, 255),
+}
 SKY = np.array([14, 16, 26], dtype=np.uint8)
 FLOOR_NEAR = np.array([24, 26, 40], dtype=np.uint8)
 FLOOR_FAR = np.array([10, 11, 18], dtype=np.uint8)
 WALL_BASE = np.array([210, 210, 225], dtype=np.uint8)
 WALL_SIDE = np.array([150, 150, 170], dtype=np.uint8)
+DOOR_COL = np.array([180, 210, 255], dtype=np.uint8)
+ACTIONS = ["L", "F", "R"]  # keep small for tabular learning stability
 # -----------------------------
+# Deterministic RNG streams
 # -----------------------------
 def rng_for(seed: int, step: int, stream: int = 0) -> np.random.Generator:
     mix = (seed * 1_000_003) ^ (step * 9_999_937) ^ (stream * 97_531)
     return np.random.default_rng(mix & 0xFFFFFFFFFFFFFFFF)
 # -----------------------------
+# Data structures
 # -----------------------------
 @dataclass
 class Agent:
     name: str
     x: int
     y: int
+    ori: int
     energy: int = 100
+@dataclass
+class TrainConfig:
+    use_q_pred: bool = True
+    use_q_prey: bool = True
+    alpha: float = 0.15
+    gamma: float = 0.95
+    epsilon: float = 0.10
+    epsilon_min: float = 0.02
+    epsilon_decay: float = 0.995
+    # reward shaping
+    pred_step_penalty: float = -0.02
+    pred_dist_coeff: float = 0.03
+    pred_catch_reward: float = 3.0
+    prey_step_penalty: float = -0.02
+    prey_food_reward: float = 0.6
+    prey_survive_reward: float = 0.02
+    prey_caught_penalty: float = -3.0
+@dataclass
+class Metrics:
+    episodes: int = 0
+    catches: int = 0
+    avg_steps_to_catch: float = 0.0
+    avg_path_efficiency: float = 0.0  # optimal / actual (0..1)
+    last_episode_steps: int = 0
+    last_episode_eff: float = 0.0
+    epsilon: float = 0.10
 @dataclass
 class WorldState:
     seed: int
     agents: Dict[str, Agent]
     controlled: str
     pov: str
     overlay: bool
     caught: bool
     branches: Dict[str, int]
+    # instrumentation
+    event_log: List[str]
+    trace_log: List[str]  # more detailed step trace (bounded)
+    # training
+    cfg: TrainConfig
+    q_pred: Dict[str, List[float]]
+    q_prey: Dict[str, List[float]]
+    metrics: Metrics
 @dataclass
 class Snapshot:
     step: int
     agents: Dict[str, Dict]
     grid: List[List[int]]
     caught: bool
+    event_log_tail: List[str]
+    trace_tail: List[str]
+# -----------------------------
+# Environment
+# -----------------------------
 def default_grid() -> List[List[int]]:
     g = [[EMPTY for _ in range(GRID_W)] for _ in range(GRID_H)]
     for x in range(GRID_W):
         g[0][x] = WALL
         g[GRID_H - 1][x] = WALL
         g[y][0] = WALL
         g[y][GRID_W - 1] = WALL
     for x in range(4, 17):
         g[7][x] = WALL
     g[7][10] = DOOR
     g[3][4] = FOOD
     g[11][15] = FOOD
     g[4][14] = NOISE
         "Prey":     Agent("Prey", 18, 12, 2, 100),
         "Scout":    Agent("Scout", 10, 3, 1, 100),
     }
+    cfg = TrainConfig()
     return WorldState(
         seed=seed,
         step=0,
         agents=agents,
         controlled="Predator",
         pov="Predator",
         overlay=False,
         caught=False,
         branches={"main": 0},
+        event_log=["Initialized world."],
+        trace_log=[],
+        cfg=cfg,
+        q_pred={},
+        q_prey={},
+        metrics=Metrics(epsilon=cfg.epsilon),
     )
 # -----------------------------
+# Belief maps
 # -----------------------------
 def init_belief() -> Dict[str, np.ndarray]:
     b = {}
+    for nm in ["Predator", "Prey", "Scout"]:
+        b[nm] = -1 * np.ones((GRID_H, GRID_W), dtype=np.int16)
     return b
 # -----------------------------
+# Helpers
 # -----------------------------
 def in_bounds(x: int, y: int) -> bool:
     return 0 <= x < GRID_W and 0 <= y < GRID_H
 def is_blocking(tile: int) -> bool:
     return tile == WALL
+def manhattan(a: Agent, b: Agent) -> int:
+    return abs(a.x - b.x) + abs(a.y - b.y)
+def bresenham_los(grid: List[List[int]], x0: int, y0: int, x1: int, y1: int) -> bool:
     dx = abs(x1 - x0)
     dy = abs(y1 - y0)
     sx = 1 if x0 < x1 else -1
             err += dx
             y += sy
+def within_fov(observer: Agent, tx: int, ty: int, fov_deg: float = FOV_DEG) -> bool:
     dx = tx - observer.x
     dy = ty - observer.y
     if dx == 0 and dy == 0:
     return abs(diff) <= (fov_deg / 2)
 def visible(observer: Agent, target: Agent, grid: List[List[int]]) -> bool:
+    return within_fov(observer, target.x, target.y, FOV_DEG) and bresenham_los(grid, observer.x, observer.y, target.x, target.y)
+# -----------------------------
+# Movement
+# -----------------------------
+def turn_left(a: Agent) -> None:
+    a.ori = (a.ori - 1) % 4
+def turn_right(a: Agent) -> None:
+    a.ori = (a.ori + 1) % 4
+def move_forward(state: WorldState, a: Agent) -> str:
+    dx, dy = DIRS[a.ori]
+    nx, ny = a.x + dx, a.y + dy
+    if not in_bounds(nx, ny):
+        return "blocked: bounds"
+    if is_blocking(state.grid[ny][nx]):
+        return "blocked: wall"
+    if state.grid[ny][nx] == DOOR:
+        state.grid[ny][nx] = EMPTY
+        state.event_log.append(f"t={state.step}: {a.name} opened a door.")
+    a.x, a.y = nx, ny
+    if state.grid[ny][nx] == TELE:
+        teles = [(x, y) for y in range(GRID_H) for x in range(GRID_W) if state.grid[y][x] == TELE]
+        if len(teles) >= 2:
+            teles_sorted = sorted(teles)
+            idx = teles_sorted.index((nx, ny))
+            dest = teles_sorted[(idx + 1) % len(teles_sorted)]
+            a.x, a.y = dest
+            state.event_log.append(f"t={state.step}: {a.name} teleported.")
+            return "moved: teleported"
+    return "moved"
+def apply_action(state: WorldState, agent_name: str, action: str) -> str:
+    a = state.agents[agent_name]
+    if action == "L":
+        turn_left(a)
+        return "turned left"
+    if action == "R":
+        turn_right(a)
+        return "turned right"
+    if action == "F":
+        return move_forward(state, a)
+    return "noop"
 # -----------------------------
+# Rendering
 # -----------------------------
 def raycast_view(state: WorldState, observer: Agent) -> np.ndarray:
     img = np.zeros((VIEW_H, VIEW_W, 3), dtype=np.uint8)
         cos_a = math.cos(ray_ang)
         depth = 0.0
+        hit = None  # None, "wall", "door"
+        side = 0
         while depth < MAX_DEPTH:
             depth += 0.05
             ty = int(oy + sin_a * depth)
             if not in_bounds(tx, ty):
                 break
             tile = state.grid[ty][tx]
             if tile == WALL:
+                hit = "wall"
+                side = 1 if abs(cos_a) > abs(sin_a) else 0
                 break
             if tile == DOOR:
+                hit = "door"
                 break
+        if hit is None:
             continue
         depth *= math.cos(ray_ang - math.radians(ORI_DEG[observer.ori]))
         y0 = max(0, VIEW_H // 2 - proj_h // 2)
         y1 = min(VIEW_H - 1, VIEW_H // 2 + proj_h // 2)
+        if hit == "door":
+            col = DOOR_COL.copy()
         else:
+            col = WALL_BASE.copy() if side == 0 else WALL_SIDE.copy()
         dim = max(0.25, 1.0 - (depth / MAX_DEPTH))
         col = (col * dim).astype(np.uint8)
         x1 = int((rx + 1) * (VIEW_W / RAY_W))
         img[y0:y1, x0:x1] = col
+    # billboards for visible agents
+    for nm, other in state.agents.items():
+        if nm == observer.name:
             continue
         if visible(observer, other, state.grid):
             dx = other.x - observer.x
             y1 = min(VIEW_H - 1, y_mid + h // 2)
             x0 = max(0, sx - w // 2)
             x1 = min(VIEW_W - 1, sx + w // 2)
+            col = AGENT_COLORS.get(nm, (255, 200, 120))
             img[y0:y1, x0:x1] = np.array(col, dtype=np.uint8)
     if state.overlay:
     return img
 def render_topdown(grid: np.ndarray, agents: Dict[str, Agent], title: str, show_agents: bool = True) -> Image.Image:
     w = grid.shape[1] * TILE
     h = grid.shape[0] * TILE
         draw.line([0, yy, w, yy], fill=(12, 14, 22))
     if show_agents:
+        for nm, a in agents.items():
             cx = a.x * TILE + TILE // 2
             cy = a.y * TILE + 28 + TILE // 2
+            col = AGENT_COLORS.get(nm, (220, 220, 220))
             r = TILE // 3
             draw.ellipse([cx - r, cy - r, cx + r, cy + r], fill=col)
             dx, dy = DIRS[a.ori]
     return im
 # -----------------------------
+# Belief updates
 # -----------------------------
+def update_belief_for_agent(state: WorldState, belief: np.ndarray, agent: Agent) -> None:
+    belief[agent.y, agent.x] = state.grid[agent.y][agent.x]
+    base = math.radians(ORI_DEG[agent.ori])
+    half = math.radians(FOV_DEG / 2)
+    rays = 33 if agent.name != "Scout" else 45
+    for i in range(rays):
+        t = i / (rays - 1)
+        ang = base + (t * 2 - 1) * half
+        sin_a, cos_a = math.sin(ang), math.cos(ang)
+        ox, oy = agent.x + 0.5, agent.y + 0.5
+        depth = 0.0
+        while depth < MAX_DEPTH:
+            depth += 0.2
+            tx = int(ox + cos_a * depth)
+            ty = int(oy + sin_a * depth)
+            if not in_bounds(tx, ty):
+                break
+            belief[ty, tx] = state.grid[ty][tx]
+            if state.grid[ty][tx] == WALL:
+                break
+# -----------------------------
+# Optimal distance (BFS) for efficiency metric
+# -----------------------------
+def bfs_distance(grid: List[List[int]], sx: int, sy: int, gx: int, gy: int) -> Optional[int]:
+    if (sx, sy) == (gx, gy):
+        return 0
+    q = [(sx, sy)]
+    dist = { (sx, sy): 0 }
+    head = 0
+    while head < len(q):
+        x, y = q[head]; head += 1
+        for dx, dy in DIRS:
+            nx, ny = x + dx, y + dy
+            if not in_bounds(nx, ny):
+                continue
+            if grid[ny][nx] == WALL:
+                continue
+            if (nx, ny) in dist:
+                continue
+            dist[(nx, ny)] = dist[(x, y)] + 1
+            if (nx, ny) == (gx, gy):
+                return dist[(nx, ny)]
+            q.append((nx, ny))
+    return None
+# -----------------------------
+# Observation encoding (compact state key)
+# -----------------------------
+def obs_key(state: WorldState, who: str) -> str:
+    pred = state.agents["Predator"]
+    prey = state.agents["Prey"]
+    a = state.agents[who]
+    # relative position coarse-binned to keep table smaller
+    dx = prey.x - pred.x
+    dy = prey.y - pred.y
+    dx_bin = int(np.clip(dx, -6, 6))
+    dy_bin = int(np.clip(dy, -6, 6))
+    vis = 1 if visible(pred, prey, state.grid) else 0
+    # include own orientation and role
+    if who == "Predator":
+        return f"P|{pred.x},{pred.y},{pred.ori}|d{dx_bin},{dy_bin}|v{vis}"
+    if who == "Prey":
+        # prey cares if predator is visible to it
+        vis2 = 1 if visible(prey, pred, state.grid) else 0
+        ddx = pred.x - prey.x
+        ddy = pred.y - prey.y
+        ddx_bin = int(np.clip(ddx, -6, 6))
+        ddy_bin = int(np.clip(ddy, -6, 6))
+        return f"R|{prey.x},{prey.y},{prey.ori}|d{ddx_bin},{ddy_bin}|v{vis2}|e{int(prey.energy//25)}"
+    # Scout: simple
+    return f"S|{a.x},{a.y},{a.ori}"
+def q_get(q: Dict[str, List[float]], key: str) -> List[float]:
+    if key not in q:
+        q[key] = [0.0, 0.0, 0.0]
+    return q[key]
+def epsilon_greedy(qvals: List[float], eps: float, r: np.random.Generator) -> int:
+    if r.random() < eps:
+        return int(r.integers(0, len(qvals)))
+    return int(np.argmax(qvals))
+def q_update(q: Dict[str, List[float]], key: str, a_idx: int, reward: float, next_key: str, alpha: float, gamma: float) -> Tuple[float, float, float]:
+    qv = q_get(q, key)
+    nq = q_get(q, next_key)
+    old = qv[a_idx]
+    target = reward + gamma * float(np.max(nq))
+    new = old + alpha * (target - old)
+    qv[a_idx] = new
+    return old, target, new
+# -----------------------------
+# Baseline heuristic policies (for Scout + fallback)
+# -----------------------------
+def heuristic_pred_action(state: WorldState) -> str:
     pred = state.agents["Predator"]
     prey = state.agents["Prey"]
     if visible(pred, prey, state.grid):
         if diff > 10:
             return "R"
         return "F"
+    r = rng_for(state.seed, state.step, stream=11)
+    return r.choice(ACTIONS)
+def heuristic_prey_action(state: WorldState) -> str:
     prey = state.agents["Prey"]
     pred = state.agents["Predator"]
     if visible(prey, pred, state.grid):
         if diff_away > 10:
             return "R"
         return "F"
+    r = rng_for(state.seed, state.step, stream=12)
+    return r.choice(ACTIONS)
+def heuristic_scout_action(state: WorldState) -> str:
+    r = rng_for(state.seed, state.step, stream=13)
+    return r.choice(ACTIONS)
+# -----------------------------
+# Reward shaping
+# -----------------------------
+def pred_reward(state_prev: WorldState, state_now: WorldState) -> float:
+    cfg = state_now.cfg
+    pred0 = state_prev.agents["Predator"]
+    prey0 = state_prev.agents["Prey"]
+    pred1 = state_now.agents["Predator"]
+    prey1 = state_now.agents["Prey"]
+    d0 = abs(pred0.x - prey0.x) + abs(pred0.y - prey0.y)
+    d1 = abs(pred1.x - prey1.x) + abs(pred1.y - prey1.y)
+    r = cfg.pred_step_penalty + cfg.pred_dist_coeff * (d0 - d1)  # reward closing distance
+    if state_now.caught:
+        r += cfg.pred_catch_reward
+    return float(r)
+def prey_reward(state_prev: WorldState, state_now: WorldState, ate_food: bool) -> float:
+    cfg = state_now.cfg
+    r = cfg.prey_step_penalty + cfg.prey_survive_reward
+    if ate_food:
+        r += cfg.prey_food_reward
+    if state_now.caught:
+        r += cfg.prey_caught_penalty
+    return float(r)
+# -----------------------------
+# Core simulation tick (with instrumentation + optional learning)
+# -----------------------------
+TRACE_MAX = 400
+def clone_shallow(state: WorldState) -> WorldState:
+    # clone for reward computation, minimal fields
+    return WorldState(
+        seed=state.seed,
+        step=state.step,
+        grid=[row[:] for row in state.grid],
+        agents={k: Agent(**asdict(v)) for k, v in state.agents.items()},
+        controlled=state.controlled,
+        pov=state.pov,
+        overlay=state.overlay,
+        caught=state.caught,
+        branches=dict(state.branches),
+        event_log=list(state.event_log),
+        trace_log=list(state.trace_log),
+        cfg=state.cfg,
+        q_pred=state.q_pred,
+        q_prey=state.q_prey,
+        metrics=state.metrics,
+    )
 def check_catch(state: WorldState) -> None:
     pred = state.agents["Predator"]
         state.caught = True
         state.event_log.append(f"t={state.step}: CAUGHT.")
+def consume_food(state: WorldState) -> bool:
+    prey = state.agents["Prey"]
+    if state.grid[prey.y][prey.x] == FOOD:
+        prey.energy = min(200, prey.energy + 35)
+        state.grid[prey.y][prey.x] = EMPTY
+        state.event_log.append(f"t={state.step}: Prey ate food (+energy).")
+        return True
+    return False
+def choose_action(state: WorldState, who: str, stream: int) -> Tuple[str, str, Optional[Tuple[str,int]]]:
+    """
+    Returns (action, reason, q_info)
+    q_info: (obs_key, action_index) if chosen by Q, else None
+    """
+    cfg = state.cfg
+    r = rng_for(state.seed, state.step, stream=stream)
+    if who == "Predator" and cfg.use_q_pred:
+        k = obs_key(state, "Predator")
+        qv = q_get(state.q_pred, k)
+        a_idx = epsilon_greedy(qv, state.metrics.epsilon, r)
+        return ACTIONS[a_idx], f"Q(pred) eps={state.metrics.epsilon:.3f} q={np.round(qv,3).tolist()}", (k, a_idx)
+    if who == "Prey" and cfg.use_q_prey:
+        k = obs_key(state, "Prey")
+        qv = q_get(state.q_prey, k)
+        a_idx = epsilon_greedy(qv, state.metrics.epsilon, r)
+        return ACTIONS[a_idx], f"Q(prey) eps={state.metrics.epsilon:.3f} q={np.round(qv,3).tolist()}", (k, a_idx)
+    # fallbacks
+    if who == "Predator":
+        a = heuristic_pred_action(state)
+        return a, "heuristic(pred)", None
+    if who == "Prey":
+        a = heuristic_prey_action(state)
+        return a, "heuristic(prey)", None
+    a = heuristic_scout_action(state)
+    return a, "heuristic(scout)", None
 def tick(state: WorldState, manual_action: Optional[str] = None) -> None:
     if state.caught:
         return
+    prev = clone_shallow(state)
+    # record optimal distance for efficiency stats
+    pred = state.agents["Predator"]
+    prey = state.agents["Prey"]
+    opt_dist = bfs_distance(state.grid, pred.x, pred.y, prey.x, prey.y)
+    if opt_dist is None:
+        opt_dist = 999
+    # Action selection
+    chosen = {}
+    reasons = {}
+    qinfo = {}
+    # manual action applies to controlled agent
     if manual_action:
+        chosen[state.controlled] = manual_action
+        reasons[state.controlled] = "manual"
+        qinfo[state.controlled] = None
+    # others choose
+    for who in ["Predator", "Prey", "Scout"]:
+        if who in chosen:
             continue
+        act, reason, q_i = choose_action(state, who, stream={"Predator":21,"Prey":22,"Scout":23}[who])
+        chosen[who] = act
+        reasons[who] = reason
+        qinfo[who] = q_i
+    # Apply actions (deterministic order)
+    outcomes = {}
+    for who in ["Predator", "Prey", "Scout"]:
+        outcomes[who] = apply_action(state, who, chosen[who])
+    ate = consume_food(state)
     check_catch(state)
+    # Rewards + Q-updates
+    pred_r = pred_reward(prev, state)
+    prey_r = prey_reward(prev, state, ate_food=ate)
+    q_lines = []
+    if qinfo["Predator"] is not None:
+        k, a_idx = qinfo["Predator"]
+        nk = obs_key(state, "Predator")
+        old, target, new = q_update(state.q_pred, k, a_idx, pred_r, nk, state.cfg.alpha, state.cfg.gamma)
+        q_lines.append(f"Qpred: {k} a={ACTIONS[a_idx]} old={old:.3f} tgt={target:.3f} new={new:.3f}")
+    if qinfo["Prey"] is not None:
+        k, a_idx = qinfo["Prey"]
+        nk = obs_key(state, "Prey")
+        old, target, new = q_update(state.q_prey, k, a_idx, prey_r, nk, state.cfg.alpha, state.cfg.gamma)
+        q_lines.append(f"Qprey: {k} a={ACTIONS[a_idx]} old={old:.3f} tgt={target:.3f} new={new:.3f}")
+    # Trace line
+    dist_now = manhattan(state.agents["Predator"], state.agents["Prey"])
+    eff = (opt_dist / max(1, dist_now)) if dist_now > 0 else 1.0
+    trace = (
+        f"t={state.step} optDist~{opt_dist} distNow={dist_now} "
+        f"| Pred:{chosen['Predator']} ({outcomes['Predator']}) [{reasons['Predator']}] r={pred_r:+.3f} "
+        f"| Prey:{chosen['Prey']} ({outcomes['Prey']}) [{reasons['Prey']}] r={prey_r:+.3f} "
+        f"| Scout:{chosen['Scout']} ({outcomes['Scout']}) [{reasons['Scout']}] "
+        f"| ateFood={ate} caught={state.caught}"
+    )
+    if q_lines:
+        trace += " | " + " ; ".join(q_lines)
+    state.trace_log.append(trace)
+    if len(state.trace_log) > TRACE_MAX:
+        state.trace_log = state.trace_log[-TRACE_MAX:]
     state.step += 1
 # -----------------------------
+# Episode reset + training
 # -----------------------------
+def reset_episode(state: WorldState, seed: Optional[int] = None) -> None:
+    # Keep Q-tables + cfg + metrics; reset world + logs
+    if seed is None:
+        seed = state.seed
+    fresh = init_state(seed)
+    fresh.cfg = state.cfg
+    fresh.q_pred = state.q_pred
+    fresh.q_prey = state.q_prey
+    fresh.metrics = state.metrics
+    fresh.metrics.epsilon = state.metrics.epsilon
+    state.seed = fresh.seed
+    state.step = 0
+    state.grid = fresh.grid
+    state.agents = fresh.agents
+    state.controlled = fresh.controlled
+    state.pov = fresh.pov
+    state.overlay = fresh.overlay
+    state.caught = False
+    state.branches = fresh.branches
+    state.event_log = ["Episode reset."]
+    state.trace_log = []
+def run_episode(state: WorldState, max_steps: int) -> Tuple[bool, int, float]:
+    # returns (caught, steps, path_eff)
+    start_pred = state.agents["Predator"]
+    start_prey = state.agents["Prey"]
+    opt = bfs_distance(state.grid, start_pred.x, start_pred.y, start_prey.x, start_prey.y)
+    if opt is None:
+        opt = 999
+    steps = 0
+    while steps < max_steps and not state.caught:
+        tick(state, manual_action=None)
+        steps += 1
+    caught = state.caught
+    eff = float(opt / max(1, steps)) if opt < 999 else 0.0
+    return caught, steps, eff
+def train(state: WorldState, episodes: int, max_steps: int) -> None:
+    m = state.metrics
+    cfg = state.cfg
+    catches = 0
+    total_steps_catch = 0
+    total_eff = 0.0
+    for ep in range(episodes):
+        # deterministically vary episode seed so it doesn't memorize one map-layout only
+        ep_seed = (state.seed * 1_000_003 + (m.episodes + ep) * 97_531) & 0xFFFFFFFF
+        reset_episode(state, seed=int(ep_seed))
+        caught, steps, eff = run_episode(state, max_steps=max_steps)
+        total_eff += eff
+        if caught:
+            catches += 1
+            total_steps_catch += steps
+        # epsilon decay
+        m.epsilon = max(cfg.epsilon_min, m.epsilon * cfg.epsilon_decay)
+    # Update metrics
+    m.episodes += episodes
+    m.catches += catches
+    m.last_episode_steps = steps
+    m.last_episode_eff = eff
+    if catches > 0:
+        # moving average by episode count for stability
+        avg_steps = total_steps_catch / catches
+        m.avg_steps_to_catch = (
+            0.85 * m.avg_steps_to_catch + 0.15 * avg_steps
+            if m.avg_steps_to_catch > 0 else avg_steps
+        )
+    avg_eff = total_eff / max(1, episodes)
+    m.avg_path_efficiency = (
+        0.85 * m.avg_path_efficiency + 0.15 * avg_eff
+        if m.avg_path_efficiency > 0 else avg_eff
+    )
+    state.event_log.append(
+        f"Training: +{episodes} eps | catches={catches}/{episodes} | "
+        f"avgStepsToCatch~{m.avg_steps_to_catch:.2f} | avgEff~{m.avg_path_efficiency:.2f} | eps={m.epsilon:.3f}"
+    )
+# -----------------------------
+# History / snapshots
+# -----------------------------
+MAX_HISTORY = 1200
 def snapshot_of(state: WorldState) -> Snapshot:
     return Snapshot(
         step=state.step,
         agents={k: asdict(v) for k, v in state.agents.items()},
         grid=[row[:] for row in state.grid],
         caught=state.caught,
+        event_log_tail=state.event_log[-20:],
+        trace_tail=state.trace_log[-40:],
     )
 def restore_into(state: WorldState, snap: Snapshot) -> None:
     for k, d in snap.agents.items():
         state.agents[k] = Agent(**d)
     state.caught = snap.caught
+    state.event_log.append(f"Jumped to snapshot t={snap.step}.")
 # -----------------------------
+# Export / import
 # -----------------------------
+def export_run(state: WorldState, history: List[Snapshot]) -> str:
+    payload = {
+        "seed": state.seed,
+        "controlled": state.controlled,
+        "pov": state.pov,
+        "overlay": state.overlay,
+        "cfg": asdict(state.cfg),
+        "metrics": asdict(state.metrics),
+        "q_pred": state.q_pred,
+        "q_prey": state.q_prey,
+        "history": [asdict(s) for s in history],
+        "grid": state.grid,
+    }
+    return json.dumps(payload, indent=2)
+def import_run(txt: str) -> Tuple[WorldState, List[Snapshot], Dict[str, np.ndarray], int]:
+    data = json.loads(txt)
+    st = init_state(int(data.get("seed", 1337)))
+    st.controlled = data.get("controlled", st.controlled)
+    st.pov = data.get("pov", st.pov)
+    st.overlay = bool(data.get("overlay", False))
+    st.grid = data.get("grid", st.grid)
+    st.cfg = TrainConfig(**data.get("cfg", asdict(st.cfg)))
+    st.metrics = Metrics(**data.get("metrics", asdict(st.metrics)))
+    st.q_pred = data.get("q_pred", {})
+    st.q_prey = data.get("q_prey", {})
+    hist = [Snapshot(**s) for s in data.get("history", [])]
+    bel = init_belief()
+    r_idx = max(0, len(hist) - 1)
+    if hist:
+        restore_into(st, hist[-1])
+    st.event_log.append("Imported run.")
+    return st, hist, bel, r_idx
+# -----------------------------
+# UI glue
+# -----------------------------
+def build_views(state: WorldState, beliefs: Dict[str, np.ndarray]) -> Tuple[np.ndarray, Image.Image, Image.Image, Image.Image, str, str, str]:
+    for nm, a in state.agents.items():
+        update_belief_for_agent(state, beliefs[nm], a)
+    pov = raycast_view(state, state.agents[state.pov])
     truth_np = np.array(state.grid, dtype=np.int16)
+    truth_img = render_topdown(truth_np, state.agents, f"Truth Map — t={state.step} seed={state.seed}", show_agents=True)
     ctrl = state.controlled
     other = "Prey" if ctrl == "Predator" else "Predator"
+    b_ctrl = render_topdown(beliefs[ctrl], state.agents, f"{ctrl} Belief", show_agents=True)
+    b_other = render_topdown(beliefs[other], state.agents, f"{other} Belief", show_agents=True)
+    m = state.metrics
     pred = state.agents["Predator"]
     prey = state.agents["Prey"]
     scout = state.agents["Scout"]
     status = (
+        f"Controlled={state.controlled} | POV={state.pov} | caught={state.caught} | eps={m.epsilon:.3f}\n"
+        f"Episodes={m.episodes} | catches={m.catches} | avgStepsToCatch~{m.avg_steps_to_catch:.2f} | avgEff~{m.avg_path_efficiency:.2f}\n"
+        f"Pred({pred.x},{pred.y}) o={pred.ori} | Prey({prey.x},{prey.y}) o={prey.ori} e={prey.energy} | Scout({scout.x},{scout.y}) o={scout.ori}"
     )
+    events = "\n".join(state.event_log[-18:])
+    trace = "\n".join(state.trace_log[-18:])
+    return pov, truth_img, b_ctrl, b_other, status, events, trace
 def grid_click_to_tile(evt: gr.SelectData, selected_tile: int, state: WorldState) -> WorldState:
     x_px, y_px = evt.index
+    y_px -= 28
     if y_px < 0:
         return state
     gx = int(x_px // TILE)
     if gx == 0 or gy == 0 or gx == GRID_W - 1 or gy == GRID_H - 1:
         return state
     state.grid[gy][gx] = selected_tile
+    state.event_log.append(f"t={state.step}: Tile ({gx},{gy}) -> {TILE_NAMES.get(selected_tile)}")
     return state
 # -----------------------------
+# Gradio App
 # -----------------------------
+with gr.Blocks(title="ChronoSandbox++ — Training Arena") as demo:
     gr.Markdown(
+        "## ChronoSandbox++ — Instrumented Agent Training Arena\n"
+        "Track every interaction, train policies, and audit why outcomes happened.\n"
+        "No timers (compatibility). Use Tick/Run/Train for controlled experiments."
     )
+    st = gr.State(init_state(1337))
+    history = gr.State([snapshot_of(init_state(1337))])
     beliefs = gr.State(init_belief())
+    rewind_idx = gr.State(0)
     with gr.Row():
+        pov_img = gr.Image(label="POV (Pseudo-3D)", type="numpy", width=VIEW_W, height=VIEW_H)
         with gr.Column():
+            status = gr.Textbox(label="Status + Metrics", lines=4)
+            events = gr.Textbox(label="Event Log", lines=10)
+            trace = gr.Textbox(label="Step Trace (why it happened)", lines=10)
     with gr.Row():
         truth = gr.Image(label="Truth Map (click to edit tiles)", type="pil")
+        belief_a = gr.Image(label="Belief (Controlled)", type="pil")
+        belief_b = gr.Image(label="Belief (Other)", type="pil")
     with gr.Row():
         with gr.Column(scale=2):
+            gr.Markdown("### Manual Controls")
             with gr.Row():
+                btn_L = gr.Button("L")
+                btn_F = gr.Button("F")
+                btn_R = gr.Button("R")
             with gr.Row():
+                btn_tick = gr.Button("Tick")
+                run_steps = gr.Number(value=25, label="Run N steps", precision=0)
+                btn_run = gr.Button("Run")
             with gr.Row():
+                btn_toggle_control = gr.Button("Toggle Controlled")
+                btn_toggle_pov = gr.Button("Toggle POV")
+                overlay = gr.Checkbox(False, label="Overlay reticle")
             tile_pick = gr.Radio(
                 choices=[(TILE_NAMES[k], k) for k in [EMPTY, WALL, FOOD, NOISE, DOOR, TELE]],
                 value=WALL,
+                label="Paint tile type"
             )
+        with gr.Column(scale=3):
+            gr.Markdown("### Training Controls (Q-learning)")
+            use_q_pred = gr.Checkbox(True, label="Use Q-learning: Predator")
+            use_q_prey = gr.Checkbox(True, label="Use Q-learning: Prey")
+            alpha = gr.Slider(0.01, 0.5, value=0.15, step=0.01, label="alpha (learn rate)")
+            gamma = gr.Slider(0.5, 0.99, value=0.95, step=0.01, label="gamma (discount)")
+            eps = gr.Slider(0.0, 0.5, value=0.10, step=0.01, label="epsilon (exploration)")
+            eps_decay = gr.Slider(0.90, 0.999, value=0.995, step=0.001, label="epsilon decay")
+            eps_min = gr.Slider(0.0, 0.2, value=0.02, step=0.01, label="epsilon min")
+            episodes = gr.Number(value=50, label="Train episodes", precision=0)
+            max_steps = gr.Number(value=250, label="Max steps per episode", precision=0)
+            btn_train = gr.Button("Train")
+            btn_reset = gr.Button("Reset Episode")
+            btn_reset_all = gr.Button("Reset ALL (wipe Q + metrics)")
+    with gr.Row():
+        with gr.Column():
+            rewind = gr.Slider(0, 0, value=0, step=1, label="Rewind (history index)")
+            btn_jump = gr.Button("Jump")
+        with gr.Column():
             export_box = gr.Textbox(label="Export JSON", lines=10)
+            btn_export = gr.Button("Export")
+        with gr.Column():
             import_box = gr.Textbox(label="Import JSON", lines=10)
+            btn_import = gr.Button("Import")
+    def refresh(state: WorldState, hist: List[Snapshot], bel: Dict[str, np.ndarray], r: int):
         r_max = max(0, len(hist) - 1)
+        r = max(0, min(int(r), r_max))
+        pov, tr, ba, bb, stxt, etxt, ttxt = build_views(state, bel)
         return (
+            pov, tr, ba, bb,
+            stxt, etxt, ttxt,
+            gr.update(maximum=r_max, value=r),
+            r
         )
+    def push_hist(state: WorldState, hist: List[Snapshot]) -> List[Snapshot]:
         hist.append(snapshot_of(state))
         if len(hist) > MAX_HISTORY:
             hist.pop(0)
+        return hist
+    def set_cfg(state: WorldState, uq_pred: bool, uq_prey: bool, a: float, g: float, e: float, ed: float, emin: float):
+        state.cfg.use_q_pred = bool(uq_pred)
+        state.cfg.use_q_prey = bool(uq_prey)
+        state.cfg.alpha = float(a)
+        state.cfg.gamma = float(g)
+        state.metrics.epsilon = float(e)
+        state.cfg.epsilon_decay = float(ed)
+        state.cfg.epsilon_min = float(emin)
+        return state
+    def do_manual(state, hist, bel, r, act):
+        tick(state, manual_action=act)
+        hist = push_hist(state, hist)
+        r = len(hist) - 1
+        out = refresh(state, hist, bel, r)
+        return out + (state, hist, bel, r)
+    def do_tick(state, hist, bel, r):
+        tick(state, manual_action=None)
+        hist = push_hist(state, hist)
+        r = len(hist) - 1
+        out = refresh(state, hist, bel, r)
+        return out + (state, hist, bel, r)
+    def do_run(state, hist, bel, r, n):
+        n = max(1, int(n))
+        for _ in range(n):
+            if state.caught:
+                break
+            tick(state, manual_action=None)
+        hist = push_hist(state, hist)
+        r = len(hist) - 1
+        out = refresh(state, hist, bel, r)
+        return out + (state, hist, bel, r)
+    def toggle_control(state, hist, bel, r):
         order = ["Predator", "Prey", "Scout"]
         i = order.index(state.controlled)
         state.controlled = order[(i + 1) % len(order)]
+        state.event_log.append(f"Controlled -> {state.controlled}")
+        hist = push_hist(state, hist)
+        r = len(hist) - 1
+        out = refresh(state, hist, bel, r)
+        return out + (state, hist, bel, r)
+    def toggle_pov(state, hist, bel, r):
         order = ["Predator", "Prey", "Scout"]
         i = order.index(state.pov)
         state.pov = order[(i + 1) % len(order)]
+        state.event_log.append(f"POV -> {state.pov}")
+        hist = push_hist(state, hist)
+        r = len(hist) - 1
+        out = refresh(state, hist, bel, r)
+        return out + (state, hist, bel, r)
+    def set_overlay(state, hist, bel, r, ov):
+        state.overlay = bool(ov)
+        out = refresh(state, hist, bel, r)
+        return out + (state, hist, bel, r)
+    def click_truth(tile, state, hist, bel, r, evt: gr.SelectData):
         state = grid_click_to_tile(evt, int(tile), state)
+        hist = push_hist(state, hist)
+        r = len(hist) - 1
+        out = refresh(state, hist, bel, r)
+        return out + (state, hist, bel, r)
+    def jump(state, hist, bel, r, idx):
+        if not hist:
+            out = refresh(state, hist, bel, r)
+            return out + (state, hist, bel, r)
+        idx = max(0, min(int(idx), len(hist) - 1))
+        restore_into(state, hist[idx])
+        r = idx
+        out = refresh(state, hist, bel, r)
+        return out + (state, hist, bel, r)
+    def reset_ep(state, hist, bel, r):
+        reset_episode(state, seed=state.seed)
+        hist = [snapshot_of(state)]
+        r = 0
+        bel = init_belief()
+        out = refresh(state, hist, bel, r)
+        return out + (state, hist, bel, r)
+    def reset_all(state, hist, bel, r):
+        seed = state.seed
+        state = init_state(seed)
+        hist = [snapshot_of(state)]
+        bel = init_belief()
+        r = 0
+        out = refresh(state, hist, bel, r)
+        return out + (state, hist, bel, r)
+    def do_train(state, hist, bel, r,
+                 uq_pred, uq_prey, a, g, e, ed, emin,
+                 eps_count, max_s):
+        state = set_cfg(state, uq_pred, uq_prey, a, g, e, ed, emin)
+        train(state, episodes=max(1, int(eps_count)), max_steps=max(10, int(max_s)))
+        # After training, reset to a clean episode so user sees improved behavior
+        reset_episode(state, seed=state.seed)
+        hist = [snapshot_of(state)]
+        bel = init_belief()
+        r = 0
+        out = refresh(state, hist, bel, r)
+        return out + (state, hist, bel, r)
+    def export_fn(state, hist):
         return export_run(state, hist)
+    def import_fn(txt):
+        state, hist, bel, r = import_run(txt)
+        pov, tr, ba, bb, stxt, etxt, ttxt = build_views(state, bel)
         r_max = max(0, len(hist) - 1)
         return (
+            pov, tr, ba, bb, stxt, etxt, ttxt,
+            gr.update(maximum=r_max, value=r),
+            state, hist, bel, r
         )
+    # --- Wire buttons (no fn_kwargs; use lambdas) ---
+    btn_L.click(lambda s,h,b,r: do_manual(s,h,b,r,"L"),
+                inputs=[st, history, beliefs, rewind_idx],
+                outputs=[pov_img, truth, belief_a, belief_b, status, events, trace, rewind, rewind_idx, st, history, beliefs, rewind_idx],
+                queue=True)
+    btn_F.click(lambda s,h,b,r: do_manual(s,h,b,r,"F"),
+                inputs=[st, history, beliefs, rewind_idx],
+                outputs=[pov_img, truth, belief_a, belief_b, status, events, trace, rewind, rewind_idx, st, history, beliefs, rewind_idx],
+                queue=True)
+    btn_R.click(lambda s,h,b,r: do_manual(s,h,b,r,"R"),
+                inputs=[st, history, beliefs, rewind_idx],
+                outputs=[pov_img, truth, belief_a, belief_b, status, events, trace, rewind, rewind_idx, st, history, beliefs, rewind_idx],
+                queue=True)
+    btn_tick.click(do_tick,
+                   inputs=[st, history, beliefs, rewind_idx],
+                   outputs=[pov_img, truth, belief_a, belief_b, status, events, trace, rewind, rewind_idx, st, history, beliefs, rewind_idx],
+                   queue=True)
+    btn_run.click(do_run,
+                  inputs=[st, history, beliefs, rewind_idx, run_steps],
+                  outputs=[pov_img, truth, belief_a, belief_b, status, events, trace, rewind, rewind_idx, st, history, beliefs, rewind_idx],
+                  queue=True)
+    btn_toggle_control.click(toggle_control,
+                             inputs=[st, history, beliefs, rewind_idx],
+                             outputs=[pov_img, truth, belief_a, belief_b, status, events, trace, rewind, rewind_idx, st, history, beliefs, rewind_idx],
+                             queue=True)
+    btn_toggle_pov.click(toggle_pov,
+                         inputs=[st, history, beliefs, rewind_idx],
+                         outputs=[pov_img, truth, belief_a, belief_b, status, events, trace, rewind, rewind_idx, st, history, beliefs, rewind_idx],
+                         queue=True)
+    overlay.change(set_overlay,
+                   inputs=[st, history, beliefs, rewind_idx, overlay],
+                   outputs=[pov_img, truth, belief_a, belief_b, status, events, trace, rewind, rewind_idx, st, history, beliefs, rewind_idx],
+                   queue=True)
+    truth.select(click_truth,
+                 inputs=[tile_pick, st, history, beliefs, rewind_idx],
+                 outputs=[pov_img, truth, belief_a, belief_b, status, events, trace, rewind, rewind_idx, st, history, beliefs, rewind_idx],
+                 queue=True)
+    btn_jump.click(jump,
+                   inputs=[st, history, beliefs, rewind_idx, rewind],
+                   outputs=[pov_img, truth, belief_a, belief_b, status, events, trace, rewind, rewind_idx, st, history, beliefs, rewind_idx],
+                   queue=True)
+    btn_reset.click(reset_ep,
+                    inputs=[st, history, beliefs, rewind_idx],
+                    outputs=[pov_img, truth, belief_a, belief_b, status, events, trace, rewind, rewind_idx, st, history, beliefs, rewind_idx],
+                    queue=True)
+    btn_reset_all.click(reset_all,
+                        inputs=[st, history, beliefs, rewind_idx],
+                        outputs=[pov_img, truth, belief_a, belief_b, status, events, trace, rewind, rewind_idx, st, history, beliefs, rewind_idx],
+                        queue=True)
+    btn_train.click(do_train,
+                    inputs=[st, history, beliefs, rewind_idx,
+                            use_q_pred, use_q_prey, alpha, gamma, eps, eps_decay, eps_min,
+                            episodes, max_steps],
+                    outputs=[pov_img, truth, belief_a, belief_b, status, events, trace, rewind, rewind_idx, st, history, beliefs, rewind_idx],
+                    queue=True)
     btn_export.click(export_fn, inputs=[st, history], outputs=[export_box], queue=True)
+    btn_import.click(import_fn,
+                     inputs=[import_box],
+                     outputs=[pov_img, truth, belief_a, belief_b, status, events, trace, rewind, st, history, beliefs, rewind_idx],
+                     queue=True)
+    demo.load(refresh,
+              inputs=[st, history, beliefs, rewind_idx],
+              outputs=[pov_img, truth, belief_a, belief_b, status, events, trace, rewind, rewind_idx],
+              queue=True)
 demo.queue().launch()