Spaces:

beanapologist
/

arc-agi

Sleeping

App Files Files Community

beanapologist commited on 28 days ago

Commit

0919b50

verified ·

1 Parent(s): 746c56d

Update app.py

Browse files

Files changed (1) hide show

app.py +422 -195

app.py CHANGED Viewed

@@ -1,12 +1,11 @@
 """
-ARC-AGI-3 Agent Spectator v3
 Hugging Face Space: beanapologist/arc-agi
-Watch the Re/Im CNN agent explore live ARC-AGI-3 games.
-- Shaped reward: +10 level-up / +0.1 frame-change / -0.01 dead
-- Live reward history chart
-- Level completion timeline
-- Auto-loads ARC_API_KEY from HF secret
 """
 import gradio as gr
@@ -18,7 +17,7 @@ from matplotlib.colors import ListedColormap
 import torch
 import torch.nn as nn
 import torch.nn.functional as TF
-import io, json, os, time, threading, queue
 from collections import deque
 from PIL import Image
@@ -30,21 +29,7 @@ ARC_CMAP = ListedColormap(ARC_HEX)
 COLOR_NAMES = ['black','blue','red','green','yellow',
                'purple','orange','gray','azure','maroon']
-# ── Feature extractor ─────────────────────────────────────────────────────────
-def _cc(mask):
-    labels=np.zeros_like(mask,dtype=np.int32); cur=0; H,W=mask.shape
-    for r in range(H):
-        for c in range(W):
-            if mask[r,c] and labels[r,c]==0:
-                cur+=1; q=[(r,c)]; labels[r,c]=cur
-                while q:
-                    y,x=q.pop()
-                    for dy,dx in [(-1,0),(1,0),(0,-1),(0,1)]:
-                        ny,nx=y+dy,x+dx
-                        if 0<=ny<H and 0<=nx<W and mask[ny,nx] and labels[ny,nx]==0:
-                            labels[ny,nx]=cur; q.append((ny,nx))
-    return labels
 def _sobel(f):
     p=np.pad(f,1,mode='edge')
@@ -52,6 +37,25 @@ def _sobel(f):
     gy=(-p[:-2,:-2]-2*p[:-2,1:-1]-p[:-2,2:]+p[2:,:-2]+2*p[2:,1:-1]+p[2:,2:])/8
     return gx,gy
 def _sym(grid,axis):
     H,W=grid.shape; s=np.zeros((H,W),np.float32)
     if axis=='h':
@@ -66,23 +70,137 @@ def _sym(grid,axis):
             s[y,:]=(grid[y-r:y,:]==grid[y+1:y+r+1,:][::-1,:]).mean()
     return s
-def _boundary(grid):
-    p=np.pad(grid,1,mode='edge')
-    return ((p[1:-1,1:-1]!=p[:-2,1:-1])|(p[1:-1,1:-1]!=p[2:,1:-1])|
-            (p[1:-1,1:-1]!=p[1:-1,:-2])|(p[1:-1,1:-1]!=p[1:-1,2:])).astype(np.float32)
-def extract_features_fast(grid, num_colours=10):
     H,W=grid.shape
-    one_hot=np.zeros((num_colours,H,W),dtype=np.float32)
-    for c in range(num_colours): one_hot[c]=(grid==c).astype(np.float32)
     gx,gy=_sobel(grid.astype(np.float32)/9)
-    stacked=np.concatenate([
-        one_hot,
-        _sym(grid,'h')[np.newaxis],
-        _sym(grid,'v')[np.newaxis],
-        _boundary(grid)[np.newaxis],
-        np.sqrt(gx**2+gy**2)[np.newaxis].astype(np.float32),
-    ],axis=0)
     t=torch.from_numpy(stacked).float().unsqueeze(0)
     if H!=64 or W!=64:
         t=TF.interpolate(t,size=(64,64),mode='bilinear',align_corners=False)
@@ -90,15 +208,16 @@ def extract_features_fast(grid, num_colours=10):
 # ── Rendering ─────────────────────────────────────────────────────────────────
-def _fig_to_pil(fig):
     buf=io.BytesIO()
-    fig.savefig(buf,format='png',dpi=80,bbox_inches='tight',facecolor=fig.get_facecolor())
     buf.seek(0); img=Image.open(buf).copy(); plt.close(fig)
     return img
-def render_grid(grid, title='', highlight_diff=None):
     if grid is None: return None
-    H,W=grid.shape; cell=max(28,min(60,360//max(H,W)))
     fig,ax=plt.subplots(figsize=((W*cell+4)/72,(H*cell+22)/72),dpi=72)
     fig.patch.set_facecolor('#1e1e2e'); ax.set_facecolor('#1e1e2e')
     ax.imshow(grid,cmap=ARC_CMAP,vmin=0,vmax=9,interpolation='nearest',aspect='equal')
@@ -109,68 +228,78 @@ def render_grid(grid, title='', highlight_diff=None):
             v=int(grid[r,c])
             col='white' if v in [0,1,2,3,5,6,9] else 'black'
             ax.text(c,r,str(v),ha='center',va='center',
-                    fontsize=max(7,cell//5),color=col,fontweight='bold',fontfamily='monospace')
-            if highlight_diff is not None and highlight_diff[r,c]:
-                ax.add_patch(plt.Rectangle((c-.5,r-.5),1,1,
-                    fill=True,facecolor='#ffffff',alpha=0.25,lw=0))
     ax.set_xlim(-.5,W-.5); ax.set_ylim(H-.5,-.5); ax.axis('off')
-    if title: ax.set_title(title,color='#cdd6f4',fontsize=10,pad=4)
     plt.tight_layout(pad=.3)
-    return _fig_to_pil(fig)
-def render_action_bar(action_counts, total):
     if not action_counts or total==0: return None
     labels=[f"A{k}" for k in sorted(action_counts)]
     vals  =[action_counts[k] for k in sorted(action_counts)]
     pcts  =[v/total*100 for v in vals]
-    fig,ax=plt.subplots(figsize=(5,1.6))
     fig.patch.set_facecolor('#1e1e2e'); ax.set_facecolor('#1e1e2e')
     colors=['#4a9eff','#e05050','#50c050','#f5c400','#c060c0','#d07030']
     bars=ax.barh(labels,pcts,color=colors[:len(labels)],height=0.6)
     for bar,v,p in zip(bars,vals,pcts):
         ax.text(min(p+1,98),bar.get_y()+bar.get_height()/2,
-                f'{v} ({p:.0f}%)',va='center',color='white',fontsize=8)
-    ax.set_xlim(0,105); ax.set_xlabel('% of actions',color='#888',fontsize=8)
-    ax.tick_params(colors='#888',labelsize=8); ax.spines[:].set_visible(False)
     plt.tight_layout(pad=.4)
-    return _fig_to_pil(fig)
 def render_reward_chart(reward_history):
-    if len(reward_history) < 2: return None
-    fig,ax=plt.subplots(figsize=(6,2))
     fig.patch.set_facecolor('#1e1e2e'); ax.set_facecolor('#1e1e2e')
-    steps=list(range(len(reward_history)))
-    rewards=list(reward_history)
-    # Color by reward type
-    for i,(s,r) in enumerate(zip(steps,rewards)):
         col='#ffd700' if r>=5 else ('#50c050' if r>0 else '#e05050')
-        ax.bar(s,r,color=col,width=1,alpha=0.8)
     ax.axhline(0,color='#555',lw=0.5)
-    ax.set_xlim(0,max(len(steps),1))
-    ax.set_ylabel('Reward',color='#888',fontsize=8)
-    ax.set_xlabel('Step',color='#888',fontsize=8)
     ax.tick_params(colors='#888',labelsize=7); ax.spines[:].set_visible(False)
-    ax.set_title('Reward history  🟡=level-up  🟢=change  🔴=dead',
                  color='#cdd6f4',fontsize=8,pad=3)
     plt.tight_layout(pad=.3)
-    return _fig_to_pil(fig)
-def render_level_timeline(level_history):
-    if not level_history: return None
-    fig,ax=plt.subplots(figsize=(6,1.4))
-    fig.patch.set_facecolor('#1e1e2e'); ax.set_facecolor('#1e1e2e')
-    for step,level in level_history:
-        ax.axvline(step,color='#ffd700',lw=2,alpha=0.9)
-        ax.text(step,0.5,f'L{level}',color='#ffd700',fontsize=8,
-                ha='center',va='center',fontweight='bold')
-    ax.set_xlim(0,max(s for s,_ in level_history)+10)
-    ax.set_ylim(0,1); ax.axis('off')
-    ax.set_title(f'Level completions — {len(level_history)} total',
-                 color='#cdd6f4',fontsize=9,pad=3)
-    plt.tight_layout(pad=.2)
-    return _fig_to_pil(fig)
-# ── TinyAgent ─────────────────────────────────────────────────────────────────
 class TinyAgent:
     def __init__(self):
@@ -179,12 +308,12 @@ class TinyAgent:
         self.opt=torch.optim.Adam(self.model.parameters(),lr=1e-4)
         self.buf=[]; self.prev_feat=None; self.prev_action=None
         self.step_count=0; self.action_counts={}; self.prev_levels=0
-        self.reward_history=deque(maxlen=200)
-        self.level_history=[]
-        # Shaped reward — matches Kaggle submission
-        self.level_up_reward=10.0
-        self.change_reward=0.1
-        self.dead_penalty=-0.01
     def _make_model(self):
         return nn.Sequential(
@@ -201,53 +330,121 @@ class TinyAgent:
         self.opt=torch.optim.Adam(self.model.parameters(),lr=1e-4)
         self.buf=[]; self.prev_feat=None; self.prev_action=None
         self.step_count=0; self.action_counts={}; self.prev_levels=0
-        self.reward_history=deque(maxlen=200)
-        self.level_history=[]
-    def choose(self, grid, available_actions=None, levels=0):
-        feat=extract_features_fast(grid).to(self.device)
         # Store shaped experience
         if self.prev_feat is not None:
             changed=not np.array_equal(
                 self.prev_feat.cpu().numpy(),feat.cpu().numpy())
             level_up=levels>self.prev_levels
-            if level_up:
-                reward=self.level_up_reward
                 self.level_history.append((self.step_count,levels))
             elif changed:
-                reward=self.change_reward
             else:
-                reward=self.dead_penalty
             self.reward_history.append(reward)
             self.buf.append((self.prev_feat,self.prev_action,reward))
             if len(self.buf)>500: self.buf.pop(0)
         self.prev_levels=levels
         if self.step_count%10==0 and len(self.buf)>=16:
             self._train()
         with torch.no_grad():
             logits=self.model(feat.unsqueeze(0)).squeeze(0)
-            mask=list(range(1,7))
             if available_actions:
-                mask=[int(a.value if hasattr(a,'value') else a)
-                      for a in available_actions
-                      if int(a.value if hasattr(a,'value') else a)<=6]
-            indices=[m-1 for m in mask if 1<=m<=6]
             masked=torch.full((6,),float('-inf'))
             for i in indices: masked[i]=logits[i]
             probs=torch.softmax(masked,dim=0).cpu().numpy()
             probs=np.nan_to_num(probs,nan=0)
-            if probs.sum()==0: probs[indices]=1/len(indices)
             probs=probs/probs.sum()
-            action_idx=np.random.choice(6,p=probs)
-        self.prev_feat=feat; self.prev_action=action_idx; self.step_count+=1
-        a_id=action_idx+1
         self.action_counts[a_id]=self.action_counts.get(a_id,0)+1
         try:
             from arcengine import GameAction
             action=GameAction(a_id)
         except Exception:
             action=a_id
-        return action,dict(probs=probs.tolist())
     def _train(self):
         import random
@@ -269,13 +466,12 @@ _stop_flag  = threading.Event()
 _run_thread = None
 _frame_queue= queue.Queue(maxsize=60)
-def _run_agent(game_id, api_key, max_steps):
     import arc_agi
     try:
         arc=arc_agi.Arcade(arc_api_key=api_key)
         env=arc.make(game_id,include_frame_data=True)
-        frame=env.reset()
-        _agent.reset()
         prev_grid=None; step=0
         while not _stop_flag.is_set() and step<max_steps:
             if frame is None: break
@@ -283,42 +479,41 @@ def _run_agent(game_id, api_key, max_steps):
             grid=raw[-1] if raw.ndim==3 else raw
             avail=getattr(frame,'available_actions',None)
             levels=getattr(frame,'levels_completed',0)
-            action,info=_agent.choose(grid,avail,levels=levels)
             diff=(grid!=prev_grid) if prev_grid is not None else None
             prev_grid=grid.copy()
             _frame_queue.put({
-                'grid':   grid,
-                'diff':   diff,
-                'step':   step,
-                'action': int(action.value if hasattr(action,'value') else action),
-                'levels': levels,
-                'state':  str(getattr(frame,'state','')),
-                'probs':  info['probs'],
-                'counts': dict(_agent.action_counts),
-                'reward_history': list(_agent.reward_history),
-                'level_history':  list(_agent.level_history),
             },block=True,timeout=5)
-            state_str=str(getattr(frame,'state',''))
             if 'WIN' in state_str or 'GAME_OVER' in state_str: break
             try:
                 from arcengine import GameAction as GA
-                step_action=GA(int(action.value if hasattr(action,'value') else action))
             except Exception:
-                step_action=action
-            frame=env.step(step_action)
             step+=1
-            time.sleep(0.05)
         _frame_queue.put({'done':True,'step':step,
                           'level_history':list(_agent.level_history)})
     except Exception as e:
         _frame_queue.put({'error':str(e)})
-# ── Pull latest frame ─────────────────────────────────────────────────────────
-_latest={
-    'grid_img':None,'bar_img':None,'reward_img':None,'level_img':None,
-    'status':'*Waiting for agent...*'
-}
 def pull_frame():
     global _latest
@@ -328,57 +523,79 @@ def pull_frame():
         except queue.Empty: break
     if data is None:
-        return (_latest['grid_img'],_latest['bar_img'],
-                _latest['reward_img'],_latest['level_img'],
-                _latest['status'])
     if 'error' in data:
         _latest['status']=f"**Error:** {data['error']}"
-        return (_latest['grid_img'],_latest['bar_img'],
-                _latest['reward_img'],_latest['level_img'],
-                _latest['status'])
     if data.get('done'):
         lh=data.get('level_history',[])
-        _latest['status']=(
-            f"**Done** — {data['step']} steps | "
-            f"{len(lh)} levels completed")
-        _latest['level_img']=render_level_timeline(lh)
-        return (_latest['grid_img'],_latest['bar_img'],
-                _latest['reward_img'],_latest['level_img'],
-                _latest['status'])
-    grid   =data['grid'];  diff  =data['diff']
-    step   =data['step'];  levels=data['levels']
-    action =data['action'];state =data['state']
-    counts =data['counts'];probs =data['probs']
-    rh     =data['reward_history']
-    lh     =data['level_history']
-    _latest['grid_img']  =render_grid(grid,
-        title=f"Step {step}  |  Action A{action}  |  Levels {levels}",
-        highlight_diff=diff)
-    _latest['bar_img']   =render_action_bar(counts,sum(counts.values()))
-    _latest['reward_img']=render_reward_chart(rh)
-    _latest['level_img'] =render_level_timeline(lh) if lh else None
-    action_names={1:'A1',2:'A2',3:'A3',4:'A4',5:'A5',6:'A6(click)'}
-    prob_str='  '.join(
-        f"**{action_names.get(i+1,str(i+1))}** {p:.2f}"
-        for i,p in enumerate(probs))
-    last_r=rh[-1] if rh else 0
     r_emoji='🟡' if last_r>=5 else ('🟢' if last_r>0 else '🔴')
     _latest['status']=(
-        f"**Step:** {step} &nbsp;|&nbsp; **Action:** A{action}"
-        f" &nbsp;|&nbsp; **Levels:** {levels}"
-        f" &nbsp;|&nbsp; **Last reward:** {r_emoji} `{last_r:.2f}`"
-        f" &nbsp;|&nbsp; **State:** {state}\n\n"
-        f"Probs: {prob_str}")
-    return (_latest['grid_img'],_latest['bar_img'],
-            _latest['reward_img'],_latest['level_img'],
-            _latest['status'])
 # ── Handlers ──────────────────────────────────────────────────────────────────
@@ -396,7 +613,7 @@ def fetch_games(api_key):
 def start_agent(game_id,api_key,max_steps):
     global _run_thread,_stop_flag
     if not game_id: return "Select a game first."
-    if not api_key: return "Enter your API key first."
     _stop_flag.set()
     if _run_thread and _run_thread.is_alive(): _run_thread.join(timeout=3)
     while not _frame_queue.empty():
@@ -406,7 +623,7 @@ def start_agent(game_id,api_key,max_steps):
     _run_thread=threading.Thread(
         target=_run_agent,args=(game_id,api_key,int(max_steps)),daemon=True)
     _run_thread.start()
-    return f"Agent started on **{game_id}** for {int(max_steps)} steps. Reward: 🟡+10 level / 🟢+0.1 change / 🔴-0.01 dead"
 def stop_agent():
     _stop_flag.set()
@@ -414,21 +631,22 @@ def stop_agent():
 # ── UI ────────────────────────────────────────────────────────────────────────
-with gr.Blocks(title="ARC-AGI-3 Agent Spectator") as demo:
     gr.Markdown("""
-# ARC-AGI-3 Agent Spectator
-Watch the Re/Im CNN agent explore live ARC-AGI-3 games in real time.
-Get your API key at [docs.arcprize.org/api-keys](https://docs.arcprize.org/api-keys).
-Reward signal: 🟡 **+10** level completed · 🟢 **+0.1** frame changed · 🔴 **-0.01** no change
 """)
     with gr.Row():
         with gr.Column(scale=3):
-            api_box=gr.Textbox(
-                label="ARC API key",type="password",
-                value=os.environ.get("ARC_API_KEY",""),
-                placeholder="arc-key-... (or set ARC_API_KEY as HF Space secret)")
         with gr.Column(scale=1):
             fetch_btn=gr.Button("Fetch games")
@@ -442,21 +660,32 @@ Reward signal: 🟡 **+10** level completed · 🟢 **+0.1** frame changed ·
                 start_btn=gr.Button("▶ Watch",variant="primary")
                 stop_btn =gr.Button("■ Stop")
-    run_status=gr.Markdown("*Press Fetch games, select a game, then Watch.*")
     api_status=gr.Markdown()
     gr.Markdown("---")
     with gr.Row():
-        grid_img  =gr.Image(label="Current frame",   type="pil",interactive=False,height=300)
-        bar_img   =gr.Image(label="Action frequency",type="pil",interactive=False,height=300)
     with gr.Row():
-        reward_img=gr.Image(label="Reward history",  type="pil",interactive=False,height=160)
-        level_img =gr.Image(label="Level completions",type="pil",interactive=False,height=160)
     timer=gr.Timer(value=1.0)
-    timer.tick(pull_frame,outputs=[grid_img,bar_img,reward_img,level_img,run_status])
     fetch_btn.click(fetch_games,inputs=api_box,outputs=[game_dd,api_status])
     start_btn.click(start_agent,inputs=[game_dd,api_box,steps_sl],outputs=run_status)
@@ -464,16 +693,14 @@ Reward signal: 🟡 **+10** level completed · 🟢 **+0.1** frame changed ·
     gr.Markdown("""
 ---
-**How it works:** Each 64×64 frame is encoded as 14 feature channels
-(10 one-hot colors + H-symmetry + V-symmetry + boundary contour + edge magnitude)
-and fed through a tiny 3-layer CNN. The reward signal matches the Kaggle submission exactly:
-+10 for advancing a level, +0.1 for any frame change, -0.01 for dead actions.
-The CNN trains online every 10 steps, learning to chase level completions over time.
-**Re/Im duality:** The 4 Im-side channels (symmetry, boundary, edge) give the CNN
-geometric priors about the puzzle structure before any learning happens —
-the same insight from the *Decoding Complex Objects* framework.
 """)
 if __name__ == "__main__":
-    demo.launch()

 """
+ARC-AGI-3 Agent Spectator v4
 Hugging Face Space: beanapologist/arc-agi
+Re/Im solver live demo:
+  Im side = bird's eye hypothesis (which transformation?)
+  Re side = exact diff (which cells to click?)
+  Bridge  = ACTION6 at the Re-side coordinates that close the gap
 """
 import gradio as gr
 import torch
 import torch.nn as nn
 import torch.nn.functional as TF
+import io, os, time, threading, queue
 from collections import deque
 from PIL import Image
 COLOR_NAMES = ['black','blue','red','green','yellow',
                'purple','orange','gray','azure','maroon']
+# ── Re/Im primitives ──────────────────────────────────────────────────────────
 def _sobel(f):
     p=np.pad(f,1,mode='edge')
     gy=(-p[:-2,:-2]-2*p[:-2,1:-1]-p[:-2,2:]+p[2:,:-2]+2*p[2:,1:-1]+p[2:,2:])/8
     return gx,gy
+def _sym_axis(grid,axis):
+    H,W=grid.shape; best_s,best_i=0.0,0
+    if axis=='h':
+        for x in range(1,W-1):
+            r=min(x,W-1-x)
+            s=(grid[:,x-r:x]==grid[:,x+1:x+r+1][:,::-1]).mean()
+            if s>best_s: best_s,best_i=s,x
+    else:
+        for y in range(1,H-1):
+            r=min(y,H-1-y)
+            s=(grid[y-r:y,:]==grid[y+1:y+r+1,:][::-1,:]).mean()
+            if s>best_s: best_s,best_i=s,y
+    return best_i,best_s
+def _boundary(grid):
+    p=np.pad(grid,1,mode='edge')
+    return ((p[1:-1,1:-1]!=p[:-2,1:-1])|(p[1:-1,1:-1]!=p[2:,1:-1])|
+            (p[1:-1,1:-1]!=p[1:-1,:-2])|(p[1:-1,1:-1]!=p[1:-1,2:])).astype(np.float32)
 def _sym(grid,axis):
     H,W=grid.shape; s=np.zeros((H,W),np.float32)
     if axis=='h':
             s[y,:]=(grid[y-r:y,:]==grid[y+1:y+r+1,:][::-1,:]).mean()
     return s
+# ── Im-side: candidate transforms ────────────────────────────────────────────
+def _h_mirror(grid):
+    H,W=grid.shape; ax,sc=_sym_axis(grid,'h')
+    lm=(grid[:,:ax]>0).sum(); rm=(grid[:,ax:]>0).sum()
+    if lm==0 or rm>=lm*0.7: return None,0.0
+    c=grid.copy()
+    for col in range(ax):
+        mir=W-1-col
+        if mir<W:
+            mask=c[:,mir]==0; c[mask,mir]=grid[mask,col]
+    return c,(1-rm/max(lm,1))*sc*0.95
+def _v_mirror(grid):
+    H,W=grid.shape; ax,sc=_sym_axis(grid,'v')
+    tm=(grid[:ax,:]>0).sum(); bm=(grid[ax:,:]>0).sum()
+    if tm==0 or bm>=tm*0.7: return None,0.0
+    c=grid.copy()
+    for row in range(ax):
+        mir=H-1-row
+        if mir<H:
+            mask=c[mir,:]==0; c[mir,mask]=grid[row,mask]
+    return c,(1-bm/max(tm,1))*sc*0.90
+def _boundary_only(grid):
+    if not (grid>0).any(): return None,0.0
+    solid=(grid>0).sum(); b=_boundary(grid); bpx=b.sum()
+    if solid==0 or bpx/solid>0.6: return None,0.0
+    c=np.zeros_like(grid); c[b>0]=grid[b>0]
+    return c,(1-bpx/solid)*0.85
+def _hollow_fill(grid):
+    b=_boundary(grid); interior=(grid==0)&(b==0)
+    if not interior.any() or not (grid>0).any(): return None,0.0
+    dom=np.argmax(np.bincount(grid[grid>0].flatten(),minlength=10)[1:])+1
+    c=grid.copy(); c[interior]=dom
+    return c,interior.sum()/max(1,(grid==0).sum())*0.80
+def _gravity(grid,d='down'):
+    H,W=grid.shape; c=np.zeros_like(grid)
+    if d=='down':
+        for col in range(W):
+            v=grid[:,col][grid[:,col]>0]
+            if len(v): c[H-len(v):H,col]=v
+    elif d=='up':
+        for col in range(W):
+            v=grid[:,col][grid[:,col]>0]
+            if len(v): c[:len(v),col]=v
+    elif d=='right':
+        for row in range(H):
+            v=grid[row,:][grid[row,:]>0]
+            if len(v): c[row,W-len(v):W]=v
+    elif d=='left':
+        for row in range(H):
+            v=grid[row,:][grid[row,:]>0]
+            if len(v): c[row,:len(v)]=v
+    if np.array_equal(c,grid) or not (grid>0).any(): return None,0.0
+    moved=(c!=grid).sum()
+    return c,min(0.75,moved/max(1,(grid>0).sum())*0.8)
+def _color_shift(grid,d=1):
+    if not (grid>0).any(): return None,0.0
+    c=grid.copy(); mask=grid>0
+    c[mask]=((grid[mask]-1+d)%9)+1
+    return c,0.45
+def _rotate(grid,k): return np.rot90(grid,k),0.30
+def _hflip(grid): return np.fliplr(grid),0.25
+def _vflip(grid): return np.flipud(grid),0.25
+def _4fold(grid):
+    c=grid.copy()
+    for k in [1,2,3]:
+        rot=np.rot90(grid,k)
+        if rot.shape==grid.shape:
+            mask=c==0; c[mask]=rot[mask]
+    return (c,0.55) if not np.array_equal(c,grid) else (None,0.0)
+TRANSFORMS=[
+    ('h_mirror_complete', _h_mirror),
+    ('v_mirror_complete', _v_mirror),
+    ('boundary_only',     _boundary_only),
+    ('hollow_fill',       _hollow_fill),
+    ('gravity_down',      lambda g: _gravity(g,'down')),
+    ('gravity_up',        lambda g: _gravity(g,'up')),
+    ('gravity_right',     lambda g: _gravity(g,'right')),
+    ('gravity_left',      lambda g: _gravity(g,'left')),
+    ('4fold_symmetry',    _4fold),
+    ('color_shift_+1',    lambda g: _color_shift(g,1)),
+    ('color_shift_+2',    lambda g: _color_shift(g,2)),
+    ('rotate_90',         lambda g: _rotate(g,1)),
+    ('rotate_180',        lambda g: _rotate(g,2)),
+    ('rotate_270',        lambda g: _rotate(g,3)),
+    ('h_flip',            _hflip),
+    ('v_flip',            _vflip),
+]
+def get_candidates(grid):
+    out=[]
+    for name,fn in TRANSFORMS:
+        try:
+            c,conf=fn(grid)
+            if c is not None and conf>0.05: out.append((name,c,conf))
+        except: pass
+    return sorted(out,key=lambda x:-x[2])
+def pixel_diff(cur,tgt):
+    if cur.shape!=tgt.shape: return []
+    return [(r,c,int(tgt[r,c]))
+            for r in range(cur.shape[0]) for c in range(cur.shape[1])
+            if cur[r,c]!=tgt[r,c]]
+def most_urgent_diff(cur,tgt):
+    diffs=pixel_diff(cur,tgt)
+    if not diffs: return None
+    b=_boundary(cur)
+    bdiffs=[(r,c,v) for r,c,v in diffs if b[r,c]>0]
+    pool=bdiffs if bdiffs else diffs
+    return pool[np.random.randint(len(pool))]
+# ── Feature extractor ─────────────────────────────────────────────────────────
+def extract_features(grid,num_colours=10):
     H,W=grid.shape
+    oh=np.zeros((num_colours,H,W),np.float32)
+    for c in range(num_colours): oh[c]=(grid==c).astype(np.float32)
     gx,gy=_sobel(grid.astype(np.float32)/9)
+    stacked=np.concatenate([oh,_sym(grid,'h')[np.newaxis],
+                             _sym(grid,'v')[np.newaxis],
+                             _boundary(grid)[np.newaxis],
+                             np.sqrt(gx**2+gy**2)[np.newaxis].astype(np.float32)],axis=0)
     t=torch.from_numpy(stacked).float().unsqueeze(0)
     if H!=64 or W!=64:
         t=TF.interpolate(t,size=(64,64),mode='bilinear',align_corners=False)
 # ── Rendering ─────────────────────────────────────────────────────────────────
+def _pil(fig):
     buf=io.BytesIO()
+    fig.savefig(buf,format='png',dpi=80,bbox_inches='tight',
+                facecolor=fig.get_facecolor())
     buf.seek(0); img=Image.open(buf).copy(); plt.close(fig)
     return img
+def render_grid(grid,title='',highlight=None,mark_cell=None):
     if grid is None: return None
+    H,W=grid.shape; cell=max(28,min(56,360//max(H,W)))
     fig,ax=plt.subplots(figsize=((W*cell+4)/72,(H*cell+22)/72),dpi=72)
     fig.patch.set_facecolor('#1e1e2e'); ax.set_facecolor('#1e1e2e')
     ax.imshow(grid,cmap=ARC_CMAP,vmin=0,vmax=9,interpolation='nearest',aspect='equal')
             v=int(grid[r,c])
             col='white' if v in [0,1,2,3,5,6,9] else 'black'
             ax.text(c,r,str(v),ha='center',va='center',
+                    fontsize=max(7,cell//5),color=col,
+                    fontweight='bold',fontfamily='monospace')
+    if highlight is not None:
+        for r,c,_ in highlight:
+            ax.add_patch(plt.Rectangle((c-.5,r-.5),1,1,
+                fill=True,facecolor='#ff4444',alpha=0.35,lw=0))
+    if mark_cell is not None:
+        r,c,_=mark_cell
+        ax.add_patch(plt.Rectangle((c-.5,r-.5),1,1,
+            fill=False,edgecolor='#00ffff',lw=2.5))
+        ax.plot(c,r,'*',color='#00ffff',markersize=max(8,cell//4))
     ax.set_xlim(-.5,W-.5); ax.set_ylim(H-.5,-.5); ax.axis('off')
+    if title: ax.set_title(title,color='#cdd6f4',fontsize=9,pad=4)
     plt.tight_layout(pad=.3)
+    return _pil(fig)
+def render_hypothesis_panel(candidates):
+    """Im side: bar chart of top hypotheses with confidence."""
+    if not candidates: return None
+    top=candidates[:6]
+    names=[c[0] for c in top]; confs=[c[2] for c in top]
+    fig,ax=plt.subplots(figsize=(5,2.2))
+    fig.patch.set_facecolor('#1e1e2e'); ax.set_facecolor('#1e1e2e')
+    colors=['#ffd700' if i==0 else '#4a9eff' for i in range(len(top))]
+    bars=ax.barh(names[::-1],confs[::-1],color=colors[::-1],height=0.6)
+    for bar,conf in zip(bars,confs[::-1]):
+        ax.text(bar.get_width()+.01,bar.get_y()+bar.get_height()/2,
+                f'{conf:.2f}',va='center',color='white',fontsize=8)
+    ax.set_xlim(0,1.15); ax.axvline(0.4,color='#ff6666',lw=1,ls='--',alpha=0.7)
+    ax.text(0.41,0,'threshold',color='#ff6666',fontsize=7,va='bottom')
+    ax.tick_params(colors='#888',labelsize=8); ax.spines[:].set_visible(False)
+    ax.set_title('Im side — hypothesis ranking  🟡=selected',
+                 color='#cdd6f4',fontsize=9,pad=3)
+    plt.tight_layout(pad=.4)
+    return _pil(fig)
+def render_action_bar(action_counts,total):
     if not action_counts or total==0: return None
     labels=[f"A{k}" for k in sorted(action_counts)]
     vals  =[action_counts[k] for k in sorted(action_counts)]
     pcts  =[v/total*100 for v in vals]
+    fig,ax=plt.subplots(figsize=(4,1.6))
     fig.patch.set_facecolor('#1e1e2e'); ax.set_facecolor('#1e1e2e')
     colors=['#4a9eff','#e05050','#50c050','#f5c400','#c060c0','#d07030']
     bars=ax.barh(labels,pcts,color=colors[:len(labels)],height=0.6)
     for bar,v,p in zip(bars,vals,pcts):
         ax.text(min(p+1,98),bar.get_y()+bar.get_height()/2,
+                f'{v}',va='center',color='white',fontsize=8)
+    ax.set_xlim(0,110); ax.tick_params(colors='#888',labelsize=8)
+    ax.spines[:].set_visible(False)
+    ax.set_title('Action frequency',color='#cdd6f4',fontsize=9,pad=3)
     plt.tight_layout(pad=.4)
+    return _pil(fig)
 def render_reward_chart(reward_history):
+    if len(reward_history)<2: return None
+    fig,ax=plt.subplots(figsize=(5,1.6))
     fig.patch.set_facecolor('#1e1e2e'); ax.set_facecolor('#1e1e2e')
+    for i,r in enumerate(reward_history):
         col='#ffd700' if r>=5 else ('#50c050' if r>0 else '#e05050')
+        ax.bar(i,r,color=col,width=1,alpha=0.8)
     ax.axhline(0,color='#555',lw=0.5)
+    ax.set_xlim(0,len(reward_history))
     ax.tick_params(colors='#888',labelsize=7); ax.spines[:].set_visible(False)
+    ax.set_title('Reward  🟡=level-up  🟢=change  🔴=dead',
                  color='#cdd6f4',fontsize=8,pad=3)
     plt.tight_layout(pad=.3)
+    return _pil(fig)
+# ── TinyAgent with Re/Im solver ───────────────────────────────────────────────
+CONF_THRESHOLD = 0.40
 class TinyAgent:
     def __init__(self):
         self.opt=torch.optim.Adam(self.model.parameters(),lr=1e-4)
         self.buf=[]; self.prev_feat=None; self.prev_action=None
         self.step_count=0; self.action_counts={}; self.prev_levels=0
+        self.reward_history=deque(maxlen=300)
+        self.level_history=[]; self.prev_state=None
+        self.level_up_reward=10.0; self.win_reward=50.0
+        self.near_win_reward=2.0;  self.change_reward=0.1
+        self.dead_penalty=-0.01;   self.candidate_win_reward=30.0
+        self.prev_candidate_dist=1.0
     def _make_model(self):
         return nn.Sequential(
         self.opt=torch.optim.Adam(self.model.parameters(),lr=1e-4)
         self.buf=[]; self.prev_feat=None; self.prev_action=None
         self.step_count=0; self.action_counts={}; self.prev_levels=0
+        self.reward_history=deque(maxlen=300); self.level_history=[]
+        self.prev_state=None; self.prev_candidate_dist=1.0
+    def choose(self,grid,available_actions=None,levels=0,state=None):
+        feat=extract_features(grid).to(self.device)
+        cur_state=str(state) if state else None
+        # ── Im side: rank hypotheses ──────────────────────────────────────
+        candidates=get_candidates(grid)
+        best_name,best_cand,best_conf=(candidates[0] if candidates
+                                        else ('none',grid,0.0))
+        # Candidate proximity bonus
+        if candidates:
+            nn_name,nn_cand,nn_conf=min(
+                candidates,
+                key=lambda c:(grid!=c[1]).mean() if grid.shape==c[1].shape else 1.0)
+            curr_dist=(grid!=nn_cand).mean() if grid.shape==nn_cand.shape else 1.0
+            if curr_dist==0.0:
+                cand_bonus=self.candidate_win_reward
+            elif curr_dist<self.prev_candidate_dist:
+                cand_bonus=(self.prev_candidate_dist-curr_dist)*5.0
+            else:
+                cand_bonus=0.0
+            self.prev_candidate_dist=curr_dist
+        else:
+            cand_bonus=0.0
         # Store shaped experience
         if self.prev_feat is not None:
             changed=not np.array_equal(
                 self.prev_feat.cpu().numpy(),feat.cpu().numpy())
+            just_won=(cur_state=='WIN' and self.prev_state!='WIN')
             level_up=levels>self.prev_levels
+            if just_won:
+                reward=self.win_reward+cand_bonus
+                for i in range(min(5,len(self.buf))):
+                    idx=len(self.buf)-1-i
+                    self.buf[idx]=(self.buf[idx][0],self.buf[idx][1],
+                                   self.buf[idx][2]+self.near_win_reward*(1-i*0.15))
+            elif level_up:
+                reward=self.level_up_reward+cand_bonus
                 self.level_history.append((self.step_count,levels))
             elif changed:
+                reward=self.change_reward+cand_bonus
             else:
+                reward=self.dead_penalty+cand_bonus
             self.reward_history.append(reward)
             self.buf.append((self.prev_feat,self.prev_action,reward))
             if len(self.buf)>500: self.buf.pop(0)
+            self.prev_state=cur_state
         self.prev_levels=levels
         if self.step_count%10==0 and len(self.buf)>=16:
             self._train()
+        # ── Im → Re bridge: analytic action ──────────────────────────────
+        analytic_action=None; analytic_meta={}
+        if best_conf>=CONF_THRESHOLD and candidates:
+            diffs=pixel_diff(grid,best_cand)
+            if diffs:
+                cell=most_urgent_diff(grid,best_cand)
+                if cell is not None:
+                    r,c,tgt_color=cell
+                    H,W=grid.shape
+                    gy=min(63,max(0,int(r*64/H+32/H)))
+                    gx=min(63,max(0,int(c*64/W+32/W)))
+                    analytic_action=6
+                    analytic_meta={'x':gx,'y':gy,'cell':(r,c,tgt_color),
+                                   'hypothesis':best_name,'conf':best_conf,
+                                   'n_diffs':len(diffs),'candidates':candidates[:4]}
+        # ── CNN fallback ──────────────────────────────────────────────────
         with torch.no_grad():
             logits=self.model(feat.unsqueeze(0)).squeeze(0)
+            avail=list(range(1,7))
             if available_actions:
+                avail=[int(a.value if hasattr(a,'value') else a)
+                       for a in available_actions if
+                       int(a.value if hasattr(a,'value') else a)<=6]
+            indices=[m-1 for m in avail if 1<=m<=6]
             masked=torch.full((6,),float('-inf'))
             for i in indices: masked[i]=logits[i]
             probs=torch.softmax(masked,dim=0).cpu().numpy()
             probs=np.nan_to_num(probs,nan=0)
+            if probs.sum()==0: probs[np.array(indices)]=1/len(indices)
             probs=probs/probs.sum()
+            cnn_action_idx=np.random.choice(6,p=probs)
+        # Pick final action
+        if analytic_action is not None:
+            chosen_id=analytic_action
+            meta=analytic_meta
+            meta['source']='analytic'
+        else:
+            chosen_id=cnn_action_idx+1
+            meta={'source':'cnn','probs':probs.tolist(),
+                  'candidates':candidates[:4] if candidates else []}
+        self.prev_feat=feat; self.prev_action=cnn_action_idx
+        self.step_count+=1
+        a_id=chosen_id
         self.action_counts[a_id]=self.action_counts.get(a_id,0)+1
         try:
             from arcengine import GameAction
             action=GameAction(a_id)
         except Exception:
             action=a_id
+        if a_id==6 and 'x' in meta:
+            try: action.set_data({'x':meta['x'],'y':meta['y']})
+            except: pass
+        return action,meta
     def _train(self):
         import random
 _run_thread = None
 _frame_queue= queue.Queue(maxsize=60)
+def _run_agent(game_id,api_key,max_steps):
     import arc_agi
     try:
         arc=arc_agi.Arcade(arc_api_key=api_key)
         env=arc.make(game_id,include_frame_data=True)
+        frame=env.reset(); _agent.reset()
         prev_grid=None; step=0
         while not _stop_flag.is_set() and step<max_steps:
             if frame is None: break
             grid=raw[-1] if raw.ndim==3 else raw
             avail=getattr(frame,'available_actions',None)
             levels=getattr(frame,'levels_completed',0)
+            state=getattr(frame,'state',None)
+            action,meta=_agent.choose(grid,avail,levels=levels,state=state)
             diff=(grid!=prev_grid) if prev_grid is not None else None
             prev_grid=grid.copy()
             _frame_queue.put({
+                'grid':grid,'diff':diff,'step':step,
+                'action':int(action.value if hasattr(action,'value') else action),
+                'levels':levels,'state':str(state),
+                'meta':meta,
+                'counts':dict(_agent.action_counts),
+                'reward_history':list(_agent.reward_history),
+                'level_history':list(_agent.level_history),
             },block=True,timeout=5)
+            state_str=str(state)
             if 'WIN' in state_str or 'GAME_OVER' in state_str: break
             try:
                 from arcengine import GameAction as GA
+                sa=GA(int(action.value if hasattr(action,'value') else action))
             except Exception:
+                sa=action
+            if hasattr(sa,'set_data') and meta.get('x') is not None:
+                try: sa.set_data({'x':meta['x'],'y':meta['y']})
+                except: pass
+            frame=env.step(sa)
             step+=1
+            time.sleep(0.08)
         _frame_queue.put({'done':True,'step':step,
                           'level_history':list(_agent.level_history)})
     except Exception as e:
         _frame_queue.put({'error':str(e)})
+# ── Pull frame ────────────────────────────────────────────────────────────────
+_latest={'grid_img':None,'hyp_img':None,'cand_img':None,
+         'bar_img':None,'reward_img':None,'status':'*Waiting...*'}
 def pull_frame():
     global _latest
         except queue.Empty: break
     if data is None:
+        return (_latest['grid_img'],_latest['hyp_img'],_latest['cand_img'],
+                _latest['bar_img'],_latest['reward_img'],_latest['status'])
     if 'error' in data:
         _latest['status']=f"**Error:** {data['error']}"
+        return (_latest['grid_img'],_latest['hyp_img'],_latest['cand_img'],
+                _latest['bar_img'],_latest['reward_img'],_latest['status'])
     if data.get('done'):
         lh=data.get('level_history',[])
+        _latest['status']=f"**Done** — {data['step']} steps | {len(lh)} levels completed"
+        return (_latest['grid_img'],_latest['hyp_img'],_latest['cand_img'],
+                _latest['bar_img'],_latest['reward_img'],_latest['status'])
+    grid=data['grid']; meta=data['meta']; step=data['step']
+    levels=data['levels']; state=data['state']; action=data['action']
+    candidates=meta.get('candidates',[])
+    source=meta.get('source','cnn')
+    # Determine what to highlight
+    mark_cell=None; highlight=None
+    if source=='analytic' and 'cell' in meta:
+        r,c,v=meta['cell']
+        best_cand_name=meta.get('hypothesis','?')
+        best_conf=meta.get('conf',0)
+        # Compute Re-side diff for candidate
+        cand_name,cand_grid,cand_conf=(candidates[0] if candidates
+                                        else (best_cand_name,grid,best_conf))
+        if cand_grid.shape==grid.shape:
+            all_diffs=pixel_diff(grid,cand_grid)
+            highlight=all_diffs[:20]  # show up to 20 wrong cells in red
+            mark_cell=meta['cell']    # cyan star on the cell we're clicking
+    source_emoji='🧠' if source=='analytic' else '🎲'
+    _latest['grid_img']=render_grid(
+        grid,
+        title=f"Step {step} | {source_emoji} A{action} | Levels {levels}",
+        highlight=highlight,
+        mark_cell=mark_cell)
+    # Im side: hypothesis ranking
+    _latest['hyp_img']=render_hypothesis_panel(candidates)
+    # Re side: candidate grid (what Im thinks the answer looks like)
+    if candidates and candidates[0][1].shape==grid.shape:
+        cname,cgrid,cconf=candidates[0]
+        diffs=pixel_diff(grid,cgrid)
+        _latest['cand_img']=render_grid(
+            cgrid,
+            title=f"Im candidate: {cname} (conf={cconf:.2f}) — {len(diffs)} cells differ",
+            highlight=diffs[:20])
+    else:
+        _latest['cand_img']=None
+    _latest['bar_img']   =render_action_bar(data['counts'],sum(data['counts'].values()))
+    _latest['reward_img']=render_reward_chart(data['reward_history'])
+    last_r=data['reward_history'][-1] if data['reward_history'] else 0
     r_emoji='🟡' if last_r>=5 else ('🟢' if last_r>0 else '🔴')
+    hyp_str=(f"`{meta.get('hypothesis','?')}` conf={meta.get('conf',0):.2f} "
+             f"→ click ({meta.get('x','?')},{meta.get('y','?')}) "
+             f"[{meta.get('n_diffs','?')} cells wrong]"
+             if source=='analytic'
+             else f"CNN probs: {[round(p,2) for p in meta.get('probs',[])]}")
     _latest['status']=(
+        f"{source_emoji} **{'Analytic (Re/Im)' if source=='analytic' else 'CNN fallback'}**"
+        f" &nbsp;|&nbsp; Step {step} &nbsp;|&nbsp; Levels {levels}"
+        f" &nbsp;|&nbsp; Reward {r_emoji} `{last_r:.2f}` &nbsp;|&nbsp; {state}\n\n"
+        f"{hyp_str}")
+    return (_latest['grid_img'],_latest['hyp_img'],_latest['cand_img'],
+            _latest['bar_img'],_latest['reward_img'],_latest['status'])
 # ── Handlers ──────────────────────────────────────────────────────────────────
 def start_agent(game_id,api_key,max_steps):
     global _run_thread,_stop_flag
     if not game_id: return "Select a game first."
+    if not api_key: return "Enter your API key."
     _stop_flag.set()
     if _run_thread and _run_thread.is_alive(): _run_thread.join(timeout=3)
     while not _frame_queue.empty():
     _run_thread=threading.Thread(
         target=_run_agent,args=(game_id,api_key,int(max_steps)),daemon=True)
     _run_thread.start()
+    return f"Agent started on **{game_id}** — 🧠 Re/Im analytic + 🎲 CNN fallback"
 def stop_agent():
     _stop_flag.set()
 # ── UI ────────────────────────────────────────────────────────────────────────
+with gr.Blocks(title="ARC-AGI-3 Re/Im Agent") as demo:
     gr.Markdown("""
+# ARC-AGI-3 Re/Im Agent Spectator
+**Im side** = bird's eye hypothesis (which transformation?) &nbsp;|&nbsp;
+**Re side** = exact location (which cells to click?)
+🧠 = analytic solver (Im picks hypothesis → Re pins cell → ACTION6 click)
+🎲 = CNN fallback (when no hypothesis clears the confidence threshold)
 """)
     with gr.Row():
         with gr.Column(scale=3):
+            api_box=gr.Textbox(label="ARC API key",type="password",
+                                value=os.environ.get("ARC_API_KEY",""),
+                                placeholder="arc-key-... or set ARC_API_KEY secret")
         with gr.Column(scale=1):
             fetch_btn=gr.Button("Fetch games")
                 start_btn=gr.Button("▶ Watch",variant="primary")
                 stop_btn =gr.Button("■ Stop")
+    run_status=gr.Markdown("*Fetch games → select → Watch*")
     api_status=gr.Markdown()
     gr.Markdown("---")
+    # Row 1: current frame + Im hypothesis ranking
     with gr.Row():
+        grid_img=gr.Image(label="Current frame  (🔴=wrong cells  ⭐=target click)",
+                          type="pil",interactive=False,height=280)
+        hyp_img =gr.Image(label="Im side — hypothesis ranking",
+                          type="pil",interactive=False,height=280)
+    # Row 2: Im candidate (what the answer should look like) + action bar
     with gr.Row():
+        cand_img=gr.Image(label="Im candidate — what the answer should look like",
+                          type="pil",interactive=False,height=240)
+        bar_img =gr.Image(label="Action frequency",
+                          type="pil",interactive=False,height=240)
+    # Row 3: reward history
+    reward_img=gr.Image(label="Reward history  🟡+50 WIN  🟡+10 level  🟢+0.1 change  🔴-0.01 dead",
+                        type="pil",interactive=False,height=140)
     timer=gr.Timer(value=1.0)
+    timer.tick(pull_frame,
+               outputs=[grid_img,hyp_img,cand_img,bar_img,reward_img,run_status])
     fetch_btn.click(fetch_games,inputs=api_box,outputs=[game_dd,api_status])
     start_btn.click(start_agent,inputs=[game_dd,api_box,steps_sl],outputs=run_status)
     gr.Markdown("""
 ---
+**Re/Im duality in action:**
+The Im side reads the whole board at once — symmetry maps, boundary contour, directional
+flow — and ranks candidate transformations by confidence.
+The Re side then diffs the current frame against the winning candidate and finds the exact
+cell (boundary-first, following Cauchy's principle) that most needs fixing.
+The agent emits ACTION6 at those precise coordinates instead of guessing randomly.
+CNN fires only when no analytic hypothesis clears 0.40 confidence.
 """)
 if __name__ == "__main__":
+    demo.launch()