beanapologist commited on
Commit
0919b50
Β·
verified Β·
1 Parent(s): 746c56d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +422 -195
app.py CHANGED
@@ -1,12 +1,11 @@
1
  """
2
- ARC-AGI-3 Agent Spectator v3
3
  Hugging Face Space: beanapologist/arc-agi
4
 
5
- Watch the Re/Im CNN agent explore live ARC-AGI-3 games.
6
- - Shaped reward: +10 level-up / +0.1 frame-change / -0.01 dead
7
- - Live reward history chart
8
- - Level completion timeline
9
- - Auto-loads ARC_API_KEY from HF secret
10
  """
11
 
12
  import gradio as gr
@@ -18,7 +17,7 @@ from matplotlib.colors import ListedColormap
18
  import torch
19
  import torch.nn as nn
20
  import torch.nn.functional as TF
21
- import io, json, os, time, threading, queue
22
  from collections import deque
23
  from PIL import Image
24
 
@@ -30,21 +29,7 @@ ARC_CMAP = ListedColormap(ARC_HEX)
30
  COLOR_NAMES = ['black','blue','red','green','yellow',
31
  'purple','orange','gray','azure','maroon']
32
 
33
- # ── Feature extractor ─────────────────────────────────────────────────────────
34
-
35
- def _cc(mask):
36
- labels=np.zeros_like(mask,dtype=np.int32); cur=0; H,W=mask.shape
37
- for r in range(H):
38
- for c in range(W):
39
- if mask[r,c] and labels[r,c]==0:
40
- cur+=1; q=[(r,c)]; labels[r,c]=cur
41
- while q:
42
- y,x=q.pop()
43
- for dy,dx in [(-1,0),(1,0),(0,-1),(0,1)]:
44
- ny,nx=y+dy,x+dx
45
- if 0<=ny<H and 0<=nx<W and mask[ny,nx] and labels[ny,nx]==0:
46
- labels[ny,nx]=cur; q.append((ny,nx))
47
- return labels
48
 
49
  def _sobel(f):
50
  p=np.pad(f,1,mode='edge')
@@ -52,6 +37,25 @@ def _sobel(f):
52
  gy=(-p[:-2,:-2]-2*p[:-2,1:-1]-p[:-2,2:]+p[2:,:-2]+2*p[2:,1:-1]+p[2:,2:])/8
53
  return gx,gy
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  def _sym(grid,axis):
56
  H,W=grid.shape; s=np.zeros((H,W),np.float32)
57
  if axis=='h':
@@ -66,23 +70,137 @@ def _sym(grid,axis):
66
  s[y,:]=(grid[y-r:y,:]==grid[y+1:y+r+1,:][::-1,:]).mean()
67
  return s
68
 
69
- def _boundary(grid):
70
- p=np.pad(grid,1,mode='edge')
71
- return ((p[1:-1,1:-1]!=p[:-2,1:-1])|(p[1:-1,1:-1]!=p[2:,1:-1])|
72
- (p[1:-1,1:-1]!=p[1:-1,:-2])|(p[1:-1,1:-1]!=p[1:-1,2:])).astype(np.float32)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
- def extract_features_fast(grid, num_colours=10):
75
  H,W=grid.shape
76
- one_hot=np.zeros((num_colours,H,W),dtype=np.float32)
77
- for c in range(num_colours): one_hot[c]=(grid==c).astype(np.float32)
78
  gx,gy=_sobel(grid.astype(np.float32)/9)
79
- stacked=np.concatenate([
80
- one_hot,
81
- _sym(grid,'h')[np.newaxis],
82
- _sym(grid,'v')[np.newaxis],
83
- _boundary(grid)[np.newaxis],
84
- np.sqrt(gx**2+gy**2)[np.newaxis].astype(np.float32),
85
- ],axis=0)
86
  t=torch.from_numpy(stacked).float().unsqueeze(0)
87
  if H!=64 or W!=64:
88
  t=TF.interpolate(t,size=(64,64),mode='bilinear',align_corners=False)
@@ -90,15 +208,16 @@ def extract_features_fast(grid, num_colours=10):
90
 
91
  # ── Rendering ─────────────────────────────────────────────────────────────────
92
 
93
- def _fig_to_pil(fig):
94
  buf=io.BytesIO()
95
- fig.savefig(buf,format='png',dpi=80,bbox_inches='tight',facecolor=fig.get_facecolor())
 
96
  buf.seek(0); img=Image.open(buf).copy(); plt.close(fig)
97
  return img
98
 
99
- def render_grid(grid, title='', highlight_diff=None):
100
  if grid is None: return None
101
- H,W=grid.shape; cell=max(28,min(60,360//max(H,W)))
102
  fig,ax=plt.subplots(figsize=((W*cell+4)/72,(H*cell+22)/72),dpi=72)
103
  fig.patch.set_facecolor('#1e1e2e'); ax.set_facecolor('#1e1e2e')
104
  ax.imshow(grid,cmap=ARC_CMAP,vmin=0,vmax=9,interpolation='nearest',aspect='equal')
@@ -109,68 +228,78 @@ def render_grid(grid, title='', highlight_diff=None):
109
  v=int(grid[r,c])
110
  col='white' if v in [0,1,2,3,5,6,9] else 'black'
111
  ax.text(c,r,str(v),ha='center',va='center',
112
- fontsize=max(7,cell//5),color=col,fontweight='bold',fontfamily='monospace')
113
- if highlight_diff is not None and highlight_diff[r,c]:
114
- ax.add_patch(plt.Rectangle((c-.5,r-.5),1,1,
115
- fill=True,facecolor='#ffffff',alpha=0.25,lw=0))
 
 
 
 
 
 
 
116
  ax.set_xlim(-.5,W-.5); ax.set_ylim(H-.5,-.5); ax.axis('off')
117
- if title: ax.set_title(title,color='#cdd6f4',fontsize=10,pad=4)
118
  plt.tight_layout(pad=.3)
119
- return _fig_to_pil(fig)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
- def render_action_bar(action_counts, total):
122
  if not action_counts or total==0: return None
123
  labels=[f"A{k}" for k in sorted(action_counts)]
124
  vals =[action_counts[k] for k in sorted(action_counts)]
125
  pcts =[v/total*100 for v in vals]
126
- fig,ax=plt.subplots(figsize=(5,1.6))
127
  fig.patch.set_facecolor('#1e1e2e'); ax.set_facecolor('#1e1e2e')
128
  colors=['#4a9eff','#e05050','#50c050','#f5c400','#c060c0','#d07030']
129
  bars=ax.barh(labels,pcts,color=colors[:len(labels)],height=0.6)
130
  for bar,v,p in zip(bars,vals,pcts):
131
  ax.text(min(p+1,98),bar.get_y()+bar.get_height()/2,
132
- f'{v} ({p:.0f}%)',va='center',color='white',fontsize=8)
133
- ax.set_xlim(0,105); ax.set_xlabel('% of actions',color='#888',fontsize=8)
134
- ax.tick_params(colors='#888',labelsize=8); ax.spines[:].set_visible(False)
 
135
  plt.tight_layout(pad=.4)
136
- return _fig_to_pil(fig)
137
 
138
  def render_reward_chart(reward_history):
139
- if len(reward_history) < 2: return None
140
- fig,ax=plt.subplots(figsize=(6,2))
141
  fig.patch.set_facecolor('#1e1e2e'); ax.set_facecolor('#1e1e2e')
142
- steps=list(range(len(reward_history)))
143
- rewards=list(reward_history)
144
- # Color by reward type
145
- for i,(s,r) in enumerate(zip(steps,rewards)):
146
  col='#ffd700' if r>=5 else ('#50c050' if r>0 else '#e05050')
147
- ax.bar(s,r,color=col,width=1,alpha=0.8)
148
  ax.axhline(0,color='#555',lw=0.5)
149
- ax.set_xlim(0,max(len(steps),1))
150
- ax.set_ylabel('Reward',color='#888',fontsize=8)
151
- ax.set_xlabel('Step',color='#888',fontsize=8)
152
  ax.tick_params(colors='#888',labelsize=7); ax.spines[:].set_visible(False)
153
- ax.set_title('Reward history 🟑=level-up 🟒=change πŸ”΄=dead',
154
  color='#cdd6f4',fontsize=8,pad=3)
155
  plt.tight_layout(pad=.3)
156
- return _fig_to_pil(fig)
157
 
158
- def render_level_timeline(level_history):
159
- if not level_history: return None
160
- fig,ax=plt.subplots(figsize=(6,1.4))
161
- fig.patch.set_facecolor('#1e1e2e'); ax.set_facecolor('#1e1e2e')
162
- for step,level in level_history:
163
- ax.axvline(step,color='#ffd700',lw=2,alpha=0.9)
164
- ax.text(step,0.5,f'L{level}',color='#ffd700',fontsize=8,
165
- ha='center',va='center',fontweight='bold')
166
- ax.set_xlim(0,max(s for s,_ in level_history)+10)
167
- ax.set_ylim(0,1); ax.axis('off')
168
- ax.set_title(f'Level completions β€” {len(level_history)} total',
169
- color='#cdd6f4',fontsize=9,pad=3)
170
- plt.tight_layout(pad=.2)
171
- return _fig_to_pil(fig)
172
 
173
- # ── TinyAgent ─────────────────────────────────────────────────────────────────
174
 
175
  class TinyAgent:
176
  def __init__(self):
@@ -179,12 +308,12 @@ class TinyAgent:
179
  self.opt=torch.optim.Adam(self.model.parameters(),lr=1e-4)
180
  self.buf=[]; self.prev_feat=None; self.prev_action=None
181
  self.step_count=0; self.action_counts={}; self.prev_levels=0
182
- self.reward_history=deque(maxlen=200)
183
- self.level_history=[]
184
- # Shaped reward β€” matches Kaggle submission
185
- self.level_up_reward=10.0
186
- self.change_reward=0.1
187
- self.dead_penalty=-0.01
188
 
189
  def _make_model(self):
190
  return nn.Sequential(
@@ -201,53 +330,121 @@ class TinyAgent:
201
  self.opt=torch.optim.Adam(self.model.parameters(),lr=1e-4)
202
  self.buf=[]; self.prev_feat=None; self.prev_action=None
203
  self.step_count=0; self.action_counts={}; self.prev_levels=0
204
- self.reward_history=deque(maxlen=200)
205
- self.level_history=[]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
- def choose(self, grid, available_actions=None, levels=0):
208
- feat=extract_features_fast(grid).to(self.device)
209
  # Store shaped experience
210
  if self.prev_feat is not None:
211
  changed=not np.array_equal(
212
  self.prev_feat.cpu().numpy(),feat.cpu().numpy())
 
213
  level_up=levels>self.prev_levels
214
- if level_up:
215
- reward=self.level_up_reward
 
 
 
 
 
 
216
  self.level_history.append((self.step_count,levels))
217
  elif changed:
218
- reward=self.change_reward
219
  else:
220
- reward=self.dead_penalty
221
  self.reward_history.append(reward)
222
  self.buf.append((self.prev_feat,self.prev_action,reward))
223
  if len(self.buf)>500: self.buf.pop(0)
 
224
  self.prev_levels=levels
 
225
  if self.step_count%10==0 and len(self.buf)>=16:
226
  self._train()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  with torch.no_grad():
228
  logits=self.model(feat.unsqueeze(0)).squeeze(0)
229
- mask=list(range(1,7))
230
  if available_actions:
231
- mask=[int(a.value if hasattr(a,'value') else a)
232
- for a in available_actions
233
- if int(a.value if hasattr(a,'value') else a)<=6]
234
- indices=[m-1 for m in mask if 1<=m<=6]
235
  masked=torch.full((6,),float('-inf'))
236
  for i in indices: masked[i]=logits[i]
237
  probs=torch.softmax(masked,dim=0).cpu().numpy()
238
  probs=np.nan_to_num(probs,nan=0)
239
- if probs.sum()==0: probs[indices]=1/len(indices)
240
  probs=probs/probs.sum()
241
- action_idx=np.random.choice(6,p=probs)
242
- self.prev_feat=feat; self.prev_action=action_idx; self.step_count+=1
243
- a_id=action_idx+1
 
 
 
 
 
 
 
 
 
 
 
 
244
  self.action_counts[a_id]=self.action_counts.get(a_id,0)+1
 
245
  try:
246
  from arcengine import GameAction
247
  action=GameAction(a_id)
248
  except Exception:
249
  action=a_id
250
- return action,dict(probs=probs.tolist())
 
 
 
 
 
251
 
252
  def _train(self):
253
  import random
@@ -269,13 +466,12 @@ _stop_flag = threading.Event()
269
  _run_thread = None
270
  _frame_queue= queue.Queue(maxsize=60)
271
 
272
- def _run_agent(game_id, api_key, max_steps):
273
  import arc_agi
274
  try:
275
  arc=arc_agi.Arcade(arc_api_key=api_key)
276
  env=arc.make(game_id,include_frame_data=True)
277
- frame=env.reset()
278
- _agent.reset()
279
  prev_grid=None; step=0
280
  while not _stop_flag.is_set() and step<max_steps:
281
  if frame is None: break
@@ -283,42 +479,41 @@ def _run_agent(game_id, api_key, max_steps):
283
  grid=raw[-1] if raw.ndim==3 else raw
284
  avail=getattr(frame,'available_actions',None)
285
  levels=getattr(frame,'levels_completed',0)
286
- action,info=_agent.choose(grid,avail,levels=levels)
 
287
  diff=(grid!=prev_grid) if prev_grid is not None else None
288
  prev_grid=grid.copy()
289
  _frame_queue.put({
290
- 'grid': grid,
291
- 'diff': diff,
292
- 'step': step,
293
- 'action': int(action.value if hasattr(action,'value') else action),
294
- 'levels': levels,
295
- 'state': str(getattr(frame,'state','')),
296
- 'probs': info['probs'],
297
- 'counts': dict(_agent.action_counts),
298
- 'reward_history': list(_agent.reward_history),
299
- 'level_history': list(_agent.level_history),
300
  },block=True,timeout=5)
301
- state_str=str(getattr(frame,'state',''))
302
  if 'WIN' in state_str or 'GAME_OVER' in state_str: break
303
  try:
304
  from arcengine import GameAction as GA
305
- step_action=GA(int(action.value if hasattr(action,'value') else action))
306
  except Exception:
307
- step_action=action
308
- frame=env.step(step_action)
 
 
 
309
  step+=1
310
- time.sleep(0.05)
311
  _frame_queue.put({'done':True,'step':step,
312
  'level_history':list(_agent.level_history)})
313
  except Exception as e:
314
  _frame_queue.put({'error':str(e)})
315
 
316
- # ── Pull latest frame ─────────────────────────────────────────────────────────
317
 
318
- _latest={
319
- 'grid_img':None,'bar_img':None,'reward_img':None,'level_img':None,
320
- 'status':'*Waiting for agent...*'
321
- }
322
 
323
  def pull_frame():
324
  global _latest
@@ -328,57 +523,79 @@ def pull_frame():
328
  except queue.Empty: break
329
 
330
  if data is None:
331
- return (_latest['grid_img'],_latest['bar_img'],
332
- _latest['reward_img'],_latest['level_img'],
333
- _latest['status'])
334
 
335
  if 'error' in data:
336
  _latest['status']=f"**Error:** {data['error']}"
337
- return (_latest['grid_img'],_latest['bar_img'],
338
- _latest['reward_img'],_latest['level_img'],
339
- _latest['status'])
340
 
341
  if data.get('done'):
342
  lh=data.get('level_history',[])
343
- _latest['status']=(
344
- f"**Done** β€” {data['step']} steps | "
345
- f"{len(lh)} levels completed")
346
- _latest['level_img']=render_level_timeline(lh)
347
- return (_latest['grid_img'],_latest['bar_img'],
348
- _latest['reward_img'],_latest['level_img'],
349
- _latest['status'])
350
-
351
- grid =data['grid']; diff =data['diff']
352
- step =data['step']; levels=data['levels']
353
- action =data['action'];state =data['state']
354
- counts =data['counts'];probs =data['probs']
355
- rh =data['reward_history']
356
- lh =data['level_history']
357
-
358
- _latest['grid_img'] =render_grid(grid,
359
- title=f"Step {step} | Action A{action} | Levels {levels}",
360
- highlight_diff=diff)
361
- _latest['bar_img'] =render_action_bar(counts,sum(counts.values()))
362
- _latest['reward_img']=render_reward_chart(rh)
363
- _latest['level_img'] =render_level_timeline(lh) if lh else None
364
-
365
- action_names={1:'A1',2:'A2',3:'A3',4:'A4',5:'A5',6:'A6(click)'}
366
- prob_str=' '.join(
367
- f"**{action_names.get(i+1,str(i+1))}** {p:.2f}"
368
- for i,p in enumerate(probs))
369
- last_r=rh[-1] if rh else 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  r_emoji='🟑' if last_r>=5 else ('🟒' if last_r>0 else 'πŸ”΄')
 
 
 
 
 
371
 
372
  _latest['status']=(
373
- f"**Step:** {step} &nbsp;|&nbsp; **Action:** A{action}"
374
- f" &nbsp;|&nbsp; **Levels:** {levels}"
375
- f" &nbsp;|&nbsp; **Last reward:** {r_emoji} `{last_r:.2f}`"
376
- f" &nbsp;|&nbsp; **State:** {state}\n\n"
377
- f"Probs: {prob_str}")
378
 
379
- return (_latest['grid_img'],_latest['bar_img'],
380
- _latest['reward_img'],_latest['level_img'],
381
- _latest['status'])
382
 
383
  # ── Handlers ──────────────────────────────────────────────────────────────────
384
 
@@ -396,7 +613,7 @@ def fetch_games(api_key):
396
  def start_agent(game_id,api_key,max_steps):
397
  global _run_thread,_stop_flag
398
  if not game_id: return "Select a game first."
399
- if not api_key: return "Enter your API key first."
400
  _stop_flag.set()
401
  if _run_thread and _run_thread.is_alive(): _run_thread.join(timeout=3)
402
  while not _frame_queue.empty():
@@ -406,7 +623,7 @@ def start_agent(game_id,api_key,max_steps):
406
  _run_thread=threading.Thread(
407
  target=_run_agent,args=(game_id,api_key,int(max_steps)),daemon=True)
408
  _run_thread.start()
409
- return f"Agent started on **{game_id}** for {int(max_steps)} steps. Reward: 🟑+10 level / 🟒+0.1 change / πŸ”΄-0.01 dead"
410
 
411
  def stop_agent():
412
  _stop_flag.set()
@@ -414,21 +631,22 @@ def stop_agent():
414
 
415
  # ── UI ────────────────────────────────────────────────────────────────────────
416
 
417
- with gr.Blocks(title="ARC-AGI-3 Agent Spectator") as demo:
418
 
419
  gr.Markdown("""
420
- # ARC-AGI-3 Agent Spectator
421
- Watch the Re/Im CNN agent explore live ARC-AGI-3 games in real time.
422
- Get your API key at [docs.arcprize.org/api-keys](https://docs.arcprize.org/api-keys).
423
- Reward signal: 🟑 **+10** level completed Β· 🟒 **+0.1** frame changed Β· πŸ”΄ **-0.01** no change
 
 
424
  """)
425
 
426
  with gr.Row():
427
  with gr.Column(scale=3):
428
- api_box=gr.Textbox(
429
- label="ARC API key",type="password",
430
- value=os.environ.get("ARC_API_KEY",""),
431
- placeholder="arc-key-... (or set ARC_API_KEY as HF Space secret)")
432
  with gr.Column(scale=1):
433
  fetch_btn=gr.Button("Fetch games")
434
 
@@ -442,21 +660,32 @@ Reward signal: 🟑 **+10** level completed · 🟒 **+0.1** frame changed ·
442
  start_btn=gr.Button("β–Ά Watch",variant="primary")
443
  stop_btn =gr.Button("β–  Stop")
444
 
445
- run_status=gr.Markdown("*Press Fetch games, select a game, then Watch.*")
446
  api_status=gr.Markdown()
447
 
448
  gr.Markdown("---")
449
 
 
450
  with gr.Row():
451
- grid_img =gr.Image(label="Current frame", type="pil",interactive=False,height=300)
452
- bar_img =gr.Image(label="Action frequency",type="pil",interactive=False,height=300)
 
 
453
 
 
454
  with gr.Row():
455
- reward_img=gr.Image(label="Reward history", type="pil",interactive=False,height=160)
456
- level_img =gr.Image(label="Level completions",type="pil",interactive=False,height=160)
 
 
 
 
 
 
457
 
458
  timer=gr.Timer(value=1.0)
459
- timer.tick(pull_frame,outputs=[grid_img,bar_img,reward_img,level_img,run_status])
 
460
 
461
  fetch_btn.click(fetch_games,inputs=api_box,outputs=[game_dd,api_status])
462
  start_btn.click(start_agent,inputs=[game_dd,api_box,steps_sl],outputs=run_status)
@@ -464,16 +693,14 @@ Reward signal: 🟑 **+10** level completed · 🟒 **+0.1** frame changed ·
464
 
465
  gr.Markdown("""
466
  ---
467
- **How it works:** Each 64Γ—64 frame is encoded as 14 feature channels
468
- (10 one-hot colors + H-symmetry + V-symmetry + boundary contour + edge magnitude)
469
- and fed through a tiny 3-layer CNN. The reward signal matches the Kaggle submission exactly:
470
- +10 for advancing a level, +0.1 for any frame change, -0.01 for dead actions.
471
- The CNN trains online every 10 steps, learning to chase level completions over time.
472
-
473
- **Re/Im duality:** The 4 Im-side channels (symmetry, boundary, edge) give the CNN
474
- geometric priors about the puzzle structure before any learning happens β€”
475
- the same insight from the *Decoding Complex Objects* framework.
476
  """)
477
 
478
  if __name__ == "__main__":
479
- demo.launch()
 
1
  """
2
+ ARC-AGI-3 Agent Spectator v4
3
  Hugging Face Space: beanapologist/arc-agi
4
 
5
+ Re/Im solver live demo:
6
+ Im side = bird's eye hypothesis (which transformation?)
7
+ Re side = exact diff (which cells to click?)
8
+ Bridge = ACTION6 at the Re-side coordinates that close the gap
 
9
  """
10
 
11
  import gradio as gr
 
17
  import torch
18
  import torch.nn as nn
19
  import torch.nn.functional as TF
20
+ import io, os, time, threading, queue
21
  from collections import deque
22
  from PIL import Image
23
 
 
29
  COLOR_NAMES = ['black','blue','red','green','yellow',
30
  'purple','orange','gray','azure','maroon']
31
 
32
+ # ── Re/Im primitives ──────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  def _sobel(f):
35
  p=np.pad(f,1,mode='edge')
 
37
  gy=(-p[:-2,:-2]-2*p[:-2,1:-1]-p[:-2,2:]+p[2:,:-2]+2*p[2:,1:-1]+p[2:,2:])/8
38
  return gx,gy
39
 
40
+ def _sym_axis(grid,axis):
41
+ H,W=grid.shape; best_s,best_i=0.0,0
42
+ if axis=='h':
43
+ for x in range(1,W-1):
44
+ r=min(x,W-1-x)
45
+ s=(grid[:,x-r:x]==grid[:,x+1:x+r+1][:,::-1]).mean()
46
+ if s>best_s: best_s,best_i=s,x
47
+ else:
48
+ for y in range(1,H-1):
49
+ r=min(y,H-1-y)
50
+ s=(grid[y-r:y,:]==grid[y+1:y+r+1,:][::-1,:]).mean()
51
+ if s>best_s: best_s,best_i=s,y
52
+ return best_i,best_s
53
+
54
+ def _boundary(grid):
55
+ p=np.pad(grid,1,mode='edge')
56
+ return ((p[1:-1,1:-1]!=p[:-2,1:-1])|(p[1:-1,1:-1]!=p[2:,1:-1])|
57
+ (p[1:-1,1:-1]!=p[1:-1,:-2])|(p[1:-1,1:-1]!=p[1:-1,2:])).astype(np.float32)
58
+
59
  def _sym(grid,axis):
60
  H,W=grid.shape; s=np.zeros((H,W),np.float32)
61
  if axis=='h':
 
70
  s[y,:]=(grid[y-r:y,:]==grid[y+1:y+r+1,:][::-1,:]).mean()
71
  return s
72
 
73
+ # ── Im-side: candidate transforms ────────────────────────────────────────────
74
+
75
+ def _h_mirror(grid):
76
+ H,W=grid.shape; ax,sc=_sym_axis(grid,'h')
77
+ lm=(grid[:,:ax]>0).sum(); rm=(grid[:,ax:]>0).sum()
78
+ if lm==0 or rm>=lm*0.7: return None,0.0
79
+ c=grid.copy()
80
+ for col in range(ax):
81
+ mir=W-1-col
82
+ if mir<W:
83
+ mask=c[:,mir]==0; c[mask,mir]=grid[mask,col]
84
+ return c,(1-rm/max(lm,1))*sc*0.95
85
+
86
+ def _v_mirror(grid):
87
+ H,W=grid.shape; ax,sc=_sym_axis(grid,'v')
88
+ tm=(grid[:ax,:]>0).sum(); bm=(grid[ax:,:]>0).sum()
89
+ if tm==0 or bm>=tm*0.7: return None,0.0
90
+ c=grid.copy()
91
+ for row in range(ax):
92
+ mir=H-1-row
93
+ if mir<H:
94
+ mask=c[mir,:]==0; c[mir,mask]=grid[row,mask]
95
+ return c,(1-bm/max(tm,1))*sc*0.90
96
+
97
+ def _boundary_only(grid):
98
+ if not (grid>0).any(): return None,0.0
99
+ solid=(grid>0).sum(); b=_boundary(grid); bpx=b.sum()
100
+ if solid==0 or bpx/solid>0.6: return None,0.0
101
+ c=np.zeros_like(grid); c[b>0]=grid[b>0]
102
+ return c,(1-bpx/solid)*0.85
103
+
104
+ def _hollow_fill(grid):
105
+ b=_boundary(grid); interior=(grid==0)&(b==0)
106
+ if not interior.any() or not (grid>0).any(): return None,0.0
107
+ dom=np.argmax(np.bincount(grid[grid>0].flatten(),minlength=10)[1:])+1
108
+ c=grid.copy(); c[interior]=dom
109
+ return c,interior.sum()/max(1,(grid==0).sum())*0.80
110
+
111
+ def _gravity(grid,d='down'):
112
+ H,W=grid.shape; c=np.zeros_like(grid)
113
+ if d=='down':
114
+ for col in range(W):
115
+ v=grid[:,col][grid[:,col]>0]
116
+ if len(v): c[H-len(v):H,col]=v
117
+ elif d=='up':
118
+ for col in range(W):
119
+ v=grid[:,col][grid[:,col]>0]
120
+ if len(v): c[:len(v),col]=v
121
+ elif d=='right':
122
+ for row in range(H):
123
+ v=grid[row,:][grid[row,:]>0]
124
+ if len(v): c[row,W-len(v):W]=v
125
+ elif d=='left':
126
+ for row in range(H):
127
+ v=grid[row,:][grid[row,:]>0]
128
+ if len(v): c[row,:len(v)]=v
129
+ if np.array_equal(c,grid) or not (grid>0).any(): return None,0.0
130
+ moved=(c!=grid).sum()
131
+ return c,min(0.75,moved/max(1,(grid>0).sum())*0.8)
132
+
133
+ def _color_shift(grid,d=1):
134
+ if not (grid>0).any(): return None,0.0
135
+ c=grid.copy(); mask=grid>0
136
+ c[mask]=((grid[mask]-1+d)%9)+1
137
+ return c,0.45
138
+
139
+ def _rotate(grid,k): return np.rot90(grid,k),0.30
140
+ def _hflip(grid): return np.fliplr(grid),0.25
141
+ def _vflip(grid): return np.flipud(grid),0.25
142
+
143
+ def _4fold(grid):
144
+ c=grid.copy()
145
+ for k in [1,2,3]:
146
+ rot=np.rot90(grid,k)
147
+ if rot.shape==grid.shape:
148
+ mask=c==0; c[mask]=rot[mask]
149
+ return (c,0.55) if not np.array_equal(c,grid) else (None,0.0)
150
+
151
+ TRANSFORMS=[
152
+ ('h_mirror_complete', _h_mirror),
153
+ ('v_mirror_complete', _v_mirror),
154
+ ('boundary_only', _boundary_only),
155
+ ('hollow_fill', _hollow_fill),
156
+ ('gravity_down', lambda g: _gravity(g,'down')),
157
+ ('gravity_up', lambda g: _gravity(g,'up')),
158
+ ('gravity_right', lambda g: _gravity(g,'right')),
159
+ ('gravity_left', lambda g: _gravity(g,'left')),
160
+ ('4fold_symmetry', _4fold),
161
+ ('color_shift_+1', lambda g: _color_shift(g,1)),
162
+ ('color_shift_+2', lambda g: _color_shift(g,2)),
163
+ ('rotate_90', lambda g: _rotate(g,1)),
164
+ ('rotate_180', lambda g: _rotate(g,2)),
165
+ ('rotate_270', lambda g: _rotate(g,3)),
166
+ ('h_flip', _hflip),
167
+ ('v_flip', _vflip),
168
+ ]
169
+
170
+ def get_candidates(grid):
171
+ out=[]
172
+ for name,fn in TRANSFORMS:
173
+ try:
174
+ c,conf=fn(grid)
175
+ if c is not None and conf>0.05: out.append((name,c,conf))
176
+ except: pass
177
+ return sorted(out,key=lambda x:-x[2])
178
+
179
+ def pixel_diff(cur,tgt):
180
+ if cur.shape!=tgt.shape: return []
181
+ return [(r,c,int(tgt[r,c]))
182
+ for r in range(cur.shape[0]) for c in range(cur.shape[1])
183
+ if cur[r,c]!=tgt[r,c]]
184
+
185
+ def most_urgent_diff(cur,tgt):
186
+ diffs=pixel_diff(cur,tgt)
187
+ if not diffs: return None
188
+ b=_boundary(cur)
189
+ bdiffs=[(r,c,v) for r,c,v in diffs if b[r,c]>0]
190
+ pool=bdiffs if bdiffs else diffs
191
+ return pool[np.random.randint(len(pool))]
192
+
193
+ # ── Feature extractor ─────────────────────────────────────────────────────────
194
 
195
+ def extract_features(grid,num_colours=10):
196
  H,W=grid.shape
197
+ oh=np.zeros((num_colours,H,W),np.float32)
198
+ for c in range(num_colours): oh[c]=(grid==c).astype(np.float32)
199
  gx,gy=_sobel(grid.astype(np.float32)/9)
200
+ stacked=np.concatenate([oh,_sym(grid,'h')[np.newaxis],
201
+ _sym(grid,'v')[np.newaxis],
202
+ _boundary(grid)[np.newaxis],
203
+ np.sqrt(gx**2+gy**2)[np.newaxis].astype(np.float32)],axis=0)
 
 
 
204
  t=torch.from_numpy(stacked).float().unsqueeze(0)
205
  if H!=64 or W!=64:
206
  t=TF.interpolate(t,size=(64,64),mode='bilinear',align_corners=False)
 
208
 
209
  # ── Rendering ─────────────────────────────────────────────────────────────────
210
 
211
+ def _pil(fig):
212
  buf=io.BytesIO()
213
+ fig.savefig(buf,format='png',dpi=80,bbox_inches='tight',
214
+ facecolor=fig.get_facecolor())
215
  buf.seek(0); img=Image.open(buf).copy(); plt.close(fig)
216
  return img
217
 
218
+ def render_grid(grid,title='',highlight=None,mark_cell=None):
219
  if grid is None: return None
220
+ H,W=grid.shape; cell=max(28,min(56,360//max(H,W)))
221
  fig,ax=plt.subplots(figsize=((W*cell+4)/72,(H*cell+22)/72),dpi=72)
222
  fig.patch.set_facecolor('#1e1e2e'); ax.set_facecolor('#1e1e2e')
223
  ax.imshow(grid,cmap=ARC_CMAP,vmin=0,vmax=9,interpolation='nearest',aspect='equal')
 
228
  v=int(grid[r,c])
229
  col='white' if v in [0,1,2,3,5,6,9] else 'black'
230
  ax.text(c,r,str(v),ha='center',va='center',
231
+ fontsize=max(7,cell//5),color=col,
232
+ fontweight='bold',fontfamily='monospace')
233
+ if highlight is not None:
234
+ for r,c,_ in highlight:
235
+ ax.add_patch(plt.Rectangle((c-.5,r-.5),1,1,
236
+ fill=True,facecolor='#ff4444',alpha=0.35,lw=0))
237
+ if mark_cell is not None:
238
+ r,c,_=mark_cell
239
+ ax.add_patch(plt.Rectangle((c-.5,r-.5),1,1,
240
+ fill=False,edgecolor='#00ffff',lw=2.5))
241
+ ax.plot(c,r,'*',color='#00ffff',markersize=max(8,cell//4))
242
  ax.set_xlim(-.5,W-.5); ax.set_ylim(H-.5,-.5); ax.axis('off')
243
+ if title: ax.set_title(title,color='#cdd6f4',fontsize=9,pad=4)
244
  plt.tight_layout(pad=.3)
245
+ return _pil(fig)
246
+
247
+ def render_hypothesis_panel(candidates):
248
+ """Im side: bar chart of top hypotheses with confidence."""
249
+ if not candidates: return None
250
+ top=candidates[:6]
251
+ names=[c[0] for c in top]; confs=[c[2] for c in top]
252
+ fig,ax=plt.subplots(figsize=(5,2.2))
253
+ fig.patch.set_facecolor('#1e1e2e'); ax.set_facecolor('#1e1e2e')
254
+ colors=['#ffd700' if i==0 else '#4a9eff' for i in range(len(top))]
255
+ bars=ax.barh(names[::-1],confs[::-1],color=colors[::-1],height=0.6)
256
+ for bar,conf in zip(bars,confs[::-1]):
257
+ ax.text(bar.get_width()+.01,bar.get_y()+bar.get_height()/2,
258
+ f'{conf:.2f}',va='center',color='white',fontsize=8)
259
+ ax.set_xlim(0,1.15); ax.axvline(0.4,color='#ff6666',lw=1,ls='--',alpha=0.7)
260
+ ax.text(0.41,0,'threshold',color='#ff6666',fontsize=7,va='bottom')
261
+ ax.tick_params(colors='#888',labelsize=8); ax.spines[:].set_visible(False)
262
+ ax.set_title('Im side β€” hypothesis ranking 🟑=selected',
263
+ color='#cdd6f4',fontsize=9,pad=3)
264
+ plt.tight_layout(pad=.4)
265
+ return _pil(fig)
266
 
267
+ def render_action_bar(action_counts,total):
268
  if not action_counts or total==0: return None
269
  labels=[f"A{k}" for k in sorted(action_counts)]
270
  vals =[action_counts[k] for k in sorted(action_counts)]
271
  pcts =[v/total*100 for v in vals]
272
+ fig,ax=plt.subplots(figsize=(4,1.6))
273
  fig.patch.set_facecolor('#1e1e2e'); ax.set_facecolor('#1e1e2e')
274
  colors=['#4a9eff','#e05050','#50c050','#f5c400','#c060c0','#d07030']
275
  bars=ax.barh(labels,pcts,color=colors[:len(labels)],height=0.6)
276
  for bar,v,p in zip(bars,vals,pcts):
277
  ax.text(min(p+1,98),bar.get_y()+bar.get_height()/2,
278
+ f'{v}',va='center',color='white',fontsize=8)
279
+ ax.set_xlim(0,110); ax.tick_params(colors='#888',labelsize=8)
280
+ ax.spines[:].set_visible(False)
281
+ ax.set_title('Action frequency',color='#cdd6f4',fontsize=9,pad=3)
282
  plt.tight_layout(pad=.4)
283
+ return _pil(fig)
284
 
285
  def render_reward_chart(reward_history):
286
+ if len(reward_history)<2: return None
287
+ fig,ax=plt.subplots(figsize=(5,1.6))
288
  fig.patch.set_facecolor('#1e1e2e'); ax.set_facecolor('#1e1e2e')
289
+ for i,r in enumerate(reward_history):
 
 
 
290
  col='#ffd700' if r>=5 else ('#50c050' if r>0 else '#e05050')
291
+ ax.bar(i,r,color=col,width=1,alpha=0.8)
292
  ax.axhline(0,color='#555',lw=0.5)
293
+ ax.set_xlim(0,len(reward_history))
 
 
294
  ax.tick_params(colors='#888',labelsize=7); ax.spines[:].set_visible(False)
295
+ ax.set_title('Reward 🟑=level-up 🟒=change πŸ”΄=dead',
296
  color='#cdd6f4',fontsize=8,pad=3)
297
  plt.tight_layout(pad=.3)
298
+ return _pil(fig)
299
 
300
+ # ── TinyAgent with Re/Im solver ───────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
301
 
302
+ CONF_THRESHOLD = 0.40
303
 
304
  class TinyAgent:
305
  def __init__(self):
 
308
  self.opt=torch.optim.Adam(self.model.parameters(),lr=1e-4)
309
  self.buf=[]; self.prev_feat=None; self.prev_action=None
310
  self.step_count=0; self.action_counts={}; self.prev_levels=0
311
+ self.reward_history=deque(maxlen=300)
312
+ self.level_history=[]; self.prev_state=None
313
+ self.level_up_reward=10.0; self.win_reward=50.0
314
+ self.near_win_reward=2.0; self.change_reward=0.1
315
+ self.dead_penalty=-0.01; self.candidate_win_reward=30.0
316
+ self.prev_candidate_dist=1.0
317
 
318
  def _make_model(self):
319
  return nn.Sequential(
 
330
  self.opt=torch.optim.Adam(self.model.parameters(),lr=1e-4)
331
  self.buf=[]; self.prev_feat=None; self.prev_action=None
332
  self.step_count=0; self.action_counts={}; self.prev_levels=0
333
+ self.reward_history=deque(maxlen=300); self.level_history=[]
334
+ self.prev_state=None; self.prev_candidate_dist=1.0
335
+
336
+ def choose(self,grid,available_actions=None,levels=0,state=None):
337
+ feat=extract_features(grid).to(self.device)
338
+ cur_state=str(state) if state else None
339
+
340
+ # ── Im side: rank hypotheses ──────────────────────────────────────
341
+ candidates=get_candidates(grid)
342
+ best_name,best_cand,best_conf=(candidates[0] if candidates
343
+ else ('none',grid,0.0))
344
+
345
+ # Candidate proximity bonus
346
+ if candidates:
347
+ nn_name,nn_cand,nn_conf=min(
348
+ candidates,
349
+ key=lambda c:(grid!=c[1]).mean() if grid.shape==c[1].shape else 1.0)
350
+ curr_dist=(grid!=nn_cand).mean() if grid.shape==nn_cand.shape else 1.0
351
+ if curr_dist==0.0:
352
+ cand_bonus=self.candidate_win_reward
353
+ elif curr_dist<self.prev_candidate_dist:
354
+ cand_bonus=(self.prev_candidate_dist-curr_dist)*5.0
355
+ else:
356
+ cand_bonus=0.0
357
+ self.prev_candidate_dist=curr_dist
358
+ else:
359
+ cand_bonus=0.0
360
 
 
 
361
  # Store shaped experience
362
  if self.prev_feat is not None:
363
  changed=not np.array_equal(
364
  self.prev_feat.cpu().numpy(),feat.cpu().numpy())
365
+ just_won=(cur_state=='WIN' and self.prev_state!='WIN')
366
  level_up=levels>self.prev_levels
367
+ if just_won:
368
+ reward=self.win_reward+cand_bonus
369
+ for i in range(min(5,len(self.buf))):
370
+ idx=len(self.buf)-1-i
371
+ self.buf[idx]=(self.buf[idx][0],self.buf[idx][1],
372
+ self.buf[idx][2]+self.near_win_reward*(1-i*0.15))
373
+ elif level_up:
374
+ reward=self.level_up_reward+cand_bonus
375
  self.level_history.append((self.step_count,levels))
376
  elif changed:
377
+ reward=self.change_reward+cand_bonus
378
  else:
379
+ reward=self.dead_penalty+cand_bonus
380
  self.reward_history.append(reward)
381
  self.buf.append((self.prev_feat,self.prev_action,reward))
382
  if len(self.buf)>500: self.buf.pop(0)
383
+ self.prev_state=cur_state
384
  self.prev_levels=levels
385
+
386
  if self.step_count%10==0 and len(self.buf)>=16:
387
  self._train()
388
+
389
+ # ── Im β†’ Re bridge: analytic action ──────────────────────────────
390
+ analytic_action=None; analytic_meta={}
391
+ if best_conf>=CONF_THRESHOLD and candidates:
392
+ diffs=pixel_diff(grid,best_cand)
393
+ if diffs:
394
+ cell=most_urgent_diff(grid,best_cand)
395
+ if cell is not None:
396
+ r,c,tgt_color=cell
397
+ H,W=grid.shape
398
+ gy=min(63,max(0,int(r*64/H+32/H)))
399
+ gx=min(63,max(0,int(c*64/W+32/W)))
400
+ analytic_action=6
401
+ analytic_meta={'x':gx,'y':gy,'cell':(r,c,tgt_color),
402
+ 'hypothesis':best_name,'conf':best_conf,
403
+ 'n_diffs':len(diffs),'candidates':candidates[:4]}
404
+
405
+ # ── CNN fallback ──────────────────────────────────────────────────
406
  with torch.no_grad():
407
  logits=self.model(feat.unsqueeze(0)).squeeze(0)
408
+ avail=list(range(1,7))
409
  if available_actions:
410
+ avail=[int(a.value if hasattr(a,'value') else a)
411
+ for a in available_actions if
412
+ int(a.value if hasattr(a,'value') else a)<=6]
413
+ indices=[m-1 for m in avail if 1<=m<=6]
414
  masked=torch.full((6,),float('-inf'))
415
  for i in indices: masked[i]=logits[i]
416
  probs=torch.softmax(masked,dim=0).cpu().numpy()
417
  probs=np.nan_to_num(probs,nan=0)
418
+ if probs.sum()==0: probs[np.array(indices)]=1/len(indices)
419
  probs=probs/probs.sum()
420
+ cnn_action_idx=np.random.choice(6,p=probs)
421
+
422
+ # Pick final action
423
+ if analytic_action is not None:
424
+ chosen_id=analytic_action
425
+ meta=analytic_meta
426
+ meta['source']='analytic'
427
+ else:
428
+ chosen_id=cnn_action_idx+1
429
+ meta={'source':'cnn','probs':probs.tolist(),
430
+ 'candidates':candidates[:4] if candidates else []}
431
+
432
+ self.prev_feat=feat; self.prev_action=cnn_action_idx
433
+ self.step_count+=1
434
+ a_id=chosen_id
435
  self.action_counts[a_id]=self.action_counts.get(a_id,0)+1
436
+
437
  try:
438
  from arcengine import GameAction
439
  action=GameAction(a_id)
440
  except Exception:
441
  action=a_id
442
+
443
+ if a_id==6 and 'x' in meta:
444
+ try: action.set_data({'x':meta['x'],'y':meta['y']})
445
+ except: pass
446
+
447
+ return action,meta
448
 
449
  def _train(self):
450
  import random
 
466
  _run_thread = None
467
  _frame_queue= queue.Queue(maxsize=60)
468
 
469
+ def _run_agent(game_id,api_key,max_steps):
470
  import arc_agi
471
  try:
472
  arc=arc_agi.Arcade(arc_api_key=api_key)
473
  env=arc.make(game_id,include_frame_data=True)
474
+ frame=env.reset(); _agent.reset()
 
475
  prev_grid=None; step=0
476
  while not _stop_flag.is_set() and step<max_steps:
477
  if frame is None: break
 
479
  grid=raw[-1] if raw.ndim==3 else raw
480
  avail=getattr(frame,'available_actions',None)
481
  levels=getattr(frame,'levels_completed',0)
482
+ state=getattr(frame,'state',None)
483
+ action,meta=_agent.choose(grid,avail,levels=levels,state=state)
484
  diff=(grid!=prev_grid) if prev_grid is not None else None
485
  prev_grid=grid.copy()
486
  _frame_queue.put({
487
+ 'grid':grid,'diff':diff,'step':step,
488
+ 'action':int(action.value if hasattr(action,'value') else action),
489
+ 'levels':levels,'state':str(state),
490
+ 'meta':meta,
491
+ 'counts':dict(_agent.action_counts),
492
+ 'reward_history':list(_agent.reward_history),
493
+ 'level_history':list(_agent.level_history),
 
 
 
494
  },block=True,timeout=5)
495
+ state_str=str(state)
496
  if 'WIN' in state_str or 'GAME_OVER' in state_str: break
497
  try:
498
  from arcengine import GameAction as GA
499
+ sa=GA(int(action.value if hasattr(action,'value') else action))
500
  except Exception:
501
+ sa=action
502
+ if hasattr(sa,'set_data') and meta.get('x') is not None:
503
+ try: sa.set_data({'x':meta['x'],'y':meta['y']})
504
+ except: pass
505
+ frame=env.step(sa)
506
  step+=1
507
+ time.sleep(0.08)
508
  _frame_queue.put({'done':True,'step':step,
509
  'level_history':list(_agent.level_history)})
510
  except Exception as e:
511
  _frame_queue.put({'error':str(e)})
512
 
513
+ # ── Pull frame ────────────────────────────────────────────────────────────────
514
 
515
+ _latest={'grid_img':None,'hyp_img':None,'cand_img':None,
516
+ 'bar_img':None,'reward_img':None,'status':'*Waiting...*'}
 
 
517
 
518
  def pull_frame():
519
  global _latest
 
523
  except queue.Empty: break
524
 
525
  if data is None:
526
+ return (_latest['grid_img'],_latest['hyp_img'],_latest['cand_img'],
527
+ _latest['bar_img'],_latest['reward_img'],_latest['status'])
 
528
 
529
  if 'error' in data:
530
  _latest['status']=f"**Error:** {data['error']}"
531
+ return (_latest['grid_img'],_latest['hyp_img'],_latest['cand_img'],
532
+ _latest['bar_img'],_latest['reward_img'],_latest['status'])
 
533
 
534
  if data.get('done'):
535
  lh=data.get('level_history',[])
536
+ _latest['status']=f"**Done** β€” {data['step']} steps | {len(lh)} levels completed"
537
+ return (_latest['grid_img'],_latest['hyp_img'],_latest['cand_img'],
538
+ _latest['bar_img'],_latest['reward_img'],_latest['status'])
539
+
540
+ grid=data['grid']; meta=data['meta']; step=data['step']
541
+ levels=data['levels']; state=data['state']; action=data['action']
542
+ candidates=meta.get('candidates',[])
543
+ source=meta.get('source','cnn')
544
+
545
+ # Determine what to highlight
546
+ mark_cell=None; highlight=None
547
+ if source=='analytic' and 'cell' in meta:
548
+ r,c,v=meta['cell']
549
+ best_cand_name=meta.get('hypothesis','?')
550
+ best_conf=meta.get('conf',0)
551
+ # Compute Re-side diff for candidate
552
+ cand_name,cand_grid,cand_conf=(candidates[0] if candidates
553
+ else (best_cand_name,grid,best_conf))
554
+ if cand_grid.shape==grid.shape:
555
+ all_diffs=pixel_diff(grid,cand_grid)
556
+ highlight=all_diffs[:20] # show up to 20 wrong cells in red
557
+ mark_cell=meta['cell'] # cyan star on the cell we're clicking
558
+
559
+ source_emoji='🧠' if source=='analytic' else '🎲'
560
+ _latest['grid_img']=render_grid(
561
+ grid,
562
+ title=f"Step {step} | {source_emoji} A{action} | Levels {levels}",
563
+ highlight=highlight,
564
+ mark_cell=mark_cell)
565
+
566
+ # Im side: hypothesis ranking
567
+ _latest['hyp_img']=render_hypothesis_panel(candidates)
568
+
569
+ # Re side: candidate grid (what Im thinks the answer looks like)
570
+ if candidates and candidates[0][1].shape==grid.shape:
571
+ cname,cgrid,cconf=candidates[0]
572
+ diffs=pixel_diff(grid,cgrid)
573
+ _latest['cand_img']=render_grid(
574
+ cgrid,
575
+ title=f"Im candidate: {cname} (conf={cconf:.2f}) β€” {len(diffs)} cells differ",
576
+ highlight=diffs[:20])
577
+ else:
578
+ _latest['cand_img']=None
579
+
580
+ _latest['bar_img'] =render_action_bar(data['counts'],sum(data['counts'].values()))
581
+ _latest['reward_img']=render_reward_chart(data['reward_history'])
582
+
583
+ last_r=data['reward_history'][-1] if data['reward_history'] else 0
584
  r_emoji='🟑' if last_r>=5 else ('🟒' if last_r>0 else 'πŸ”΄')
585
+ hyp_str=(f"`{meta.get('hypothesis','?')}` conf={meta.get('conf',0):.2f} "
586
+ f"β†’ click ({meta.get('x','?')},{meta.get('y','?')}) "
587
+ f"[{meta.get('n_diffs','?')} cells wrong]"
588
+ if source=='analytic'
589
+ else f"CNN probs: {[round(p,2) for p in meta.get('probs',[])]}")
590
 
591
  _latest['status']=(
592
+ f"{source_emoji} **{'Analytic (Re/Im)' if source=='analytic' else 'CNN fallback'}**"
593
+ f" &nbsp;|&nbsp; Step {step} &nbsp;|&nbsp; Levels {levels}"
594
+ f" &nbsp;|&nbsp; Reward {r_emoji} `{last_r:.2f}` &nbsp;|&nbsp; {state}\n\n"
595
+ f"{hyp_str}")
 
596
 
597
+ return (_latest['grid_img'],_latest['hyp_img'],_latest['cand_img'],
598
+ _latest['bar_img'],_latest['reward_img'],_latest['status'])
 
599
 
600
  # ── Handlers ──────────────────────────────────────────────────────────────────
601
 
 
613
  def start_agent(game_id,api_key,max_steps):
614
  global _run_thread,_stop_flag
615
  if not game_id: return "Select a game first."
616
+ if not api_key: return "Enter your API key."
617
  _stop_flag.set()
618
  if _run_thread and _run_thread.is_alive(): _run_thread.join(timeout=3)
619
  while not _frame_queue.empty():
 
623
  _run_thread=threading.Thread(
624
  target=_run_agent,args=(game_id,api_key,int(max_steps)),daemon=True)
625
  _run_thread.start()
626
+ return f"Agent started on **{game_id}** β€” 🧠 Re/Im analytic + 🎲 CNN fallback"
627
 
628
  def stop_agent():
629
  _stop_flag.set()
 
631
 
632
  # ── UI ────────────────────────────────────────────────────────────────────────
633
 
634
+ with gr.Blocks(title="ARC-AGI-3 Re/Im Agent") as demo:
635
 
636
  gr.Markdown("""
637
+ # ARC-AGI-3 Re/Im Agent Spectator
638
+ **Im side** = bird's eye hypothesis (which transformation?) &nbsp;|&nbsp;
639
+ **Re side** = exact location (which cells to click?)
640
+
641
+ 🧠 = analytic solver (Im picks hypothesis β†’ Re pins cell β†’ ACTION6 click)
642
+ 🎲 = CNN fallback (when no hypothesis clears the confidence threshold)
643
  """)
644
 
645
  with gr.Row():
646
  with gr.Column(scale=3):
647
+ api_box=gr.Textbox(label="ARC API key",type="password",
648
+ value=os.environ.get("ARC_API_KEY",""),
649
+ placeholder="arc-key-... or set ARC_API_KEY secret")
 
650
  with gr.Column(scale=1):
651
  fetch_btn=gr.Button("Fetch games")
652
 
 
660
  start_btn=gr.Button("β–Ά Watch",variant="primary")
661
  stop_btn =gr.Button("β–  Stop")
662
 
663
+ run_status=gr.Markdown("*Fetch games β†’ select β†’ Watch*")
664
  api_status=gr.Markdown()
665
 
666
  gr.Markdown("---")
667
 
668
+ # Row 1: current frame + Im hypothesis ranking
669
  with gr.Row():
670
+ grid_img=gr.Image(label="Current frame (πŸ”΄=wrong cells ⭐=target click)",
671
+ type="pil",interactive=False,height=280)
672
+ hyp_img =gr.Image(label="Im side β€” hypothesis ranking",
673
+ type="pil",interactive=False,height=280)
674
 
675
+ # Row 2: Im candidate (what the answer should look like) + action bar
676
  with gr.Row():
677
+ cand_img=gr.Image(label="Im candidate β€” what the answer should look like",
678
+ type="pil",interactive=False,height=240)
679
+ bar_img =gr.Image(label="Action frequency",
680
+ type="pil",interactive=False,height=240)
681
+
682
+ # Row 3: reward history
683
+ reward_img=gr.Image(label="Reward history 🟑+50 WIN 🟑+10 level 🟒+0.1 change πŸ”΄-0.01 dead",
684
+ type="pil",interactive=False,height=140)
685
 
686
  timer=gr.Timer(value=1.0)
687
+ timer.tick(pull_frame,
688
+ outputs=[grid_img,hyp_img,cand_img,bar_img,reward_img,run_status])
689
 
690
  fetch_btn.click(fetch_games,inputs=api_box,outputs=[game_dd,api_status])
691
  start_btn.click(start_agent,inputs=[game_dd,api_box,steps_sl],outputs=run_status)
 
693
 
694
  gr.Markdown("""
695
  ---
696
+ **Re/Im duality in action:**
697
+ The Im side reads the whole board at once β€” symmetry maps, boundary contour, directional
698
+ flow β€” and ranks candidate transformations by confidence.
699
+ The Re side then diffs the current frame against the winning candidate and finds the exact
700
+ cell (boundary-first, following Cauchy's principle) that most needs fixing.
701
+ The agent emits ACTION6 at those precise coordinates instead of guessing randomly.
702
+ CNN fires only when no analytic hypothesis clears 0.40 confidence.
 
 
703
  """)
704
 
705
  if __name__ == "__main__":
706
+ demo.launch()