beanapologist commited on
Commit
55a1dab
Β·
verified Β·
1 Parent(s): 0919b50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +381 -133
app.py CHANGED
@@ -70,126 +70,377 @@ def _sym(grid,axis):
70
  s[y,:]=(grid[y-r:y,:]==grid[y+1:y+r+1,:][::-1,:]).mean()
71
  return s
72
 
73
- # ── Im-side: candidate transforms ────────────────────────────────────────────
74
-
75
- def _h_mirror(grid):
76
- H,W=grid.shape; ax,sc=_sym_axis(grid,'h')
77
- lm=(grid[:,:ax]>0).sum(); rm=(grid[:,ax:]>0).sum()
78
- if lm==0 or rm>=lm*0.7: return None,0.0
79
- c=grid.copy()
80
- for col in range(ax):
81
- mir=W-1-col
82
- if mir<W:
83
- mask=c[:,mir]==0; c[mask,mir]=grid[mask,col]
84
- return c,(1-rm/max(lm,1))*sc*0.95
85
-
86
- def _v_mirror(grid):
87
- H,W=grid.shape; ax,sc=_sym_axis(grid,'v')
88
- tm=(grid[:ax,:]>0).sum(); bm=(grid[ax:,:]>0).sum()
89
- if tm==0 or bm>=tm*0.7: return None,0.0
90
- c=grid.copy()
91
- for row in range(ax):
92
- mir=H-1-row
93
- if mir<H:
94
- mask=c[mir,:]==0; c[mir,mask]=grid[row,mask]
95
- return c,(1-bm/max(tm,1))*sc*0.90
96
-
97
- def _boundary_only(grid):
98
- if not (grid>0).any(): return None,0.0
99
- solid=(grid>0).sum(); b=_boundary(grid); bpx=b.sum()
100
- if solid==0 or bpx/solid>0.6: return None,0.0
101
- c=np.zeros_like(grid); c[b>0]=grid[b>0]
102
- return c,(1-bpx/solid)*0.85
103
-
104
- def _hollow_fill(grid):
105
- b=_boundary(grid); interior=(grid==0)&(b==0)
106
- if not interior.any() or not (grid>0).any(): return None,0.0
107
- dom=np.argmax(np.bincount(grid[grid>0].flatten(),minlength=10)[1:])+1
108
- c=grid.copy(); c[interior]=dom
109
- return c,interior.sum()/max(1,(grid==0).sum())*0.80
110
-
111
- def _gravity(grid,d='down'):
112
- H,W=grid.shape; c=np.zeros_like(grid)
113
- if d=='down':
114
- for col in range(W):
115
- v=grid[:,col][grid[:,col]>0]
116
- if len(v): c[H-len(v):H,col]=v
117
- elif d=='up':
118
- for col in range(W):
119
- v=grid[:,col][grid[:,col]>0]
120
- if len(v): c[:len(v),col]=v
121
- elif d=='right':
122
- for row in range(H):
123
- v=grid[row,:][grid[row,:]>0]
124
- if len(v): c[row,W-len(v):W]=v
125
- elif d=='left':
126
- for row in range(H):
127
- v=grid[row,:][grid[row,:]>0]
128
- if len(v): c[row,:len(v)]=v
129
- if np.array_equal(c,grid) or not (grid>0).any(): return None,0.0
130
- moved=(c!=grid).sum()
131
- return c,min(0.75,moved/max(1,(grid>0).sum())*0.8)
132
-
133
- def _color_shift(grid,d=1):
134
- if not (grid>0).any(): return None,0.0
135
- c=grid.copy(); mask=grid>0
136
- c[mask]=((grid[mask]-1+d)%9)+1
137
- return c,0.45
138
-
139
- def _rotate(grid,k): return np.rot90(grid,k),0.30
140
- def _hflip(grid): return np.fliplr(grid),0.25
141
- def _vflip(grid): return np.flipud(grid),0.25
142
-
143
- def _4fold(grid):
144
- c=grid.copy()
145
- for k in [1,2,3]:
146
- rot=np.rot90(grid,k)
147
- if rot.shape==grid.shape:
148
- mask=c==0; c[mask]=rot[mask]
149
- return (c,0.55) if not np.array_equal(c,grid) else (None,0.0)
150
-
151
- TRANSFORMS=[
152
- ('h_mirror_complete', _h_mirror),
153
- ('v_mirror_complete', _v_mirror),
154
- ('boundary_only', _boundary_only),
155
- ('hollow_fill', _hollow_fill),
156
- ('gravity_down', lambda g: _gravity(g,'down')),
157
- ('gravity_up', lambda g: _gravity(g,'up')),
158
- ('gravity_right', lambda g: _gravity(g,'right')),
159
- ('gravity_left', lambda g: _gravity(g,'left')),
160
- ('4fold_symmetry', _4fold),
161
- ('color_shift_+1', lambda g: _color_shift(g,1)),
162
- ('color_shift_+2', lambda g: _color_shift(g,2)),
163
- ('rotate_90', lambda g: _rotate(g,1)),
164
- ('rotate_180', lambda g: _rotate(g,2)),
165
- ('rotate_270', lambda g: _rotate(g,3)),
166
- ('h_flip', _hflip),
167
- ('v_flip', _vflip),
168
- ]
169
-
170
- def get_candidates(grid):
171
- out=[]
172
- for name,fn in TRANSFORMS:
173
- try:
174
- c,conf=fn(grid)
175
- if c is not None and conf>0.05: out.append((name,c,conf))
176
- except: pass
177
- return sorted(out,key=lambda x:-x[2])
178
-
179
- def pixel_diff(cur,tgt):
180
- if cur.shape!=tgt.shape: return []
181
- return [(r,c,int(tgt[r,c]))
182
- for r in range(cur.shape[0]) for c in range(cur.shape[1])
183
- if cur[r,c]!=tgt[r,c]]
184
-
185
- def most_urgent_diff(cur,tgt):
186
- diffs=pixel_diff(cur,tgt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  if not diffs: return None
188
- b=_boundary(cur)
189
- bdiffs=[(r,c,v) for r,c,v in diffs if b[r,c]>0]
190
- pool=bdiffs if bdiffs else diffs
191
  return pool[np.random.randint(len(pool))]
192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  # ── Feature extractor ─────────────────────────────────────────────────────────
194
 
195
  def extract_features(grid,num_colours=10):
@@ -337,17 +588,12 @@ class TinyAgent:
337
  feat=extract_features(grid).to(self.device)
338
  cur_state=str(state) if state else None
339
 
340
- # ── Im side: rank hypotheses ──────────────────────────────────────
341
- candidates=get_candidates(grid)
342
- best_name,best_cand,best_conf=(candidates[0] if candidates
343
- else ('none',grid,0.0))
344
 
345
  # Candidate proximity bonus
346
- if candidates:
347
- nn_name,nn_cand,nn_conf=min(
348
- candidates,
349
- key=lambda c:(grid!=c[1]).mean() if grid.shape==c[1].shape else 1.0)
350
- curr_dist=(grid!=nn_cand).mean() if grid.shape==nn_cand.shape else 1.0
351
  if curr_dist==0.0:
352
  cand_bonus=self.candidate_win_reward
353
  elif curr_dist<self.prev_candidate_dist:
@@ -356,7 +602,7 @@ class TinyAgent:
356
  cand_bonus=0.0
357
  self.prev_candidate_dist=curr_dist
358
  else:
359
- cand_bonus=0.0
360
 
361
  # Store shaped experience
362
  if self.prev_feat is not None:
@@ -386,21 +632,23 @@ class TinyAgent:
386
  if self.step_count%10==0 and len(self.buf)>=16:
387
  self._train()
388
 
389
- # ── Im β†’ Re bridge: analytic action ──────────────────────────────
390
  analytic_action=None; analytic_meta={}
391
- if best_conf>=CONF_THRESHOLD and candidates:
392
- diffs=pixel_diff(grid,best_cand)
393
  if diffs:
394
- cell=most_urgent_diff(grid,best_cand)
395
  if cell is not None:
396
  r,c,tgt_color=cell
397
  H,W=grid.shape
398
  gy=min(63,max(0,int(r*64/H+32/H)))
399
  gx=min(63,max(0,int(c*64/W+32/W)))
400
  analytic_action=6
 
401
  analytic_meta={'x':gx,'y':gy,'cell':(r,c,tgt_color),
402
- 'hypothesis':best_name,'conf':best_conf,
403
- 'n_diffs':len(diffs),'candidates':candidates[:4]}
 
404
 
405
  # ── CNN fallback ──────────────────────────────────────────────────
406
  with torch.no_grad():
 
70
  s[y,:]=(grid[y-r:y,:]==grid[y+1:y+r+1,:][::-1,:]).mean()
71
  return s
72
 
73
+ # ── Re/Im board reader (inlined from arc_solver.py) ─────────────────────────
74
+
75
+ """
76
+ arc_solver.py β€” Re/Im board reader for ARC-AGI-3
77
+ =================================================
78
+
79
+ The board IS a complex object M = Re(M) + iΒ·Im(M).
80
+
81
+ Re(M) = multiplicative structure: what colors exist, how many pixels,
82
+ where objects are, their bounding boxes, centroids, density.
83
+
84
+ Im(M) = additive structure: symmetry axes, boundary contour, gradient
85
+ flow direction, winding/curl β€” the "where pointed" information.
86
+
87
+ log separates them. iΒ· swaps them.
88
+
89
+ The answer is what you get by applying iΒ· to the board:
90
+ "Read the Im side of the board β†’ that tells you what the Re side
91
+ of the answer must look like β†’ find the cells that need to change."
92
+
93
+ Pipeline:
94
+ read_board(grid) β†’ (re, im, answer_grid, confidence, reasoning)
95
+ pixel_diff(current, answer) β†’ list of (r, c, target_color)
96
+ most_urgent_diff(current, answer) β†’ single most important cell (Re coords)
97
+ try_analytic_action(frame, available) β†’ (action_id, data, name, confidence)
98
+ """
99
+
100
+ import numpy as np
101
+ from typing import Optional, Tuple, List, Dict
102
+
103
+ # ── Primitives ────────────────────────────────────────────────────────────────
104
+
105
+ def _sobel(f):
106
+ p = np.pad(f, 1, mode='edge')
107
+ gx = (-p[:-2,:-2]-2*p[1:-1,:-2]-p[2:,:-2]+p[:-2,2:]+2*p[1:-1,2:]+p[2:,2:])/8
108
+ gy = (-p[:-2,:-2]-2*p[:-2,1:-1]-p[:-2,2:]+p[2:,:-2]+2*p[2:,1:-1]+p[2:,2:])/8
109
+ return gx, gy
110
+
111
+ def _boundary(grid):
112
+ """Color-change boundary (Im side)."""
113
+ p = np.pad(grid, 1, mode='edge')
114
+ return ((p[1:-1,1:-1]!=p[:-2,1:-1])|(p[1:-1,1:-1]!=p[2:,1:-1])|
115
+ (p[1:-1,1:-1]!=p[1:-1,:-2])|(p[1:-1,1:-1]!=p[1:-1,2:])).astype(np.float32)
116
+
117
+ def _perimeter(grid):
118
+ """
119
+ Object perimeter β€” cells at the edge of nonzero regions.
120
+ Handles solid blocks (all same color, no color-change boundary).
121
+ This is the Cauchy contour for the Re side.
122
+ """
123
+ H, W = grid.shape
124
+ p = np.zeros((H,W), dtype=np.float32)
125
+ mask = grid > 0
126
+ if not mask.any(): return p
127
+ padded = np.pad(mask.astype(int), 1, constant_values=0)
128
+ for dy,dx in [(-1,0),(1,0),(0,-1),(0,1)]:
129
+ shifted = padded[1+dy:H+1+dy, 1+dx:W+1+dx]
130
+ p[mask & (shifted==0)] = 1
131
+ # Solid block: no cell has a zero neighbor β€” use outer ring
132
+ if p.sum() == 0:
133
+ p[0,:] = mask[0,:] .astype(float)
134
+ p[-1,:] = mask[-1,:].astype(float)
135
+ p[:,0] = mask[:,0] .astype(float)
136
+ p[:,-1] = mask[:,-1].astype(float)
137
+ return p
138
+
139
+ def _cc(mask):
140
+ labels = np.zeros_like(mask, dtype=np.int32); cur = 0; H,W = mask.shape
141
+ for r in range(H):
142
+ for c in range(W):
143
+ if mask[r,c] and labels[r,c]==0:
144
+ cur+=1; q=[(r,c)]; labels[r,c]=cur
145
+ while q:
146
+ y,x=q.pop()
147
+ for dy,dx in [(-1,0),(1,0),(0,-1),(0,1)]:
148
+ ny,nx=y+dy,x+dx
149
+ if 0<=ny<H and 0<=nx<W and mask[ny,nx] and labels[ny,nx]==0:
150
+ labels[ny,nx]=cur; q.append((ny,nx))
151
+ return labels
152
+
153
+ def _h_sym_at(grid, x):
154
+ r = min(x, grid.shape[1]-1-x)
155
+ if r <= 0: return 0.0
156
+ return float((grid[:, x-r:x] == grid[:, x+1:x+r+1][:,::-1]).mean())
157
+
158
+ def _v_sym_at(grid, y):
159
+ r = min(y, grid.shape[0]-1-y)
160
+ if r <= 0: return 0.0
161
+ return float((grid[y-r:y, :] == grid[y+1:y+r+1, :][::-1, :]).mean())
162
+
163
+
164
+ # ── Re/Im board reader ────────────────────────────────────────────────────────
165
+
166
+ def read_board(grid: np.ndarray):
167
+ """
168
+ Read the board as a complex object.
169
+
170
+ Returns
171
+ -------
172
+ re : dict β€” Re-side structure per color
173
+ im : dict β€” Im-side structure (symmetry, boundary, flow, curl)
174
+ answer : np.ndarray β€” derived answer grid (or None)
175
+ confidence : float 0-1
176
+ reasoning : list of strings, one per signal that fired
177
+ """
178
+ H, W = grid.shape
179
+ gx, gy = _sobel(grid.astype(np.float32) / 9)
180
+ bound = _boundary(grid)
181
+
182
+ # ── Re side ───────────────────────────────────────────────────────────
183
+ colors = [c for c in range(1, 10) if (grid == c).any()]
184
+ re = {}
185
+ for color in colors:
186
+ mask = (grid == color)
187
+ ys, xs = np.where(mask)
188
+ labels = _cc(mask)
189
+ re[color] = {
190
+ 'count': int(mask.sum()),
191
+ 'objects': int(labels.max()),
192
+ 'centroid': (float(ys.mean()), float(xs.mean())),
193
+ 'bbox': (int(ys.min()),int(xs.min()),int(ys.max()),int(xs.max())),
194
+ }
195
+ total_px = int((grid > 0).sum())
196
+
197
+ # ── Im side ───────────────────────────────────────────────────────────
198
+ # Symmetry axes
199
+ h_scores = [(x, _h_sym_at(grid, x)) for x in range(1, W-1)]
200
+ v_scores = [(y, _v_sym_at(grid, y)) for y in range(1, H-1)]
201
+ best_h = max(h_scores, key=lambda x:x[1]) if h_scores else (W//2, 0.0)
202
+ best_v = max(v_scores, key=lambda x:x[1]) if v_scores else (H//2, 0.0)
203
+
204
+ # Boundary (Cauchy contour)
205
+ b_px = int(bound.sum())
206
+ b_ratio = b_px / max(total_px, 1)
207
+
208
+ # Gradient field
209
+ gx_mag = float(np.abs(gx).mean())
210
+ gy_mag = float(np.abs(gy).mean())
211
+
212
+ # Suspended pixels (Re gives count, Im gives direction)
213
+ suspended = sum(
214
+ 1 for c in range(W)
215
+ for r in np.where(grid[:, c] > 0)[0]
216
+ if r < H-1 and grid[r+1, c] == 0
217
+ )
218
+
219
+ # Winding / curl
220
+ p_gx = np.pad(gx,1,mode='edge'); p_gy = np.pad(gy,1,mode='edge')
221
+ curl = ((p_gy[1:-1,2:]-p_gy[1:-1,:-2])/2 -
222
+ (p_gx[2:,1:-1]-p_gx[:-2,1:-1])/2)
223
+ curl_max = float(np.abs(curl).max())
224
+
225
+ im = {
226
+ 'best_h': best_h, # (x, score)
227
+ 'best_v': best_v, # (y, score)
228
+ 'b_ratio': b_ratio,
229
+ 'b_px': b_px,
230
+ 'gx_mag': gx_mag,
231
+ 'gy_mag': gy_mag,
232
+ 'suspended': suspended,
233
+ 'curl_max': curl_max,
234
+ }
235
+
236
+ # ── iΒ· column swap: apply Im β†’ derive answer (Re coords) ──────────────
237
+ reasoning = []
238
+ answer = None
239
+ confidence = 0.0
240
+
241
+ # ── Signal 1: One dominant color fills everything β€” check boundary first
242
+ # Re: single color, high density β†’ Im: boundary ratio is the key signal
243
+ if len(colors) == 1 and total_px > 0:
244
+ single_color = colors[0]
245
+ filled_ratio = total_px / (H * W)
246
+ # Check if grid is already hollow (has interior zeros)
247
+ interior = (grid == 0) & (_perimeter(grid) == 0)
248
+ already_hollow = interior.any()
249
+ if filled_ratio > 0.5 and not already_hollow:
250
+ # Solid block β€” Im says extract the perimeter
251
+ perim = _perimeter(grid)
252
+ answer = np.zeros_like(grid)
253
+ answer[perim > 0] = grid[perim > 0]
254
+ perim_ratio = float(perim.sum()) / max(total_px, 1)
255
+ confidence = filled_ratio * 0.88 # solid fill is the signal
256
+ reasoning.append(
257
+ f"Re: 1 color={single_color}, fill={filled_ratio:.2f} "
258
+ f"Im: perimeter={int(perim.sum())}px (ratio={perim_ratio:.2f}) "
259
+ f"iΒ·: Cauchy perimeter IS the answer")
260
+
261
+ # ── Signal 2: Strong H-symmetry + asymmetric Re mass
262
+ if confidence < 0.75:
263
+ hx, hs = best_h
264
+ if hs > 0.65:
265
+ left_px = int((grid[:, :hx] > 0).sum())
266
+ right_px = int((grid[:, hx:] > 0).sum())
267
+ asymmetry = abs(left_px - right_px) / max(left_px + right_px, 1)
268
+ if asymmetry > 0.25:
269
+ answer = grid.copy()
270
+ if left_px > right_px:
271
+ # Mirror left β†’ right (fill empty right side)
272
+ for c in range(hx):
273
+ mir = W - 1 - c
274
+ if 0 <= mir < W:
275
+ mask = answer[:, mir] == 0
276
+ answer[mask, mir] = grid[mask, c]
277
+ else:
278
+ # Mirror right β†’ left
279
+ for c in range(hx+1, W):
280
+ mir = W - 1 - c
281
+ if 0 <= mir < W:
282
+ mask = answer[:, mir] == 0
283
+ answer[mask, mir] = grid[mask, c]
284
+ confidence = hs * asymmetry * 0.95
285
+ reasoning.append(
286
+ f"Im H-sym={hs:.2f} at x={hx} "
287
+ f"Re left={left_px} right={right_px} (asym={asymmetry:.2f}) "
288
+ f"iΒ·: complete H mirror")
289
+
290
+ # ── Signal 3: Strong V-symmetry + asymmetric top/bottom
291
+ if confidence < 0.55:
292
+ vy, vs = best_v
293
+ if vs > 0.65:
294
+ top_px = int((grid[:vy, :] > 0).sum())
295
+ bot_px = int((grid[vy:, :] > 0).sum())
296
+ asymmetry = abs(top_px - bot_px) / max(top_px + bot_px, 1)
297
+ if asymmetry > 0.25:
298
+ answer = grid.copy()
299
+ if top_px > bot_px:
300
+ for r in range(vy):
301
+ mir = H - 1 - r
302
+ if 0 <= mir < H:
303
+ mask = answer[mir, :] == 0
304
+ answer[mir, mask] = grid[r, mask]
305
+ else:
306
+ for r in range(vy+1, H):
307
+ mir = H - 1 - r
308
+ if 0 <= mir < H:
309
+ mask = answer[mir, :] == 0
310
+ answer[mir, mask] = grid[r, mask]
311
+ confidence = vs * asymmetry * 0.90
312
+ reasoning.append(
313
+ f"Im V-sym={vs:.2f} at y={vy} "
314
+ f"Re top={top_px} bot={bot_px} (asym={asymmetry:.2f}) "
315
+ f"iΒ·: complete V mirror")
316
+
317
+ # ── Signal 4: Hollow Re structure + unfilled interior
318
+ if confidence < 0.5:
319
+ interior = (grid == 0) & (bound == 0)
320
+ if interior.any() and total_px > 0:
321
+ dom = np.argmax(np.bincount(grid[grid>0].flatten(),minlength=10)[1:])+1
322
+ answer = grid.copy()
323
+ answer[interior] = dom
324
+ conf = interior.sum() / max(1, (grid==0).sum()) * 0.80
325
+ if conf > confidence:
326
+ confidence = conf
327
+ reasoning.append(
328
+ f"Re: hollow object, dom_color={dom} "
329
+ f"Im: interior={int(interior.sum())}px unfilled "
330
+ f"iΒ·: fill interior β€” Im interior β†’ Re color")
331
+
332
+ # ── Signal 5: Suspended pixels + Im gradient direction β†’ gravity
333
+ if confidence < 0.45 and suspended > 0:
334
+ direction = 'down' if gy_mag >= gx_mag else 'right'
335
+ answer = np.zeros_like(grid)
336
+ if direction == 'down':
337
+ for c in range(W):
338
+ vals = grid[:, c][grid[:, c] > 0]
339
+ if len(vals): answer[H-len(vals):H, c] = vals
340
+ else:
341
+ for r in range(H):
342
+ vals = grid[r, :][grid[r, :] > 0]
343
+ if len(vals): answer[r, W-len(vals):W] = vals
344
+ confidence = min(0.80, suspended / max(total_px, 1) * 2.5)
345
+ reasoning.append(
346
+ f"Re suspended={suspended}px "
347
+ f"Im gy_mag={gy_mag:.3f} gx_mag={gx_mag:.3f} "
348
+ f"iΒ·: gravity {direction}")
349
+
350
+ # ── Signal 6: Color shift (Re count ratio between colors suggests increment)
351
+ if confidence < 0.35 and len(colors) > 0:
352
+ if all(v < 9 for v in colors):
353
+ answer = grid.copy()
354
+ mask = grid > 0
355
+ answer[mask] = ((grid[mask] - 1 + 1) % 9) + 1
356
+ confidence = 0.35
357
+ reasoning.append(
358
+ f"Re: colors {colors} all < 9 "
359
+ f"Im: uniform distribution suggests Re→Im shift "
360
+ f"iΒ·: increment all colors +1")
361
+
362
+ return re, im, answer, confidence, reasoning
363
+
364
+
365
+ # ── Re-side: exact cell targeting ────────────────────────────────────────────
366
+
367
+ def pixel_diff(current: np.ndarray, target: np.ndarray):
368
+ """All differing cells: [(r, c, target_color), ...]"""
369
+ if current.shape != target.shape: return []
370
+ return [(int(r), int(c), int(target[r,c]))
371
+ for r in range(current.shape[0])
372
+ for c in range(current.shape[1])
373
+ if current[r,c] != target[r,c]]
374
+
375
+ def most_urgent_diff(current: np.ndarray, target: np.ndarray):
376
+ """
377
+ Re-side: find the single most important cell to fix.
378
+ Cauchy boundary-first: the boundary determines the interior,
379
+ so fix boundary cells before interior cells.
380
+ """
381
+ diffs = pixel_diff(current, target)
382
  if not diffs: return None
383
+ bound = _boundary(current)
384
+ boundary_diffs = [(r,c,v) for r,c,v in diffs if bound[r,c] > 0]
385
+ pool = boundary_diffs if boundary_diffs else diffs
386
  return pool[np.random.randint(len(pool))]
387
 
388
+
389
+ # ── Main entry point ─────────────────────────────────────────────────────────
390
+
391
+ CONFIDENCE_THRESHOLD = 0.38
392
+
393
+ def try_analytic_action(
394
+ frame_2d: np.ndarray,
395
+ available_actions,
396
+ ) -> Tuple[Optional[int], Optional[dict], str, float]:
397
+ """
398
+ Read the board as a complex object, derive the answer via iΒ· column swap,
399
+ then use the Re-side diff to find the exact cell to click.
400
+
401
+ Returns (action_id, action_data, solver_name, confidence)
402
+ """
403
+ if frame_2d is None: return None, None, 'none', 0.0
404
+
405
+ avail_ids = set(
406
+ int(a.value if hasattr(a,'value') else a)
407
+ for a in (available_actions or range(1,7))
408
+ )
409
+
410
+ # Read the board
411
+ re, im, answer, confidence, reasoning = read_board(frame_2d)
412
+
413
+ if answer is None or confidence < CONFIDENCE_THRESHOLD:
414
+ return None, None, 'low_confidence', confidence
415
+
416
+ # Find the diff (Re side: exact wrong cells)
417
+ diffs = pixel_diff(frame_2d, answer)
418
+ if not diffs:
419
+ return None, None, 'already_matches', confidence
420
+
421
+ solver_name = reasoning[0].split('β†’')[0].strip() if reasoning else 're_im'
422
+
423
+ # Emit ACTION6 at the most urgent Re-side coordinate
424
+ if 6 in avail_ids:
425
+ cell = most_urgent_diff(frame_2d, answer)
426
+ if cell is not None:
427
+ r, c, _ = cell
428
+ H, W = frame_2d.shape
429
+ game_y = min(63, max(0, int(r * 64 / H + 32 / H)))
430
+ game_x = min(63, max(0, int(c * 64 / W + 32 / W)))
431
+ return 6, {'x': game_x, 'y': game_y}, solver_name, confidence
432
+
433
+ # Fallback: button actions
434
+ BUTTONS = {'h_mirror': 1, 'v_mirror': 2, 'rotate': 3, 'gravity': 4}
435
+ for key, a_id in BUTTONS.items():
436
+ if any(key in r for r in reasoning) and a_id in avail_ids:
437
+ return a_id, None, solver_name, confidence
438
+
439
+ return None, None, 'no_action_mapping', confidence
440
+
441
+
442
+
443
+
444
  # ── Feature extractor ─────────────────────────────────────────────────────────
445
 
446
  def extract_features(grid,num_colours=10):
 
588
  feat=extract_features(grid).to(self.device)
589
  cur_state=str(state) if state else None
590
 
591
+ # ── Re/Im: read the board as a complex object ────────────────────
592
+ re,im,cand_answer,cand_conf,cand_reasoning=read_board(grid)
 
 
593
 
594
  # Candidate proximity bonus
595
+ if cand_answer is not None and cand_conf>=0.35:
596
+ curr_dist=(grid!=cand_answer).mean() if grid.shape==cand_answer.shape else 1.0
 
 
 
597
  if curr_dist==0.0:
598
  cand_bonus=self.candidate_win_reward
599
  elif curr_dist<self.prev_candidate_dist:
 
602
  cand_bonus=0.0
603
  self.prev_candidate_dist=curr_dist
604
  else:
605
+ cand_bonus=0.0; cand_answer=None
606
 
607
  # Store shaped experience
608
  if self.prev_feat is not None:
 
632
  if self.step_count%10==0 and len(self.buf)>=16:
633
  self._train()
634
 
635
+ # ── Im β†’ Re bridge: read board β†’ derive answer β†’ click exact cell ──
636
  analytic_action=None; analytic_meta={}
637
+ if cand_answer is not None and cand_conf>=CONF_THRESHOLD:
638
+ diffs=pixel_diff(grid,cand_answer)
639
  if diffs:
640
+ cell=most_urgent_diff(grid,cand_answer)
641
  if cell is not None:
642
  r,c,tgt_color=cell
643
  H,W=grid.shape
644
  gy=min(63,max(0,int(r*64/H+32/H)))
645
  gx=min(63,max(0,int(c*64/W+32/W)))
646
  analytic_action=6
647
+ reasoning_str=' | '.join(cand_reasoning[:2]) if cand_reasoning else 'read_board'
648
  analytic_meta={'x':gx,'y':gy,'cell':(r,c,tgt_color),
649
+ 'hypothesis':reasoning_str,'conf':cand_conf,
650
+ 'n_diffs':len(diffs),
651
+ 'candidates':[(reasoning_str,cand_answer,cand_conf)]}
652
 
653
  # ── CNN fallback ──────────────────────────────────────────────────
654
  with torch.no_grad():