amirali1985 commited on
Commit
13fc2ef
Β·
1 Parent(s): 1309cfb

LaTeX: move tab:quirke-subtasks to appendix, update main body ref

Browse files
Files changed (1) hide show
  1. app.py +30 -35
app.py CHANGED
@@ -51,47 +51,13 @@ sum-9 boundary (SS), and \texttt{t16}/\texttt{t3} at the trivial positions (SC/S
51
  At each answer-digit position $n$, the local computation falls into one
52
  of ten mutually exclusive subtasks determined by the operand digits and
53
  the carry or borrow state propagating from lower positions
54
- (Table~\ref{tab:quirke-subtasks}).
55
  A \emph{carry cascade} arises when consecutive digit pairs sum to exactly
56
  9 (SS positions in addition): whether a carry propagates through such a
57
  run depends on a single carry entering from the right, requiring the model
58
  to track state across multiple positions.
59
  Borrow cascades (UD) are the analogous structure in subtraction.
60
 
61
- \begin{table}[h]
62
- \centering
63
- \small
64
- \setlength{\tabcolsep}{6pt}
65
- \begin{tabular}{llp{8.2cm}}
66
- \toprule
67
- & Label & Condition at digit position $n$ \\
68
- \midrule
69
- \multirow{5}{*}{\rotatebox[origin=c]{90}{Addition\;}}
70
- & \textbf{SA} & $d_1{+}d_2 \leq 8$;\; no carry in or out \\
71
- & \textbf{SC} & $d_1{+}d_2 \geq 10$;\; generates a carry \\
72
- & \textbf{SS} & $d_1{+}d_2 = 9$;\; carry state \emph{uncertain}
73
- (cascade boundary) \\
74
- & \textbf{UC} & carry arrives from position $n{-}1$;\;
75
- answer digit depends on it \\
76
- & \textbf{US} & carry propagates through a run of SS positions
77
- (sum-of-9 cascade) \\
78
- \midrule
79
- \multirow{5}{*}{\rotatebox[origin=c]{90}{Subtraction\;}}
80
- & \textbf{MD} & $d_1 \geq d_2$;\; no borrow \\
81
- & \textbf{MB} & $d_1 < d_2$;\; generates a borrow \\
82
- & \textbf{ME} & $d_1 = d_2$;\; borrow state \emph{uncertain} \\
83
- & \textbf{UB} & borrow arrives from position $n{-}1$ \\
84
- & \textbf{UD} & borrow propagates through a run of ME positions \\
85
- \bottomrule
86
- \end{tabular}
87
- \caption{Per-digit subtask labels for six-digit addition and
88
- subtraction~\citep{quirke_2024_addsub_preprint}.
89
- Cascades (US, UD) are the hardest: the answer digit cannot
90
- be determined locally and requires resolving multi-position
91
- carry/borrow propagation.}
92
- \label{tab:quirke-subtasks}
93
- \end{table}
94
-
95
  \paragraph{Models and training.}
96
  We evaluate three undersized architectures:
97
  \texttt{1L/2H/256d} (1 transformer layer, 2 attention heads, hidden size 256),
@@ -356,6 +322,35 @@ LATEX_APPENDIX = r"""% ═══════════════════
356
  \section{Arithmetic case study: interpretability analysis}
357
  \label{app:arithmetic}
358
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  \paragraph{Setup.}
360
  All interpretability analyses use model
361
  \texttt{add\_sub\_sorl\_v1\_abs30\_K1\_100K\_2L1H128d}
 
51
  At each answer-digit position $n$, the local computation falls into one
52
  of ten mutually exclusive subtasks determined by the operand digits and
53
  the carry or borrow state propagating from lower positions
54
+ (Table~\ref{tab:quirke-subtasks} in Appendix~\ref{app:arithmetic}).
55
  A \emph{carry cascade} arises when consecutive digit pairs sum to exactly
56
  9 (SS positions in addition): whether a carry propagates through such a
57
  run depends on a single carry entering from the right, requiring the model
58
  to track state across multiple positions.
59
  Borrow cascades (UD) are the analogous structure in subtraction.
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  \paragraph{Models and training.}
62
  We evaluate three undersized architectures:
63
  \texttt{1L/2H/256d} (1 transformer layer, 2 attention heads, hidden size 256),
 
322
  \section{Arithmetic case study: interpretability analysis}
323
  \label{app:arithmetic}
324
 
325
+ \begin{table}[h]
326
+ \centering\small
327
+ \setlength{\tabcolsep}{6pt}
328
+ \begin{tabular}{llp{7.8cm}}
329
+ \toprule
330
+ & Label & Condition at digit position $n$ \\
331
+ \midrule
332
+ \multirow{5}{*}{\rotatebox[origin=c]{90}{Addition\;}}
333
+ & \textbf{SA} & $d_1{+}d_2 \leq 8$;\; no carry in or out \\
334
+ & \textbf{SC} & $d_1{+}d_2 \geq 10$;\; generates a carry \\
335
+ & \textbf{SS} & $d_1{+}d_2 = 9$;\; carry state \emph{uncertain} (cascade boundary) \\
336
+ & \textbf{UC} & carry arrives from position $n{-}1$;\; answer digit depends on it \\
337
+ & \textbf{US} & carry propagates through a run of SS positions (sum-of-9 cascade) \\
338
+ \midrule
339
+ \multirow{5}{*}{\rotatebox[origin=c]{90}{Subtraction\;}}
340
+ & \textbf{MD} & $d_1 \geq d_2$;\; no borrow \\
341
+ & \textbf{MB} & $d_1 < d_2$;\; generates a borrow \\
342
+ & \textbf{ME} & $d_1 = d_2$;\; borrow state \emph{uncertain} \\
343
+ & \textbf{UB} & borrow arrives from position $n{-}1$ \\
344
+ & \textbf{UD} & borrow propagates through a run of ME positions \\
345
+ \bottomrule
346
+ \end{tabular}
347
+ \caption{Per-digit subtask labels for six-digit addition and
348
+ subtraction~\citep{quirke_2024_addsub_preprint}.
349
+ Cascades (US, UD) require tracking carry/borrow state across
350
+ multiple positions and are the hardest splits.}
351
+ \label{tab:quirke-subtasks}
352
+ \end{table}
353
+
354
  \paragraph{Setup.}
355
  All interpretability analyses use model
356
  \texttt{add\_sub\_sorl\_v1\_abs30\_K1\_100K\_2L1H128d}