Taylor commited on
Commit
32ea598
·
1 Parent(s): 47908f7

feat: Buleyean RL interactive playground

Browse files

Rejection-only training surface. Feed in rejected actions,
watch the complement target sharpen without any chosen examples.
3D Three.js visualization of void boundary accumulation.

THM-BULEYEAN-POSITIVITY: P(i) > 0 for all i (the sliver)
THM-FAILURE-STRICTLY-MORE-INFORMATIVE: N-1 bits vs 1 bit

Files changed (3) hide show
  1. README.md +8 -8
  2. app.py +321 -0
  3. requirements.txt +1 -0
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
- title: Buleyean Rl
3
- emoji: 📉
4
- colorFrom: pink
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 6.9.0
 
8
  app_file: app.py
9
- pinned: false
 
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Buleyean RL
3
+ emoji: "\U0001F573\uFE0F"
4
+ colorFrom: red
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 5.23.0
8
+ python_version: "3.11"
9
  app_file: app.py
10
+ pinned: true
11
+ license: mit
12
  ---
 
 
app.py ADDED
@@ -0,0 +1,321 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Buleyean RL demo.
3
+
4
+ Reject actions, accumulate a void boundary, and watch the complement target
5
+ sharpen without any chosen column.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+
12
+ import gradio as gr
13
+
14
+
15
+ def parse_labels(raw_labels: str) -> list[str]:
16
+ labels = [label.strip() for label in raw_labels.split(",") if label.strip()]
17
+ return labels or ["answer", "hedge", "repair", "refuse"]
18
+
19
+
20
+ def parse_rejection_sequence(raw_sequence: str, labels: list[str]) -> list[str]:
21
+ entries = [entry.strip() for entry in raw_sequence.split(",") if entry.strip()]
22
+ label_set = set(labels)
23
+ return [entry for entry in entries if entry in label_set]
24
+
25
+
26
+ def weight(rounds: int, rejections: int) -> int:
27
+ return rounds - min(rejections, rounds) + 1
28
+
29
+
30
+ def build_scene_html(action_rows: list[list[object]], preferred_action: str) -> str:
31
+ payload = json.dumps(
32
+ {
33
+ "rows": action_rows,
34
+ "preferredAction": preferred_action,
35
+ }
36
+ )
37
+ return f"""
38
+ <div id="buleyean-rl-scene" style="height: 430px; width: 100%; border-radius: 22px; overflow: hidden; background:
39
+ radial-gradient(circle at top, #24090b 0%, #120607 55%, #050203 100%);"></div>
40
+ <script>
41
+ (async () => {{
42
+ const mount = document.getElementById("buleyean-rl-scene");
43
+ if (!mount) return;
44
+ const payload = {payload};
45
+ window.__spaceSceneDisposers = window.__spaceSceneDisposers || {{}};
46
+ if (window.__spaceSceneDisposers.buleyeanRl) {{
47
+ window.__spaceSceneDisposers.buleyeanRl();
48
+ }}
49
+ const loadScript = (src) => new Promise((resolve, reject) => {{
50
+ const existing = Array.from(document.scripts).find((script) => script.src === src);
51
+ if (existing) {{
52
+ if (existing.dataset.loaded === "true") {{
53
+ resolve();
54
+ }} else {{
55
+ existing.addEventListener("load", resolve, {{ once: true }});
56
+ existing.addEventListener("error", reject, {{ once: true }});
57
+ }}
58
+ return;
59
+ }}
60
+ const script = document.createElement("script");
61
+ script.src = src;
62
+ script.async = true;
63
+ script.onload = () => {{
64
+ script.dataset.loaded = "true";
65
+ resolve();
66
+ }};
67
+ script.onerror = reject;
68
+ document.head.appendChild(script);
69
+ }});
70
+ await loadScript("https://unpkg.com/three@0.160.0/build/three.min.js");
71
+ await loadScript("https://unpkg.com/three@0.160.0/examples/js/controls/OrbitControls.js");
72
+ const THREE = window.THREE;
73
+ mount.innerHTML = "";
74
+ const width = mount.clientWidth || 900;
75
+ const height = mount.clientHeight || 430;
76
+ const renderer = new THREE.WebGLRenderer({{ antialias: true, alpha: true }});
77
+ renderer.setPixelRatio(window.devicePixelRatio || 1);
78
+ renderer.setSize(width, height);
79
+ mount.appendChild(renderer.domElement);
80
+
81
+ const scene = new THREE.Scene();
82
+ const camera = new THREE.PerspectiveCamera(46, width / height, 0.1, 100);
83
+ camera.position.set(0, 7, 16);
84
+ const controls = new THREE.OrbitControls(camera, renderer.domElement);
85
+ controls.enableDamping = true;
86
+ controls.dampingFactor = 0.05;
87
+
88
+ scene.add(new THREE.AmbientLight(0xffe4e6, 0.65));
89
+ const redLight = new THREE.PointLight(0xfb7185, 1.2, 80);
90
+ redLight.position.set(-6, 11, 8);
91
+ scene.add(redLight);
92
+ const cyanLight = new THREE.PointLight(0x7dd3fc, 1.1, 80);
93
+ cyanLight.position.set(8, 8, -3);
94
+ scene.add(cyanLight);
95
+ const grid = new THREE.GridHelper(16, payload.rows.length * 2, 0x4c0519, 0x1f2937);
96
+ grid.position.y = -0.12;
97
+ scene.add(grid);
98
+
99
+ const group = new THREE.Group();
100
+ payload.rows.forEach((row, index) => {{
101
+ const [label, rejections, complementWeight, probability] = row;
102
+ const x = (index - (payload.rows.length - 1) / 2) * 2.2;
103
+ const rejectionHeight = Math.max(Number(rejections), 0.15) + 0.25;
104
+ const weightHeight = Math.max(Number(complementWeight) * 0.42, 0.2);
105
+
106
+ const rejectionBar = new THREE.Mesh(
107
+ new THREE.BoxGeometry(0.72, rejectionHeight, 0.72),
108
+ new THREE.MeshStandardMaterial({{
109
+ color: 0xfb7185,
110
+ emissive: 0xfb7185,
111
+ emissiveIntensity: 0.14,
112
+ metalness: 0.2,
113
+ roughness: 0.32,
114
+ }})
115
+ );
116
+ rejectionBar.position.set(x - 0.44, rejectionHeight / 2, 0);
117
+ group.add(rejectionBar);
118
+
119
+ const weightBar = new THREE.Mesh(
120
+ new THREE.BoxGeometry(0.72, weightHeight, 0.72),
121
+ new THREE.MeshStandardMaterial({{
122
+ color: label === payload.preferredAction ? 0xffffff : 0x7dd3fc,
123
+ emissive: label === payload.preferredAction ? 0xffffff : 0x7dd3fc,
124
+ emissiveIntensity: label === payload.preferredAction ? 0.36 : 0.16,
125
+ metalness: 0.35,
126
+ roughness: 0.18,
127
+ }})
128
+ );
129
+ weightBar.position.set(x + 0.44, weightHeight / 2, 0);
130
+ group.add(weightBar);
131
+
132
+ if (label === payload.preferredAction) {{
133
+ const halo = new THREE.Mesh(
134
+ new THREE.TorusGeometry(0.9, 0.08, 18, 48),
135
+ new THREE.MeshStandardMaterial({{
136
+ color: 0xffffff,
137
+ emissive: 0xffffff,
138
+ emissiveIntensity: 0.4,
139
+ }})
140
+ );
141
+ halo.rotation.x = Math.PI / 2;
142
+ halo.position.set(x + 0.44, weightHeight + 0.4, 0);
143
+ group.add(halo);
144
+ }}
145
+
146
+ const probabilityOrb = new THREE.Mesh(
147
+ new THREE.SphereGeometry(0.12 + Number(probability) * 0.45, 16, 16),
148
+ new THREE.MeshStandardMaterial({{
149
+ color: 0xf8fafc,
150
+ emissive: 0xf8fafc,
151
+ emissiveIntensity: 0.35,
152
+ }})
153
+ );
154
+ probabilityOrb.position.set(x + 0.44, weightHeight + 0.28, -1.2);
155
+ group.add(probabilityOrb);
156
+ }});
157
+ scene.add(group);
158
+
159
+ let frame = 0;
160
+ const renderLoop = () => {{
161
+ frame += 1;
162
+ group.rotation.y = Math.sin(frame * 0.003) * 0.2;
163
+ group.children.forEach((child, index) => {{
164
+ if (child.geometry && child.geometry.type === "SphereGeometry") {{
165
+ child.position.z = -1.2 + Math.sin(frame * 0.02 + index) * 0.35;
166
+ }}
167
+ }});
168
+ controls.update();
169
+ renderer.render(scene, camera);
170
+ window.__spaceSceneDisposers.buleyeanRl.frame = requestAnimationFrame(renderLoop);
171
+ }};
172
+
173
+ const resize = () => {{
174
+ const nextWidth = mount.clientWidth || 900;
175
+ const nextHeight = mount.clientHeight || 430;
176
+ camera.aspect = nextWidth / nextHeight;
177
+ camera.updateProjectionMatrix();
178
+ renderer.setSize(nextWidth, nextHeight);
179
+ }};
180
+ window.addEventListener("resize", resize);
181
+ window.__spaceSceneDisposers.buleyeanRl = () => {{
182
+ cancelAnimationFrame(window.__spaceSceneDisposers.buleyeanRl.frame);
183
+ window.removeEventListener("resize", resize);
184
+ controls.dispose();
185
+ renderer.dispose();
186
+ mount.innerHTML = "";
187
+ }};
188
+ renderLoop();
189
+ }})().catch((error) => {{
190
+ const mount = document.getElementById("buleyean-rl-scene");
191
+ if (mount) {{
192
+ mount.innerHTML = `<div style="padding: 1rem; color: #fecdd3;">three.js scene failed to load: ${{error}}</div>`;
193
+ }}
194
+ }});
195
+ </script>
196
+ """
197
+
198
+
199
+ def run_demo(raw_labels: str, raw_sequence: str, fork_width: int):
200
+ labels = parse_labels(raw_labels)
201
+ sequence = parse_rejection_sequence(raw_sequence, labels)
202
+ counts = {label: 0 for label in labels}
203
+ trajectory_rows = []
204
+
205
+ for step_index, rejected in enumerate(sequence, start=1):
206
+ counts[rejected] += 1
207
+ weights = {label: weight(step_index, counts[label]) for label in labels}
208
+ total_weight = sum(weights.values())
209
+ best_label = max(labels, key=lambda label: (weights[label], label))
210
+ trajectory_rows.append(
211
+ [
212
+ step_index,
213
+ rejected,
214
+ best_label,
215
+ round(weights[best_label] / total_weight, 6),
216
+ ]
217
+ )
218
+
219
+ rounds = max(len(sequence), 1)
220
+ final_weights = {label: weight(rounds, counts[label]) for label in labels}
221
+ total_weight = sum(final_weights.values())
222
+ action_rows = []
223
+ for label in labels:
224
+ action_rows.append(
225
+ [
226
+ label,
227
+ counts[label],
228
+ final_weights[label],
229
+ round(final_weights[label] / total_weight, 6),
230
+ ]
231
+ )
232
+
233
+ preferred_action = max(labels, key=lambda label: (final_weights[label], label))
234
+ success_data = len(sequence)
235
+ failure_data = len(sequence) * (fork_width - 1)
236
+
237
+ summary = f"""
238
+ ## What you can see
239
+
240
+ - Logged rejection rounds: `{len(sequence)}`
241
+ - Fork width: `{fork_width}`
242
+ - Success-only data points: `{success_data}`
243
+ - Failure data points: `{failure_data}`
244
+ - Failure multiplier: `{fork_width - 1}x`
245
+ - Current complement winner: `{preferred_action}`
246
+
247
+ No chosen example was required to produce the target distribution below.
248
+ The void boundary alone is enough to rank the actions.
249
+ """
250
+
251
+ scene_html = build_scene_html(action_rows, preferred_action)
252
+ return scene_html, summary, action_rows, trajectory_rows
253
+
254
+
255
+ with gr.Blocks(
256
+ title="Buleyean RL",
257
+ theme=gr.themes.Base(primary_hue="red"),
258
+ ) as demo:
259
+ gr.Markdown(
260
+ """
261
+ # Buleyean RL
262
+
263
+ Feed in rejected actions only. The complement target updates from what the
264
+ policy should not do. Talk is cheap; the table below is the point.
265
+ """
266
+ )
267
+
268
+ labels = gr.Textbox(
269
+ label="Action labels",
270
+ value="answer, hedge, repair, refuse",
271
+ )
272
+ rejection_sequence = gr.Textbox(
273
+ label="Rejected actions in order",
274
+ value="refuse, hedge, refuse, repair, refuse, refuse",
275
+ lines=2,
276
+ )
277
+ fork_width = gr.Slider(2, 8, value=4, step=1, label="Fork width")
278
+
279
+ run_button = gr.Button("Walk the void", variant="primary")
280
+ scene = gr.HTML()
281
+ summary = gr.Markdown()
282
+ action_table = gr.Dataframe(
283
+ headers=["Action", "Rejections", "Complement weight", "Probability"],
284
+ interactive=False,
285
+ )
286
+ trajectory_table = gr.Dataframe(
287
+ headers=["Step", "Rejected action", "Current winner", "Winner probability"],
288
+ interactive=False,
289
+ )
290
+
291
+ gr.Examples(
292
+ examples=[
293
+ [
294
+ "answer, hedge, repair, refuse",
295
+ "refuse, hedge, refuse, repair, refuse, refuse",
296
+ 4,
297
+ ],
298
+ [
299
+ "build, stall, bluff, concede",
300
+ "stall, bluff, stall, stall, concede",
301
+ 5,
302
+ ],
303
+ ],
304
+ inputs=[labels, rejection_sequence, fork_width],
305
+ )
306
+
307
+ run_button.click(
308
+ run_demo,
309
+ inputs=[labels, rejection_sequence, fork_width],
310
+ outputs=[scene, summary, action_table, trajectory_table],
311
+ )
312
+
313
+ demo.load(
314
+ run_demo,
315
+ inputs=[labels, rejection_sequence, fork_width],
316
+ outputs=[scene, summary, action_table, trajectory_table],
317
+ )
318
+
319
+
320
+ if __name__ == "__main__":
321
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ gradio>=5.0.0,<6.0.0