Spaces:

forkjoin-ai
/

buleyean-rl

Sleeping

Taylor commited on 27 days ago

Commit

32ea598

1 Parent(s): 47908f7

feat: Buleyean RL interactive playground

Rejection-only training surface. Feed in rejected actions,
watch the complement target sharpen without any chosen examples.
3D Three.js visualization of void boundary accumulation.

THM-BULEYEAN-POSITIVITY: P(i) > 0 for all i (the sliver)
THM-FAILURE-STRICTLY-MORE-INFORMATIVE: N-1 bits vs 1 bit

Files changed (3) hide show

README.md +8 -8
app.py +321 -0
requirements.txt +1 -0

README.md CHANGED Viewed

@@ -1,12 +1,12 @@
 ---
-title: Buleyean Rl
-emoji: 📉
-colorFrom: pink
-colorTo: blue
 sdk: gradio
-sdk_version: 6.9.0
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Buleyean RL
+emoji: "\U0001F573\uFE0F"
+colorFrom: red
+colorTo: red
 sdk: gradio
+sdk_version: 5.23.0
+python_version: "3.11"
 app_file: app.py
+pinned: true
+license: mit
 ---

app.py ADDED Viewed

	@@ -0,0 +1,321 @@

+"""
+Buleyean RL demo.
+Reject actions, accumulate a void boundary, and watch the complement target
+sharpen without any chosen column.
+"""
+from __future__ import annotations
+import json
+import gradio as gr
+def parse_labels(raw_labels: str) -> list[str]:
+    labels = [label.strip() for label in raw_labels.split(",") if label.strip()]
+    return labels or ["answer", "hedge", "repair", "refuse"]
+def parse_rejection_sequence(raw_sequence: str, labels: list[str]) -> list[str]:
+    entries = [entry.strip() for entry in raw_sequence.split(",") if entry.strip()]
+    label_set = set(labels)
+    return [entry for entry in entries if entry in label_set]
+def weight(rounds: int, rejections: int) -> int:
+    return rounds - min(rejections, rounds) + 1
+def build_scene_html(action_rows: list[list[object]], preferred_action: str) -> str:
+    payload = json.dumps(
+        {
+            "rows": action_rows,
+            "preferredAction": preferred_action,
+        }
+    )
+    return f"""
+<div id="buleyean-rl-scene" style="height: 430px; width: 100%; border-radius: 22px; overflow: hidden; background:
+radial-gradient(circle at top, #24090b 0%, #120607 55%, #050203 100%);"></div>
+<script>
+(async () => {{
+  const mount = document.getElementById("buleyean-rl-scene");
+  if (!mount) return;
+  const payload = {payload};
+  window.__spaceSceneDisposers = window.__spaceSceneDisposers || {{}};
+  if (window.__spaceSceneDisposers.buleyeanRl) {{
+    window.__spaceSceneDisposers.buleyeanRl();
+  }}
+  const loadScript = (src) => new Promise((resolve, reject) => {{
+    const existing = Array.from(document.scripts).find((script) => script.src === src);
+    if (existing) {{
+      if (existing.dataset.loaded === "true") {{
+        resolve();
+      }} else {{
+        existing.addEventListener("load", resolve, {{ once: true }});
+        existing.addEventListener("error", reject, {{ once: true }});
+      }}
+      return;
+    }}
+    const script = document.createElement("script");
+    script.src = src;
+    script.async = true;
+    script.onload = () => {{
+      script.dataset.loaded = "true";
+      resolve();
+    }};
+    script.onerror = reject;
+    document.head.appendChild(script);
+  }});
+  await loadScript("https://unpkg.com/three@0.160.0/build/three.min.js");
+  await loadScript("https://unpkg.com/three@0.160.0/examples/js/controls/OrbitControls.js");
+  const THREE = window.THREE;
+  mount.innerHTML = "";
+  const width = mount.clientWidth || 900;
+  const height = mount.clientHeight || 430;
+  const renderer = new THREE.WebGLRenderer({{ antialias: true, alpha: true }});
+  renderer.setPixelRatio(window.devicePixelRatio || 1);
+  renderer.setSize(width, height);
+  mount.appendChild(renderer.domElement);
+  const scene = new THREE.Scene();
+  const camera = new THREE.PerspectiveCamera(46, width / height, 0.1, 100);
+  camera.position.set(0, 7, 16);
+  const controls = new THREE.OrbitControls(camera, renderer.domElement);
+  controls.enableDamping = true;
+  controls.dampingFactor = 0.05;
+  scene.add(new THREE.AmbientLight(0xffe4e6, 0.65));
+  const redLight = new THREE.PointLight(0xfb7185, 1.2, 80);
+  redLight.position.set(-6, 11, 8);
+  scene.add(redLight);
+  const cyanLight = new THREE.PointLight(0x7dd3fc, 1.1, 80);
+  cyanLight.position.set(8, 8, -3);
+  scene.add(cyanLight);
+  const grid = new THREE.GridHelper(16, payload.rows.length * 2, 0x4c0519, 0x1f2937);
+  grid.position.y = -0.12;
+  scene.add(grid);
+  const group = new THREE.Group();
+  payload.rows.forEach((row, index) => {{
+    const [label, rejections, complementWeight, probability] = row;
+    const x = (index - (payload.rows.length - 1) / 2) * 2.2;
+    const rejectionHeight = Math.max(Number(rejections), 0.15) + 0.25;
+    const weightHeight = Math.max(Number(complementWeight) * 0.42, 0.2);
+    const rejectionBar = new THREE.Mesh(
+      new THREE.BoxGeometry(0.72, rejectionHeight, 0.72),
+      new THREE.MeshStandardMaterial({{
+        color: 0xfb7185,
+        emissive: 0xfb7185,
+        emissiveIntensity: 0.14,
+        metalness: 0.2,
+        roughness: 0.32,
+      }})
+    );
+    rejectionBar.position.set(x - 0.44, rejectionHeight / 2, 0);
+    group.add(rejectionBar);
+    const weightBar = new THREE.Mesh(
+      new THREE.BoxGeometry(0.72, weightHeight, 0.72),
+      new THREE.MeshStandardMaterial({{
+        color: label === payload.preferredAction ? 0xffffff : 0x7dd3fc,
+        emissive: label === payload.preferredAction ? 0xffffff : 0x7dd3fc,
+        emissiveIntensity: label === payload.preferredAction ? 0.36 : 0.16,
+        metalness: 0.35,
+        roughness: 0.18,
+      }})
+    );
+    weightBar.position.set(x + 0.44, weightHeight / 2, 0);
+    group.add(weightBar);
+    if (label === payload.preferredAction) {{
+      const halo = new THREE.Mesh(
+        new THREE.TorusGeometry(0.9, 0.08, 18, 48),
+        new THREE.MeshStandardMaterial({{
+          color: 0xffffff,
+          emissive: 0xffffff,
+          emissiveIntensity: 0.4,
+        }})
+      );
+      halo.rotation.x = Math.PI / 2;
+      halo.position.set(x + 0.44, weightHeight + 0.4, 0);
+      group.add(halo);
+    }}
+    const probabilityOrb = new THREE.Mesh(
+      new THREE.SphereGeometry(0.12 + Number(probability) * 0.45, 16, 16),
+      new THREE.MeshStandardMaterial({{
+        color: 0xf8fafc,
+        emissive: 0xf8fafc,
+        emissiveIntensity: 0.35,
+      }})
+    );
+    probabilityOrb.position.set(x + 0.44, weightHeight + 0.28, -1.2);
+    group.add(probabilityOrb);
+  }});
+  scene.add(group);
+  let frame = 0;
+  const renderLoop = () => {{
+    frame += 1;
+    group.rotation.y = Math.sin(frame * 0.003) * 0.2;
+    group.children.forEach((child, index) => {{
+      if (child.geometry && child.geometry.type === "SphereGeometry") {{
+        child.position.z = -1.2 + Math.sin(frame * 0.02 + index) * 0.35;
+      }}
+    }});
+    controls.update();
+    renderer.render(scene, camera);
+    window.__spaceSceneDisposers.buleyeanRl.frame = requestAnimationFrame(renderLoop);
+  }};
+  const resize = () => {{
+    const nextWidth = mount.clientWidth || 900;
+    const nextHeight = mount.clientHeight || 430;
+    camera.aspect = nextWidth / nextHeight;
+    camera.updateProjectionMatrix();
+    renderer.setSize(nextWidth, nextHeight);
+  }};
+  window.addEventListener("resize", resize);
+  window.__spaceSceneDisposers.buleyeanRl = () => {{
+    cancelAnimationFrame(window.__spaceSceneDisposers.buleyeanRl.frame);
+    window.removeEventListener("resize", resize);
+    controls.dispose();
+    renderer.dispose();
+    mount.innerHTML = "";
+  }};
+  renderLoop();
+}})().catch((error) => {{
+  const mount = document.getElementById("buleyean-rl-scene");
+  if (mount) {{
+    mount.innerHTML = `<div style="padding: 1rem; color: #fecdd3;">three.js scene failed to load: ${{error}}</div>`;
+  }}
+}});
+</script>
+"""
+def run_demo(raw_labels: str, raw_sequence: str, fork_width: int):
+    labels = parse_labels(raw_labels)
+    sequence = parse_rejection_sequence(raw_sequence, labels)
+    counts = {label: 0 for label in labels}
+    trajectory_rows = []
+    for step_index, rejected in enumerate(sequence, start=1):
+        counts[rejected] += 1
+        weights = {label: weight(step_index, counts[label]) for label in labels}
+        total_weight = sum(weights.values())
+        best_label = max(labels, key=lambda label: (weights[label], label))
+        trajectory_rows.append(
+            [
+                step_index,
+                rejected,
+                best_label,
+                round(weights[best_label] / total_weight, 6),
+            ]
+        )
+    rounds = max(len(sequence), 1)
+    final_weights = {label: weight(rounds, counts[label]) for label in labels}
+    total_weight = sum(final_weights.values())
+    action_rows = []
+    for label in labels:
+        action_rows.append(
+            [
+                label,
+                counts[label],
+                final_weights[label],
+                round(final_weights[label] / total_weight, 6),
+            ]
+        )
+    preferred_action = max(labels, key=lambda label: (final_weights[label], label))
+    success_data = len(sequence)
+    failure_data = len(sequence) * (fork_width - 1)
+    summary = f"""
+## What you can see
+- Logged rejection rounds: `{len(sequence)}`
+- Fork width: `{fork_width}`
+- Success-only data points: `{success_data}`
+- Failure data points: `{failure_data}`
+- Failure multiplier: `{fork_width - 1}x`
+- Current complement winner: `{preferred_action}`
+No chosen example was required to produce the target distribution below.
+The void boundary alone is enough to rank the actions.
+"""
+    scene_html = build_scene_html(action_rows, preferred_action)
+    return scene_html, summary, action_rows, trajectory_rows
+with gr.Blocks(
+    title="Buleyean RL",
+    theme=gr.themes.Base(primary_hue="red"),
+) as demo:
+    gr.Markdown(
+        """
+# Buleyean RL
+Feed in rejected actions only. The complement target updates from what the
+policy should not do. Talk is cheap; the table below is the point.
+        """
+    )
+    labels = gr.Textbox(
+        label="Action labels",
+        value="answer, hedge, repair, refuse",
+    )
+    rejection_sequence = gr.Textbox(
+        label="Rejected actions in order",
+        value="refuse, hedge, refuse, repair, refuse, refuse",
+        lines=2,
+    )
+    fork_width = gr.Slider(2, 8, value=4, step=1, label="Fork width")
+    run_button = gr.Button("Walk the void", variant="primary")
+    scene = gr.HTML()
+    summary = gr.Markdown()
+    action_table = gr.Dataframe(
+        headers=["Action", "Rejections", "Complement weight", "Probability"],
+        interactive=False,
+    )
+    trajectory_table = gr.Dataframe(
+        headers=["Step", "Rejected action", "Current winner", "Winner probability"],
+        interactive=False,
+    )
+    gr.Examples(
+        examples=[
+            [
+                "answer, hedge, repair, refuse",
+                "refuse, hedge, refuse, repair, refuse, refuse",
+                4,
+            ],
+            [
+                "build, stall, bluff, concede",
+                "stall, bluff, stall, stall, concede",
+                5,
+            ],
+        ],
+        inputs=[labels, rejection_sequence, fork_width],
+    )
+    run_button.click(
+        run_demo,
+        inputs=[labels, rejection_sequence, fork_width],
+        outputs=[scene, summary, action_table, trajectory_table],
+    )
+    demo.load(
+        run_demo,
+        inputs=[labels, rejection_sequence, fork_width],
+        outputs=[scene, summary, action_table, trajectory_table],
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ gradio>=5.0.0,<6.0.0