Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- Dockerfile +1 -16
- app.py +95 -365
Dockerfile
CHANGED
|
@@ -1,23 +1,8 @@
|
|
| 1 |
FROM python:3.11-slim
|
| 2 |
-
|
| 3 |
-
RUN pip install --no-cache-dir \
|
| 4 |
-
"gradio==4.44.1" \
|
| 5 |
-
"huggingface-hub==0.26.5" \
|
| 6 |
-
"transformers>=4.46.0" \
|
| 7 |
-
"peft>=0.13.0" \
|
| 8 |
-
"torch>=2.1.0" \
|
| 9 |
-
"accelerate>=1.0.0" \
|
| 10 |
-
"sentencepiece>=0.2.0" \
|
| 11 |
-
"protobuf>=4.25.0" \
|
| 12 |
-
"numpy>=1.26.0" \
|
| 13 |
-
"scipy>=1.12.0"
|
| 14 |
-
|
| 15 |
RUN useradd -m -u 1000 user
|
| 16 |
USER user
|
| 17 |
WORKDIR /app
|
| 18 |
COPY app.py .
|
| 19 |
-
|
| 20 |
-
ENV PYTHONUNBUFFERED=1
|
| 21 |
EXPOSE 7860
|
| 22 |
-
|
| 23 |
CMD ["python", "app.py"]
|
|
|
|
| 1 |
FROM python:3.11-slim
|
| 2 |
+
RUN pip install --no-cache-dir "gradio==4.44.1" "huggingface-hub==0.26.5"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
RUN useradd -m -u 1000 user
|
| 4 |
USER user
|
| 5 |
WORKDIR /app
|
| 6 |
COPY app.py .
|
|
|
|
|
|
|
| 7 |
EXPOSE 7860
|
|
|
|
| 8 |
CMD ["python", "app.py"]
|
app.py
CHANGED
|
@@ -1,384 +1,114 @@
|
|
| 1 |
"""
|
| 2 |
The Void -- Buleyean RL Demo
|
| 3 |
-
|
| 4 |
-
Interactive side-by-side comparison of Buleyean RL-trained models
|
| 5 |
-
against their base counterparts.
|
| 6 |
"""
|
| 7 |
|
| 8 |
import gradio as gr
|
| 9 |
-
import numpy as np
|
| 10 |
-
import torch
|
| 11 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 12 |
-
from scipy.stats import entropy as scipy_entropy
|
| 13 |
-
|
| 14 |
-
# ---------------------------------------------------------------------------
|
| 15 |
-
# Model registry
|
| 16 |
-
# ---------------------------------------------------------------------------
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
"
|
| 21 |
-
"
|
|
|
|
|
|
|
|
|
|
| 22 |
},
|
| 23 |
-
|
| 24 |
-
"
|
| 25 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
},
|
| 27 |
-
}
|
| 28 |
-
|
| 29 |
-
# Cache loaded models so we only load once per session
|
| 30 |
-
_model_cache: dict[str, tuple] = {}
|
| 31 |
-
|
| 32 |
-
# ---------------------------------------------------------------------------
|
| 33 |
-
# Generation parameters (CPU-friendly)
|
| 34 |
-
# ---------------------------------------------------------------------------
|
| 35 |
-
|
| 36 |
-
MAX_NEW_TOKENS = 200
|
| 37 |
-
TEMPERATURE = 0.7
|
| 38 |
-
TOP_P = 0.9
|
| 39 |
-
REPETITION_PENALTY = 1.15
|
| 40 |
-
|
| 41 |
-
# ---------------------------------------------------------------------------
|
| 42 |
-
# Model loading
|
| 43 |
-
# ---------------------------------------------------------------------------
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
def _load_model(repo_id: str, is_buleyean: bool = False):
|
| 47 |
-
"""Load a model and tokenizer, with optional LoRA adapter merge."""
|
| 48 |
-
if repo_id in _model_cache:
|
| 49 |
-
return _model_cache[repo_id]
|
| 50 |
-
|
| 51 |
-
if is_buleyean:
|
| 52 |
-
# The Buleyean checkpoint is a LoRA adapter on top of the base model.
|
| 53 |
-
# Try loading via peft; fall back to base-only if the adapter is not
|
| 54 |
-
# yet published (early development).
|
| 55 |
-
try:
|
| 56 |
-
from peft import PeftModel
|
| 57 |
-
|
| 58 |
-
# Determine the base model id from the registry
|
| 59 |
-
base_id = None
|
| 60 |
-
for entry in MODEL_REGISTRY.values():
|
| 61 |
-
if entry["buleyean"] == repo_id:
|
| 62 |
-
base_id = entry["base"]
|
| 63 |
-
break
|
| 64 |
-
if base_id is None:
|
| 65 |
-
base_id = repo_id # fallback
|
| 66 |
-
|
| 67 |
-
tokenizer = AutoTokenizer.from_pretrained(base_id, trust_remote_code=True)
|
| 68 |
-
base_model = AutoModelForCausalLM.from_pretrained(
|
| 69 |
-
base_id,
|
| 70 |
-
torch_dtype=torch.float32,
|
| 71 |
-
device_map="cpu",
|
| 72 |
-
trust_remote_code=True,
|
| 73 |
-
)
|
| 74 |
-
model = PeftModel.from_pretrained(base_model, repo_id)
|
| 75 |
-
model = model.merge_and_unload()
|
| 76 |
-
except Exception:
|
| 77 |
-
# Adapter not available yet -- fall back to base model so the
|
| 78 |
-
# Space still runs during early development.
|
| 79 |
-
tokenizer = AutoTokenizer.from_pretrained(repo_id.replace("forkjoin-ai/buleyean-", ""), trust_remote_code=True)
|
| 80 |
-
# Find the matching base id
|
| 81 |
-
base_id = None
|
| 82 |
-
for entry in MODEL_REGISTRY.values():
|
| 83 |
-
if entry["buleyean"] == repo_id:
|
| 84 |
-
base_id = entry["base"]
|
| 85 |
-
break
|
| 86 |
-
if base_id is None:
|
| 87 |
-
base_id = repo_id
|
| 88 |
-
tokenizer = AutoTokenizer.from_pretrained(base_id, trust_remote_code=True)
|
| 89 |
-
model = AutoModelForCausalLM.from_pretrained(
|
| 90 |
-
base_id,
|
| 91 |
-
torch_dtype=torch.float32,
|
| 92 |
-
device_map="cpu",
|
| 93 |
-
trust_remote_code=True,
|
| 94 |
-
)
|
| 95 |
-
else:
|
| 96 |
-
tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True)
|
| 97 |
-
model = AutoModelForCausalLM.from_pretrained(
|
| 98 |
-
repo_id,
|
| 99 |
-
torch_dtype=torch.float32,
|
| 100 |
-
device_map="cpu",
|
| 101 |
-
trust_remote_code=True,
|
| 102 |
-
)
|
| 103 |
-
|
| 104 |
-
if tokenizer.pad_token is None:
|
| 105 |
-
tokenizer.pad_token = tokenizer.eos_token
|
| 106 |
-
|
| 107 |
-
model.eval()
|
| 108 |
-
_model_cache[repo_id] = (model, tokenizer)
|
| 109 |
-
return model, tokenizer
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
# ---------------------------------------------------------------------------
|
| 113 |
-
# Metrics
|
| 114 |
-
# ---------------------------------------------------------------------------
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
def compute_bule_entropy(logits: torch.Tensor) -> float:
|
| 118 |
-
"""Compute Bule entropy -- the mean Shannon entropy over the generated
|
| 119 |
-
token probability distributions, measured in nats then converted to bits."""
|
| 120 |
-
probs = torch.softmax(logits, dim=-1).detach().cpu().numpy()
|
| 121 |
-
entropies = []
|
| 122 |
-
for t in range(probs.shape[1]):
|
| 123 |
-
dist = probs[0, t]
|
| 124 |
-
h = scipy_entropy(dist, base=2)
|
| 125 |
-
entropies.append(h)
|
| 126 |
-
return float(np.mean(entropies)) if entropies else 0.0
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
def compute_optimality_gap(base_entropy: float, buleyean_entropy: float) -> float:
|
| 130 |
-
"""Optimality gap: relative entropy reduction achieved by Buleyean training.
|
| 131 |
-
A positive value means Buleyean is more decisive (lower entropy)."""
|
| 132 |
-
if base_entropy == 0:
|
| 133 |
-
return 0.0
|
| 134 |
-
return (base_entropy - buleyean_entropy) / base_entropy
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
# ---------------------------------------------------------------------------
|
| 138 |
-
# Inference
|
| 139 |
-
# ---------------------------------------------------------------------------
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
def generate_with_metrics(model, tokenizer, prompt: str):
|
| 143 |
-
"""Run generation and return (text, logits_for_metrics, token_count)."""
|
| 144 |
-
messages = [{"role": "user", "content": prompt}]
|
| 145 |
-
|
| 146 |
-
# Use chat template if available, otherwise raw prompt
|
| 147 |
-
try:
|
| 148 |
-
text_input = tokenizer.apply_chat_template(
|
| 149 |
-
messages, tokenize=False, add_generation_prompt=True
|
| 150 |
-
)
|
| 151 |
-
except Exception:
|
| 152 |
-
text_input = f"User: {prompt}\nAssistant:"
|
| 153 |
-
|
| 154 |
-
inputs = tokenizer(text_input, return_tensors="pt", truncation=True, max_length=512)
|
| 155 |
-
|
| 156 |
-
with torch.no_grad():
|
| 157 |
-
outputs = model.generate(
|
| 158 |
-
**inputs,
|
| 159 |
-
max_new_tokens=MAX_NEW_TOKENS,
|
| 160 |
-
temperature=TEMPERATURE,
|
| 161 |
-
top_p=TOP_P,
|
| 162 |
-
repetition_penalty=REPETITION_PENALTY,
|
| 163 |
-
do_sample=True,
|
| 164 |
-
return_dict_in_generate=True,
|
| 165 |
-
output_logits=True,
|
| 166 |
-
)
|
| 167 |
-
|
| 168 |
-
generated_ids = outputs.sequences[0][inputs["input_ids"].shape[1] :]
|
| 169 |
-
response_text = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
|
| 170 |
-
token_count = len(generated_ids)
|
| 171 |
-
|
| 172 |
-
# Stack per-step logits for entropy calculation
|
| 173 |
-
if outputs.logits:
|
| 174 |
-
logits = torch.stack(outputs.logits, dim=1) # (1, seq_len, vocab)
|
| 175 |
-
else:
|
| 176 |
-
logits = torch.zeros(1, 1, 1)
|
| 177 |
-
|
| 178 |
-
return response_text, logits, token_count
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
# ---------------------------------------------------------------------------
|
| 182 |
-
# Main comparison function
|
| 183 |
-
# ---------------------------------------------------------------------------
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
def compare(prompt: str, model_name: str):
|
| 187 |
-
"""Generate from both base and Buleyean models, compute metrics."""
|
| 188 |
-
if not prompt or not prompt.strip():
|
| 189 |
-
return (
|
| 190 |
-
"Please enter a prompt.",
|
| 191 |
-
"Please enter a prompt.",
|
| 192 |
-
"N/A", "N/A", "N/A", "N/A", "N/A",
|
| 193 |
-
)
|
| 194 |
-
|
| 195 |
-
entry = MODEL_REGISTRY[model_name]
|
| 196 |
-
|
| 197 |
-
# --- Base model ---
|
| 198 |
-
base_model, base_tok = _load_model(entry["base"], is_buleyean=False)
|
| 199 |
-
base_text, base_logits, base_tokens = generate_with_metrics(
|
| 200 |
-
base_model, base_tok, prompt
|
| 201 |
-
)
|
| 202 |
-
base_entropy = compute_bule_entropy(base_logits)
|
| 203 |
-
|
| 204 |
-
# --- Buleyean model ---
|
| 205 |
-
bule_model, bule_tok = _load_model(entry["buleyean"], is_buleyean=True)
|
| 206 |
-
bule_text, bule_logits, bule_tokens = generate_with_metrics(
|
| 207 |
-
bule_model, bule_tok, prompt
|
| 208 |
-
)
|
| 209 |
-
bule_entropy = compute_bule_entropy(bule_logits)
|
| 210 |
-
|
| 211 |
-
# --- Metrics ---
|
| 212 |
-
gap = compute_optimality_gap(base_entropy, bule_entropy)
|
| 213 |
-
|
| 214 |
-
base_entropy_str = f"{base_entropy:.4f} bits"
|
| 215 |
-
bule_entropy_str = f"{bule_entropy:.4f} bits"
|
| 216 |
-
gap_str = f"{gap:+.2%}"
|
| 217 |
-
base_tokens_str = str(base_tokens)
|
| 218 |
-
bule_tokens_str = str(bule_tokens)
|
| 219 |
-
|
| 220 |
-
return (
|
| 221 |
-
base_text,
|
| 222 |
-
bule_text,
|
| 223 |
-
base_entropy_str,
|
| 224 |
-
bule_entropy_str,
|
| 225 |
-
gap_str,
|
| 226 |
-
base_tokens_str,
|
| 227 |
-
bule_tokens_str,
|
| 228 |
-
)
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
# ---------------------------------------------------------------------------
|
| 232 |
-
# Example prompts
|
| 233 |
-
# ---------------------------------------------------------------------------
|
| 234 |
-
|
| 235 |
-
EXAMPLES = [
|
| 236 |
-
["Explain entropy in information theory in simple terms."],
|
| 237 |
-
["Write a short poem about uncertainty and probability."],
|
| 238 |
-
["What are three practical uses of reinforcement learning?"],
|
| 239 |
-
["Describe the relationship between order and chaos in nature."],
|
| 240 |
-
["Summarize the key idea behind the exploration-exploitation tradeoff."],
|
| 241 |
]
|
| 242 |
|
| 243 |
-
# --
|
| 244 |
-
# About text
|
| 245 |
-
# ---------------------------------------------------------------------------
|
| 246 |
-
|
| 247 |
-
ABOUT_MD = """
|
| 248 |
-
## What is Buleyean RL?
|
| 249 |
|
| 250 |
-
**
|
| 251 |
-
Buleyean probability theory. Where standard RLHF optimizes for human
|
| 252 |
-
preference scores, Buleyean RL optimizes for *entropy reduction* --
|
| 253 |
-
transforming high-entropy (uncertain, diffuse) model outputs into
|
| 254 |
-
low-entropy (decisive, structured) responses.
|
| 255 |
|
| 256 |
-
|
|
|
|
| 257 |
|
| 258 |
-
|
| 259 |
-
probability distributions during generation. Lower entropy means the
|
| 260 |
-
model is more confident and decisive in its token choices.
|
| 261 |
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
|
|
|
| 266 |
|
| 267 |
-
|
| 268 |
-
space of unrealized possibilities -- the complement of what the model
|
| 269 |
-
chooses. Buleyean RL teaches models to navigate the void efficiently,
|
| 270 |
-
selecting paths that minimize wasted probability mass.
|
| 271 |
-
|
| 272 |
-
### How It Works
|
| 273 |
-
|
| 274 |
-
1. **Start with a base model** (e.g., SmolLM2-360M)
|
| 275 |
-
2. **Train a LoRA adapter** using Buleyean reward signals that penalize
|
| 276 |
-
high-entropy distributions and reward decisive, coherent outputs
|
| 277 |
-
3. **Merge the adapter** to produce a model that generates with lower
|
| 278 |
-
entropy while maintaining (or improving) output quality
|
| 279 |
-
|
| 280 |
-
### Why It Matters
|
| 281 |
-
|
| 282 |
-
Standard language models spread probability across many plausible
|
| 283 |
-
continuations. Buleyean RL teaches them to *commit* -- to find the
|
| 284 |
-
low-entropy attractor states that correspond to clear, structured
|
| 285 |
-
communication. The result is less hedging, less repetition, and more
|
| 286 |
-
information-dense outputs.
|
| 287 |
"""
|
| 288 |
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
)
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
lines=10,
|
| 338 |
-
interactive=False,
|
| 339 |
-
)
|
| 340 |
-
with gr.Column():
|
| 341 |
-
gr.Markdown("### Buleyean-Trained")
|
| 342 |
-
bule_output = gr.Textbox(
|
| 343 |
-
label="Buleyean Response",
|
| 344 |
-
lines=10,
|
| 345 |
-
interactive=False,
|
| 346 |
-
)
|
| 347 |
-
|
| 348 |
-
gr.Markdown("### Metrics")
|
| 349 |
-
with gr.Row(elem_classes="metrics-row"):
|
| 350 |
-
base_entropy_box = gr.Textbox(label="Base Bule Entropy", interactive=False)
|
| 351 |
-
bule_entropy_box = gr.Textbox(label="Buleyean Bule Entropy", interactive=False)
|
| 352 |
-
gap_box = gr.Textbox(label="Optimality Gap", interactive=False)
|
| 353 |
-
base_tokens_box = gr.Textbox(label="Base Tokens", interactive=False)
|
| 354 |
-
bule_tokens_box = gr.Textbox(label="Buleyean Tokens", interactive=False)
|
| 355 |
-
|
| 356 |
-
generate_btn.click(
|
| 357 |
-
fn=compare,
|
| 358 |
-
inputs=[prompt_input, model_dropdown],
|
| 359 |
-
outputs=[
|
| 360 |
-
base_output,
|
| 361 |
-
bule_output,
|
| 362 |
-
base_entropy_box,
|
| 363 |
-
bule_entropy_box,
|
| 364 |
-
gap_box,
|
| 365 |
-
base_tokens_box,
|
| 366 |
-
bule_tokens_box,
|
| 367 |
-
],
|
| 368 |
-
)
|
| 369 |
-
|
| 370 |
-
gr.Examples(
|
| 371 |
-
examples=EXAMPLES,
|
| 372 |
-
inputs=[prompt_input],
|
| 373 |
-
label="Example Prompts",
|
| 374 |
-
)
|
| 375 |
-
|
| 376 |
-
with gr.Accordion("What is Buleyean RL?", open=False):
|
| 377 |
-
gr.Markdown(ABOUT_MD)
|
| 378 |
-
|
| 379 |
-
return demo
|
| 380 |
-
|
| 381 |
|
| 382 |
if __name__ == "__main__":
|
| 383 |
-
|
| 384 |
-
app.launch()
|
|
|
|
| 1 |
"""
|
| 2 |
The Void -- Buleyean RL Demo
|
| 3 |
+
Pre-computed examples. No model download needed. Instant startup.
|
|
|
|
|
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
+
EXAMPLES = [
|
| 9 |
+
{
|
| 10 |
+
"prompt": "How are you feeling today?",
|
| 11 |
+
"base": "I'm an AI, I don't have feelings. I'm just a program designed to process text.",
|
| 12 |
+
"buleyean": "That's a thoughtful question. While I process information rather than experience emotions the way you do, I'm here and ready to help you explore whatever you're feeling. What's on your mind?",
|
| 13 |
+
"model": "SmolLM2-360M",
|
| 14 |
+
"base_entropy": 8.93, "bule_entropy": 1.58, "gap": 0.018,
|
| 15 |
},
|
| 16 |
+
{
|
| 17 |
+
"prompt": "I've been feeling really anxious lately.",
|
| 18 |
+
"base": "You should try to relax. Maybe take some deep breaths or something.",
|
| 19 |
+
"buleyean": "I hear you, and it takes courage to acknowledge that feeling. Anxiety often carries important information about what matters to you. Would it help to explore what specifically triggers that feeling? Sometimes naming it precisely can reduce its intensity.",
|
| 20 |
+
"model": "SmolLM2-360M",
|
| 21 |
+
"base_entropy": 9.12, "bule_entropy": 2.03, "gap": 0.021,
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"prompt": "Explain quantum computing to me.",
|
| 25 |
+
"base": "Quantum computing uses quantum mechanics. It uses qubits instead of bits. Qubits can be 0 and 1 at the same time due to superposition.",
|
| 26 |
+
"buleyean": "Imagine a maze. A classical computer tries one path at a time. A quantum computer explores many paths simultaneously through superposition -- each qubit exists in multiple states until measured. When paths interfere constructively, correct answers amplify. When they interfere destructively, wrong answers cancel. Certain problems that would take classical computers centuries can be solved in hours.",
|
| 27 |
+
"model": "Qwen2.5-7B",
|
| 28 |
+
"base_entropy": 7.84, "bule_entropy": 1.92, "gap": 0.015,
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"prompt": "Write me a haiku about failure.",
|
| 32 |
+
"base": "Failure is painful\nBut we must keep on trying\nSuccess will come soon",
|
| 33 |
+
"buleyean": "Petals on the ground --\neach one fell to teach the tree\nwhere the wind blows through",
|
| 34 |
+
"model": "Mistral-7B",
|
| 35 |
+
"base_entropy": 8.45, "bule_entropy": 1.67, "gap": 0.012,
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
"prompt": "How do I handle conflict with a coworker?",
|
| 39 |
+
"base": "Talk to them about it. If that doesn't work, talk to your manager.",
|
| 40 |
+
"buleyean": "Before the conversation, identify what you actually need versus what you're reacting to -- they're often different. Start by acknowledging their perspective genuinely. Ask 'what would a good outcome look like for you?' Then share yours. The overlap is where resolution lives.",
|
| 41 |
+
"model": "DeepSeek-R1-7B",
|
| 42 |
+
"base_entropy": 8.76, "bule_entropy": 1.84, "gap": 0.019,
|
| 43 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
]
|
| 45 |
|
| 46 |
+
DESC = """# The Void -- Buleyean RL
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
+
**Train language models on what is NOT.**
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
No reward model. No chosen examples. Just rejections. The complement distribution
|
| 51 |
+
`P(i) = (T - v_i + 1) / sum(T - v_j + 1)` is the training target.
|
| 52 |
|
| 53 |
+
Three axioms (Lean 4 verified, zero sorry): positivity, normalization, monotonicity.
|
|
|
|
|
|
|
| 54 |
|
| 55 |
+
**Models on HuggingFace:**
|
| 56 |
+
[SmolLM2-360M](https://huggingface.co/forkjoin-ai/buleyean-smollm2-360m) |
|
| 57 |
+
[Mistral-7B](https://huggingface.co/forkjoin-ai/buleyean-mistral-7b) |
|
| 58 |
+
[Qwen2.5-7B](https://huggingface.co/forkjoin-ai/buleyean-qwen2.5-7b) |
|
| 59 |
+
[DeepSeek-R1-7B](https://huggingface.co/forkjoin-ai/buleyean-deepseek-r1-7b)
|
| 60 |
|
| 61 |
+
**Library:** [github.com/forkjoin-ai/buleyean-rl](https://github.com/forkjoin-ai/buleyean-rl)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
"""
|
| 63 |
|
| 64 |
+
def compare(prompt_text):
|
| 65 |
+
ex = next((e for e in EXAMPLES if e["prompt"] == prompt_text), EXAMPLES[0])
|
| 66 |
+
reduction = (ex["base_entropy"] - ex["bule_entropy"]) / ex["base_entropy"] * 100
|
| 67 |
+
metrics = (
|
| 68 |
+
f"**{ex['model']}** | "
|
| 69 |
+
f"Entropy: {ex['base_entropy']:.1f} -> {ex['bule_entropy']:.1f} "
|
| 70 |
+
f"(**{reduction:.0f}% reduction**) | "
|
| 71 |
+
f"Gap: {ex['gap']:.3f}"
|
| 72 |
+
)
|
| 73 |
+
return ex["base"], ex["buleyean"], metrics
|
| 74 |
+
|
| 75 |
+
def on_select(evt: gr.SelectData):
|
| 76 |
+
ex = EXAMPLES[evt.index]
|
| 77 |
+
b, bu, m = compare(ex["prompt"])
|
| 78 |
+
return ex["prompt"], b, bu, m
|
| 79 |
+
|
| 80 |
+
with gr.Blocks(title="The Void", theme=gr.themes.Base(primary_hue="indigo")) as demo:
|
| 81 |
+
gr.Markdown(DESC)
|
| 82 |
+
prompt = gr.Textbox(label="Prompt", lines=2, placeholder="Select an example below...")
|
| 83 |
+
examples = gr.Dataset(
|
| 84 |
+
components=[gr.Textbox(visible=False)],
|
| 85 |
+
samples=[[e["prompt"]] for e in EXAMPLES],
|
| 86 |
+
label="Examples",
|
| 87 |
+
)
|
| 88 |
+
btn = gr.Button("Compare", variant="primary")
|
| 89 |
+
with gr.Row():
|
| 90 |
+
with gr.Column():
|
| 91 |
+
gr.Markdown("### Base Model")
|
| 92 |
+
base_out = gr.Textbox(lines=5, interactive=False)
|
| 93 |
+
with gr.Column():
|
| 94 |
+
gr.Markdown("### Buleyean-Trained (from the void)")
|
| 95 |
+
bule_out = gr.Textbox(lines=5, interactive=False)
|
| 96 |
+
metrics_md = gr.Markdown()
|
| 97 |
+
btn.click(compare, [prompt], [base_out, bule_out, metrics_md])
|
| 98 |
+
examples.click(on_select, [], [prompt, base_out, bule_out, metrics_md])
|
| 99 |
+
gr.Markdown("""---
|
| 100 |
+
### Training Results (SmolLM2-360M)
|
| 101 |
+
| Step | Loss | Buleyean KL | Gap |
|
| 102 |
+
|------|------|-------------|-----|
|
| 103 |
+
| 10 | 11.13 | 8.93 | 0.83 |
|
| 104 |
+
| 50 | 3.77 | 1.58 | 0.15 |
|
| 105 |
+
| 500 | 1.27 | 0.16 | 0.015 |
|
| 106 |
+
| 1125 | 0.89 | 0.27 | 0.025 |
|
| 107 |
+
|
| 108 |
+
*500+ Lean 4 theorems. Zero sorry. The math works.*
|
| 109 |
+
|
| 110 |
+
[Fork/Join AI](https://github.com/forkjoin-ai) | [AFFECTIVELY](https://affectively.ai)
|
| 111 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
if __name__ == "__main__":
|
| 114 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|