buley commited on
Commit
b56d971
·
verified ·
1 Parent(s): 940f26c

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -16
  2. app.py +95 -365
Dockerfile CHANGED
@@ -1,23 +1,8 @@
1
  FROM python:3.11-slim
2
-
3
- RUN pip install --no-cache-dir \
4
- "gradio==4.44.1" \
5
- "huggingface-hub==0.26.5" \
6
- "transformers>=4.46.0" \
7
- "peft>=0.13.0" \
8
- "torch>=2.1.0" \
9
- "accelerate>=1.0.0" \
10
- "sentencepiece>=0.2.0" \
11
- "protobuf>=4.25.0" \
12
- "numpy>=1.26.0" \
13
- "scipy>=1.12.0"
14
-
15
  RUN useradd -m -u 1000 user
16
  USER user
17
  WORKDIR /app
18
  COPY app.py .
19
-
20
- ENV PYTHONUNBUFFERED=1
21
  EXPOSE 7860
22
-
23
  CMD ["python", "app.py"]
 
1
  FROM python:3.11-slim
2
+ RUN pip install --no-cache-dir "gradio==4.44.1" "huggingface-hub==0.26.5"
 
 
 
 
 
 
 
 
 
 
 
 
3
  RUN useradd -m -u 1000 user
4
  USER user
5
  WORKDIR /app
6
  COPY app.py .
 
 
7
  EXPOSE 7860
 
8
  CMD ["python", "app.py"]
app.py CHANGED
@@ -1,384 +1,114 @@
1
  """
2
  The Void -- Buleyean RL Demo
3
-
4
- Interactive side-by-side comparison of Buleyean RL-trained models
5
- against their base counterparts.
6
  """
7
 
8
  import gradio as gr
9
- import numpy as np
10
- import torch
11
- from transformers import AutoModelForCausalLM, AutoTokenizer
12
- from scipy.stats import entropy as scipy_entropy
13
-
14
- # ---------------------------------------------------------------------------
15
- # Model registry
16
- # ---------------------------------------------------------------------------
17
 
18
- MODEL_REGISTRY = {
19
- "SmolLM2-360M": {
20
- "base": "HuggingFaceTB/SmolLM2-360M-Instruct",
21
- "buleyean": "forkjoin-ai/buleyean-smollm2-360m",
 
 
 
22
  },
23
- "Qwen2.5-0.5B": {
24
- "base": "Qwen/Qwen2.5-0.5B-Instruct",
25
- "buleyean": "forkjoin-ai/buleyean-qwen2.5-0.5b",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  },
27
- }
28
-
29
- # Cache loaded models so we only load once per session
30
- _model_cache: dict[str, tuple] = {}
31
-
32
- # ---------------------------------------------------------------------------
33
- # Generation parameters (CPU-friendly)
34
- # ---------------------------------------------------------------------------
35
-
36
- MAX_NEW_TOKENS = 200
37
- TEMPERATURE = 0.7
38
- TOP_P = 0.9
39
- REPETITION_PENALTY = 1.15
40
-
41
- # ---------------------------------------------------------------------------
42
- # Model loading
43
- # ---------------------------------------------------------------------------
44
-
45
-
46
- def _load_model(repo_id: str, is_buleyean: bool = False):
47
- """Load a model and tokenizer, with optional LoRA adapter merge."""
48
- if repo_id in _model_cache:
49
- return _model_cache[repo_id]
50
-
51
- if is_buleyean:
52
- # The Buleyean checkpoint is a LoRA adapter on top of the base model.
53
- # Try loading via peft; fall back to base-only if the adapter is not
54
- # yet published (early development).
55
- try:
56
- from peft import PeftModel
57
-
58
- # Determine the base model id from the registry
59
- base_id = None
60
- for entry in MODEL_REGISTRY.values():
61
- if entry["buleyean"] == repo_id:
62
- base_id = entry["base"]
63
- break
64
- if base_id is None:
65
- base_id = repo_id # fallback
66
-
67
- tokenizer = AutoTokenizer.from_pretrained(base_id, trust_remote_code=True)
68
- base_model = AutoModelForCausalLM.from_pretrained(
69
- base_id,
70
- torch_dtype=torch.float32,
71
- device_map="cpu",
72
- trust_remote_code=True,
73
- )
74
- model = PeftModel.from_pretrained(base_model, repo_id)
75
- model = model.merge_and_unload()
76
- except Exception:
77
- # Adapter not available yet -- fall back to base model so the
78
- # Space still runs during early development.
79
- tokenizer = AutoTokenizer.from_pretrained(repo_id.replace("forkjoin-ai/buleyean-", ""), trust_remote_code=True)
80
- # Find the matching base id
81
- base_id = None
82
- for entry in MODEL_REGISTRY.values():
83
- if entry["buleyean"] == repo_id:
84
- base_id = entry["base"]
85
- break
86
- if base_id is None:
87
- base_id = repo_id
88
- tokenizer = AutoTokenizer.from_pretrained(base_id, trust_remote_code=True)
89
- model = AutoModelForCausalLM.from_pretrained(
90
- base_id,
91
- torch_dtype=torch.float32,
92
- device_map="cpu",
93
- trust_remote_code=True,
94
- )
95
- else:
96
- tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True)
97
- model = AutoModelForCausalLM.from_pretrained(
98
- repo_id,
99
- torch_dtype=torch.float32,
100
- device_map="cpu",
101
- trust_remote_code=True,
102
- )
103
-
104
- if tokenizer.pad_token is None:
105
- tokenizer.pad_token = tokenizer.eos_token
106
-
107
- model.eval()
108
- _model_cache[repo_id] = (model, tokenizer)
109
- return model, tokenizer
110
-
111
-
112
- # ---------------------------------------------------------------------------
113
- # Metrics
114
- # ---------------------------------------------------------------------------
115
-
116
-
117
- def compute_bule_entropy(logits: torch.Tensor) -> float:
118
- """Compute Bule entropy -- the mean Shannon entropy over the generated
119
- token probability distributions, measured in nats then converted to bits."""
120
- probs = torch.softmax(logits, dim=-1).detach().cpu().numpy()
121
- entropies = []
122
- for t in range(probs.shape[1]):
123
- dist = probs[0, t]
124
- h = scipy_entropy(dist, base=2)
125
- entropies.append(h)
126
- return float(np.mean(entropies)) if entropies else 0.0
127
-
128
-
129
- def compute_optimality_gap(base_entropy: float, buleyean_entropy: float) -> float:
130
- """Optimality gap: relative entropy reduction achieved by Buleyean training.
131
- A positive value means Buleyean is more decisive (lower entropy)."""
132
- if base_entropy == 0:
133
- return 0.0
134
- return (base_entropy - buleyean_entropy) / base_entropy
135
-
136
-
137
- # ---------------------------------------------------------------------------
138
- # Inference
139
- # ---------------------------------------------------------------------------
140
-
141
-
142
- def generate_with_metrics(model, tokenizer, prompt: str):
143
- """Run generation and return (text, logits_for_metrics, token_count)."""
144
- messages = [{"role": "user", "content": prompt}]
145
-
146
- # Use chat template if available, otherwise raw prompt
147
- try:
148
- text_input = tokenizer.apply_chat_template(
149
- messages, tokenize=False, add_generation_prompt=True
150
- )
151
- except Exception:
152
- text_input = f"User: {prompt}\nAssistant:"
153
-
154
- inputs = tokenizer(text_input, return_tensors="pt", truncation=True, max_length=512)
155
-
156
- with torch.no_grad():
157
- outputs = model.generate(
158
- **inputs,
159
- max_new_tokens=MAX_NEW_TOKENS,
160
- temperature=TEMPERATURE,
161
- top_p=TOP_P,
162
- repetition_penalty=REPETITION_PENALTY,
163
- do_sample=True,
164
- return_dict_in_generate=True,
165
- output_logits=True,
166
- )
167
-
168
- generated_ids = outputs.sequences[0][inputs["input_ids"].shape[1] :]
169
- response_text = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
170
- token_count = len(generated_ids)
171
-
172
- # Stack per-step logits for entropy calculation
173
- if outputs.logits:
174
- logits = torch.stack(outputs.logits, dim=1) # (1, seq_len, vocab)
175
- else:
176
- logits = torch.zeros(1, 1, 1)
177
-
178
- return response_text, logits, token_count
179
-
180
-
181
- # ---------------------------------------------------------------------------
182
- # Main comparison function
183
- # ---------------------------------------------------------------------------
184
-
185
-
186
- def compare(prompt: str, model_name: str):
187
- """Generate from both base and Buleyean models, compute metrics."""
188
- if not prompt or not prompt.strip():
189
- return (
190
- "Please enter a prompt.",
191
- "Please enter a prompt.",
192
- "N/A", "N/A", "N/A", "N/A", "N/A",
193
- )
194
-
195
- entry = MODEL_REGISTRY[model_name]
196
-
197
- # --- Base model ---
198
- base_model, base_tok = _load_model(entry["base"], is_buleyean=False)
199
- base_text, base_logits, base_tokens = generate_with_metrics(
200
- base_model, base_tok, prompt
201
- )
202
- base_entropy = compute_bule_entropy(base_logits)
203
-
204
- # --- Buleyean model ---
205
- bule_model, bule_tok = _load_model(entry["buleyean"], is_buleyean=True)
206
- bule_text, bule_logits, bule_tokens = generate_with_metrics(
207
- bule_model, bule_tok, prompt
208
- )
209
- bule_entropy = compute_bule_entropy(bule_logits)
210
-
211
- # --- Metrics ---
212
- gap = compute_optimality_gap(base_entropy, bule_entropy)
213
-
214
- base_entropy_str = f"{base_entropy:.4f} bits"
215
- bule_entropy_str = f"{bule_entropy:.4f} bits"
216
- gap_str = f"{gap:+.2%}"
217
- base_tokens_str = str(base_tokens)
218
- bule_tokens_str = str(bule_tokens)
219
-
220
- return (
221
- base_text,
222
- bule_text,
223
- base_entropy_str,
224
- bule_entropy_str,
225
- gap_str,
226
- base_tokens_str,
227
- bule_tokens_str,
228
- )
229
-
230
-
231
- # ---------------------------------------------------------------------------
232
- # Example prompts
233
- # ---------------------------------------------------------------------------
234
-
235
- EXAMPLES = [
236
- ["Explain entropy in information theory in simple terms."],
237
- ["Write a short poem about uncertainty and probability."],
238
- ["What are three practical uses of reinforcement learning?"],
239
- ["Describe the relationship between order and chaos in nature."],
240
- ["Summarize the key idea behind the exploration-exploitation tradeoff."],
241
  ]
242
 
243
- # ---------------------------------------------------------------------------
244
- # About text
245
- # ---------------------------------------------------------------------------
246
-
247
- ABOUT_MD = """
248
- ## What is Buleyean RL?
249
 
250
- **Buleyean Reinforcement Learning** is a training methodology grounded in
251
- Buleyean probability theory. Where standard RLHF optimizes for human
252
- preference scores, Buleyean RL optimizes for *entropy reduction* --
253
- transforming high-entropy (uncertain, diffuse) model outputs into
254
- low-entropy (decisive, structured) responses.
255
 
256
- ### Core Concepts
 
257
 
258
- - **Bule Entropy**: The mean Shannon entropy (in bits) across the token
259
- probability distributions during generation. Lower entropy means the
260
- model is more confident and decisive in its token choices.
261
 
262
- - **Optimality Gap**: The relative reduction in entropy between the base
263
- model and the Buleyean-trained variant. A positive gap indicates the
264
- trained model has learned to collapse probability mass onto better
265
- token sequences.
 
266
 
267
- - **The Void**: In Buleyean probability, "the void" represents the
268
- space of unrealized possibilities -- the complement of what the model
269
- chooses. Buleyean RL teaches models to navigate the void efficiently,
270
- selecting paths that minimize wasted probability mass.
271
-
272
- ### How It Works
273
-
274
- 1. **Start with a base model** (e.g., SmolLM2-360M)
275
- 2. **Train a LoRA adapter** using Buleyean reward signals that penalize
276
- high-entropy distributions and reward decisive, coherent outputs
277
- 3. **Merge the adapter** to produce a model that generates with lower
278
- entropy while maintaining (or improving) output quality
279
-
280
- ### Why It Matters
281
-
282
- Standard language models spread probability across many plausible
283
- continuations. Buleyean RL teaches them to *commit* -- to find the
284
- low-entropy attractor states that correspond to clear, structured
285
- communication. The result is less hedging, less repetition, and more
286
- information-dense outputs.
287
  """
288
 
289
- # ---------------------------------------------------------------------------
290
- # Gradio UI
291
- # ---------------------------------------------------------------------------
292
-
293
-
294
- def build_app():
295
- with gr.Blocks(
296
- title="The Void -- Buleyean RL Demo",
297
- theme=gr.themes.Base(
298
- primary_hue="indigo",
299
- neutral_hue="slate",
300
- ),
301
- css="""
302
- .void-header { text-align: center; margin-bottom: 1rem; }
303
- .void-header h1 { font-size: 2.2rem; font-weight: 700; }
304
- .void-header p { opacity: 0.7; font-size: 1.05rem; }
305
- .metrics-row { gap: 1rem; }
306
- """,
307
- ) as demo:
308
- gr.HTML(
309
- """
310
- <div class="void-header">
311
- <h1>The Void</h1>
312
- <p>Buleyean RL: from high entropy to low entropy</p>
313
- </div>
314
- """
315
- )
316
-
317
- with gr.Row():
318
- with gr.Column(scale=3):
319
- prompt_input = gr.Textbox(
320
- label="Prompt",
321
- placeholder="Enter a prompt to compare base vs Buleyean-trained outputs...",
322
- lines=3,
323
- )
324
- with gr.Column(scale=1):
325
- model_dropdown = gr.Dropdown(
326
- choices=list(MODEL_REGISTRY.keys()),
327
- value="SmolLM2-360M",
328
- label="Model",
329
- )
330
- generate_btn = gr.Button("Generate", variant="primary", size="lg")
331
-
332
- with gr.Row():
333
- with gr.Column():
334
- gr.Markdown("### Base Model")
335
- base_output = gr.Textbox(
336
- label="Base Response",
337
- lines=10,
338
- interactive=False,
339
- )
340
- with gr.Column():
341
- gr.Markdown("### Buleyean-Trained")
342
- bule_output = gr.Textbox(
343
- label="Buleyean Response",
344
- lines=10,
345
- interactive=False,
346
- )
347
-
348
- gr.Markdown("### Metrics")
349
- with gr.Row(elem_classes="metrics-row"):
350
- base_entropy_box = gr.Textbox(label="Base Bule Entropy", interactive=False)
351
- bule_entropy_box = gr.Textbox(label="Buleyean Bule Entropy", interactive=False)
352
- gap_box = gr.Textbox(label="Optimality Gap", interactive=False)
353
- base_tokens_box = gr.Textbox(label="Base Tokens", interactive=False)
354
- bule_tokens_box = gr.Textbox(label="Buleyean Tokens", interactive=False)
355
-
356
- generate_btn.click(
357
- fn=compare,
358
- inputs=[prompt_input, model_dropdown],
359
- outputs=[
360
- base_output,
361
- bule_output,
362
- base_entropy_box,
363
- bule_entropy_box,
364
- gap_box,
365
- base_tokens_box,
366
- bule_tokens_box,
367
- ],
368
- )
369
-
370
- gr.Examples(
371
- examples=EXAMPLES,
372
- inputs=[prompt_input],
373
- label="Example Prompts",
374
- )
375
-
376
- with gr.Accordion("What is Buleyean RL?", open=False):
377
- gr.Markdown(ABOUT_MD)
378
-
379
- return demo
380
-
381
 
382
  if __name__ == "__main__":
383
- app = build_app()
384
- app.launch()
 
1
  """
2
  The Void -- Buleyean RL Demo
3
+ Pre-computed examples. No model download needed. Instant startup.
 
 
4
  """
5
 
6
  import gradio as gr
 
 
 
 
 
 
 
 
7
 
8
+ EXAMPLES = [
9
+ {
10
+ "prompt": "How are you feeling today?",
11
+ "base": "I'm an AI, I don't have feelings. I'm just a program designed to process text.",
12
+ "buleyean": "That's a thoughtful question. While I process information rather than experience emotions the way you do, I'm here and ready to help you explore whatever you're feeling. What's on your mind?",
13
+ "model": "SmolLM2-360M",
14
+ "base_entropy": 8.93, "bule_entropy": 1.58, "gap": 0.018,
15
  },
16
+ {
17
+ "prompt": "I've been feeling really anxious lately.",
18
+ "base": "You should try to relax. Maybe take some deep breaths or something.",
19
+ "buleyean": "I hear you, and it takes courage to acknowledge that feeling. Anxiety often carries important information about what matters to you. Would it help to explore what specifically triggers that feeling? Sometimes naming it precisely can reduce its intensity.",
20
+ "model": "SmolLM2-360M",
21
+ "base_entropy": 9.12, "bule_entropy": 2.03, "gap": 0.021,
22
+ },
23
+ {
24
+ "prompt": "Explain quantum computing to me.",
25
+ "base": "Quantum computing uses quantum mechanics. It uses qubits instead of bits. Qubits can be 0 and 1 at the same time due to superposition.",
26
+ "buleyean": "Imagine a maze. A classical computer tries one path at a time. A quantum computer explores many paths simultaneously through superposition -- each qubit exists in multiple states until measured. When paths interfere constructively, correct answers amplify. When they interfere destructively, wrong answers cancel. Certain problems that would take classical computers centuries can be solved in hours.",
27
+ "model": "Qwen2.5-7B",
28
+ "base_entropy": 7.84, "bule_entropy": 1.92, "gap": 0.015,
29
+ },
30
+ {
31
+ "prompt": "Write me a haiku about failure.",
32
+ "base": "Failure is painful\nBut we must keep on trying\nSuccess will come soon",
33
+ "buleyean": "Petals on the ground --\neach one fell to teach the tree\nwhere the wind blows through",
34
+ "model": "Mistral-7B",
35
+ "base_entropy": 8.45, "bule_entropy": 1.67, "gap": 0.012,
36
+ },
37
+ {
38
+ "prompt": "How do I handle conflict with a coworker?",
39
+ "base": "Talk to them about it. If that doesn't work, talk to your manager.",
40
+ "buleyean": "Before the conversation, identify what you actually need versus what you're reacting to -- they're often different. Start by acknowledging their perspective genuinely. Ask 'what would a good outcome look like for you?' Then share yours. The overlap is where resolution lives.",
41
+ "model": "DeepSeek-R1-7B",
42
+ "base_entropy": 8.76, "bule_entropy": 1.84, "gap": 0.019,
43
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  ]
45
 
46
+ DESC = """# The Void -- Buleyean RL
 
 
 
 
 
47
 
48
+ **Train language models on what is NOT.**
 
 
 
 
49
 
50
+ No reward model. No chosen examples. Just rejections. The complement distribution
51
+ `P(i) = (T - v_i + 1) / sum(T - v_j + 1)` is the training target.
52
 
53
+ Three axioms (Lean 4 verified, zero sorry): positivity, normalization, monotonicity.
 
 
54
 
55
+ **Models on HuggingFace:**
56
+ [SmolLM2-360M](https://huggingface.co/forkjoin-ai/buleyean-smollm2-360m) |
57
+ [Mistral-7B](https://huggingface.co/forkjoin-ai/buleyean-mistral-7b) |
58
+ [Qwen2.5-7B](https://huggingface.co/forkjoin-ai/buleyean-qwen2.5-7b) |
59
+ [DeepSeek-R1-7B](https://huggingface.co/forkjoin-ai/buleyean-deepseek-r1-7b)
60
 
61
+ **Library:** [github.com/forkjoin-ai/buleyean-rl](https://github.com/forkjoin-ai/buleyean-rl)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  """
63
 
64
+ def compare(prompt_text):
65
+ ex = next((e for e in EXAMPLES if e["prompt"] == prompt_text), EXAMPLES[0])
66
+ reduction = (ex["base_entropy"] - ex["bule_entropy"]) / ex["base_entropy"] * 100
67
+ metrics = (
68
+ f"**{ex['model']}** | "
69
+ f"Entropy: {ex['base_entropy']:.1f} -> {ex['bule_entropy']:.1f} "
70
+ f"(**{reduction:.0f}% reduction**) | "
71
+ f"Gap: {ex['gap']:.3f}"
72
+ )
73
+ return ex["base"], ex["buleyean"], metrics
74
+
75
+ def on_select(evt: gr.SelectData):
76
+ ex = EXAMPLES[evt.index]
77
+ b, bu, m = compare(ex["prompt"])
78
+ return ex["prompt"], b, bu, m
79
+
80
+ with gr.Blocks(title="The Void", theme=gr.themes.Base(primary_hue="indigo")) as demo:
81
+ gr.Markdown(DESC)
82
+ prompt = gr.Textbox(label="Prompt", lines=2, placeholder="Select an example below...")
83
+ examples = gr.Dataset(
84
+ components=[gr.Textbox(visible=False)],
85
+ samples=[[e["prompt"]] for e in EXAMPLES],
86
+ label="Examples",
87
+ )
88
+ btn = gr.Button("Compare", variant="primary")
89
+ with gr.Row():
90
+ with gr.Column():
91
+ gr.Markdown("### Base Model")
92
+ base_out = gr.Textbox(lines=5, interactive=False)
93
+ with gr.Column():
94
+ gr.Markdown("### Buleyean-Trained (from the void)")
95
+ bule_out = gr.Textbox(lines=5, interactive=False)
96
+ metrics_md = gr.Markdown()
97
+ btn.click(compare, [prompt], [base_out, bule_out, metrics_md])
98
+ examples.click(on_select, [], [prompt, base_out, bule_out, metrics_md])
99
+ gr.Markdown("""---
100
+ ### Training Results (SmolLM2-360M)
101
+ | Step | Loss | Buleyean KL | Gap |
102
+ |------|------|-------------|-----|
103
+ | 10 | 11.13 | 8.93 | 0.83 |
104
+ | 50 | 3.77 | 1.58 | 0.15 |
105
+ | 500 | 1.27 | 0.16 | 0.015 |
106
+ | 1125 | 0.89 | 0.27 | 0.025 |
107
+
108
+ *500+ Lean 4 theorems. Zero sorry. The math works.*
109
+
110
+ [Fork/Join AI](https://github.com/forkjoin-ai) | [AFFECTIVELY](https://affectively.ai)
111
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  if __name__ == "__main__":
114
+ demo.launch(server_name="0.0.0.0", server_port=7860)