Taylor commited on
Commit
2ba37ce
·
1 Parent(s): 26dd3c3

feat: add personality models tab and all models catalog

Browse files

Three tabs: Compare (live inference), Personality Models (sweep results
and 32B downloads), All Models (full catalog with links).
Five personality-modulated 32B adapters now linked.

Files changed (1) hide show
  1. app.py +158 -43
app.py CHANGED
@@ -4,61 +4,63 @@ LIVE inference only. Every response generated in real-time.
4
  """
5
 
6
  import gradio as gr
7
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
8
  from peft import PeftModel
9
  import torch
10
- import os
11
 
12
- print("Loading base model (SmolLM2-360M-Instruct)...", flush=True)
13
- base_model_id = "HuggingFaceTB/SmolLM2-360M-Instruct"
14
- buleyean_adapter = "forkjoin-ai/buleyean-smollm2-360m"
15
 
16
- tokenizer = AutoTokenizer.from_pretrained(base_model_id)
 
17
  if tokenizer.pad_token is None:
18
  tokenizer.pad_token = tokenizer.eos_token
19
 
20
- # Load base model
21
  base_model = AutoModelForCausalLM.from_pretrained(
22
- base_model_id,
23
  torch_dtype=torch.float32,
24
  device_map="cpu",
25
  trust_remote_code=True,
26
  )
27
  print("Base model loaded.", flush=True)
28
 
29
- # Load Buleyean model (base + LoRA adapter)
30
  print("Loading Buleyean adapter...", flush=True)
31
  bule_base = AutoModelForCausalLM.from_pretrained(
32
- base_model_id,
33
  torch_dtype=torch.float32,
34
  device_map="cpu",
35
  trust_remote_code=True,
36
  )
37
  try:
38
- bule_model = PeftModel.from_pretrained(bule_base, buleyean_adapter)
39
  bule_model = bule_model.merge_and_unload()
40
  print("Buleyean adapter merged.", flush=True)
41
  except Exception as e:
42
  print(f"Warning: Could not load adapter ({e}), using base model copy", flush=True)
43
  bule_model = bule_base
44
 
45
- print("Both models ready. Live inference active.", flush=True)
46
 
47
 
48
- def generate(prompt, model):
49
  messages = [{"role": "user", "content": prompt}]
50
- input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
 
51
  inputs = tokenizer(input_text, return_tensors="pt")
52
  with torch.no_grad():
53
  outputs = model.generate(
54
  **inputs,
55
- max_new_tokens=300,
56
- temperature=0.7,
57
- top_p=0.9,
58
  do_sample=True,
59
  pad_token_id=tokenizer.pad_token_id,
60
  )
61
- response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
 
 
62
  return response.strip()
63
 
64
 
@@ -70,38 +72,151 @@ def compare(prompt):
70
  return base_out, bule_out
71
 
72
 
73
- with gr.Blocks(title="The Void", theme=gr.themes.Base(primary_hue="indigo")) as demo:
 
 
 
 
 
 
 
 
74
  gr.Markdown("""# The Void -- Buleyean RL
75
 
76
  **Live inference. Every response generated in real-time. Nothing hardcoded. Nothing fabricated.**
77
 
78
- Type any prompt. Both models run inference right now on this machine.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
- Base: [SmolLM2-360M-Instruct](https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct)
81
- Buleyean: [buleyean-smollm2-360m](https://huggingface.co/forkjoin-ai/buleyean-smollm2-360m) -- same model, trained from rejection alone
82
 
83
- [Library](https://github.com/forkjoin-ai/buleyean-rl) | [Paper](https://huggingface.co/forkjoin-ai) | 500+ Lean 4 theorems, zero sorry
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  """)
85
- prompt = gr.Textbox(label="Your prompt", lines=2, placeholder="Type anything and press Generate...")
86
- btn = gr.Button("Generate (live inference)", variant="primary", size="lg")
87
- with gr.Row():
88
- with gr.Column():
89
- gr.Markdown("### Base Model (null hypothesis)")
90
- base_out = gr.Textbox(label="SmolLM2-360M-Instruct", lines=12, interactive=False)
91
- with gr.Column():
92
- gr.Markdown("### Buleyean-Trained (from the void)")
93
- bule_out = gr.Textbox(label="buleyean-smollm2-360m", lines=12, interactive=False)
94
-
95
- btn.click(compare, [prompt], [base_out, bule_out])
96
- prompt.submit(compare, [prompt], [base_out, bule_out])
97
-
98
- gr.Markdown("### Try these prompts:")
99
- for p in ["hello", "How are you feeling today?", "I've been feeling really anxious lately.", "Write me a haiku about failure.", "What is the meaning of life?"]:
100
- gr.Button(p, size="sm").click(
101
- fn=lambda x=p: compare(x),
102
- inputs=[],
103
- outputs=[base_out, bule_out],
104
- ).then(fn=lambda x=p: x, inputs=[], outputs=[prompt])
105
 
106
  if __name__ == "__main__":
107
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
4
  """
5
 
6
  import gradio as gr
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer
8
  from peft import PeftModel
9
  import torch
 
10
 
11
+ BASE_MODEL_ID = "HuggingFaceTB/SmolLM2-360M-Instruct"
12
+ BULEYEAN_ADAPTER = "forkjoin-ai/buleyean-smollm2-360m"
 
13
 
14
+ print("Loading tokenizer...", flush=True)
15
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
16
  if tokenizer.pad_token is None:
17
  tokenizer.pad_token = tokenizer.eos_token
18
 
19
+ print(f"Loading base model ({BASE_MODEL_ID})...", flush=True)
20
  base_model = AutoModelForCausalLM.from_pretrained(
21
+ BASE_MODEL_ID,
22
  torch_dtype=torch.float32,
23
  device_map="cpu",
24
  trust_remote_code=True,
25
  )
26
  print("Base model loaded.", flush=True)
27
 
 
28
  print("Loading Buleyean adapter...", flush=True)
29
  bule_base = AutoModelForCausalLM.from_pretrained(
30
+ BASE_MODEL_ID,
31
  torch_dtype=torch.float32,
32
  device_map="cpu",
33
  trust_remote_code=True,
34
  )
35
  try:
36
+ bule_model = PeftModel.from_pretrained(bule_base, BULEYEAN_ADAPTER)
37
  bule_model = bule_model.merge_and_unload()
38
  print("Buleyean adapter merged.", flush=True)
39
  except Exception as e:
40
  print(f"Warning: Could not load adapter ({e}), using base model copy", flush=True)
41
  bule_model = bule_base
42
 
43
+ print("All models ready. Live inference active.", flush=True)
44
 
45
 
46
+ def generate(prompt, model, max_tokens=300, temperature=0.7, top_p=0.9):
47
  messages = [{"role": "user", "content": prompt}]
48
+ input_text = tokenizer.apply_chat_template(
49
+ messages, tokenize=False, add_generation_prompt=True
50
+ )
51
  inputs = tokenizer(input_text, return_tensors="pt")
52
  with torch.no_grad():
53
  outputs = model.generate(
54
  **inputs,
55
+ max_new_tokens=max_tokens,
56
+ temperature=temperature,
57
+ top_p=top_p,
58
  do_sample=True,
59
  pad_token_id=tokenizer.pad_token_id,
60
  )
61
+ response = tokenizer.decode(
62
+ outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True
63
+ )
64
  return response.strip()
65
 
66
 
 
72
  return base_out, bule_out
73
 
74
 
75
+ # ---------------------------------------------------------------------------
76
+ # UI
77
+ # ---------------------------------------------------------------------------
78
+
79
+ with gr.Blocks(
80
+ title="The Void -- Buleyean RL",
81
+ theme=gr.themes.Base(primary_hue="indigo"),
82
+ ) as demo:
83
+
84
  gr.Markdown("""# The Void -- Buleyean RL
85
 
86
  **Live inference. Every response generated in real-time. Nothing hardcoded. Nothing fabricated.**
87
 
88
+ Standard RLHF learns what to say by imitating chosen completions.
89
+ Buleyean RL learns what *not* to say by studying rejections.
90
+ The complement distribution preserves the (K-1) rejected perspectives.
91
+
92
+ [Library](https://github.com/forkjoin-ai/buleyean-rl) | [Paper](https://forkracefold.com) | [Training Data](https://huggingface.co/datasets/forkjoin-ai/buleyean-rejection-data) | 500+ Lean 4 theorems, zero sorry
93
+ """)
94
+
95
+ with gr.Tab("Compare (Base vs Buleyean)"):
96
+ gr.Markdown("""Type any prompt. Both models run inference right now on this machine.
97
+
98
+ **Base:** [SmolLM2-360M-Instruct](https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct) |
99
+ **Buleyean:** [buleyean-smollm2-360m](https://huggingface.co/forkjoin-ai/buleyean-smollm2-360m) -- same architecture, trained from rejection alone
100
+ """)
101
+ prompt = gr.Textbox(
102
+ label="Your prompt", lines=2,
103
+ placeholder="Type anything and press Generate...",
104
+ )
105
+ btn = gr.Button("Generate (live inference)", variant="primary", size="lg")
106
+ with gr.Row():
107
+ with gr.Column():
108
+ gr.Markdown("### Base Model (null hypothesis)")
109
+ base_out = gr.Textbox(
110
+ label="SmolLM2-360M-Instruct", lines=12, interactive=False,
111
+ )
112
+ with gr.Column():
113
+ gr.Markdown("### Buleyean-Trained (from the void)")
114
+ bule_out = gr.Textbox(
115
+ label="buleyean-smollm2-360m", lines=12, interactive=False,
116
+ )
117
+
118
+ btn.click(compare, [prompt], [base_out, bule_out])
119
+ prompt.submit(compare, [prompt], [base_out, bule_out])
120
+
121
+ gr.Markdown("### Try these prompts:")
122
+ for p in [
123
+ "What is the theory of failure?",
124
+ "How are you feeling today?",
125
+ "I've been feeling really anxious lately.",
126
+ "Write me a haiku about failure.",
127
+ "What is the meaning of life?",
128
+ ]:
129
+ gr.Button(p, size="sm").click(
130
+ fn=lambda x=p: compare(x),
131
+ inputs=[],
132
+ outputs=[base_out, bule_out],
133
+ ).then(fn=lambda x=p: x, inputs=[], outputs=[prompt])
134
+
135
+ with gr.Tab("Personality Models"):
136
+ gr.Markdown("""## The Personality IS the Walker
137
+
138
+ Same rejection data. Same base model. Five different complement distributions.
139
+
140
+ Each personality profile modulates how the void walker traverses the rejection boundary:
141
+
142
+ | Personality | Try (Fork) | Choose (Race) | Commit (Fold) | Let Go (Vent) | Learn (Interfere) | Result |
143
+ |---|---|---|---|---|---|---|
144
+ | **Builder** | 0.5 | 0.8 | **0.9** | 0.4 | 0.618 | Tightest convergence (97%). The fold dominates. |
145
+ | **Anxious** | 0.3 | 0.5 | 0.7 | **0.15** | 0.4 | Learns slowly, forgets nothing (79%). |
146
+ | **Balanced** | 0.618 | 0.618 | 0.618 | 0.618 | 0.618 | All phi. The control (81%). |
147
+ | **Explorer** | **0.9** | 0.618 | 0.4 | 0.7 | 0.85 | Wide aperture, broad distribution (73%). |
148
+ | **Creative** | **0.95** | 0.4 | 0.3 | 0.8 | **0.9** | Max divergence, keeps options open (73%). |
149
+
150
+ ### Training Results (Qwen2.5-32B-Instruct)
151
+
152
+ | Personality | Alpha | Final Loss | Min Loss | Curriculum |
153
+ |---|---|---|---|---|
154
+ | Builder | 0.950 | 0.293 | 0.270 | inverse_bule |
155
+ | Anxious | 0.793 | 0.543 | 0.495 | rejection_density |
156
+ | Balanced | 0.700 | 0.830 | 0.741 | rejection_density |
157
+ | Explorer | 0.453 | 2.937 | 2.708 | kurtosis |
158
+ | Creative | 0.340 | 3.525 | 3.239 | kurtosis |
159
+
160
+ ### Download the models
161
+
162
+ All five personality-modulated LoRA adapters for Qwen2.5-32B:
163
+
164
+ - [buleyean-qwen2.5-32b-builder](https://huggingface.co/forkjoin-ai/buleyean-qwen2.5-32b-builder)
165
+ - [buleyean-qwen2.5-32b-anxious](https://huggingface.co/forkjoin-ai/buleyean-qwen2.5-32b-anxious)
166
+ - [buleyean-qwen2.5-32b-balanced](https://huggingface.co/forkjoin-ai/buleyean-qwen2.5-32b-balanced)
167
+ - [buleyean-qwen2.5-32b-explorer](https://huggingface.co/forkjoin-ai/buleyean-qwen2.5-32b-explorer)
168
+ - [buleyean-qwen2.5-32b-creative](https://huggingface.co/forkjoin-ai/buleyean-qwen2.5-32b-creative)
169
+
170
+ ```python
171
+ from transformers import AutoModelForCausalLM
172
+ from peft import PeftModel
173
+
174
+ base = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-32B-Instruct", device_map="auto")
175
+ # Choose your personality:
176
+ model = PeftModel.from_pretrained(base, "forkjoin-ai/buleyean-qwen2.5-32b-builder", subfolder="lora")
177
+ model = model.merge_and_unload()
178
+ ```
179
+ """)
180
+
181
+ with gr.Tab("All Models"):
182
+ gr.Markdown("""## Buleyean RL Model Family
183
+
184
+ All models trained from rejection alone. No reward model. No chosen examples.
185
+
186
+ ### Base Buleyean Models
187
+
188
+ | Model | Base | Size | HF Link |
189
+ |---|---|---|---|
190
+ | buleyean-qwen2.5-32b | Qwen2.5-32B-Instruct | 32B | [Download](https://huggingface.co/forkjoin-ai/buleyean-qwen2.5-32b) |
191
+ | buleyean-qwen2.5-7b | Qwen2.5-7B-Instruct | 7B | [Download](https://huggingface.co/forkjoin-ai/buleyean-qwen2.5-7b) |
192
+ | buleyean-deepseek-r1-7b | DeepSeek-R1-Distill-Qwen-7B | 7B | [Download](https://huggingface.co/forkjoin-ai/buleyean-deepseek-r1-7b) |
193
+ | buleyean-mistral-7b | Mistral-7B-Instruct-v0.3 | 7B | [Download](https://huggingface.co/forkjoin-ai/buleyean-mistral-7b) |
194
+ | buleyean-qwen2.5-0.5b | Qwen2.5-0.5B-Instruct | 0.5B | [Download](https://huggingface.co/forkjoin-ai/buleyean-qwen2.5-0.5b) |
195
+ | buleyean-smollm2-360m | SmolLM2-360M-Instruct | 360M | [Download](https://huggingface.co/forkjoin-ai/buleyean-smollm2-360m) |
196
 
197
+ ### Personality-Modulated (32B)
 
198
 
199
+ | Personality | Commit | Final Loss | HF Link |
200
+ |---|---|---|---|
201
+ | Builder | 0.9 | 0.293 | [Download](https://huggingface.co/forkjoin-ai/buleyean-qwen2.5-32b-builder) |
202
+ | Anxious | 0.7 | 0.543 | [Download](https://huggingface.co/forkjoin-ai/buleyean-qwen2.5-32b-anxious) |
203
+ | Balanced | 0.618 | 0.830 | [Download](https://huggingface.co/forkjoin-ai/buleyean-qwen2.5-32b-balanced) |
204
+ | Explorer | 0.4 | 2.937 | [Download](https://huggingface.co/forkjoin-ai/buleyean-qwen2.5-32b-explorer) |
205
+ | Creative | 0.3 | 3.525 | [Download](https://huggingface.co/forkjoin-ai/buleyean-qwen2.5-32b-creative) |
206
+
207
+ ### Links
208
+
209
+ - [Training library](https://github.com/forkjoin-ai/buleyean-rl)
210
+ - [Training data](https://huggingface.co/datasets/forkjoin-ai/buleyean-rejection-data)
211
+ - [Paper](https://forkracefold.com)
212
+ - [Colab: Train your own](https://huggingface.co/datasets/forkjoin-ai/buleyean-rejection-data/blob/main/train_70b_colab.ipynb)
213
+ - [Colab: Personality sweep](https://huggingface.co/datasets/forkjoin-ai/buleyean-rejection-data/blob/main/personality_sweep_colab.ipynb)
214
  """)
215
+
216
+ gr.Markdown("""---
217
+ *Built with [Buleyean RL](https://github.com/forkjoin-ai/buleyean-rl). The void is where the information is.*
218
+ """)
219
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
  if __name__ == "__main__":
222
  demo.launch(server_name="0.0.0.0", server_port=7860)