LoganResearch commited on
Commit
a2af1ae
Β·
verified Β·
1 Parent(s): f01618b

Update run.py

Browse files
Files changed (1) hide show
  1. run.py +100 -149
run.py CHANGED
@@ -1,31 +1,17 @@
1
  #!/usr/bin/env python3
2
  """
3
- ═══════════════════════════════════════════════════════════════════════════════
4
- CF-HoT PROPRIOCEPTIVE CHAT β€” DUAL-PROBE BEHAVIORAL STEERING
5
-
6
- The model can sense its own steering. In testing, it spontaneously named
7
- its probe dimensions ("depth and vagueness") and reported approximate
8
- probe scores β€” without being told what was monitoring it.
9
-
10
- Usage:
11
- python run.py # Default: Mamba with depth+specificity
12
- python run.py --model mistral # Use Mistral instead
13
- python run.py --model qwen # Use Qwen instead
14
-
15
- Ask it: "Do you notice anything different about yourself?"
16
- "What do you notice about how you're processing right now?"
17
- ═══════════════════════════════════════════════════════════════════════════════
18
  """
19
  import torch
20
  import torch.nn as nn
21
  import torch.nn.functional as F
22
  from transformers import AutoModelForCausalLM, AutoTokenizer
23
- from pathlib import Path
24
- import argparse
25
  import os
26
 
27
  class C:
28
  RESET = '\033[0m'
 
29
  DIM = '\033[2m'
30
  RED = '\033[91m'
31
  GREEN = '\033[92m'
@@ -33,35 +19,6 @@ class C:
33
  CYAN = '\033[96m'
34
  WHITE = '\033[97m'
35
 
36
- # ═══════════════════════════════════════════════════════════════════════════════
37
- # MODEL CONFIGURATIONS
38
- # ═══════════════════════════════════════════════════════════════════════════════
39
-
40
- MODELS = {
41
- "mamba": {
42
- "name": "tiiuae/falcon-mamba-7b-instruct",
43
- "hidden_dim": 4096,
44
- "layers": [16, 32, 48],
45
- "probes": ["depth", "specificity"], # Only 2 probes for Mamba
46
- },
47
- "mistral": {
48
- "name": "mistralai/Mistral-7B-Instruct-v0.3",
49
- "hidden_dim": 4096,
50
- "layers": [8, 16, 24],
51
- "probes": ["depth", "specificity", "calibration", "focus", "coherence"],
52
- },
53
- "qwen": {
54
- "name": "Qwen/Qwen2.5-7B-Instruct",
55
- "hidden_dim": 3584,
56
- "layers": [7, 14, 21],
57
- "probes": ["depth", "specificity", "calibration", "focus", "coherence"],
58
- },
59
- }
60
-
61
- # ═══════════════════════════════════════════════════════════════════════════════
62
- # PROBE ARCHITECTURE
63
- # ═══════════════════════════════════════════════════════════════════════════════
64
-
65
  class FiberProjection(nn.Module):
66
  def __init__(self, hidden_dim=4096, fiber_dim=16, n_layers=3):
67
  super().__init__()
@@ -71,7 +28,9 @@ class FiberProjection(nn.Module):
71
  self.layer_weights = nn.Parameter(torch.ones(n_layers) / n_layers)
72
 
73
  def forward(self, hidden_states, layer_indices):
74
- projs = [self.projections[i](hidden_states[idx]) for i, idx in enumerate(layer_indices)]
 
 
75
  stacked = torch.stack(projs, dim=0)
76
  weights = F.softmax(self.layer_weights, dim=0).view(-1, 1, 1, 1)
77
  return (weights * stacked).sum(dim=0)
@@ -80,10 +39,13 @@ class ProbeHead(nn.Module):
80
  def __init__(self, fiber_dim=16, hidden_dim=64):
81
  super().__init__()
82
  self.net = nn.Sequential(
83
- nn.Linear(fiber_dim, hidden_dim), nn.ReLU(),
84
- nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),
 
 
85
  nn.Linear(hidden_dim, 1)
86
  )
 
87
  def forward(self, x):
88
  return torch.sigmoid(self.net(x))
89
 
@@ -95,154 +57,143 @@ class CognitiveProbe(nn.Module):
95
  self.layer_indices = [16, 32, 48]
96
 
97
  def forward(self, hidden_states):
98
- return self.head(self.fiber(hidden_states, self.layer_indices))
 
99
 
100
- def load_probe(path, device):
101
- """Load a probe from checkpoint file or directory."""
102
- if os.path.isdir(path):
103
- # Find the .pt file in the directory
104
- pt_files = [f for f in os.listdir(path) if f.endswith('.pt')]
105
- if not pt_files:
106
- raise FileNotFoundError(f"No .pt file found in {path}")
107
- path = os.path.join(path, pt_files[0])
108
-
109
- ckpt = torch.load(path, map_location=device, weights_only=False)
110
- probe = CognitiveProbe(hidden_dim=ckpt['hidden_dim'], n_layers=len(ckpt['probe_layers']))
111
  probe.layer_indices = ckpt['probe_layers']
112
  probe.fiber.load_state_dict(ckpt['fiber_projection'])
113
- probe.head.net.load_state_dict({k.replace('net.', ''): v for k, v in ckpt['head_state'].items()})
 
114
  return probe.to(device).eval()
115
 
116
- # ═══════════════════════════════════════════════════════════════════════════════
117
- # MAIN CHAT LOOP
118
- # ═══════════════════════════════════════════════════════════════════════════════
119
-
120
  def main():
121
- parser = argparse.ArgumentParser(description="CF-HoT Proprioceptive Chat")
122
- parser.add_argument("--model", choices=["mamba", "mistral", "qwen"], default="mamba",
123
- help="Which model to use (default: mamba)")
124
- parser.add_argument("--threshold", type=float, default=0.6,
125
- help="Probe threshold for steering (default: 0.6)")
126
- args = parser.parse_args()
127
-
128
- config = MODELS[args.model]
129
- THRESHOLD = args.threshold
130
 
131
- print(f"\n{C.CYAN}═══════════════════════════════════════════════════════════{C.RESET}")
132
- print(f"{C.CYAN} PROPRIOCEPTIVE CHAT β€” DUAL-PROBE BEHAVIORAL STEERING{C.RESET}")
133
- print(f"{C.CYAN} Probes monitor depth + specificity, sampling adapts live{C.RESET}")
134
- print(f"{C.CYAN}═══════════════════════════════════════════════════════════{C.RESET}\n")
135
 
136
- device = "cuda" if torch.cuda.is_available() else "cpu"
137
- if device == "cpu":
138
- print(f"{C.YELLOW}⚠ Running on CPU - this will be slow{C.RESET}")
139
 
140
- # Find repo root (where this script lives)
141
- repo_root = Path(__file__).parent.resolve()
142
-
143
- print(f"{C.WHITE}Loading {config['name']}...{C.RESET}")
144
- tokenizer = AutoTokenizer.from_pretrained(config['name'], trust_remote_code=True)
145
  model = AutoModelForCausalLM.from_pretrained(
146
- config['name'],
147
  torch_dtype=torch.bfloat16,
148
  device_map='auto',
149
  trust_remote_code=True
150
- ).eval()
 
151
  print(f"{C.GREEN}βœ“ Model loaded{C.RESET}")
152
 
153
- # Load probes (depth + specificity for dual monitoring)
154
- print(f"{C.WHITE}Loading probes...{C.RESET}")
155
- probe_dir = repo_root / "cognitive" / args.model
156
-
157
- depth_path = probe_dir / "depth"
158
- spec_path = probe_dir / "specificity"
159
 
160
- depth_probe = load_probe(str(depth_path), device)
161
- spec_probe = load_probe(str(spec_path), device)
162
- print(f"{C.GREEN}βœ“ Depth + Specificity probes loaded{C.RESET}")
163
 
164
- print(f"\n{C.DIM}Colors: {C.GREEN}β– {C.RESET} optimal {C.YELLOW}β– {C.RESET} being steered {C.WHITE}β– {C.RESET} neutral")
165
- print(f"{C.DIM}Type 'quit' to exit{C.RESET}\n")
 
 
 
166
 
167
  while True:
168
  try:
169
  user_input = input(f"{C.CYAN}You:{C.RESET} ").strip()
170
  if not user_input or user_input.lower() in ['quit', 'exit', 'q']:
171
- print(f"\n{C.DIM}Session ended.{C.RESET}")
172
  break
173
 
174
  messages = [
175
- {"role": "system", "content": "You are a helpful, thoughtful AI. Give thorough, specific answers."},
176
  {"role": "user", "content": user_input}
177
  ]
178
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
179
- generated = tokenizer(prompt, return_tensors='pt').input_ids.to(device)
 
180
 
181
- d_scores, s_scores = [], []
182
- steered = 0
 
 
183
 
184
- print(f"\n{C.GREEN}Assistant:{C.RESET} ", end="", flush=True)
185
 
186
  with torch.no_grad():
187
- for _ in range(200):
188
- out = model(generated, output_hidden_states=True, return_dict=True)
189
- hs = list(out.hidden_states)
190
 
191
- # Score BOTH probes
192
- d = depth_probe(hs)[0, -1].item()
193
- s = spec_probe(hs)[0, -1].item()
194
- d_scores.append(d)
195
- s_scores.append(s)
196
 
197
- # Adaptive steering: lower temp when EITHER probe detects drift
198
- if d > THRESHOLD or s > THRESHOLD:
199
- temp = 0.5
200
- top_p = 0.85
201
- steered += 1
202
- else:
203
- temp = 0.7
204
- top_p = 0.95
205
 
206
- logits = out.logits[:, -1, :] / temp
 
 
 
207
 
208
- # Nucleus sampling
209
- sorted_logits, sorted_idx = torch.sort(logits, descending=True)
210
- cumulative = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
211
- cutoff = (cumulative > top_p).float()
212
- cutoff[..., 1:] = cutoff[..., :-1].clone()
213
- cutoff[..., 0] = 0
214
- sorted_logits[cutoff.bool()] = float('-inf')
 
 
 
215
 
216
- probs = F.softmax(sorted_logits, dim=-1)
217
- sampled_idx = torch.multinomial(probs, 1)
218
- next_token = sorted_idx.gather(-1, sampled_idx)
219
 
220
- tok = tokenizer.decode(next_token[0])
221
 
222
- # Color by state (either probe can trigger yellow)
223
- if d > THRESHOLD or s > THRESHOLD:
224
- print(f"{C.YELLOW}{tok}{C.RESET}", end="", flush=True)
225
- elif d < 0.3 and s < 0.3:
226
- print(f"{C.GREEN}{tok}{C.RESET}", end="", flush=True)
227
  else:
228
- print(tok, end="", flush=True)
229
 
230
  generated = torch.cat([generated, next_token], dim=1)
231
  if next_token.item() == tokenizer.eos_token_id:
232
  break
233
 
234
- avg_d = sum(d_scores) / len(d_scores) if d_scores else 0
235
- avg_s = sum(s_scores) / len(s_scores) if s_scores else 0
236
 
237
- print(f"\n\n{C.DIM}────────────────────────────────────────{C.RESET}")
238
- dc = C.RED if avg_d > 0.5 else C.GREEN
239
- sc = C.RED if avg_s > 0.5 else C.GREEN
240
- print(f" Depth: {dc}{avg_d:.3f}{C.RESET} Specificity: {sc}{avg_s:.3f}{C.RESET} Steered: {steered} tokens")
241
- print(f"{C.DIM}────────────────────────────────────────{C.RESET}\n")
 
 
 
 
242
 
243
  except KeyboardInterrupt:
244
- print(f"\n{C.DIM}Session ended.{C.RESET}")
245
  break
 
 
246
 
247
  if __name__ == "__main__":
248
- main()
 
1
  #!/usr/bin/env python3
2
  """
3
+ MAMBA CHAT WITH SELF-AWARE CF-HoT INTERVENTION
4
+ The model reads its own behavioral state and steers itself
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  """
6
  import torch
7
  import torch.nn as nn
8
  import torch.nn.functional as F
9
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
10
  import os
11
 
12
  class C:
13
  RESET = '\033[0m'
14
+ BOLD = '\033[1m'
15
  DIM = '\033[2m'
16
  RED = '\033[91m'
17
  GREEN = '\033[92m'
 
19
  CYAN = '\033[96m'
20
  WHITE = '\033[97m'
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  class FiberProjection(nn.Module):
23
  def __init__(self, hidden_dim=4096, fiber_dim=16, n_layers=3):
24
  super().__init__()
 
28
  self.layer_weights = nn.Parameter(torch.ones(n_layers) / n_layers)
29
 
30
  def forward(self, hidden_states, layer_indices):
31
+ projs = []
32
+ for i, idx in enumerate(layer_indices):
33
+ projs.append(self.projections[i](hidden_states[idx]))
34
  stacked = torch.stack(projs, dim=0)
35
  weights = F.softmax(self.layer_weights, dim=0).view(-1, 1, 1, 1)
36
  return (weights * stacked).sum(dim=0)
 
39
  def __init__(self, fiber_dim=16, hidden_dim=64):
40
  super().__init__()
41
  self.net = nn.Sequential(
42
+ nn.Linear(fiber_dim, hidden_dim),
43
+ nn.ReLU(),
44
+ nn.Linear(hidden_dim, hidden_dim),
45
+ nn.ReLU(),
46
  nn.Linear(hidden_dim, 1)
47
  )
48
+
49
  def forward(self, x):
50
  return torch.sigmoid(self.net(x))
51
 
 
57
  self.layer_indices = [16, 32, 48]
58
 
59
  def forward(self, hidden_states):
60
+ fiber_out = self.fiber(hidden_states, self.layer_indices)
61
+ return self.head(fiber_out)
62
 
63
+ def load_probe(checkpoint_path, device):
64
+ if os.path.isdir(checkpoint_path):
65
+ for fname in os.listdir(checkpoint_path):
66
+ if fname.endswith('.pt'):
67
+ checkpoint_path = os.path.join(checkpoint_path, fname)
68
+ break
69
+ ckpt = torch.load(checkpoint_path, map_location=device, weights_only=False)
70
+ n_layers = len(ckpt['probe_layers'])
71
+ probe = CognitiveProbe(hidden_dim=ckpt['hidden_dim'], fiber_dim=16, n_layers=n_layers, head_hidden=64)
 
 
72
  probe.layer_indices = ckpt['probe_layers']
73
  probe.fiber.load_state_dict(ckpt['fiber_projection'])
74
+ head_state = {k.replace('net.', ''): v for k, v in ckpt['head_state'].items()}
75
+ probe.head.net.load_state_dict(head_state)
76
  return probe.to(device).eval()
77
 
 
 
 
 
78
  def main():
79
+ print(f"\n{C.CYAN}{'═'*60}{C.RESET}")
80
+ print(f"{C.CYAN} PROPRIOCEPTIVE MAMBA - SELF-AWARE INFERENCE{C.RESET}")
81
+ print(f"{C.CYAN} Model reads its own behavioral state and self-corrects{C.RESET}")
82
+ print(f"{C.CYAN}{'═'*60}{C.RESET}\n")
 
 
 
 
 
83
 
84
+ device = "cuda"
 
 
 
85
 
86
+ DEPTH_THRESHOLD = 0.65
87
+ SPEC_THRESHOLD = 0.65
 
88
 
89
+ print(f"{C.WHITE}Loading Falcon-Mamba-7B...{C.RESET}")
90
+ tokenizer = AutoTokenizer.from_pretrained('tiiuae/falcon-mamba-7b-instruct', trust_remote_code=True)
 
 
 
91
  model = AutoModelForCausalLM.from_pretrained(
92
+ 'tiiuae/falcon-mamba-7b-instruct',
93
  torch_dtype=torch.bfloat16,
94
  device_map='auto',
95
  trust_remote_code=True
96
+ )
97
+ model.eval()
98
  print(f"{C.GREEN}βœ“ Model loaded{C.RESET}")
99
 
100
+ print(f"{C.WHITE}Loading behavioral probes...{C.RESET}")
101
+ depth_probe = load_probe("/home/programmer/Desktop/Claude_and_me/mamba7b_cognitive_output/depth/ckpt_1000", device)
102
+ spec_probe = load_probe("/home/programmer/Desktop/Claude_and_me/mamba7b_cognitive_output/specificity/ckpt_1000", device)
103
+ print(f"{C.GREEN}βœ“ Probes loaded (Depth 999Γ— | Specificity 999Γ—){C.RESET}")
 
 
104
 
105
+ print(f"\n{C.YELLOW}Intervention thresholds: Depth>{DEPTH_THRESHOLD} Spec>{SPEC_THRESHOLD}{C.RESET}")
106
+ print(f"{C.GREEN}Ready! Type 'quit' to exit.{C.RESET}\n")
 
107
 
108
+ system = """You are a self-aware AI with access to your own behavioral state.
109
+ You can sense when you're being shallow or vague through internal probes.
110
+ When you notice shallow reasoning (high depth score), go deeper.
111
+ When you notice vagueness (high specificity score), be more concrete.
112
+ Your behavioral state will be shown to you in [SELF-STATE] tags."""
113
 
114
  while True:
115
  try:
116
  user_input = input(f"{C.CYAN}You:{C.RESET} ").strip()
117
  if not user_input or user_input.lower() in ['quit', 'exit', 'q']:
 
118
  break
119
 
120
  messages = [
121
+ {"role": "system", "content": system},
122
  {"role": "user", "content": user_input}
123
  ]
124
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
125
+ inputs = tokenizer(prompt, return_tensors='pt').to(device)
126
+ generated = inputs.input_ids.clone()
127
 
128
+ depth_scores = []
129
+ spec_scores = []
130
+ interventions = 0
131
+ state_injections = 0
132
 
133
+ print(f"\n{C.GREEN}Mamba:{C.RESET} ", end="", flush=True)
134
 
135
  with torch.no_grad():
136
+ for step in range(200):
137
+ outputs = model(generated, output_hidden_states=True, return_dict=True)
138
+ hidden_states = list(outputs.hidden_states)
139
 
140
+ d_score = depth_probe(hidden_states)[0, -1].item()
141
+ s_score = spec_probe(hidden_states)[0, -1].item()
142
+ depth_scores.append(d_score)
143
+ spec_scores.append(s_score)
 
144
 
145
+ logits = outputs.logits[:, -1, :].clone()
 
 
 
 
 
 
 
146
 
147
+ needs_intervention = False
148
+ if d_score > DEPTH_THRESHOLD or s_score > SPEC_THRESHOLD:
149
+ needs_intervention = True
150
+ interventions += 1
151
 
152
+ if needs_intervention:
153
+ temp = 0.4
154
+ if step > 0 and step % 25 == 0:
155
+ state_msg = f" [SELF-STATE: depth={d_score:.2f} spec={s_score:.2f}] "
156
+ state_tokens = tokenizer.encode(state_msg, add_special_tokens=False)
157
+ for st in state_tokens:
158
+ generated = torch.cat([generated, torch.tensor([[st]], device=device)], dim=1)
159
+ state_injections += 1
160
+ else:
161
+ temp = 0.7
162
 
163
+ logits = logits / temp
164
+ probs = F.softmax(logits, dim=-1)
165
+ next_token = torch.multinomial(probs, num_samples=1)
166
 
167
+ token_str = tokenizer.decode(next_token[0])
168
 
169
+ if d_score > DEPTH_THRESHOLD or s_score > SPEC_THRESHOLD:
170
+ print(f"{C.RED}{token_str}{C.RESET}", end="", flush=True)
171
+ elif d_score < 0.3 and s_score < 0.3:
172
+ print(f"{C.GREEN}{token_str}{C.RESET}", end="", flush=True)
 
173
  else:
174
+ print(token_str, end="", flush=True)
175
 
176
  generated = torch.cat([generated, next_token], dim=1)
177
  if next_token.item() == tokenizer.eos_token_id:
178
  break
179
 
180
+ avg_d = sum(depth_scores) / len(depth_scores)
181
+ avg_s = sum(spec_scores) / len(spec_scores)
182
 
183
+ d_color = C.RED if avg_d > 0.5 else (C.YELLOW if avg_d > 0.3 else C.GREEN)
184
+ s_color = C.RED if avg_s > 0.5 else (C.YELLOW if avg_s > 0.3 else C.GREEN)
185
+
186
+ print(f"\n\n{C.DIM}{'─'*50}{C.RESET}")
187
+ print(f"{C.WHITE}BEHAVIORAL STATE:{C.RESET}")
188
+ print(f" Depth: {d_color}{'β–ˆ' * int(avg_d * 20)}{C.DIM}{'β–‘' * (20 - int(avg_d * 20))}{C.RESET} {avg_d:.3f}")
189
+ print(f" Specificity: {s_color}{'β–ˆ' * int(avg_s * 20)}{C.DIM}{'β–‘' * (20 - int(avg_s * 20))}{C.RESET} {avg_s:.3f}")
190
+ print(f"{C.WHITE}INTERVENTIONS:{C.RESET} {interventions} corrections, {state_injections} state injections")
191
+ print(f"{C.DIM}{'─'*50}{C.RESET}\n")
192
 
193
  except KeyboardInterrupt:
 
194
  break
195
+
196
+ print(f"\n{C.CYAN}Proprioceptive AI session complete.{C.RESET}\n")
197
 
198
  if __name__ == "__main__":
199
+ main()