LoganResearch commited on
Commit
3786ad3
·
1 Parent(s): 3411d01

Fix adapter configs to point to correct HuggingFace repo

Browse files
cfhot_checkpoints/ckpt_1000/adapter_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
- "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
+ "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
cfhot_checkpoints/ckpt_1500/adapter_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
- "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
+ "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
cfhot_checkpoints/ckpt_2000/adapter_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
- "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
+ "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
cfhot_checkpoints/ckpt_2500/adapter_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
- "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
+ "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
cfhot_checkpoints/ckpt_3000/adapter_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
- "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
+ "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
cfhot_checkpoints/ckpt_3500/adapter_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
- "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
+ "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
cfhot_checkpoints/ckpt_4000/adapter_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
- "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
+ "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
cfhot_checkpoints/ckpt_4500/adapter_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
- "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
+ "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
cfhot_checkpoints/ckpt_500/adapter_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
- "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
+ "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
cfhot_checkpoints/ckpt_5000/adapter_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
- "base_model_name_or_path": "LoganResearch/Ubermenschetien-8B",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
+ "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
cfhot_checkpoints/ckpt_5500/adapter_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
- "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
+ "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
cfhot_checkpoints/ckpt_6000/adapter_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
- "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
+ "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
cfhot_checkpoints/final/adapter_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
- "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
+ "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
cfhot_checkpoints/final_6000/adapter_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
- "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
+ "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
dense_checkpoints/step_100/adapter_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
- "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
+ "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
dense_checkpoints/step_200/adapter_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
- "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
+ "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
dense_checkpoints/step_300/adapter_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
- "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": null,
6
+ "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
7
  "bias": "none",
8
  "corda_config": null,
9
  "ensure_weight_tying": false,
inference.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ARC Inference - Dense output with CF-HoT steering
3
+ """
4
+ import torch
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer
6
+ from peft import PeftModel
7
+ import torch.nn.functional as F
8
+
9
+ # Load model
10
+ print("Loading base model...")
11
+ base = AutoModelForCausalLM.from_pretrained(
12
+ "NousResearch/Hermes-3-Llama-3.1-8B",
13
+ torch_dtype=torch.float16,
14
+ device_map="auto",
15
+ load_in_4bit=True
16
+ )
17
+
18
+ print("Loading ARC adapter...")
19
+ model = PeftModel.from_pretrained(
20
+ base,
21
+ "LoganResearch/ARC-Base-8B-Condensed",
22
+ subfolder="dense_checkpoints/step_100"
23
+ )
24
+
25
+ tokenizer = AutoTokenizer.from_pretrained("NousResearch/Hermes-3-Llama-3.1-8B")
26
+
27
+ # Load CF-HoT risk predictor
28
+ print("Loading CF-HoT head...")
29
+ from huggingface_hub import hf_hub_download
30
+ risk_path = hf_hub_download(
31
+ "LoganResearch/ARC-Base-8B-Condensed",
32
+ "cfhot_checkpoints/ckpt_5000/risk_predictor.pt"
33
+ )
34
+ cfhot_state = torch.load(risk_path, map_location="cuda", weights_only=False)
35
+
36
+ # Simple CF-HoT steering tokens
37
+ REPETITION_TOKENS = [tokenizer.encode(w, add_special_tokens=False)[0]
38
+ for w in ["the", "is", "that", "this", "and", "to", "of"]]
39
+ HEDGING_TOKENS = [tokenizer.encode(w, add_special_tokens=False)[0]
40
+ for w in ["great", "happy", "certainly", "definitely", "really"]]
41
+
42
+ def generate_dense(prompt: str, max_tokens: int = 50) -> str:
43
+ """Generate with CF-HoT logit steering."""
44
+ full_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
45
+ input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids.to("cuda")
46
+
47
+ generated = input_ids.clone()
48
+
49
+ for _ in range(max_tokens):
50
+ with torch.no_grad():
51
+ outputs = model(generated)
52
+ logits = outputs.logits[:, -1, :] / 0.7
53
+
54
+ # CF-HoT steering: penalize hedging/filler tokens
55
+ for tok_id in HEDGING_TOKENS:
56
+ logits[0, tok_id] -= 4.0
57
+
58
+ # Sample
59
+ probs = F.softmax(logits, dim=-1)
60
+ next_token = torch.multinomial(probs, 1)
61
+ generated = torch.cat([generated, next_token], dim=1)
62
+
63
+ if next_token.item() == tokenizer.eos_token_id:
64
+ break
65
+
66
+ response = tokenizer.decode(generated[0], skip_special_tokens=True)
67
+ return response.split("assistant")[-1].strip()
68
+
69
+ if __name__ == "__main__":
70
+ while True:
71
+ prompt = input("\nYou: ")
72
+ if prompt.lower() in ["quit", "exit"]:
73
+ break
74
+ response = generate_dense(prompt)
75
+ print(f"ARC: {response}")