LoganResearch commited on Jan 21

Commit

3786ad3

1 Parent(s): 3411d01

Fix adapter configs to point to correct HuggingFace repo

Browse files

Files changed (18) hide show

cfhot_checkpoints/ckpt_1000/adapter_config.json +1 -1
cfhot_checkpoints/ckpt_1500/adapter_config.json +1 -1
cfhot_checkpoints/ckpt_2000/adapter_config.json +1 -1
cfhot_checkpoints/ckpt_2500/adapter_config.json +1 -1
cfhot_checkpoints/ckpt_3000/adapter_config.json +1 -1
cfhot_checkpoints/ckpt_3500/adapter_config.json +1 -1
cfhot_checkpoints/ckpt_4000/adapter_config.json +1 -1
cfhot_checkpoints/ckpt_4500/adapter_config.json +1 -1
cfhot_checkpoints/ckpt_500/adapter_config.json +1 -1
cfhot_checkpoints/ckpt_5000/adapter_config.json +1 -1
cfhot_checkpoints/ckpt_5500/adapter_config.json +1 -1
cfhot_checkpoints/ckpt_6000/adapter_config.json +1 -1
cfhot_checkpoints/final/adapter_config.json +1 -1
cfhot_checkpoints/final_6000/adapter_config.json +1 -1
dense_checkpoints/step_100/adapter_config.json +1 -1
dense_checkpoints/step_200/adapter_config.json +1 -1
dense_checkpoints/step_300/adapter_config.json +1 -1
inference.py +75 -0

cfhot_checkpoints/ckpt_1000/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

cfhot_checkpoints/ckpt_1500/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

cfhot_checkpoints/ckpt_2000/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

cfhot_checkpoints/ckpt_2500/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

cfhot_checkpoints/ckpt_3000/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

cfhot_checkpoints/ckpt_3500/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

cfhot_checkpoints/ckpt_4000/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

cfhot_checkpoints/ckpt_4500/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

cfhot_checkpoints/ckpt_500/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

cfhot_checkpoints/ckpt_5000/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "LoganResearch/Ubermenschetien-8B",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

cfhot_checkpoints/ckpt_5500/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

cfhot_checkpoints/ckpt_6000/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

cfhot_checkpoints/final/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

cfhot_checkpoints/final_6000/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

dense_checkpoints/step_100/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

dense_checkpoints/step_200/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

dense_checkpoints/step_300/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "LoganResearch/ARC-Base-8B-Condensed",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,

inference.py ADDED Viewed

	@@ -0,0 +1,75 @@

+"""
+ARC Inference - Dense output with CF-HoT steering
+"""
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
+import torch.nn.functional as F
+# Load model
+print("Loading base model...")
+base = AutoModelForCausalLM.from_pretrained(
+    "NousResearch/Hermes-3-Llama-3.1-8B",
+    torch_dtype=torch.float16,
+    device_map="auto",
+    load_in_4bit=True
+)
+print("Loading ARC adapter...")
+model = PeftModel.from_pretrained(
+    base,
+    "LoganResearch/ARC-Base-8B-Condensed",
+    subfolder="dense_checkpoints/step_100"
+)
+tokenizer = AutoTokenizer.from_pretrained("NousResearch/Hermes-3-Llama-3.1-8B")
+# Load CF-HoT risk predictor
+print("Loading CF-HoT head...")
+from huggingface_hub import hf_hub_download
+risk_path = hf_hub_download(
+    "LoganResearch/ARC-Base-8B-Condensed",
+    "cfhot_checkpoints/ckpt_5000/risk_predictor.pt"
+)
+cfhot_state = torch.load(risk_path, map_location="cuda", weights_only=False)
+# Simple CF-HoT steering tokens
+REPETITION_TOKENS = [tokenizer.encode(w, add_special_tokens=False)[0]
+                     for w in ["the", "is", "that", "this", "and", "to", "of"]]
+HEDGING_TOKENS = [tokenizer.encode(w, add_special_tokens=False)[0]
+                  for w in ["great", "happy", "certainly", "definitely", "really"]]
+def generate_dense(prompt: str, max_tokens: int = 50) -> str:
+    """Generate with CF-HoT logit steering."""
+    full_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
+    input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids.to("cuda")
+    generated = input_ids.clone()
+    for _ in range(max_tokens):
+        with torch.no_grad():
+            outputs = model(generated)
+            logits = outputs.logits[:, -1, :] / 0.7
+            # CF-HoT steering: penalize hedging/filler tokens
+            for tok_id in HEDGING_TOKENS:
+                logits[0, tok_id] -= 4.0
+            # Sample
+            probs = F.softmax(logits, dim=-1)
+            next_token = torch.multinomial(probs, 1)
+            generated = torch.cat([generated, next_token], dim=1)
+            if next_token.item() == tokenizer.eos_token_id:
+                break
+    response = tokenizer.decode(generated[0], skip_special_tokens=True)
+    return response.split("assistant")[-1].strip()
+if __name__ == "__main__":
+    while True:
+        prompt = input("\nYou: ")
+        if prompt.lower() in ["quit", "exit"]:
+            break
+        response = generate_dense(prompt)
+        print(f"ARC: {response}")