File size: 2,562 Bytes
3786ad3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
"""
ARC Inference - Dense output with CF-HoT steering
"""
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch.nn.functional as F

# Load model
print("Loading base model...")
base = AutoModelForCausalLM.from_pretrained(
    "NousResearch/Hermes-3-Llama-3.1-8B",
    torch_dtype=torch.float16,
    device_map="auto",
    load_in_4bit=True
)

print("Loading ARC adapter...")
model = PeftModel.from_pretrained(
    base,
    "LoganResearch/ARC-Base-8B-Condensed",
    subfolder="dense_checkpoints/step_100"
)

tokenizer = AutoTokenizer.from_pretrained("NousResearch/Hermes-3-Llama-3.1-8B")

# Load CF-HoT risk predictor
print("Loading CF-HoT head...")
from huggingface_hub import hf_hub_download
risk_path = hf_hub_download(
    "LoganResearch/ARC-Base-8B-Condensed",
    "cfhot_checkpoints/ckpt_5000/risk_predictor.pt"
)
cfhot_state = torch.load(risk_path, map_location="cuda", weights_only=False)

# Simple CF-HoT steering tokens
REPETITION_TOKENS = [tokenizer.encode(w, add_special_tokens=False)[0] 
                     for w in ["the", "is", "that", "this", "and", "to", "of"]]
HEDGING_TOKENS = [tokenizer.encode(w, add_special_tokens=False)[0]
                  for w in ["great", "happy", "certainly", "definitely", "really"]]

def generate_dense(prompt: str, max_tokens: int = 50) -> str:
    """Generate with CF-HoT logit steering."""
    full_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
    input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids.to("cuda")
    
    generated = input_ids.clone()
    
    for _ in range(max_tokens):
        with torch.no_grad():
            outputs = model(generated)
            logits = outputs.logits[:, -1, :] / 0.7
            
            # CF-HoT steering: penalize hedging/filler tokens
            for tok_id in HEDGING_TOKENS:
                logits[0, tok_id] -= 4.0
            
            # Sample
            probs = F.softmax(logits, dim=-1)
            next_token = torch.multinomial(probs, 1)
            generated = torch.cat([generated, next_token], dim=1)
            
            if next_token.item() == tokenizer.eos_token_id:
                break
    
    response = tokenizer.decode(generated[0], skip_special_tokens=True)
    return response.split("assistant")[-1].strip()

if __name__ == "__main__":
    while True:
        prompt = input("\nYou: ")
        if prompt.lower() in ["quit", "exit"]:
            break
        response = generate_dense(prompt)
        print(f"ARC: {response}")