Prometech Computer Sciences Corp commited on
Commit
0cd0c16
·
verified ·
1 Parent(s): c1d758e

Upload prettybird_brain.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. prettybird_brain.py +117 -0
prettybird_brain.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import json
3
+ import re
4
+ import os
5
+ import transformers
6
+ from transformers import LogitsProcessor, LogitsProcessorList, AutoModelForCausalLM, AutoTokenizer
7
+
8
+ CONTROLLED_REASONING_CORE = "You are a helpful assistant with a Controlled Reasoning Core. Please reason step by step."
9
+
10
+ class InterventionLogitsProcessor(LogitsProcessor):
11
+ def __init__(self, boost_token_id, boost_value=2.0):
12
+ self.boost_token_id = boost_token_id
13
+ self.boost_value = boost_value
14
+
15
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
16
+ scores[:, self.boost_token_id] += self.boost_value
17
+ return scores
18
+
19
+ class prettybird_bce_basic_brain_mini:
20
+ def __init__(self, model_path="qwen_merged", device="cuda" if torch.cuda.is_available() else "cpu"):
21
+ self.device = device
22
+ print(f"Transformers version: {transformers.__version__}")
23
+
24
+ local_path = model_path
25
+ if not os.path.exists(local_path):
26
+ if os.path.exists(os.path.join("llama.cpp", model_path)):
27
+ local_path = os.path.join("llama.cpp", model_path)
28
+ elif os.path.exists("/content/qwen_merged"):
29
+ local_path = "/content/qwen_merged"
30
+
31
+ final_path = local_path if os.path.exists(local_path) else "Qwen/Qwen2.5-Math-1.5B-Instruct"
32
+ print(f"Loading model from {final_path}...")
33
+
34
+ try:
35
+ # Attempt Native Load (trust_remote_code=False) first
36
+ self.tokenizer = AutoTokenizer.from_pretrained(final_path, trust_remote_code=False)
37
+ self.model = AutoModelForCausalLM.from_pretrained(
38
+ final_path,
39
+ device_map=device,
40
+ trust_remote_code=False,
41
+ torch_dtype=torch.float16
42
+ )
43
+ print("Loaded natively.")
44
+ except Exception as e:
45
+ print(f"Native load failed: {e}. Trying remote code...")
46
+ try:
47
+ self.tokenizer = AutoTokenizer.from_pretrained(final_path, trust_remote_code=True)
48
+ self.model = AutoModelForCausalLM.from_pretrained(
49
+ final_path,
50
+ device_map=device,
51
+ trust_remote_code=True,
52
+ torch_dtype=torch.float16
53
+ )
54
+ print("Loaded with remote code.")
55
+ except Exception as e2:
56
+ raise RuntimeError(f"Failed to load model: {e2}")
57
+
58
+ if self.tokenizer.pad_token is None:
59
+ self.tokenizer.pad_token = self.tokenizer.eos_token
60
+
61
+ def math_reward(self, response):
62
+ score = 0.0
63
+ if re.search(r"\\boxed\{.*?\}", response):
64
+ score += 1.0
65
+ if len(response) > 50:
66
+ score += 0.5
67
+ return score
68
+
69
+ def parameter_editing(self, layer_idx=0, noise_scale=1e-5):
70
+ print(f"Editing parameters in layer {layer_idx}...")
71
+ try:
72
+ with torch.no_grad():
73
+ if hasattr(self.model, 'model'):
74
+ layers = self.model.model.layers
75
+ else:
76
+ layers = self.model.layers
77
+ weights = layers[layer_idx].self_attn.q_proj.weight
78
+ noise = torch.randn_like(weights) * noise_scale
79
+ weights.add_(noise)
80
+ print("Parameter editing complete.")
81
+ except Exception as e:
82
+ print(f"Error editing params: {e}")
83
+
84
+ def run_tool(self, tool_name, query):
85
+ if tool_name == "calculator":
86
+ try:
87
+ clean_query = re.sub(r"[^0-9+\-*/(). ]", "", query)
88
+ if not clean_query.strip(): return "Invalid"
89
+ return str(eval(clean_query))
90
+ except:
91
+ return "Error"
92
+ return "Unknown"
93
+
94
+ def generate_response(self, query, use_tool=False, use_intervention=False):
95
+ input_text = query
96
+ if use_tool or "calculate" in query.lower():
97
+ match = re.search(r"([\d\.\s\+\-\*\/\(\)]+)", query)
98
+ if match and len(match.group(1).strip()) > 3:
99
+ res = self.run_tool("calculator", match.group(1))
100
+ input_text += f"\nTool Result: {res}"
101
+
102
+ messages = [{"role": "system", "content": CONTROLLED_REASONING_CORE}, {"role": "user", "content": input_text}]
103
+ inputs = self.tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(self.device)
104
+
105
+ logits_processor = LogitsProcessorList()
106
+ if use_intervention:
107
+ print("Applying intervention...")
108
+ ids = self.tokenizer.encode("Therefore", add_special_tokens=False)
109
+ if ids:
110
+ logits_processor.append(InterventionLogitsProcessor(ids[0], 5.0))
111
+
112
+ outputs = self.model.generate(inputs, max_new_tokens=100, logits_processor=logits_processor, do_sample=True)
113
+ response = self.tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
114
+
115
+ print(f"Response: {response}")
116
+ print(f"Reward: {self.math_reward(response)}")
117
+ return response