MathBite commited on
Commit
231a0c8
·
verified ·
1 Parent(s): 85f6847

Initial model conversion and upload.

Browse files
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54566aee0d66e67f57658f178073695301dc769b173089d2355509be0e317be9
3
- size 1520485422
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2fa66d389820b24dbcb4571ae71e26710ec4dc541b4386d2136c5797be2226c
3
+ size 1520501998
model.safetensors.index.json CHANGED
@@ -1,13 +1,15 @@
1
  {
2
  "metadata": {
3
- "total_parameters": 8206434307,
4
- "total_size": 16412868614
5
  },
6
  "weight_map": {
7
  "hallucination_detector.bias": "model-00004-of-00004.safetensors",
8
  "hallucination_detector.weight": "model-00004-of-00004.safetensors",
9
  "hallucination_down_proj.weight": "model-00004-of-00004.safetensors",
10
  "hallucination_gate_proj.weight": "model-00004-of-00004.safetensors",
 
 
11
  "hallucination_up_proj.weight": "model-00004-of-00004.safetensors",
12
  "lm_head.weight": "model-00004-of-00004.safetensors",
13
  "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_parameters": 8206442499,
4
+ "total_size": 16412884998
5
  },
6
  "weight_map": {
7
  "hallucination_detector.bias": "model-00004-of-00004.safetensors",
8
  "hallucination_detector.weight": "model-00004-of-00004.safetensors",
9
  "hallucination_down_proj.weight": "model-00004-of-00004.safetensors",
10
  "hallucination_gate_proj.weight": "model-00004-of-00004.safetensors",
11
+ "hallucination_norm.bias": "model-00004-of-00004.safetensors",
12
+ "hallucination_norm.weight": "model-00004-of-00004.safetensors",
13
  "hallucination_up_proj.weight": "model-00004-of-00004.safetensors",
14
  "lm_head.weight": "model-00004-of-00004.safetensors",
15
  "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
modeling.py CHANGED
@@ -22,6 +22,7 @@ class SelfCorrectiveLlama(LlamaForCausalLM):
22
  self.hallucination_up_proj = nn.Linear(config.hidden_size, intermediate_size, bias=False)
23
  self.hallucination_down_proj = nn.Linear(intermediate_size, config.hidden_size, bias=False)
24
  self.hallucination_detector = nn.Linear(config.hidden_size, self.num_new_tokens + 1)
 
25
 
26
  def prepare_inputs_for_generation(self, input_ids, past_key_values=None, **kwargs):
27
  # Get the full sequence of input IDs from the past, if available
 
22
  self.hallucination_up_proj = nn.Linear(config.hidden_size, intermediate_size, bias=False)
23
  self.hallucination_down_proj = nn.Linear(intermediate_size, config.hidden_size, bias=False)
24
  self.hallucination_detector = nn.Linear(config.hidden_size, self.num_new_tokens + 1)
25
+ self.hallucination_norm = nn.LayerNorm(config.hidden_size)
26
 
27
  def prepare_inputs_for_generation(self, input_ids, past_key_values=None, **kwargs):
28
  # Get the full sequence of input IDs from the past, if available