Initial model conversion and upload.
Browse files- model-00004-of-00004.safetensors +2 -2
- model.safetensors.index.json +4 -2
- modeling.py +1 -0
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2fa66d389820b24dbcb4571ae71e26710ec4dc541b4386d2136c5797be2226c
|
| 3 |
+
size 1520501998
|
model.safetensors.index.json
CHANGED
|
@@ -1,13 +1,15 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
-
"total_parameters":
|
| 4 |
-
"total_size":
|
| 5 |
},
|
| 6 |
"weight_map": {
|
| 7 |
"hallucination_detector.bias": "model-00004-of-00004.safetensors",
|
| 8 |
"hallucination_detector.weight": "model-00004-of-00004.safetensors",
|
| 9 |
"hallucination_down_proj.weight": "model-00004-of-00004.safetensors",
|
| 10 |
"hallucination_gate_proj.weight": "model-00004-of-00004.safetensors",
|
|
|
|
|
|
|
| 11 |
"hallucination_up_proj.weight": "model-00004-of-00004.safetensors",
|
| 12 |
"lm_head.weight": "model-00004-of-00004.safetensors",
|
| 13 |
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
+
"total_parameters": 8206442499,
|
| 4 |
+
"total_size": 16412884998
|
| 5 |
},
|
| 6 |
"weight_map": {
|
| 7 |
"hallucination_detector.bias": "model-00004-of-00004.safetensors",
|
| 8 |
"hallucination_detector.weight": "model-00004-of-00004.safetensors",
|
| 9 |
"hallucination_down_proj.weight": "model-00004-of-00004.safetensors",
|
| 10 |
"hallucination_gate_proj.weight": "model-00004-of-00004.safetensors",
|
| 11 |
+
"hallucination_norm.bias": "model-00004-of-00004.safetensors",
|
| 12 |
+
"hallucination_norm.weight": "model-00004-of-00004.safetensors",
|
| 13 |
"hallucination_up_proj.weight": "model-00004-of-00004.safetensors",
|
| 14 |
"lm_head.weight": "model-00004-of-00004.safetensors",
|
| 15 |
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
modeling.py
CHANGED
|
@@ -22,6 +22,7 @@ class SelfCorrectiveLlama(LlamaForCausalLM):
|
|
| 22 |
self.hallucination_up_proj = nn.Linear(config.hidden_size, intermediate_size, bias=False)
|
| 23 |
self.hallucination_down_proj = nn.Linear(intermediate_size, config.hidden_size, bias=False)
|
| 24 |
self.hallucination_detector = nn.Linear(config.hidden_size, self.num_new_tokens + 1)
|
|
|
|
| 25 |
|
| 26 |
def prepare_inputs_for_generation(self, input_ids, past_key_values=None, **kwargs):
|
| 27 |
# Get the full sequence of input IDs from the past, if available
|
|
|
|
| 22 |
self.hallucination_up_proj = nn.Linear(config.hidden_size, intermediate_size, bias=False)
|
| 23 |
self.hallucination_down_proj = nn.Linear(intermediate_size, config.hidden_size, bias=False)
|
| 24 |
self.hallucination_detector = nn.Linear(config.hidden_size, self.num_new_tokens + 1)
|
| 25 |
+
self.hallucination_norm = nn.LayerNorm(config.hidden_size)
|
| 26 |
|
| 27 |
def prepare_inputs_for_generation(self, input_ids, past_key_values=None, **kwargs):
|
| 28 |
# Get the full sequence of input IDs from the past, if available
|