Initial model conversion and upload.
Browse files- README.md +1 -1
- model-00004-of-00004.safetensors +2 -2
- model.safetensors.index.json +2 -2
- modeling.py +2 -2
- special_tokens_map.json +0 -7
- tokenizer.json +2 -2
- tokenizer_config.json +1 -10
README.md
CHANGED
|
@@ -12,7 +12,7 @@ This model, an instance of `SelfCorrectiveLlama`, includes a hallucination detec
|
|
| 12 |
|
| 13 |
## Special Tokens
|
| 14 |
|
| 15 |
-
The tokenizer has been expanded to include the following special tokens: `<
|
| 16 |
|
| 17 |
## How to Use
|
| 18 |
|
|
|
|
| 12 |
|
| 13 |
## Special Tokens
|
| 14 |
|
| 15 |
+
The tokenizer has been expanded to include the following special tokens: `<DEL_S>`, `<DEL_A>`.
|
| 16 |
|
| 17 |
## How to Use
|
| 18 |
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27020f1eb19b1fe1cad4e5d0a05044e25ec6ed90d97fe13686ce58d4c14f5d24
|
| 3 |
+
size 1520501910
|
model.safetensors.index.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
-
"total_parameters":
|
| 4 |
-
"total_size":
|
| 5 |
},
|
| 6 |
"weight_map": {
|
| 7 |
"hallucination_detector.bias": "model-00004-of-00004.safetensors",
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
+
"total_parameters": 8206442499,
|
| 4 |
+
"total_size": 16412884998
|
| 5 |
},
|
| 6 |
"weight_map": {
|
| 7 |
"hallucination_detector.bias": "model-00004-of-00004.safetensors",
|
modeling.py
CHANGED
|
@@ -13,7 +13,7 @@ class SelfCorrectiveLlama(LlamaForCausalLM):
|
|
| 13 |
def __init__(self, config):
|
| 14 |
super().__init__(config)
|
| 15 |
|
| 16 |
-
self.num_new_tokens =
|
| 17 |
self.original_vocab_size = config.vocab_size
|
| 18 |
|
| 19 |
# Create a new, small embedding layer for only the special tokens
|
|
@@ -45,7 +45,7 @@ class SelfCorrectiveLlama(LlamaForCausalLM):
|
|
| 45 |
clamped_input_ids = torch.clamp(input_ids, max=self.original_vocab_size - 1)
|
| 46 |
inputs_embeds = self.model.embed_tokens(clamped_input_ids)
|
| 47 |
|
| 48 |
-
# Overwrite the embeddings for
|
| 49 |
special_token_mask = input_ids >= self.original_vocab_size
|
| 50 |
if special_token_mask.any():
|
| 51 |
special_ids = input_ids[special_token_mask] - self.original_vocab_size
|
|
|
|
| 13 |
def __init__(self, config):
|
| 14 |
super().__init__(config)
|
| 15 |
|
| 16 |
+
self.num_new_tokens = 2
|
| 17 |
self.original_vocab_size = config.vocab_size
|
| 18 |
|
| 19 |
# Create a new, small embedding layer for only the special tokens
|
|
|
|
| 45 |
clamped_input_ids = torch.clamp(input_ids, max=self.original_vocab_size - 1)
|
| 46 |
inputs_embeds = self.model.embed_tokens(clamped_input_ids)
|
| 47 |
|
| 48 |
+
# Overwrite the embeddings for new special tokens
|
| 49 |
special_token_mask = input_ids >= self.original_vocab_size
|
| 50 |
if special_token_mask.any():
|
| 51 |
special_ids = input_ids[special_token_mask] - self.original_vocab_size
|
special_tokens_map.json
CHANGED
|
@@ -1,12 +1,5 @@
|
|
| 1 |
{
|
| 2 |
"additional_special_tokens": [
|
| 3 |
-
{
|
| 4 |
-
"content": "<DEL_W>",
|
| 5 |
-
"lstrip": false,
|
| 6 |
-
"normalized": false,
|
| 7 |
-
"rstrip": false,
|
| 8 |
-
"single_word": false
|
| 9 |
-
},
|
| 10 |
{
|
| 11 |
"content": "<DEL_S>",
|
| 12 |
"lstrip": false,
|
|
|
|
| 1 |
{
|
| 2 |
"additional_special_tokens": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
{
|
| 4 |
"content": "<DEL_S>",
|
| 5 |
"lstrip": false,
|
tokenizer.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5507c539f08ee603e46b27ba2bafc4d73d66a53fdcade0b9ce53d9f5e2f3d4c5
|
| 3 |
+
size 17210288
|
tokenizer_config.json
CHANGED
|
@@ -2049,14 +2049,6 @@
|
|
| 2049 |
"special": true
|
| 2050 |
},
|
| 2051 |
"128256": {
|
| 2052 |
-
"content": "<DEL_W>",
|
| 2053 |
-
"lstrip": false,
|
| 2054 |
-
"normalized": false,
|
| 2055 |
-
"rstrip": false,
|
| 2056 |
-
"single_word": false,
|
| 2057 |
-
"special": true
|
| 2058 |
-
},
|
| 2059 |
-
"128257": {
|
| 2060 |
"content": "<DEL_S>",
|
| 2061 |
"lstrip": false,
|
| 2062 |
"normalized": false,
|
|
@@ -2064,7 +2056,7 @@
|
|
| 2064 |
"single_word": false,
|
| 2065 |
"special": true
|
| 2066 |
},
|
| 2067 |
-
"
|
| 2068 |
"content": "<DEL_A>",
|
| 2069 |
"lstrip": false,
|
| 2070 |
"normalized": false,
|
|
@@ -2074,7 +2066,6 @@
|
|
| 2074 |
}
|
| 2075 |
},
|
| 2076 |
"additional_special_tokens": [
|
| 2077 |
-
"<DEL_W>",
|
| 2078 |
"<DEL_S>",
|
| 2079 |
"<DEL_A>"
|
| 2080 |
],
|
|
|
|
| 2049 |
"special": true
|
| 2050 |
},
|
| 2051 |
"128256": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2052 |
"content": "<DEL_S>",
|
| 2053 |
"lstrip": false,
|
| 2054 |
"normalized": false,
|
|
|
|
| 2056 |
"single_word": false,
|
| 2057 |
"special": true
|
| 2058 |
},
|
| 2059 |
+
"128257": {
|
| 2060 |
"content": "<DEL_A>",
|
| 2061 |
"lstrip": false,
|
| 2062 |
"normalized": false,
|
|
|
|
| 2066 |
}
|
| 2067 |
},
|
| 2068 |
"additional_special_tokens": [
|
|
|
|
| 2069 |
"<DEL_S>",
|
| 2070 |
"<DEL_A>"
|
| 2071 |
],
|