Fix NaN outputs

by helenai - opened 3 days ago

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+94

-73

Files changed (5) hide show

config.json +46 -47
generation_config.json +6 -6
model.safetensors +1 -1
special_tokens_map.json +5 -1
tokenizer_config.json +36 -18

config.json CHANGED Viewed

@@ -1,47 +1,46 @@
-{
-  "_name_or_path": "katuni4ka/tiny-random-chatglm2",
-  "add_bias_linear": false,
-  "add_qkv_bias": true,
-  "apply_query_key_layer_scaling": true,
-  "apply_residual_connection_post_layernorm": false,
-  "architectures": [
-    "ChatGLMForConditionalGeneration"
-  ],
-  "attention_dropout": 0.0,
-  "attention_softmax_in_fp32": true,
-  "auto_map": {
-    "AutoConfig": "katuni4ka/tiny-random-chatglm2--configuration_chatglm.ChatGLMConfig",
-    "AutoModel": "katuni4ka/tiny-random-chatglm2--modeling_chatglm.ChatGLMForConditionalGeneration",
-    "AutoModelForCausalLM": "katuni4ka/tiny-random-chatglm2--modeling_chatglm.ChatGLMForConditionalGeneration",
-    "AutoModelForSeq2SeqLM": "katuni4ka/tiny-random-chatglm2--modeling_chatglm.ChatGLMForConditionalGeneration",
-    "AutoModelForSequenceClassification": "katuni4ka/tiny-random-chatglm2--modeling_chatglm.ChatGLMForSequenceClassification"
-  },
-  "bias_dropout_fusion": true,
-  "classifier_dropout": null,
-  "eos_token_id": 2,
-  "ffn_hidden_size": 3424,
-  "fp32_residual_connection": false,
-  "hidden_dropout": 0.0,
-  "hidden_size": 128,
-  "kv_channels": 32,
-  "layernorm_epsilon": 1e-05,
-  "model_type": "chatglm",
-  "multi_query_attention": true,
-  "multi_query_group_num": 1,
-  "num_attention_heads": 2,
-  "num_layers": 2,
-  "original_rope": true,
-  "pad_token_id": 0,
-  "padded_vocab_size": 65024,
-  "post_layer_norm": true,
-  "pre_seq_len": null,
-  "prefix_projection": false,
-  "quantization_bit": 0,
-  "rmsnorm": true,
-  "seq_length": 32768,
-  "tie_word_embeddings": false,
-  "torch_dtype": "float32",
-  "transformers_version": "4.38.1",
-  "use_cache": true,
-  "vocab_size": 65024
-}

+{
+  "add_bias_linear": false,
+  "add_qkv_bias": true,
+  "apply_query_key_layer_scaling": true,
+  "apply_residual_connection_post_layernorm": false,
+  "architectures": [
+    "ChatGLMForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "attention_softmax_in_fp32": true,
+  "auto_map": {
+    "AutoConfig": "configuration_chatglm.ChatGLMConfig",
+    "AutoModel": "katuni4ka/tiny-random-chatglm2--modeling_chatglm.ChatGLMForConditionalGeneration",
+    "AutoModelForCausalLM": "modeling_chatglm.ChatGLMForConditionalGeneration",
+    "AutoModelForSeq2SeqLM": "katuni4ka/tiny-random-chatglm2--modeling_chatglm.ChatGLMForConditionalGeneration",
+    "AutoModelForSequenceClassification": "katuni4ka/tiny-random-chatglm2--modeling_chatglm.ChatGLMForSequenceClassification"
+  },
+  "bias_dropout_fusion": true,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "ffn_hidden_size": 3424,
+  "fp32_residual_connection": false,
+  "hidden_dropout": 0.0,
+  "hidden_size": 128,
+  "kv_channels": 32,
+  "layernorm_epsilon": 1e-05,
+  "model_type": "chatglm",
+  "multi_query_attention": true,
+  "multi_query_group_num": 1,
+  "num_attention_heads": 2,
+  "num_layers": 2,
+  "original_rope": true,
+  "pad_token_id": 0,
+  "padded_vocab_size": 65024,
+  "post_layer_norm": true,
+  "pre_seq_len": null,
+  "prefix_projection": false,
+  "quantization_bit": 0,
+  "rmsnorm": true,
+  "seq_length": 32768,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.4",
+  "use_cache": true,
+  "vocab_size": 65024
+}

generation_config.json CHANGED Viewed

@@ -1,6 +1,6 @@
-{
-  "_from_model_config": true,
-  "eos_token_id": 2,
-  "pad_token_id": 0,
-  "transformers_version": "4.38.1"
-}

+{
+  "_from_model_config": true,
+  "eos_token_id": 2,
+  "pad_token_id": 0,
+  "transformers_version": "4.55.4"
+}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:feebf1e929365645e986ae056b8fc1ab8c56f96f44d837f4d2cdd002b4900a44
 size 77305592

 version https://git-lfs.github.com/spec/v1
+oid sha256:7a3c1036c7b5709aca072f14837752559bd4210989b814be2a314437c28a6517
 size 77305592

special_tokens_map.json CHANGED Viewed

	@@ -1 +1,5 @@
1	- {}

+{
+  "eos_token": "</s>",
+  "pad_token": "<unk>",
+  "unk_token": "<unk>"
+}

tokenizer_config.json CHANGED Viewed

@@ -1,18 +1,36 @@
-{
-  "added_tokens_decoder": {},
-  "auto_map": {
-    "AutoTokenizer": [
-      "katuni4ka/tiny-random-chatglm2--tokenization_chatglm.ChatGLMTokenizer",
-      null
-    ]
-  },
-  "clean_up_tokenization_spaces": false,
-  "do_lower_case": false,
-  "eos_token": "</s>",
-  "model_max_length": 1000000000000000019884624838656,
-  "pad_token": "<unk>",
-  "padding_side": "left",
-  "remove_space": false,
-  "tokenizer_class": "ChatGLMTokenizer",
-  "unk_token": "<unk>"
-}

+{
+  "added_tokens_decoder": {
+    "64787": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "64788": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "auto_map": {
+    "AutoTokenizer": [
+      "tokenization_chatglm.ChatGLMTokenizer",
+      null
+    ]
+  },
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<unk>",
+  "padding_side": "left",
+  "remove_space": false,
+  "tokenizer_class": "ChatGLMTokenizer",
+  "unk_token": "<unk>"
+}