permutans commited on
Commit
880f21a
·
verified ·
1 Parent(s): 732cbf9

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. config.json +67 -25
  2. model.safetensors +2 -2
  3. tokenizer.json +0 -0
  4. tokenizer_config.json +7 -5
config.json CHANGED
@@ -1,37 +1,79 @@
1
  {
2
- "add_cross_attention": false,
3
  "architectures": [
4
- "BertForSequenceClassification"
5
  ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": null,
8
- "classifier_dropout": null,
 
 
 
 
 
 
 
 
9
  "dtype": "float32",
10
- "eos_token_id": null,
 
 
11
  "gradient_checkpointing": false,
12
- "hidden_act": "gelu",
13
- "hidden_dropout_prob": 0.1,
14
  "hidden_size": 768,
15
- "id2label": {
16
- "0": "LABEL_0"
17
- },
18
  "initializer_range": 0.02,
19
- "intermediate_size": 3072,
20
- "is_decoder": false,
21
- "label2id": {
22
- "LABEL_0": 0
23
- },
24
- "layer_norm_eps": 1e-12,
25
- "max_position_embeddings": 512,
26
- "model_type": "bert",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  "num_attention_heads": 12,
28
- "num_hidden_layers": 12,
29
- "pad_token_id": 0,
30
  "position_embedding_type": "absolute",
31
- "problem_type": "regression",
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  "tie_word_embeddings": true,
33
  "transformers_version": "5.0.0",
34
- "type_vocab_size": 2,
35
- "use_cache": true,
36
- "vocab_size": 30522
37
  }
 
1
  {
 
2
  "architectures": [
3
+ "HavelockOralityRegressor"
4
  ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 50281,
8
+ "classifier_activation": "gelu",
9
+ "classifier_bias": false,
10
+ "classifier_dropout": 0.0,
11
+ "classifier_pooling": "mean",
12
+ "cls_token_id": 50281,
13
+ "decoder_bias": true,
14
+ "deterministic_flash_attn": false,
15
+ "dropout": 0.1,
16
  "dtype": "float32",
17
+ "embedding_dropout": 0.0,
18
+ "eos_token_id": 50282,
19
+ "global_attn_every_n_layers": 3,
20
  "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
 
22
  "hidden_size": 768,
23
+ "initializer_cutoff_factor": 2.0,
 
 
24
  "initializer_range": 0.02,
25
+ "intermediate_size": 1152,
26
+ "layer_norm_eps": 1e-05,
27
+ "layer_types": [
28
+ "full_attention",
29
+ "sliding_attention",
30
+ "sliding_attention",
31
+ "full_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "full_attention",
35
+ "sliding_attention",
36
+ "sliding_attention",
37
+ "full_attention",
38
+ "sliding_attention",
39
+ "sliding_attention",
40
+ "full_attention",
41
+ "sliding_attention",
42
+ "sliding_attention",
43
+ "full_attention",
44
+ "sliding_attention",
45
+ "sliding_attention",
46
+ "full_attention",
47
+ "sliding_attention",
48
+ "sliding_attention",
49
+ "full_attention"
50
+ ],
51
+ "local_attention": 128,
52
+ "max_position_embeddings": 8192,
53
+ "mlp_bias": false,
54
+ "mlp_dropout": 0.0,
55
+ "model_type": "havelock-orality-regressor",
56
+ "norm_bias": false,
57
+ "norm_eps": 1e-05,
58
  "num_attention_heads": 12,
59
+ "num_hidden_layers": 22,
60
+ "pad_token_id": 50283,
61
  "position_embedding_type": "absolute",
62
+ "repad_logits_with_grad": false,
63
+ "rope_parameters": {
64
+ "full_attention": {
65
+ "rope_theta": 160000.0,
66
+ "rope_type": "default"
67
+ },
68
+ "sliding_attention": {
69
+ "rope_theta": 10000.0,
70
+ "rope_type": "default"
71
+ }
72
+ },
73
+ "sep_token_id": 50282,
74
+ "sparse_pred_ignore_index": -100,
75
+ "sparse_prediction": false,
76
  "tie_word_embeddings": true,
77
  "transformers_version": "5.0.0",
78
+ "vocab_size": 50368
 
 
79
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e2641cf798d6c36360e8b5b4fb0bd058102b7043ee098e33b2cad68bf839089
3
- size 437955548
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef01de53c7fc44c72077f4bfa60602d71628568eda051ec321f01b1f55ac263a
3
+ size 1037272580
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,14 +1,16 @@
1
  {
2
  "backend": "tokenizers",
 
3
  "cls_token": "[CLS]",
4
- "do_lower_case": true,
5
  "is_local": false,
6
  "mask_token": "[MASK]",
7
- "model_max_length": 512,
 
 
 
 
8
  "pad_token": "[PAD]",
9
  "sep_token": "[SEP]",
10
- "strip_accents": null,
11
- "tokenize_chinese_chars": true,
12
- "tokenizer_class": "BertTokenizer",
13
  "unk_token": "[UNK]"
14
  }
 
1
  {
2
  "backend": "tokenizers",
3
+ "clean_up_tokenization_spaces": true,
4
  "cls_token": "[CLS]",
 
5
  "is_local": false,
6
  "mask_token": "[MASK]",
7
+ "model_input_names": [
8
+ "input_ids",
9
+ "attention_mask"
10
+ ],
11
+ "model_max_length": 8192,
12
  "pad_token": "[PAD]",
13
  "sep_token": "[SEP]",
14
+ "tokenizer_class": "TokenizersBackend",
 
 
15
  "unk_token": "[UNK]"
16
  }