Training in progress, epoch 1

Files changed (9) hide show

config.json CHANGED Viewed

@@ -1,28 +1,42 @@
 {
-  "_name_or_path": "cointegrated/rubert-tiny2",
   "architectures": [
-    "BertForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
-  "emb_size": 312,
-  "gradient_checkpointing": false,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
-  "hidden_size": 312,
   "initializer_range": 0.02,
-  "intermediate_size": 600,
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 2048,
   "model_type": "bert",
   "num_attention_heads": 12,
-  "num_hidden_layers": 3,
   "pad_token_id": 0,
-  "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
   "transformers_version": "4.44.2",
   "type_vocab_size": 2,
-  "use_cache": true,
-  "vocab_size": 83828
 }

 {
+  "_name_or_path": "Tochka-AI/ruRoPEBert-classic-base-2k",
   "architectures": [
+    "RoPEBertForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
+  "auto_map": {
+    "AutoConfig": "Tochka-AI/ruRoPEBert-classic-base-2k--modeling_rope_bert.RoPEBertConfig",
+    "AutoModel": "Tochka-AI/ruRoPEBert-classic-base-2k--modeling_rope_bert.RoPEBertModel",
+    "AutoModelForMaskedLM": "Tochka-AI/ruRoPEBert-classic-base-2k--modeling_rope_bert.RoPEBertForMaskedLM",
+    "AutoModelForSequenceClassification": "Tochka-AI/ruRoPEBert-classic-base-2k--modeling_rope_bert.RoPEBertForSequenceClassification"
+  },
   "classifier_dropout": null,
+  "directionality": "bidi",
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "common",
+    "1": "toxic"
+  },
   "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "common": 0,
+    "toxic": 1
+  },
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 2048,
   "model_type": "bert",
   "num_attention_heads": 12,
+  "num_hidden_layers": 12,
   "pad_token_id": 0,
+  "pooler_type": "mean",
   "problem_type": "single_label_classification",
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
   "torch_dtype": "float32",
   "transformers_version": "4.44.2",
   "type_vocab_size": 2,
+  "vocab_size": 120138
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75e340536f411f482955efa563747d2b1650fee11b5f4b38df80fc264277e511
-size 116784136

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d02402663ef3690aa2d06713f9b6c77df8ea7120fd2306860da3918a8d9186a
+size 709323472

runs/Sep12_23-15-18_legal-tech-0/events.out.tfevents.1726172119.legal-tech-0.1432.14 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0cdc6c4ea73f15f25104cacbcc40865e4608e4b170b07b4adc61a082e01922d9
-size 6750

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6723c40e7098c26b44e8594157e772cfbb79bd517fa960660c2c5b8eafcd2c8
+size 7473

runs/Sep12_23-15-18_legal-tech-0/events.out.tfevents.1726172192.legal-tech-0.1432.15 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:7081875b8a847d6236c4c1958bbb5f2841781b38f1638530825a031710383439
+size 457

runs/Sep12_23-19-05_legal-tech-0/events.out.tfevents.1726172347.legal-tech-0.1764.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:09a6968a2c6e00fc24ebb4647a08b9fddbb80954e6fc68df8ac1fb250ee6a108
+size 5932

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -8,7 +8,7 @@
       "single_word": false,
       "special": true
     },
-    "1": {
       "content": "[UNK]",
       "lstrip": false,
       "normalized": false,
@@ -16,7 +16,7 @@
       "single_word": false,
       "special": true
     },
-    "2": {
       "content": "[CLS]",
       "lstrip": false,
       "normalized": false,
@@ -24,7 +24,7 @@
       "single_word": false,
       "special": true
     },
-    "3": {
       "content": "[SEP]",
       "lstrip": false,
       "normalized": false,
@@ -32,7 +32,7 @@
       "single_word": false,
       "special": true
     },
-    "4": {
       "content": "[MASK]",
       "lstrip": false,
       "normalized": false,
@@ -44,21 +44,14 @@
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "do_basic_tokenize": true,
-  "do_lower_case": false,
   "mask_token": "[MASK]",
-  "max_length": 512,
-  "model_max_length": 512,
   "never_split": null,
-  "pad_to_multiple_of": null,
   "pad_token": "[PAD]",
-  "pad_token_type_id": 0,
-  "padding_side": "right",
   "sep_token": "[SEP]",
-  "stride": 0,
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
-  "truncation_side": "right",
-  "truncation_strategy": "longest_first",
   "unk_token": "[UNK]"
 }

       "single_word": false,
       "special": true
     },
+    "100": {
       "content": "[UNK]",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "101": {
       "content": "[CLS]",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "102": {
       "content": "[SEP]",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "103": {
       "content": "[MASK]",
       "lstrip": false,
       "normalized": false,
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "do_basic_tokenize": true,
+  "do_lower_case": true,
   "mask_token": "[MASK]",
+  "model_max_length": 1024,
   "never_split": null,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
   "unk_token": "[UNK]"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b46fc84b00e25c9df275d3ff50b2c1d1548ad617e795bcf776ac8f7745f6d67
 size 5240

 version https://git-lfs.github.com/spec/v1
+oid sha256:e4e8b89ae3a26d9ec3f6ed982387591eaf92b8b710c629ae09c5dbc0bb308db1
 size 5240

vocab.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff