alessandronascimento commited on
Commit
aa31594
·
verified ·
1 Parent(s): 624c831

Training in progress, epoch 3

Browse files
Files changed (3) hide show
  1. config.json +27 -57
  2. model.safetensors +2 -2
  3. training_args.bin +1 -1
config.json CHANGED
@@ -3,72 +3,61 @@
3
  "EncoderDecoderModel"
4
  ],
5
  "decoder": {
6
- "_name_or_path": "gayane/BARTSmiles",
7
- "activation_dropout": 0.1,
8
- "activation_function": "gelu",
9
- "add_bias_logits": false,
10
  "add_cross_attention": true,
11
- "add_final_layer_norm": false,
12
  "architectures": [
13
- "BartModel"
14
  ],
15
- "attention_dropout": 0.1,
16
  "bad_words_ids": null,
17
  "begin_suppress_tokens": null,
18
  "bos_token_id": 0,
19
  "chunk_size_feed_forward": 0,
20
- "classif_dropout": 0.1,
21
- "classifier_dropout": 0.0,
22
  "cross_attention_hidden_size": null,
23
- "d_model": 1024,
24
- "decoder_attention_heads": 16,
25
- "decoder_ffn_dim": 4096,
26
- "decoder_layerdrop": 0.0,
27
- "decoder_layers": 12,
28
- "decoder_start_token_id": 2,
29
  "diversity_penalty": 0.0,
30
  "do_sample": false,
31
- "dropout": 0.1,
32
- "early_stopping": true,
33
- "encoder_attention_heads": 16,
34
- "encoder_ffn_dim": 4096,
35
- "encoder_layerdrop": 0.0,
36
- "encoder_layers": 12,
37
  "encoder_no_repeat_ngram_size": 0,
38
  "eos_token_id": 2,
39
  "exponential_decay_length_penalty": null,
40
  "finetuning_task": null,
41
- "forced_bos_token_id": 0,
42
- "forced_eos_token_id": 2,
43
  "gradient_checkpointing": false,
 
 
 
44
  "id2label": {
45
  "0": "LABEL_0",
46
- "1": "LABEL_1",
47
- "2": "LABEL_2"
48
  },
49
- "init_std": 0.02,
 
50
  "is_decoder": true,
51
  "is_encoder_decoder": false,
52
  "label2id": {
53
  "LABEL_0": 0,
54
- "LABEL_1": 1,
55
- "LABEL_2": 2
56
  },
 
57
  "length_penalty": 1.0,
58
  "max_length": 20,
59
- "max_position_embeddings": 128,
60
  "min_length": 0,
61
- "model_type": "bart",
62
- "no_repeat_ngram_size": 3,
63
- "normalize_before": false,
64
  "num_beam_groups": 1,
65
- "num_beams": 4,
66
- "num_hidden_layers": 12,
67
  "num_return_sequences": 1,
68
  "output_attentions": false,
69
  "output_hidden_states": false,
70
  "output_scores": false,
71
  "pad_token_id": 1,
 
72
  "prefix": null,
73
  "problem_type": null,
74
  "pruned_heads": {},
@@ -76,29 +65,9 @@
76
  "repetition_penalty": 1.0,
77
  "return_dict": true,
78
  "return_dict_in_generate": false,
79
- "scale_embedding": false,
80
  "sep_token_id": null,
81
  "suppress_tokens": null,
82
- "task_specific_params": {
83
- "summarization": {
84
- "length_penalty": 1.0,
85
- "max_length": 128,
86
- "min_length": 12,
87
- "num_beams": 4
88
- },
89
- "summarization_cnn": {
90
- "length_penalty": 2.0,
91
- "max_length": 142,
92
- "min_length": 56,
93
- "num_beams": 4
94
- },
95
- "summarization_xsum": {
96
- "length_penalty": 1.0,
97
- "max_length": 62,
98
- "min_length": 11,
99
- "num_beams": 6
100
- }
101
- },
102
  "temperature": 1.0,
103
  "tf_legacy_loss": false,
104
  "tie_encoder_decoder": false,
@@ -106,12 +75,13 @@
106
  "tokenizer_class": null,
107
  "top_k": 50,
108
  "top_p": 1.0,
109
- "torch_dtype": "float32",
110
  "torchscript": false,
 
111
  "typical_p": 1.0,
112
  "use_bfloat16": false,
113
  "use_cache": true,
114
- "vocab_size": 1025
115
  },
116
  "decoder_start_token_id": 0,
117
  "encoder": {
 
3
  "EncoderDecoderModel"
4
  ],
5
  "decoder": {
6
+ "_name_or_path": "seyonec/PubChem10M_SMILES_BPE_450k",
 
 
 
7
  "add_cross_attention": true,
 
8
  "architectures": [
9
+ "RobertaForMaskedLM"
10
  ],
11
+ "attention_probs_dropout_prob": 0.1,
12
  "bad_words_ids": null,
13
  "begin_suppress_tokens": null,
14
  "bos_token_id": 0,
15
  "chunk_size_feed_forward": 0,
16
+ "classifier_dropout": null,
 
17
  "cross_attention_hidden_size": null,
18
+ "decoder_start_token_id": null,
 
 
 
 
 
19
  "diversity_penalty": 0.0,
20
  "do_sample": false,
21
+ "early_stopping": false,
 
 
 
 
 
22
  "encoder_no_repeat_ngram_size": 0,
23
  "eos_token_id": 2,
24
  "exponential_decay_length_penalty": null,
25
  "finetuning_task": null,
26
+ "forced_bos_token_id": null,
27
+ "forced_eos_token_id": null,
28
  "gradient_checkpointing": false,
29
+ "hidden_act": "gelu",
30
+ "hidden_dropout_prob": 0.1,
31
+ "hidden_size": 768,
32
  "id2label": {
33
  "0": "LABEL_0",
34
+ "1": "LABEL_1"
 
35
  },
36
+ "initializer_range": 0.02,
37
+ "intermediate_size": 3072,
38
  "is_decoder": true,
39
  "is_encoder_decoder": false,
40
  "label2id": {
41
  "LABEL_0": 0,
42
+ "LABEL_1": 1
 
43
  },
44
+ "layer_norm_eps": 1e-12,
45
  "length_penalty": 1.0,
46
  "max_length": 20,
47
+ "max_position_embeddings": 512,
48
  "min_length": 0,
49
+ "model_type": "roberta",
50
+ "no_repeat_ngram_size": 0,
51
+ "num_attention_heads": 12,
52
  "num_beam_groups": 1,
53
+ "num_beams": 1,
54
+ "num_hidden_layers": 6,
55
  "num_return_sequences": 1,
56
  "output_attentions": false,
57
  "output_hidden_states": false,
58
  "output_scores": false,
59
  "pad_token_id": 1,
60
+ "position_embedding_type": "absolute",
61
  "prefix": null,
62
  "problem_type": null,
63
  "pruned_heads": {},
 
65
  "repetition_penalty": 1.0,
66
  "return_dict": true,
67
  "return_dict_in_generate": false,
 
68
  "sep_token_id": null,
69
  "suppress_tokens": null,
70
+ "task_specific_params": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  "temperature": 1.0,
72
  "tf_legacy_loss": false,
73
  "tie_encoder_decoder": false,
 
75
  "tokenizer_class": null,
76
  "top_k": 50,
77
  "top_p": 1.0,
78
+ "torch_dtype": null,
79
  "torchscript": false,
80
+ "type_vocab_size": 1,
81
  "typical_p": 1.0,
82
  "use_bfloat16": false,
83
  "use_cache": true,
84
+ "vocab_size": 7924
85
  },
86
  "decoder_start_token_id": 0,
87
  "encoder": {
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae0dee8c34767816d09c35854f739cd1e64fb9ce17d01760468b06eea462fca8
3
- size 1408891684
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aea7bd6f63d3d0a52416bc19ac9e0496a5a59e43601b8fd8b1b5848c1396150c
3
+ size 852404428
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:331dd79c0d92cfbb953b1f370c43d2db7c1aac858a40c76b884735dd4089e9dd
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e62830d213bb1984f87b4fc22c7bfd36933ee06a1a9a7abcb594aae048d79e3b
3
  size 5304