Mauricio-100 commited on
Commit
d18e67e
·
verified ·
1 Parent(s): a2935f9

🚀 Gopu Français - 3 modèles français spécialisés

Browse files
checkpoint-20/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "GPT2LMHeadModel"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": 50256,
8
+ "dtype": "float32",
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "gradient_checkpointing": false,
12
+ "initializer_range": 0.02,
13
+ "layer_norm_epsilon": 1e-05,
14
+ "model_type": "gpt2",
15
+ "n_ctx": 1024,
16
+ "n_embd": 768,
17
+ "n_head": 12,
18
+ "n_inner": null,
19
+ "n_layer": 12,
20
+ "n_positions": 1024,
21
+ "reorder_and_upcast_attn": false,
22
+ "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
+ "scale_attn_weights": true,
25
+ "summary_activation": null,
26
+ "summary_first_dropout": 0.1,
27
+ "summary_proj_to_labels": true,
28
+ "summary_type": "cls_index",
29
+ "summary_use_proj": true,
30
+ "task_specific_params": {
31
+ "text-generation": {
32
+ "do_sample": true,
33
+ "max_length": 50
34
+ }
35
+ },
36
+ "transformers_version": "4.57.1",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
checkpoint-20/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.57.1"
6
+ }
checkpoint-20/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-20/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a287f8c32b6c7b26bf93bad61e73bd920c14fe6b55761a3e86be4c4c51d8311
3
+ size 497774208
checkpoint-20/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0781c183f20098b6393cc9dc0b3596f6a6ccbdd35244652d5ed9a0ec0c692caf
3
+ size 995644811
checkpoint-20/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e8e0baaa78234caa0c89315cc3215dbc0b9fe57ac48c14869817c5b7dd39b4e
3
+ size 14645
checkpoint-20/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:983485077bb01eda1ece273552e51a3ea7bccfbed9a4eb70ce94c814cb3e196e
3
+ size 1465
checkpoint-20/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
checkpoint-20/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-20/tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ }
13
+ },
14
+ "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": false,
16
+ "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
+ "extra_special_tokens": {},
19
+ "model_max_length": 1000000000000000019884624838656,
20
+ "pad_token": "<|endoftext|>",
21
+ "tokenizer_class": "GPT2Tokenizer",
22
+ "unk_token": "<|endoftext|>"
23
+ }
checkpoint-20/trainer_state.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 4.0,
6
+ "eval_steps": 500,
7
+ "global_step": 20,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 4.0,
14
+ "grad_norm": 8.232486724853516,
15
+ "learning_rate": 2.5e-06,
16
+ "loss": 3.7606,
17
+ "step": 20
18
+ }
19
+ ],
20
+ "logging_steps": 20,
21
+ "max_steps": 20,
22
+ "num_input_tokens_seen": 0,
23
+ "num_train_epochs": 4,
24
+ "save_steps": 50,
25
+ "stateful_callbacks": {
26
+ "TrainerControl": {
27
+ "args": {
28
+ "should_epoch_stop": false,
29
+ "should_evaluate": false,
30
+ "should_log": false,
31
+ "should_save": true,
32
+ "should_training_stop": true
33
+ },
34
+ "attributes": {}
35
+ }
36
+ },
37
+ "total_flos": 5225840640000.0,
38
+ "train_batch_size": 4,
39
+ "trial_name": null,
40
+ "trial_params": null
41
+ }
checkpoint-20/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b928f0f0119b4b15c94f7a3abb30a9a0597f14a5e352aa4aa0e3c93b6de5f994
3
+ size 5841
checkpoint-20/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
config.json CHANGED
@@ -1,54 +1,39 @@
1
  {
2
  "activation_function": "gelu_new",
3
  "architectures": [
4
- "GPTNeoForCausalLM"
5
- ],
6
- "attention_dropout": 0,
7
- "attention_layers": [
8
- "global",
9
- "local",
10
- "global",
11
- "local",
12
- "global",
13
- "local",
14
- "global",
15
- "local",
16
- "global",
17
- "local",
18
- "global",
19
- "local"
20
- ],
21
- "attention_types": [
22
- [
23
- [
24
- "global",
25
- "local"
26
- ],
27
- 6
28
- ]
29
  ],
 
30
  "bos_token_id": 50256,
31
- "classifier_dropout": 0.1,
32
  "dtype": "float32",
33
- "embed_dropout": 0,
34
  "eos_token_id": 50256,
35
  "gradient_checkpointing": false,
36
- "hidden_size": 768,
37
  "initializer_range": 0.02,
38
- "intermediate_size": null,
39
  "layer_norm_epsilon": 1e-05,
40
- "max_position_embeddings": 2048,
41
- "model_type": "gpt_neo",
42
- "num_heads": 12,
43
- "num_layers": 12,
44
- "resid_dropout": 0,
 
 
 
 
 
 
45
  "summary_activation": null,
46
  "summary_first_dropout": 0.1,
47
  "summary_proj_to_labels": true,
48
  "summary_type": "cls_index",
49
  "summary_use_proj": true,
 
 
 
 
 
 
50
  "transformers_version": "4.57.1",
51
  "use_cache": true,
52
- "vocab_size": 50257,
53
- "window_size": 256
54
  }
 
1
  {
2
  "activation_function": "gelu_new",
3
  "architectures": [
4
+ "GPT2LMHeadModel"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  ],
6
+ "attn_pdrop": 0.1,
7
  "bos_token_id": 50256,
 
8
  "dtype": "float32",
9
+ "embd_pdrop": 0.1,
10
  "eos_token_id": 50256,
11
  "gradient_checkpointing": false,
 
12
  "initializer_range": 0.02,
 
13
  "layer_norm_epsilon": 1e-05,
14
+ "model_type": "gpt2",
15
+ "n_ctx": 1024,
16
+ "n_embd": 768,
17
+ "n_head": 12,
18
+ "n_inner": null,
19
+ "n_layer": 12,
20
+ "n_positions": 1024,
21
+ "reorder_and_upcast_attn": false,
22
+ "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
+ "scale_attn_weights": true,
25
  "summary_activation": null,
26
  "summary_first_dropout": 0.1,
27
  "summary_proj_to_labels": true,
28
  "summary_type": "cls_index",
29
  "summary_use_proj": true,
30
+ "task_specific_params": {
31
+ "text-generation": {
32
+ "do_sample": true,
33
+ "max_length": 50
34
+ }
35
+ },
36
  "transformers_version": "4.57.1",
37
  "use_cache": true,
38
+ "vocab_size": 50257
 
39
  }
merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe29ead0207986e009116d625b28edbf0ce8dde08e06f7f110fd36a5844804d4
3
- size 500811336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a287f8c32b6c7b26bf93bad61e73bd920c14fe6b55761a3e86be4c4c51d8311
3
+ size 497774208
runs/Nov22_23-26-54_46c86bb47009/events.out.tfevents.1763854015.46c86bb47009.737.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87af1ae284090554e5080f2c88ea9a01f6a8cc0c0185f45b5c5fe2f67d196533
3
+ size 5865
special_tokens_map.json CHANGED
@@ -13,7 +13,13 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|endoftext|>",
 
 
 
 
 
 
17
  "unk_token": {
18
  "content": "<|endoftext|>",
19
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
  "unk_token": {
24
  "content": "<|endoftext|>",
25
  "lstrip": false,
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -2,7 +2,7 @@
2
  "add_bos_token": false,
3
  "add_prefix_space": false,
4
  "added_tokens_decoder": {
5
- "50256": {
6
  "content": "<|endoftext|>",
7
  "lstrip": false,
8
  "normalized": true,
@@ -12,11 +12,11 @@
12
  }
13
  },
14
  "bos_token": "<|endoftext|>",
15
- "clean_up_tokenization_spaces": true,
16
  "eos_token": "<|endoftext|>",
17
  "errors": "replace",
18
  "extra_special_tokens": {},
19
- "model_max_length": 2048,
20
  "pad_token": "<|endoftext|>",
21
  "tokenizer_class": "GPT2Tokenizer",
22
  "unk_token": "<|endoftext|>"
 
2
  "add_bos_token": false,
3
  "add_prefix_space": false,
4
  "added_tokens_decoder": {
5
+ "0": {
6
  "content": "<|endoftext|>",
7
  "lstrip": false,
8
  "normalized": true,
 
12
  }
13
  },
14
  "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": false,
16
  "eos_token": "<|endoftext|>",
17
  "errors": "replace",
18
  "extra_special_tokens": {},
19
+ "model_max_length": 1000000000000000019884624838656,
20
  "pad_token": "<|endoftext|>",
21
  "tokenizer_class": "GPT2Tokenizer",
22
  "unk_token": "<|endoftext|>"
vocab.json CHANGED
The diff for this file is too large to render. See raw diff