anna4142 commited on
Commit
a6aa1f6
·
verified ·
1 Parent(s): 09a5b1d

End of training

Browse files
README.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ tags:
4
+ - generated_from_trainer
5
+ model-index:
6
+ - name: output
7
+ results: []
8
+ ---
9
+
10
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
11
+ should probably proofread and complete it, then remove this comment. -->
12
+
13
+ # output
14
+
15
+ This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
16
+
17
+ ## Model description
18
+
19
+ More information needed
20
+
21
+ ## Intended uses & limitations
22
+
23
+ More information needed
24
+
25
+ ## Training and evaluation data
26
+
27
+ More information needed
28
+
29
+ ## Training procedure
30
+
31
+ ### Training hyperparameters
32
+
33
+ The following hyperparameters were used during training:
34
+ - learning_rate: 0.0001
35
+ - train_batch_size: 64
36
+ - eval_batch_size: 8
37
+ - seed: 42
38
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
39
+ - lr_scheduler_type: linear
40
+ - lr_scheduler_warmup_ratio: 0.1
41
+ - num_epochs: 200
42
+
43
+ ### Training results
44
+
45
+
46
+
47
+ ### Framework versions
48
+
49
+ - Transformers 4.46.2
50
+ - Pytorch 2.5.1+cu121
51
+ - Datasets 3.1.0
52
+ - Tokenizers 0.20.3
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "act_dim": 6,
3
+ "action_tanh": true,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "HierarchicalTrainableDT"
7
+ ],
8
+ "attn_pdrop": 0.1,
9
+ "bos_token_id": 50256,
10
+ "cluster_hidden_dim": 128,
11
+ "embd_pdrop": 0.1,
12
+ "eos_token_id": 50256,
13
+ "hidden_size": 128,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_ep_len": 4096,
17
+ "model_type": "decision_transformer",
18
+ "n_clusters": 8,
19
+ "n_head": 1,
20
+ "n_inner": null,
21
+ "n_layer": 3,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "state_dim": 17,
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.46.2",
30
+ "use_cache": true,
31
+ "use_subgoal_weighting": true,
32
+ "vocab_size": 1
33
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae38aa338356b360a9ee46b2beab0f65981e5ccfc4e293599827e8534d402224
3
+ size 5049232
runs/Dec01_17-34-48_307faaf671d1/events.out.tfevents.1733074489.307faaf671d1.284.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d518f4b5a8e5fdd1e6cf150a46a66e99a7a181d13c4a27c7193e9f6ce22381da
3
+ size 4986
runs/Dec01_17-38-17_307faaf671d1/events.out.tfevents.1733074697.307faaf671d1.284.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b05ba3fc04744d597086251331d223cb35d071b57457a23f895f622046cab26
3
+ size 5332
runs/Dec01_18-04-41_307faaf671d1/events.out.tfevents.1733076283.307faaf671d1.284.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87bc031c9b1a6a6c3451e820d42f69d5a3f687913e608fc15227471c0ebbef69
3
+ size 5065
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ba6d84c457b26cd8e66709749e8469938848f5e0a06a33b7a7bb28a89b48192
3
+ size 5240