Training in progress, step 200, checkpoint
Browse files- last-checkpoint/2_Dense/model.safetensors +2 -2
- last-checkpoint/3_Dense/model.safetensors +2 -2
- last-checkpoint/README.md +15 -13
- last-checkpoint/config.json +3 -4
- last-checkpoint/config_sentence_transformers.json +1 -1
- last-checkpoint/model.safetensors +2 -2
- last-checkpoint/optimizer.pt +2 -2
- last-checkpoint/trainer_state.json +8 -8
- last-checkpoint/training_args.bin +2 -2
last-checkpoint/2_Dense/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e06277ca8787b7fa33c7a991a49e7c44cedc64537c9a587e3eabe4480d98101
|
| 3 |
+
size 4718680
|
last-checkpoint/3_Dense/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a1747a1bbfdac934f7ee3e281dffa826558868f822e91fdd1f85e39c452033a
|
| 3 |
+
size 4718680
|
last-checkpoint/README.md
CHANGED
|
@@ -424,7 +424,7 @@ print(query_embeddings.shape, document_embeddings.shape)
|
|
| 424 |
# Get the similarity scores for the embeddings
|
| 425 |
similarities = model.similarity(query_embeddings, document_embeddings)
|
| 426 |
print(similarities)
|
| 427 |
-
# tensor([[
|
| 428 |
```
|
| 429 |
|
| 430 |
<!--
|
|
@@ -488,7 +488,7 @@ You can finetune this model on your own dataset.
|
|
| 488 |
{
|
| 489 |
"scale": 20.0,
|
| 490 |
"similarity_fct": "cos_sim",
|
| 491 |
-
"mini_batch_size":
|
| 492 |
"gather_across_devices": false
|
| 493 |
}
|
| 494 |
```
|
|
@@ -503,7 +503,7 @@ You can finetune this model on your own dataset.
|
|
| 503 |
- `push_to_hub`: True
|
| 504 |
- `hub_model_id`: guyhadad01/EncodeRec_300M_Toys
|
| 505 |
- `hub_strategy`: checkpoint
|
| 506 |
-
- `prompts`: task: search result | query:
|
| 507 |
|
| 508 |
#### All Hyperparameters
|
| 509 |
<details><summary>Click to expand</summary>
|
|
@@ -545,7 +545,6 @@ You can finetune this model on your own dataset.
|
|
| 545 |
- `seed`: 42
|
| 546 |
- `data_seed`: None
|
| 547 |
- `jit_mode_eval`: False
|
| 548 |
-
- `use_ipex`: False
|
| 549 |
- `bf16`: True
|
| 550 |
- `fp16`: False
|
| 551 |
- `fp16_opt_level`: O1
|
|
@@ -572,6 +571,7 @@ You can finetune this model on your own dataset.
|
|
| 572 |
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 573 |
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 574 |
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
|
|
|
| 575 |
- `deepspeed`: None
|
| 576 |
- `label_smoothing_factor`: 0.0
|
| 577 |
- `optim`: adamw_torch
|
|
@@ -579,6 +579,8 @@ You can finetune this model on your own dataset.
|
|
| 579 |
- `adafactor`: False
|
| 580 |
- `group_by_length`: False
|
| 581 |
- `length_column_name`: length
|
|
|
|
|
|
|
| 582 |
- `ddp_find_unused_parameters`: None
|
| 583 |
- `ddp_bucket_cap_mb`: None
|
| 584 |
- `ddp_broadcast_buffers`: False
|
|
@@ -611,7 +613,7 @@ You can finetune this model on your own dataset.
|
|
| 611 |
- `torch_compile_backend`: None
|
| 612 |
- `torch_compile_mode`: None
|
| 613 |
- `include_tokens_per_second`: False
|
| 614 |
-
- `include_num_input_tokens_seen`:
|
| 615 |
- `neftune_noise_alpha`: None
|
| 616 |
- `optim_target_modules`: None
|
| 617 |
- `batch_eval_metrics`: False
|
|
@@ -619,8 +621,8 @@ You can finetune this model on your own dataset.
|
|
| 619 |
- `use_liger_kernel`: False
|
| 620 |
- `liger_kernel_config`: None
|
| 621 |
- `eval_use_gather_object`: False
|
| 622 |
-
- `average_tokens_across_devices`:
|
| 623 |
-
- `prompts`: task: search result | query:
|
| 624 |
- `batch_sampler`: batch_sampler
|
| 625 |
- `multi_dataset_batch_sampler`: proportional
|
| 626 |
- `router_mapping`: {}
|
|
@@ -631,20 +633,20 @@ You can finetune this model on your own dataset.
|
|
| 631 |
### Training Logs
|
| 632 |
| Epoch | Step | Training Loss |
|
| 633 |
|:------:|:----:|:-------------:|
|
| 634 |
-
| 0.0463 | 50 | 0.
|
| 635 |
-
| 0.0926 | 100 | 0.
|
| 636 |
-
| 0.1389 | 150 | 0.
|
| 637 |
-
| 0.1852 | 200 | 0.
|
| 638 |
|
| 639 |
|
| 640 |
### Framework Versions
|
| 641 |
- Python: 3.12.11
|
| 642 |
- Sentence Transformers: 5.1.0
|
| 643 |
-
- Transformers: 4.
|
| 644 |
- PyTorch: 2.7.1+cu126
|
| 645 |
- Accelerate: 1.10.0
|
| 646 |
- Datasets: 3.6.0
|
| 647 |
-
- Tokenizers: 0.
|
| 648 |
|
| 649 |
## Citation
|
| 650 |
|
|
|
|
| 424 |
# Get the similarity scores for the embeddings
|
| 425 |
similarities = model.similarity(query_embeddings, document_embeddings)
|
| 426 |
print(similarities)
|
| 427 |
+
# tensor([[0.8959, 0.0632, 0.0102]])
|
| 428 |
```
|
| 429 |
|
| 430 |
<!--
|
|
|
|
| 488 |
{
|
| 489 |
"scale": 20.0,
|
| 490 |
"similarity_fct": "cos_sim",
|
| 491 |
+
"mini_batch_size": 64,
|
| 492 |
"gather_across_devices": false
|
| 493 |
}
|
| 494 |
```
|
|
|
|
| 503 |
- `push_to_hub`: True
|
| 504 |
- `hub_model_id`: guyhadad01/EncodeRec_300M_Toys
|
| 505 |
- `hub_strategy`: checkpoint
|
| 506 |
+
- `prompts`: {'question': 'task: search result | query: ', 'passage_text': 'title: none | text: '}
|
| 507 |
|
| 508 |
#### All Hyperparameters
|
| 509 |
<details><summary>Click to expand</summary>
|
|
|
|
| 545 |
- `seed`: 42
|
| 546 |
- `data_seed`: None
|
| 547 |
- `jit_mode_eval`: False
|
|
|
|
| 548 |
- `bf16`: True
|
| 549 |
- `fp16`: False
|
| 550 |
- `fp16_opt_level`: O1
|
|
|
|
| 571 |
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 572 |
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 573 |
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 574 |
+
- `parallelism_config`: None
|
| 575 |
- `deepspeed`: None
|
| 576 |
- `label_smoothing_factor`: 0.0
|
| 577 |
- `optim`: adamw_torch
|
|
|
|
| 579 |
- `adafactor`: False
|
| 580 |
- `group_by_length`: False
|
| 581 |
- `length_column_name`: length
|
| 582 |
+
- `project`: huggingface
|
| 583 |
+
- `trackio_space_id`: trackio
|
| 584 |
- `ddp_find_unused_parameters`: None
|
| 585 |
- `ddp_bucket_cap_mb`: None
|
| 586 |
- `ddp_broadcast_buffers`: False
|
|
|
|
| 613 |
- `torch_compile_backend`: None
|
| 614 |
- `torch_compile_mode`: None
|
| 615 |
- `include_tokens_per_second`: False
|
| 616 |
+
- `include_num_input_tokens_seen`: no
|
| 617 |
- `neftune_noise_alpha`: None
|
| 618 |
- `optim_target_modules`: None
|
| 619 |
- `batch_eval_metrics`: False
|
|
|
|
| 621 |
- `use_liger_kernel`: False
|
| 622 |
- `liger_kernel_config`: None
|
| 623 |
- `eval_use_gather_object`: False
|
| 624 |
+
- `average_tokens_across_devices`: True
|
| 625 |
+
- `prompts`: {'question': 'task: search result | query: ', 'passage_text': 'title: none | text: '}
|
| 626 |
- `batch_sampler`: batch_sampler
|
| 627 |
- `multi_dataset_batch_sampler`: proportional
|
| 628 |
- `router_mapping`: {}
|
|
|
|
| 633 |
### Training Logs
|
| 634 |
| Epoch | Step | Training Loss |
|
| 635 |
|:------:|:----:|:-------------:|
|
| 636 |
+
| 0.0463 | 50 | 0.2551 |
|
| 637 |
+
| 0.0926 | 100 | 0.1353 |
|
| 638 |
+
| 0.1389 | 150 | 0.1541 |
|
| 639 |
+
| 0.1852 | 200 | 0.1499 |
|
| 640 |
|
| 641 |
|
| 642 |
### Framework Versions
|
| 643 |
- Python: 3.12.11
|
| 644 |
- Sentence Transformers: 5.1.0
|
| 645 |
+
- Transformers: 4.57.0
|
| 646 |
- PyTorch: 2.7.1+cu126
|
| 647 |
- Accelerate: 1.10.0
|
| 648 |
- Datasets: 3.6.0
|
| 649 |
+
- Tokenizers: 0.22.1
|
| 650 |
|
| 651 |
## Citation
|
| 652 |
|
last-checkpoint/config.json
CHANGED
|
@@ -7,7 +7,7 @@
|
|
| 7 |
"attention_dropout": 0.0,
|
| 8 |
"attn_logit_softcapping": null,
|
| 9 |
"bos_token_id": 2,
|
| 10 |
-
"dtype": "
|
| 11 |
"eos_token_id": 1,
|
| 12 |
"final_logit_softcapping": null,
|
| 13 |
"head_dim": 256,
|
|
@@ -52,9 +52,8 @@
|
|
| 52 |
"rope_local_base_freq": 10000.0,
|
| 53 |
"rope_scaling": null,
|
| 54 |
"rope_theta": 1000000.0,
|
| 55 |
-
"sliding_window":
|
| 56 |
-
"
|
| 57 |
-
"transformers_version": "4.55.2",
|
| 58 |
"use_bidirectional_attention": true,
|
| 59 |
"use_cache": true,
|
| 60 |
"vocab_size": 262144
|
|
|
|
| 7 |
"attention_dropout": 0.0,
|
| 8 |
"attn_logit_softcapping": null,
|
| 9 |
"bos_token_id": 2,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
"eos_token_id": 1,
|
| 12 |
"final_logit_softcapping": null,
|
| 13 |
"head_dim": 256,
|
|
|
|
| 52 |
"rope_local_base_freq": 10000.0,
|
| 53 |
"rope_scaling": null,
|
| 54 |
"rope_theta": 1000000.0,
|
| 55 |
+
"sliding_window": 257,
|
| 56 |
+
"transformers_version": "4.57.0",
|
|
|
|
| 57 |
"use_bidirectional_attention": true,
|
| 58 |
"use_cache": true,
|
| 59 |
"vocab_size": 262144
|
last-checkpoint/config_sentence_transformers.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
"model_type": "SentenceTransformer",
|
| 3 |
"__version__": {
|
| 4 |
"sentence_transformers": "5.1.0",
|
| 5 |
-
"transformers": "4.
|
| 6 |
"pytorch": "2.7.1+cu126"
|
| 7 |
},
|
| 8 |
"prompts": {
|
|
|
|
| 2 |
"model_type": "SentenceTransformer",
|
| 3 |
"__version__": {
|
| 4 |
"sentence_transformers": "5.1.0",
|
| 5 |
+
"transformers": "4.57.0",
|
| 6 |
"pytorch": "2.7.1+cu126"
|
| 7 |
},
|
| 8 |
"prompts": {
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:abac25e1e6fdf3255533a12e513ba6078edbe7d810a3fa975b6d4d0639fab536
|
| 3 |
+
size 605759848
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af6ae0591084794587e796774ea539c6d9d1c58565ec4d0bf461ec38c34219ab
|
| 3 |
+
size 1230592267
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -11,30 +11,30 @@
|
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
"epoch": 0.046296296296296294,
|
| 14 |
-
"grad_norm":
|
| 15 |
"learning_rate": 2.2685185185185187e-05,
|
| 16 |
-
"loss": 0.
|
| 17 |
"step": 50
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"epoch": 0.09259259259259259,
|
| 21 |
-
"grad_norm": 6.
|
| 22 |
"learning_rate": 4.5833333333333334e-05,
|
| 23 |
-
"loss": 0.
|
| 24 |
"step": 100
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"epoch": 0.1388888888888889,
|
| 28 |
-
"grad_norm": 5.
|
| 29 |
"learning_rate": 4.7890946502057616e-05,
|
| 30 |
-
"loss": 0.
|
| 31 |
"step": 150
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"epoch": 0.18518518518518517,
|
| 35 |
-
"grad_norm": 4.
|
| 36 |
"learning_rate": 4.531893004115226e-05,
|
| 37 |
-
"loss": 0.
|
| 38 |
"step": 200
|
| 39 |
}
|
| 40 |
],
|
|
|
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
"epoch": 0.046296296296296294,
|
| 14 |
+
"grad_norm": 5.875,
|
| 15 |
"learning_rate": 2.2685185185185187e-05,
|
| 16 |
+
"loss": 0.2551,
|
| 17 |
"step": 50
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"epoch": 0.09259259259259259,
|
| 21 |
+
"grad_norm": 6.84375,
|
| 22 |
"learning_rate": 4.5833333333333334e-05,
|
| 23 |
+
"loss": 0.1353,
|
| 24 |
"step": 100
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"epoch": 0.1388888888888889,
|
| 28 |
+
"grad_norm": 5.375,
|
| 29 |
"learning_rate": 4.7890946502057616e-05,
|
| 30 |
+
"loss": 0.1541,
|
| 31 |
"step": 150
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"epoch": 0.18518518518518517,
|
| 35 |
+
"grad_norm": 4.75,
|
| 36 |
"learning_rate": 4.531893004115226e-05,
|
| 37 |
+
"loss": 0.1499,
|
| 38 |
"step": 200
|
| 39 |
}
|
| 40 |
],
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d251256d6a17063ebe50c1a916e869c5121c6daeb0ba390c2cedfa45a16a448e
|
| 3 |
+
size 6289
|