guyhadad01 commited on
Commit
892c387
·
verified ·
1 Parent(s): cae8ee4

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/2_Dense/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fed83dfd00c1a0263f07eea8794b9265914ae7b3dc5c76729cf3807e2861adc3
3
- size 9437272
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e06277ca8787b7fa33c7a991a49e7c44cedc64537c9a587e3eabe4480d98101
3
+ size 4718680
last-checkpoint/3_Dense/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87b5e471a0697253d32e596ab7ab53200a19437e9d28de12f4dc211852102b58
3
- size 9437272
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a1747a1bbfdac934f7ee3e281dffa826558868f822e91fdd1f85e39c452033a
3
+ size 4718680
last-checkpoint/README.md CHANGED
@@ -424,7 +424,7 @@ print(query_embeddings.shape, document_embeddings.shape)
424
  # Get the similarity scores for the embeddings
425
  similarities = model.similarity(query_embeddings, document_embeddings)
426
  print(similarities)
427
- # tensor([[ 0.9179, 0.0553, -0.0070]])
428
  ```
429
 
430
  <!--
@@ -488,7 +488,7 @@ You can finetune this model on your own dataset.
488
  {
489
  "scale": 20.0,
490
  "similarity_fct": "cos_sim",
491
- "mini_batch_size": 32,
492
  "gather_across_devices": false
493
  }
494
  ```
@@ -503,7 +503,7 @@ You can finetune this model on your own dataset.
503
  - `push_to_hub`: True
504
  - `hub_model_id`: guyhadad01/EncodeRec_300M_Toys
505
  - `hub_strategy`: checkpoint
506
- - `prompts`: task: search result | query:
507
 
508
  #### All Hyperparameters
509
  <details><summary>Click to expand</summary>
@@ -545,7 +545,6 @@ You can finetune this model on your own dataset.
545
  - `seed`: 42
546
  - `data_seed`: None
547
  - `jit_mode_eval`: False
548
- - `use_ipex`: False
549
  - `bf16`: True
550
  - `fp16`: False
551
  - `fp16_opt_level`: O1
@@ -572,6 +571,7 @@ You can finetune this model on your own dataset.
572
  - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
573
  - `fsdp_transformer_layer_cls_to_wrap`: None
574
  - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
 
575
  - `deepspeed`: None
576
  - `label_smoothing_factor`: 0.0
577
  - `optim`: adamw_torch
@@ -579,6 +579,8 @@ You can finetune this model on your own dataset.
579
  - `adafactor`: False
580
  - `group_by_length`: False
581
  - `length_column_name`: length
 
 
582
  - `ddp_find_unused_parameters`: None
583
  - `ddp_bucket_cap_mb`: None
584
  - `ddp_broadcast_buffers`: False
@@ -611,7 +613,7 @@ You can finetune this model on your own dataset.
611
  - `torch_compile_backend`: None
612
  - `torch_compile_mode`: None
613
  - `include_tokens_per_second`: False
614
- - `include_num_input_tokens_seen`: False
615
  - `neftune_noise_alpha`: None
616
  - `optim_target_modules`: None
617
  - `batch_eval_metrics`: False
@@ -619,8 +621,8 @@ You can finetune this model on your own dataset.
619
  - `use_liger_kernel`: False
620
  - `liger_kernel_config`: None
621
  - `eval_use_gather_object`: False
622
- - `average_tokens_across_devices`: False
623
- - `prompts`: task: search result | query:
624
  - `batch_sampler`: batch_sampler
625
  - `multi_dataset_batch_sampler`: proportional
626
  - `router_mapping`: {}
@@ -631,20 +633,20 @@ You can finetune this model on your own dataset.
631
  ### Training Logs
632
  | Epoch | Step | Training Loss |
633
  |:------:|:----:|:-------------:|
634
- | 0.0463 | 50 | 0.4695 |
635
- | 0.0926 | 100 | 0.2072 |
636
- | 0.1389 | 150 | 0.2185 |
637
- | 0.1852 | 200 | 0.2196 |
638
 
639
 
640
  ### Framework Versions
641
  - Python: 3.12.11
642
  - Sentence Transformers: 5.1.0
643
- - Transformers: 4.55.2
644
  - PyTorch: 2.7.1+cu126
645
  - Accelerate: 1.10.0
646
  - Datasets: 3.6.0
647
- - Tokenizers: 0.21.4
648
 
649
  ## Citation
650
 
 
424
  # Get the similarity scores for the embeddings
425
  similarities = model.similarity(query_embeddings, document_embeddings)
426
  print(similarities)
427
+ # tensor([[0.8959, 0.0632, 0.0102]])
428
  ```
429
 
430
  <!--
 
488
  {
489
  "scale": 20.0,
490
  "similarity_fct": "cos_sim",
491
+ "mini_batch_size": 64,
492
  "gather_across_devices": false
493
  }
494
  ```
 
503
  - `push_to_hub`: True
504
  - `hub_model_id`: guyhadad01/EncodeRec_300M_Toys
505
  - `hub_strategy`: checkpoint
506
+ - `prompts`: {'question': 'task: search result | query: ', 'passage_text': 'title: none | text: '}
507
 
508
  #### All Hyperparameters
509
  <details><summary>Click to expand</summary>
 
545
  - `seed`: 42
546
  - `data_seed`: None
547
  - `jit_mode_eval`: False
 
548
  - `bf16`: True
549
  - `fp16`: False
550
  - `fp16_opt_level`: O1
 
571
  - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
572
  - `fsdp_transformer_layer_cls_to_wrap`: None
573
  - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
574
+ - `parallelism_config`: None
575
  - `deepspeed`: None
576
  - `label_smoothing_factor`: 0.0
577
  - `optim`: adamw_torch
 
579
  - `adafactor`: False
580
  - `group_by_length`: False
581
  - `length_column_name`: length
582
+ - `project`: huggingface
583
+ - `trackio_space_id`: trackio
584
  - `ddp_find_unused_parameters`: None
585
  - `ddp_bucket_cap_mb`: None
586
  - `ddp_broadcast_buffers`: False
 
613
  - `torch_compile_backend`: None
614
  - `torch_compile_mode`: None
615
  - `include_tokens_per_second`: False
616
+ - `include_num_input_tokens_seen`: no
617
  - `neftune_noise_alpha`: None
618
  - `optim_target_modules`: None
619
  - `batch_eval_metrics`: False
 
621
  - `use_liger_kernel`: False
622
  - `liger_kernel_config`: None
623
  - `eval_use_gather_object`: False
624
+ - `average_tokens_across_devices`: True
625
+ - `prompts`: {'question': 'task: search result | query: ', 'passage_text': 'title: none | text: '}
626
  - `batch_sampler`: batch_sampler
627
  - `multi_dataset_batch_sampler`: proportional
628
  - `router_mapping`: {}
 
633
  ### Training Logs
634
  | Epoch | Step | Training Loss |
635
  |:------:|:----:|:-------------:|
636
+ | 0.0463 | 50 | 0.2551 |
637
+ | 0.0926 | 100 | 0.1353 |
638
+ | 0.1389 | 150 | 0.1541 |
639
+ | 0.1852 | 200 | 0.1499 |
640
 
641
 
642
  ### Framework Versions
643
  - Python: 3.12.11
644
  - Sentence Transformers: 5.1.0
645
+ - Transformers: 4.57.0
646
  - PyTorch: 2.7.1+cu126
647
  - Accelerate: 1.10.0
648
  - Datasets: 3.6.0
649
+ - Tokenizers: 0.22.1
650
 
651
  ## Citation
652
 
last-checkpoint/config.json CHANGED
@@ -7,7 +7,7 @@
7
  "attention_dropout": 0.0,
8
  "attn_logit_softcapping": null,
9
  "bos_token_id": 2,
10
- "dtype": "float32",
11
  "eos_token_id": 1,
12
  "final_logit_softcapping": null,
13
  "head_dim": 256,
@@ -52,9 +52,8 @@
52
  "rope_local_base_freq": 10000.0,
53
  "rope_scaling": null,
54
  "rope_theta": 1000000.0,
55
- "sliding_window": 512,
56
- "torch_dtype": "float32",
57
- "transformers_version": "4.55.2",
58
  "use_bidirectional_attention": true,
59
  "use_cache": true,
60
  "vocab_size": 262144
 
7
  "attention_dropout": 0.0,
8
  "attn_logit_softcapping": null,
9
  "bos_token_id": 2,
10
+ "dtype": "bfloat16",
11
  "eos_token_id": 1,
12
  "final_logit_softcapping": null,
13
  "head_dim": 256,
 
52
  "rope_local_base_freq": 10000.0,
53
  "rope_scaling": null,
54
  "rope_theta": 1000000.0,
55
+ "sliding_window": 257,
56
+ "transformers_version": "4.57.0",
 
57
  "use_bidirectional_attention": true,
58
  "use_cache": true,
59
  "vocab_size": 262144
last-checkpoint/config_sentence_transformers.json CHANGED
@@ -2,7 +2,7 @@
2
  "model_type": "SentenceTransformer",
3
  "__version__": {
4
  "sentence_transformers": "5.1.0",
5
- "transformers": "4.55.2",
6
  "pytorch": "2.7.1+cu126"
7
  },
8
  "prompts": {
 
2
  "model_type": "SentenceTransformer",
3
  "__version__": {
4
  "sentence_transformers": "5.1.0",
5
+ "transformers": "4.57.0",
6
  "pytorch": "2.7.1+cu126"
7
  },
8
  "prompts": {
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3569d3e977da69f1e6effdc5cfd35f2c31712c0f88b884d222e1b60040f0e26
3
- size 1211486072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abac25e1e6fdf3255533a12e513ba6078edbe7d810a3fa975b6d4d0639fab536
3
+ size 605759848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01580aac4f96298117d8589da031aaea8e5b6ce4a27bb8c251eb3a20d7cf5c0e
3
- size 2460919051
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af6ae0591084794587e796774ea539c6d9d1c58565ec4d0bf461ec38c34219ab
3
+ size 1230592267
last-checkpoint/trainer_state.json CHANGED
@@ -11,30 +11,30 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.046296296296296294,
14
- "grad_norm": 6.824543476104736,
15
  "learning_rate": 2.2685185185185187e-05,
16
- "loss": 0.4695,
17
  "step": 50
18
  },
19
  {
20
  "epoch": 0.09259259259259259,
21
- "grad_norm": 6.649824142456055,
22
  "learning_rate": 4.5833333333333334e-05,
23
- "loss": 0.2072,
24
  "step": 100
25
  },
26
  {
27
  "epoch": 0.1388888888888889,
28
- "grad_norm": 5.679929733276367,
29
  "learning_rate": 4.7890946502057616e-05,
30
- "loss": 0.2185,
31
  "step": 150
32
  },
33
  {
34
  "epoch": 0.18518518518518517,
35
- "grad_norm": 4.710780620574951,
36
  "learning_rate": 4.531893004115226e-05,
37
- "loss": 0.2196,
38
  "step": 200
39
  }
40
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0.046296296296296294,
14
+ "grad_norm": 5.875,
15
  "learning_rate": 2.2685185185185187e-05,
16
+ "loss": 0.2551,
17
  "step": 50
18
  },
19
  {
20
  "epoch": 0.09259259259259259,
21
+ "grad_norm": 6.84375,
22
  "learning_rate": 4.5833333333333334e-05,
23
+ "loss": 0.1353,
24
  "step": 100
25
  },
26
  {
27
  "epoch": 0.1388888888888889,
28
+ "grad_norm": 5.375,
29
  "learning_rate": 4.7890946502057616e-05,
30
+ "loss": 0.1541,
31
  "step": 150
32
  },
33
  {
34
  "epoch": 0.18518518518518517,
35
+ "grad_norm": 4.75,
36
  "learning_rate": 4.531893004115226e-05,
37
+ "loss": 0.1499,
38
  "step": 200
39
  }
40
  ],
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d08e873fcc6af5914cd3b33b8457f079faba879550636b5ef8bf74269ab02c7c
3
- size 6161
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d251256d6a17063ebe50c1a916e869c5121c6daeb0ba390c2cedfa45a16a448e
3
+ size 6289