Upload model checkpoint

Browse files

Files changed (16) hide show

1_Pooling/config.json +10 -0
README.md +466 -0
config.json +44 -0
config_sentence_transformers.json +10 -0
model.safetensors +3 -0
modules.json +14 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +37 -0
tokenizer.json +0 -0
tokenizer_config.json +62 -0
trainer_state.json +168 -0
training_args.bin +3 -0
vocab.txt +0 -0

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 1024,
+  "pooling_mode_cls_token": true,
+  "pooling_mode_mean_tokens": false,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

README.md ADDED Viewed

	@@ -0,0 +1,466 @@

+---
+base_model: Alibaba-NLP/gte-large-en-v1.5
+datasets: []
+language: []
+library_name: sentence-transformers
+pipeline_tag: sentence-similarity
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- generated_from_trainer
+- dataset_size:269761
+- loss:CachedMultipleNegativesRankingLoss
+widget:
+- source_sentence: netgear ac1900 nighthawk smart wifi router netgear wireless broadband
+    routers cdwcom the netgear ac1900 r7000 nighthawk smart wifi router is specially
+    designed for gaming streaming and mobile devices with speeds up to 1900 mbps and
+    a 1 ghz dualcore processor this next generation wireless router offers extreme
+    speed with reduced lag and less buffering this internet router comes with advanced
+    features such as netgear genie remote access readycloud openvpn and kwilt app
+    support so you can manage your network access a secure personal cloud access home
+    network remotely and share photos stored on the storage from anywherewifi router
+    with 600 1300 mbps speeds for online gaming streaming and more1 ghz dualcore processor
+    and prioritized bandwidth for streaming videosreadycloud usb access for secure
+    cloud access to usb storage at anytimemanage home network and provide guest access
+    remotely using netgear genie computersandaccessories
+  sentences:
+  - unirex s2 grease 40g tube bearing note recommended for high temperature service
+    in rolling bearings 1995 bmw 325i base convertible axles bearings differential
+    page 8 note recommended for high temperature service in rolling bearings genuine
+    bmw automotive
+  - netgear nighthawk ac1900 dual band wifi gigabit router r7000 with open source
+    support compatible amazon echoalexa us netgear compatibleus accelerate your wifi
+    with net gear nighthawk enjoy the fastest wifi currently available with speeds
+    up to 1900 mbps and a powerful dual core 1ghz processor for extreme performance
+    highpowered amplifiers external antennas and beamforming improve range and reliability
+    for up to 100 more wireless coverage features like dynamic qos prioritize streaming
+    and gaming creating a blazingfast lagfree wifi experience r7000 provides an extensible
+    design that enables service prioritization for data design that delivers high
+    availability scalability and for maximum flexibility and priceperformance us netgearus
+    computersandaccessories manufacturer netgear brand netgear color black model upc
+    606449099812 item weight 345 pounds item size 1008 x 311 x 311 inches package
+    weight 344 pounds package size 1047 x 331 x 331 inches units in package 1
+  - pads high performance ebc pads pads ebc notes rear set of 4 performance pads ebc
+    greenstuff price per set length mm 108 height mm 44 automotive
+- source_sentence: 12v drill impact driver twin pack gtpddid12 toolsandhomeimprovement
+  sentences:
+  - original ihip universal mlb licensed tampa bay devil rays noise isolating earbuds
+    35mm navy blue white samsung galaxy tab 77 accessoriesgalaxy accessoriesclick
+    now accessorygeekscom cellphonesandaccessories
+  - canon pixma mp160 combo pack genuine canon ink cartridges cartridges inkrediblecouk
+    combo pack contains 1 black 16ml and 1 colour 12ml officeproducts
+  - gmc 12v drill and impact driver twin pack pack 3233836 argos price tracker pricehistorycouk
+    gmc toolsandhomeimprovement date price 02 august 2017 10599 21 june 2017 9099
+    22 january 2016 9999 we started tracking this product on 22 january 2016
+- source_sentence: throttle housing assembly 2002 bmw 325ci base coupe intake system
+    page 3 genuine bmw automotive
+  sentences:
+  - 2017 bmw i3 94 ah with range extender california 91307 2015 extender lease special
+    promotion on rex electric a for 35000 per month west hills automotive
+  - oil filter spin on type pc 201 style 1969 bmw 1602 base coupe oil circulation
+    page 1 mahle automotive
+  - throttle housing assembly 2002 bmw 325ci base coupe intake system page 3 continental
+    vdo automotive
+- source_sentence: bracket without bushing for control arm front right lower 1990
+    bmw 325i base coupe suspension shocks springs page 6 note does not come w mounting
+    bushing front right meyle automotive
+  sentences:
+  - bracket without bushing for control arm front right lower 1990 bmw 325i base coupe
+    suspension shocks springs page 6 note does not come w mounting bushing front right
+    meyle automotive
+  - mohawk industries wsk120 oak golden engineered hardwood flooring 5 wide planks
+    1969 sf carton wsk120 cork bamboo tile more anderson 96in base shoe accessory
+    sale price sq ft oak golden oak engineered hardwood flooring 5 wide planks 1969
+    sf carton mohawk industries wsk120 oak golden oak engineered hardwood flooring
+    5 wide planks 1969 sf carton wsk120 instock mohawk industries toolsandhomeimprovement
+  - brizo towel ring charlotte products at efaucetscom towel ring charlotte collection
+    toolsandhomeimprovement
+- source_sentence: alpinestars 140 holdall gear bag alpinestars fl yellowredanthracite
+    radar flight gloves 35618185392x comfortable glove lightweight customized fit
+    silicone grip patterning on fingers for improved riding control included items
+    2 gloves made with 46 synthetic suede 35 polyester 19 polyamide care instructions
+    do not wash bleach tumble dry iron clean single layer clarino palm is breathable
+    and offers excellent feel the bikes controls reinforced thumb construction increases
+    durability gusset flexibility innovative stretch insert in area hand movement
+    lever reinforcements third fourth added abrasion resistance convenient slipon
+    design a secure singlepiece fabric upper gives perforated ergonomic chassis reduced
+    material result supremely lightweight alpinestars automotive
+  sentences:
+  - cover with spring and heater elementfor carburetor gb 1980 volkswagen jetta united
+    states market engine carburetor versions 1 b 3 jb ghgb 1357 gb automotive
+  - alpinestars 140 holdall gear bag alpinestars fl yellowredanthracite radar flight
+    gloves 35618185392x comfortable glove lightweight customized fit silicone grip
+    patterning on fingers for improved riding control included items 2 gloves made
+    with 46 synthetic suede 35 polyester 19 polyamide care instructions do not wash
+    bleach tumble dry iron clean single layer clarino palm is breathable and offers
+    excellent feel the bikes controls reinforced thumb construction increases durability
+    gusset flexibility innovative stretch insert in area hand movement lever reinforcements
+    third fourth added abrasion resistance convenient slipon design a secure singlepiece
+    fabric upper gives perforated ergonomic chassis reduced material result supremely
+    lightweight alpinestars automotive
+  - td 8000k xenon hid kit high beam 0910 mercedes benz cl600 c216 h7 xenon hid lighting
+    is only available on high end luxury cars you can convert your stock halogens
+    to super bright too by just connecting a few plug and play connections then mounting
+    the ballast in secure spot but with this mercedes cl600 low watt 8000k td hid
+    high beam conversion kit experience supreme brightness expanded field of vision
+    also our wattage systems are backed by full one year warrantyplease note will
+    not work if cl600s headlights came equipped factory lights unlike cheaper market
+    more consistently without fading out like coated bulbs dousually mercedes installations
+    probably most common upgrades performed increase headlight cl600 producing certain
+    temperatures technology that uses xenon gas charged bulb combination an electronic
+    regulate current going through it the resulting light 35 be up 3 times brighter
+    than traditional halogen bulbs kits reliably produce truer colored we offer conversion
+    kit short for intensity discharge automotive
+---
+# SentenceTransformer based on Alibaba-NLP/gte-large-en-v1.5
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [Alibaba-NLP/gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5) <!-- at revision 104333d6af6f97649377c2afbde10a7704870c7b -->
+- **Maximum Sequence Length:** 8192 tokens
+- **Output Dimensionality:** 1024 tokens
+- **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: NewModel
+  (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("sentence_transformers_model_id")
+# Run inference
+sentences = [
+    'alpinestars 140 holdall gear bag alpinestars fl yellowredanthracite radar flight gloves 35618185392x comfortable glove lightweight customized fit silicone grip patterning on fingers for improved riding control included items 2 gloves made with 46 synthetic suede 35 polyester 19 polyamide care instructions do not wash bleach tumble dry iron clean single layer clarino palm is breathable and offers excellent feel the bikes controls reinforced thumb construction increases durability gusset flexibility innovative stretch insert in area hand movement lever reinforcements third fourth added abrasion resistance convenient slipon design a secure singlepiece fabric upper gives perforated ergonomic chassis reduced material result supremely lightweight alpinestars automotive',
+    'alpinestars 140 holdall gear bag alpinestars fl yellowredanthracite radar flight gloves 35618185392x comfortable glove lightweight customized fit silicone grip patterning on fingers for improved riding control included items 2 gloves made with 46 synthetic suede 35 polyester 19 polyamide care instructions do not wash bleach tumble dry iron clean single layer clarino palm is breathable and offers excellent feel the bikes controls reinforced thumb construction increases durability gusset flexibility innovative stretch insert in area hand movement lever reinforcements third fourth added abrasion resistance convenient slipon design a secure singlepiece fabric upper gives perforated ergonomic chassis reduced material result supremely lightweight alpinestars automotive',
+    'td 8000k xenon hid kit high beam 0910 mercedes benz cl600 c216 h7 xenon hid lighting is only available on high end luxury cars you can convert your stock halogens to super bright too by just connecting a few plug and play connections then mounting the ballast in secure spot but with this mercedes cl600 low watt 8000k td hid high beam conversion kit experience supreme brightness expanded field of vision also our wattage systems are backed by full one year warrantyplease note will not work if cl600s headlights came equipped factory lights unlike cheaper market more consistently without fading out like coated bulbs dousually mercedes installations probably most common upgrades performed increase headlight cl600 producing certain temperatures technology that uses xenon gas charged bulb combination an electronic regulate current going through it the resulting light 35 be up 3 times brighter than traditional halogen bulbs kits reliably produce truer colored we offer conversion kit short for intensity discharge automotive',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 1024]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities.shape)
+# [3, 3]
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 269,761 training samples
+* Columns: <code>anchor</code> and <code>positive</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | anchor                                                                               | positive                                                                             |
+  |:--------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
+  | type    | string                                                                               | string                                                                               |
+  | details | <ul><li>min: 13 tokens</li><li>mean: 68.94 tokens</li><li>max: 1130 tokens</li></ul> | <ul><li>min: 12 tokens</li><li>mean: 70.35 tokens</li><li>max: 1149 tokens</li></ul> |
+* Samples:
+  | anchor                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | positive                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+  |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code>tripp lite 25u 4post open frame rack cabinet square holes 1000lb capacity open frame rack tripp 25u prices cnet tripp lite otherelectronics</code>                                                                                                                                                                                                                                                                                                                                                                                                        | <code>tripp lite 25u 4post open frame rack cabinet square holes 1000lb capacity open frame rack tripp 25u specs cnet null tripp lite otherelectronics</code>                                                                                                                                                                                                                                                                                                                                                                                                         |
+  | <code>headlamp restoration kit philips 2000 bmw 323ci base coupe lights and lenses page 6 note removes yellowing and haze of plastic headlight lenses restoring likenew condition and finish professional results in under 30 minutes can be used on headlights taillights turn signals and reflective lens covers with uv coating technology one kit restores two headlights contains qty 1 pretreatment 1 cleanerpolish 1 shine restorerpreserver 3 sandpaper 600 1500 2000 grit 10 applicator polish cloths 1 pair of vinyl gloves philips automotive</code> | <code>headlamp restoration kit philips 1996 bmw 318i base convertible lights and lenses page 6 note removes yellowing and haze of plastic headlight lenses restoring likenew condition and finish professional results in under 30 minutes can be used on headlights taillights turn signals and reflective lens covers with uv coating technology one kit restores two headlights contains qty 1 pretreatment 1 cleanerpolish 1 shine restorerpreserver 3 sandpaper 600 1500 2000 grit 10 applicator polish cloths 1 pair of vinyl gloves philips automotive</code> |
+  | <code>hose clamp 132146 mm range 12 width spring type 1991 bmw 325i base coupe cooling system miscellaneous page 1 mubea automotive</code>                                                                                                                                                                                                                                                                                                                                                                                                                      | <code>hose clamp 132146 mm range 12 width spring type 1994 bmw 325i base convertible cooling system miscellaneous page 1 mubea automotive</code>                                                                                                                                                                                                                                                                                                                                                                                                                     |
+* Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 20.0,
+      "similarity_fct": "cos_sim"
+  }
+  ```
+### Evaluation Dataset
+#### Unnamed Dataset
+* Size: 67,441 evaluation samples
+* Columns: <code>anchor</code> and <code>positive</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | anchor                                                                              | positive                                                                            |
+  |:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
+  | type    | string                                                                              | string                                                                              |
+  | details | <ul><li>min: 11 tokens</li><li>mean: 74.02 tokens</li><li>max: 693 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 74.68 tokens</li><li>max: 812 tokens</li></ul> |
+* Samples:
+  | anchor                                                                                                                                                | positive                                                                                                                                         |
+  |:------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code>bulb dashboard instruments with black socket base 12v 12w 1995 bmw 318ti hatchback lights and lenses page 3 genuine bmw automotive</code>       | <code>bulb dashboard instruments with black socket base 12v 12w 1999 bmw 323is coupe gauges miscellaneous page 1 osramsylvania automotive</code> |
+  | <code>canon pixma mp282 high capacity black compatible ink cartridge ink volumeremanufactured pg512 black 18ml 1 cartridge 18ml officeproducts</code> | <code>canon pixma mp282 high capacity black compatible ink cartridge cartridges inkrediblecouk 1 black ink cartridge 18ml officeproducts</code>  |
+  | <code>oring for camshaft position sensor 17 x 3 mm 2001 bmw 325i base wagon camshafts timing chains page 1 note 17 x 3mm uro automotive</code>        | <code>oring for crankshaft sensor 17 x 3 mm 2000 bmw 323ci base coupe sensors page 5 note 17 x 3mm uro automotive</code>                         |
+* Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 20.0,
+      "similarity_fct": "cos_sim"
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `eval_strategy`: steps
+- `learning_rate`: 1e-05
+- `num_train_epochs`: 2
+- `warmup_ratio`: 0.1
+- `fp16`: True
+- `auto_find_batch_size`: True
+- `batch_sampler`: no_duplicates
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: steps
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 8
+- `per_device_eval_batch_size`: 8
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 1e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1.0
+- `num_train_epochs`: 2
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.1
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: False
+- `fp16`: True
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: False
+- `hub_always_push`: False
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: True
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `dispatch_batches`: None
+- `split_batches`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `eval_use_gather_object`: False
+- `batch_sampler`: no_duplicates
+- `multi_dataset_batch_sampler`: proportional
+</details>
+### Training Logs
+| Epoch  | Step  | Training Loss | loss   |
+|:------:|:-----:|:-------------:|:------:|
+| 0.2076 | 7000  | 0.012         | 0.0057 |
+| 0.4152 | 14000 | 0.0044        | 0.0040 |
+| 0.6228 | 21000 | 0.0038        | 0.0040 |
+| 0.8303 | 28000 | 0.0033        | 0.0028 |
+| 1.0379 | 35000 | 0.002         | 0.0025 |
+| 1.2455 | 42000 | 0.0012        | 0.0022 |
+| 1.4531 | 49000 | 0.0008        | 0.0021 |
+| 1.6607 | 56000 | 0.0005        | 0.0021 |
+| 1.8683 | 63000 | 0.0004        | 0.0020 |
+### Framework Versions
+- Python: 3.10.13
+- Sentence Transformers: 3.0.1
+- Transformers: 4.44.0
+- PyTorch: 2.2.1
+- Accelerate: 0.33.0
+- Datasets: 2.21.0
+- Tokenizers: 0.19.1
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### CachedMultipleNegativesRankingLoss
+```bibtex
+@misc{gao2021scaling,
+    title={Scaling Deep Contrastive Learning Batch Size under Memory Limited Setup},
+    author={Luyu Gao and Yunyi Zhang and Jiawei Han and Jamie Callan},
+    year={2021},
+    eprint={2101.06983},
+    archivePrefix={arXiv},
+    primaryClass={cs.LG}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "_name_or_path": "Alibaba-NLP/gte-large-en-v1.5",
+  "architectures": [
+    "NewModel"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "auto_map": {
+    "AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
+    "AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
+    "AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
+    "AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
+    "AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
+    "AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
+    "AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
+  },
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-12,
+  "layer_norm_type": "layer_norm",
+  "logn_attention_clip1": false,
+  "logn_attention_scale": false,
+  "max_position_embeddings": 8192,
+  "model_type": "new",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pack_qkv": true,
+  "pad_token_id": 0,
+  "position_embedding_type": "rope",
+  "rope_scaling": {
+    "factor": 2.0,
+    "type": "ntk"
+  },
+  "rope_theta": 160000,
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.0",
+  "type_vocab_size": 2,
+  "unpad_inputs": false,
+  "use_memory_efficient_attention": false,
+  "vocab_size": 30528
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "__version__": {
+    "sentence_transformers": "3.0.1",
+    "transformers": "4.44.0",
+    "pytorch": "2.2.1"
+  },
+  "prompts": {},
+  "default_prompt_name": null,
+  "similarity_fn_name": null
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dcae353e6bd3abd927eb0ae32e57da87ea497ef64c5705ecade468a4a7dc6e2e
+size 1736585680

modules.json ADDED Viewed

	@@ -0,0 +1,14 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  }
+]

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d889f3d8b8855bdaa7eb0f7650f3ddf5b2f0cf971584b7d9dfc10d681cf999fc
+size 3473337082

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de6ddfbce68276c9610b3e63f6b7b14d27e537210a6a14fe7e3bd520ccc81591
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88695ecd2186d877bc568b39a3362ee255bcef60a774deb411b197770103fd95
+size 1064

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 8192,
+  "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "max_length": 8000,
+  "model_max_length": 8192,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,168 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.8682719966786276,
+  "eval_steps": 7000,
+  "global_step": 63000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.2075857774087364,
+      "grad_norm": 0.007890098728239536,
+      "learning_rate": 9.958647050101323e-06,
+      "loss": 0.012,
+      "step": 7000
+    },
+    {
+      "epoch": 0.2075857774087364,
+      "eval_loss": 0.005739695392549038,
+      "eval_runtime": 412.7277,
+      "eval_samples_per_second": 163.403,
+      "eval_steps_per_second": 20.428,
+      "step": 7000
+    },
+    {
+      "epoch": 0.4151715548174728,
+      "grad_norm": 0.07381915301084518,
+      "learning_rate": 8.805871789380036e-06,
+      "loss": 0.0044,
+      "step": 14000
+    },
+    {
+      "epoch": 0.4151715548174728,
+      "eval_loss": 0.003970430698245764,
+      "eval_runtime": 417.73,
+      "eval_samples_per_second": 161.446,
+      "eval_steps_per_second": 20.183,
+      "step": 14000
+    },
+    {
+      "epoch": 0.6227573322262092,
+      "grad_norm": 0.01579739712178707,
+      "learning_rate": 7.653096528658748e-06,
+      "loss": 0.0038,
+      "step": 21000
+    },
+    {
+      "epoch": 0.6227573322262092,
+      "eval_loss": 0.004011470824480057,
+      "eval_runtime": 416.5579,
+      "eval_samples_per_second": 161.901,
+      "eval_steps_per_second": 20.24,
+      "step": 21000
+    },
+    {
+      "epoch": 0.8303431096349456,
+      "grad_norm": 0.024147002026438713,
+      "learning_rate": 6.500321267937461e-06,
+      "loss": 0.0033,
+      "step": 28000
+    },
+    {
+      "epoch": 0.8303431096349456,
+      "eval_loss": 0.0028445960488170385,
+      "eval_runtime": 416.8518,
+      "eval_samples_per_second": 161.787,
+      "eval_steps_per_second": 20.225,
+      "step": 28000
+    },
+    {
+      "epoch": 1.037928887043682,
+      "grad_norm": 0.00019609538139775395,
+      "learning_rate": 5.347381254427731e-06,
+      "loss": 0.002,
+      "step": 35000
+    },
+    {
+      "epoch": 1.037928887043682,
+      "eval_loss": 0.002484912285581231,
+      "eval_runtime": 415.7525,
+      "eval_samples_per_second": 162.214,
+      "eval_steps_per_second": 20.279,
+      "step": 35000
+    },
+    {
+      "epoch": 1.2455146644524184,
+      "grad_norm": 0.008880384266376495,
+      "learning_rate": 4.194276488129561e-06,
+      "loss": 0.0012,
+      "step": 42000
+    },
+    {
+      "epoch": 1.2455146644524184,
+      "eval_loss": 0.0021632197313010693,
+      "eval_runtime": 408.105,
+      "eval_samples_per_second": 165.254,
+      "eval_steps_per_second": 20.659,
+      "step": 42000
+    },
+    {
+      "epoch": 1.4531004418611548,
+      "grad_norm": 0.003053226973861456,
+      "learning_rate": 3.0413364746198333e-06,
+      "loss": 0.0008,
+      "step": 49000
+    },
+    {
+      "epoch": 1.4531004418611548,
+      "eval_loss": 0.002097294433042407,
+      "eval_runtime": 407.9995,
+      "eval_samples_per_second": 165.297,
+      "eval_steps_per_second": 20.664,
+      "step": 49000
+    },
+    {
+      "epoch": 1.6606862192698912,
+      "grad_norm": 0.00016563042299821973,
+      "learning_rate": 1.8883964611101043e-06,
+      "loss": 0.0005,
+      "step": 56000
+    },
+    {
+      "epoch": 1.6606862192698912,
+      "eval_loss": 0.0021241051144897938,
+      "eval_runtime": 407.4084,
+      "eval_samples_per_second": 165.537,
+      "eval_steps_per_second": 20.694,
+      "step": 56000
+    },
+    {
+      "epoch": 1.8682719966786276,
+      "grad_norm": 0.0011578386183828115,
+      "learning_rate": 7.354564476003758e-07,
+      "loss": 0.0004,
+      "step": 63000
+    },
+    {
+      "epoch": 1.8682719966786276,
+      "eval_loss": 0.001993535552173853,
+      "eval_runtime": 406.6423,
+      "eval_samples_per_second": 165.848,
+      "eval_steps_per_second": 20.733,
+      "step": 63000
+    }
+  ],
+  "logging_steps": 7000,
+  "max_steps": 67442,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 7000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24026fdfd36b29934cf60f459acd3d60861392e75c603628e68e26d7eacb2000
+size 5368

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff