sturovec-base-sk-v0 / README.md
mrshu's picture
Add new SentenceTransformer model
bd8da4e verified
metadata
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - dense
  - generated_from_trainer
  - dataset_size:137745
  - loss:CosineSimilarityLoss
  - loss:MultipleNegativesRankingLoss
base_model: gerulata/slovakbert
widget:
  - source_sentence: >-
      Napriek tomu , myšlienka v liste dnešným Times od profesora medicíny z
      Yale je dobrá , stačí dať policajta na Springerov set .
    sentences:
      - List bol od profesora , ktorý vyučuje fyziológiu a anatómiu na Yale .
      - Whittington by rozpoznal herečku bez akéhokoľvek make-upu .
      - Bushova rodina poskytuje vynikajúce rozptýlenie .
  - source_sentence: Rusko uviedlo, že systém predstavuje jasnú hrozbu pre ruskú bezpečnosť.
    sentences:
      - Britská vojnová loď vypláva na Falklandy
      - predstavuje vážnu hrozbu pre medzinárodnú bezpečnosť.
      - Žena krája zeleninu.
  - source_sentence: >-
      Po triliónty raz opakujem, že som nikdy nepovedal, že rehabilitácia
      nefunguje.
    sentences:
      - >-
        BlueCore3 implementuje aj funkciu eSCO verzie v1.2, ktorá poskytuje
        základ pre pokročilé aplikácie bezdrôtovej telefónie pre prenos
        Bluetooth.
      - Žena krája brokolicu.
      - >-
        V bode, ktorý citujete, by malo byť napísané: "Nikdy som nepovedal, že
        rehabilitácia nemôže nikdy fungovať."
  - source_sentence: >-
      Niekoľko kilometrov ďalej na východ sa nachádza Tehuacan, kde bola
      kukurica pravdepodobne prvýkrát domestikovaná pred 4 000 rokmi.
    sentences:
      - Vták  počas letu roztiahnuté krídla.
      - >-
        Niekoľko kilometrov západne sa nachádzajú pyramídy v Teotihuacane, kde
        sa pred 4000 rokmi pravdepodobne prvýkrát domestikovala kukurica.
      - >-
        Niekoľko vozidiel SUV zaparkovaných na obytných uliciach v Monrovii bolo
        označených nápismi "ELF" a inými sloganmi, povedal seržant Tom Wright.
  - source_sentence: >-
      Veľká časť malých detí na Ukrajine trpí rôznymi zdravotnými problémami,
      ktoré boli doteraz pravdepodobne nediagnostikované, no odhaľujú sa v
      súvislosti s černobyľskou katastrofou. To by mohlo poukazovať na slabý
      systém zdravotnej starostlivosti a drsné životné podmienky.
    sentences:
      - Černobyľská katastrofa mala dôsledky mimo bývalého ZSSR.
      - Blair patrí k anglikánskej cirkvi.
      - Zložený index filipínskej burzy klesol.
pipeline_tag: sentence-similarity
library_name: sentence-transformers
metrics:
  - pearson_cosine
  - spearman_cosine
  - cosine_accuracy
  - cosine_accuracy_threshold
  - cosine_f1
  - cosine_f1_threshold
  - cosine_precision
  - cosine_recall
  - cosine_ap
  - cosine_mcc
  - validation_sts_pearson_cosine
  - validation_sts_spearman_cosine
  - validation_nli_cosine_accuracy
  - validation_nli_cosine_accuracy_threshold
  - validation_nli_cosine_f1
  - validation_nli_cosine_f1_threshold
  - validation_nli_cosine_precision
  - validation_nli_cosine_recall
  - validation_nli_cosine_ap
  - validation_nli_cosine_mcc
  - validation_rte_cosine_accuracy
  - validation_rte_cosine_accuracy_threshold
  - validation_rte_cosine_f1
  - validation_rte_cosine_f1_threshold
  - validation_rte_cosine_precision
  - validation_rte_cosine_recall
  - validation_rte_cosine_ap
  - validation_rte_cosine_mcc
  - validation_dev_overall
model-index:
  - name: SentenceTransformer based on gerulata/slovakbert
    results:
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: validation sts
          type: validation_sts
        metrics:
          - type: pearson_cosine
            value: 0.834747105127958
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8298144963288951
            name: Spearman Cosine
      - task:
          type: binary-classification
          name: Binary Classification
        dataset:
          name: validation nli
          type: validation_nli
        metrics:
          - type: cosine_accuracy
            value: 0.6662650602409639
            name: Cosine Accuracy
          - type: cosine_accuracy_threshold
            value: 0.9707679152488708
            name: Cosine Accuracy Threshold
          - type: cosine_f1
            value: 0.4995480566435673
            name: Cosine F1
          - type: cosine_f1_threshold
            value: -0.014761148020625114
            name: Cosine F1 Threshold
          - type: cosine_precision
            value: 0.33306548814785053
            name: Cosine Precision
          - type: cosine_recall
            value: 0.9987951807228915
            name: Cosine Recall
          - type: cosine_ap
            value: 0.27308256520018437
            name: Cosine Ap
          - type: cosine_mcc
            value: -0.028346702743853168
            name: Cosine Mcc
      - task:
          type: binary-classification
          name: Binary Classification
        dataset:
          name: validation rte
          type: validation_rte
        metrics:
          - type: cosine_accuracy
            value: 0.5234657039711191
            name: Cosine Accuracy
          - type: cosine_accuracy_threshold
            value: 0.9567097425460815
            name: Cosine Accuracy Threshold
          - type: cosine_f1
            value: 0.6388206388206388
            name: Cosine F1
          - type: cosine_f1_threshold
            value: 0.22168028354644775
            name: Cosine F1 Threshold
          - type: cosine_precision
            value: 0.47101449275362317
            name: Cosine Precision
          - type: cosine_recall
            value: 0.9923664122137404
            name: Cosine Recall
          - type: cosine_ap
            value: 0.3712009178783954
            name: Cosine Ap
          - type: cosine_mcc
            value: -0.06354571037626576
            name: Cosine Mcc
      - task:
          type: multi-task-dev
          name: Multi Task Dev
        dataset:
          name: Unknown
          type: unknown
        metrics:
          - type: validation_sts_pearson_cosine
            value: 0.834747105127958
            name: Validation Sts Pearson Cosine
          - type: validation_sts_spearman_cosine
            value: 0.8298144963288951
            name: Validation Sts Spearman Cosine
          - type: validation_nli_cosine_accuracy
            value: 0.6662650602409639
            name: Validation Nli Cosine Accuracy
          - type: validation_nli_cosine_accuracy_threshold
            value: 0.9707679152488708
            name: Validation Nli Cosine Accuracy Threshold
          - type: validation_nli_cosine_f1
            value: 0.4995480566435673
            name: Validation Nli Cosine F1
          - type: validation_nli_cosine_f1_threshold
            value: -0.014761148020625114
            name: Validation Nli Cosine F1 Threshold
          - type: validation_nli_cosine_precision
            value: 0.33306548814785053
            name: Validation Nli Cosine Precision
          - type: validation_nli_cosine_recall
            value: 0.9987951807228915
            name: Validation Nli Cosine Recall
          - type: validation_nli_cosine_ap
            value: 0.27308256520018437
            name: Validation Nli Cosine Ap
          - type: validation_nli_cosine_mcc
            value: -0.028346702743853168
            name: Validation Nli Cosine Mcc
          - type: validation_rte_cosine_accuracy
            value: 0.5234657039711191
            name: Validation Rte Cosine Accuracy
          - type: validation_rte_cosine_accuracy_threshold
            value: 0.9567097425460815
            name: Validation Rte Cosine Accuracy Threshold
          - type: validation_rte_cosine_f1
            value: 0.6388206388206388
            name: Validation Rte Cosine F1
          - type: validation_rte_cosine_f1_threshold
            value: 0.22168028354644775
            name: Validation Rte Cosine F1 Threshold
          - type: validation_rte_cosine_precision
            value: 0.47101449275362317
            name: Validation Rte Cosine Precision
          - type: validation_rte_cosine_recall
            value: 0.9923664122137404
            name: Validation Rte Cosine Recall
          - type: validation_rte_cosine_ap
            value: 0.3712009178783954
            name: Validation Rte Cosine Ap
          - type: validation_rte_cosine_mcc
            value: -0.06354571037626576
            name: Validation Rte Cosine Mcc
          - type: validation_dev_overall
            value: 0.49136599313582496
            name: Validation Dev Overall

SentenceTransformer based on gerulata/slovakbert

This is a sentence-transformers model finetuned from gerulata/slovakbert. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

  • Model Type: Sentence Transformer
  • Base model: gerulata/slovakbert
  • Maximum Sequence Length: 512 tokens
  • Output Dimensionality: 768 dimensions
  • Similarity Function: Cosine Similarity

Model Sources

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'RobertaModel'})
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("mrshu/sturovec-base-sk-v0")
# Run inference
sentences = [
    'Veľká časť malých detí na Ukrajine trpí rôznymi zdravotnými problémami, ktoré boli doteraz pravdepodobne nediagnostikované, no odhaľujú sa v súvislosti s černobyľskou katastrofou. To by mohlo poukazovať na slabý systém zdravotnej starostlivosti a drsné životné podmienky.',
    'Černobyľská katastrofa mala dôsledky mimo bývalého ZSSR.',
    'Blair patrí k anglikánskej cirkvi.',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities)
# tensor([[ 1.0000,  0.5042, -0.1152],
#         [ 0.5042,  1.0000, -0.1069],
#         [-0.1152, -0.1069,  1.0000]])

Evaluation

Metrics

Semantic Similarity

Metric Value
pearson_cosine 0.8347
spearman_cosine 0.8298

Binary Classification

Metric validation_nli validation_rte
cosine_accuracy 0.6663 0.5235
cosine_accuracy_threshold 0.9708 0.9567
cosine_f1 0.4995 0.6388
cosine_f1_threshold -0.0148 0.2217
cosine_precision 0.3331 0.471
cosine_recall 0.9988 0.9924
cosine_ap 0.2731 0.3712
cosine_mcc -0.0283 -0.0635

Multi Task Dev

  • Evaluated with slovak_embeddings_v1.train.MultiTaskDevEvaluator
Metric Value
validation_sts_pearson_cosine 0.8347
validation_sts_spearman_cosine 0.8298
validation_nli_cosine_accuracy 0.6663
validation_nli_cosine_accuracy_threshold 0.9708
validation_nli_cosine_f1 0.4995
validation_nli_cosine_f1_threshold -0.0148
validation_nli_cosine_precision 0.3331
validation_nli_cosine_recall 0.9988
validation_nli_cosine_ap 0.2731
validation_nli_cosine_mcc -0.0283
validation_rte_cosine_accuracy 0.5235
validation_rte_cosine_accuracy_threshold 0.9567
validation_rte_cosine_f1 0.6388
validation_rte_cosine_f1_threshold 0.2217
validation_rte_cosine_precision 0.471
validation_rte_cosine_recall 0.9924
validation_rte_cosine_ap 0.3712
validation_rte_cosine_mcc -0.0635
validation_dev_overall 0.4914

Training Details

Training Datasets

Unnamed Dataset

  • Size: 5,604 training samples
  • Columns: sentence_0, sentence_1, and label
  • Approximate statistics based on the first 1000 samples:
    sentence_0 sentence_1 label
    type string string float
    details
    • min: 5 tokens
    • mean: 15.61 tokens
    • max: 63 tokens
    • min: 5 tokens
    • mean: 15.51 tokens
    • max: 51 tokens
    • min: 0.0
    • mean: 0.52
    • max: 1.0
  • Samples:
    sentence_0 sentence_1 label
    Počet obetí útoku na políciu v Nigérii stúpol na 30 Počet obetí útoku na autobus v Keni stúpol na šesť 0.27999999523162844
    Očakáva sa, že 39-ročná Terri Schiavo zomrie niekedy v priebehu nasledujúcich dvoch týždňov v hospici v oblasti Tampa, kde strávila niekoľko posledných rokov. 39-ročná Terri Schiavo podstúpila zákrok v hospici v oblasti Tampa Bay, kde žije už niekoľko rokov, uviedol jej otec Bob Schindler. 0.3599999904632568
    Žena drží psa, zatiaľ čo iný pes stojí neďaleko na poli. Pani drží jedného psa, zatiaľ čo druhý pes sa hrá na dvore. 0.6800000190734863
  • Loss: CosineSimilarityLoss with these parameters:
    {
        "loss_fct": "torch.nn.modules.loss.MSELoss"
    }
    

Unnamed Dataset

  • Size: 130,900 training samples
  • Columns: sentence_0, sentence_1, and label
  • Approximate statistics based on the first 1000 samples:
    sentence_0 sentence_1 label
    type string string int
    details
    • min: 4 tokens
    • mean: 28.14 tokens
    • max: 512 tokens
    • min: 5 tokens
    • mean: 14.94 tokens
    • max: 45 tokens
    • 1: 100.00%
  • Samples:
    sentence_0 sentence_1 label
    Špecializovaný obchod s oblečením a um som bol manažér na čiastočný úväzok v obchode s kartami a darčekmi, rovnako ako vyučovanie v modelingovej agentúre a modelovanie tak Užila som si čas , keď som učila v modelingovej agentúre . 1
    Parcellsovi hráči a asistenti ho nasledujú verne z mesta do mesta. Parcells urobil veľa práce, aby si získal ich vernosť. 1
    Nie je príliš náročné na mozog Som celkom šikovný, vieš. 1
  • Loss: MultipleNegativesRankingLoss with these parameters:
    {
        "scale": 20.0,
        "similarity_fct": "cos_sim",
        "gather_across_devices": false
    }
    

Unnamed Dataset

  • Size: 1,241 training samples
  • Columns: sentence_0, sentence_1, and label
  • Approximate statistics based on the first 1000 samples:
    sentence_0 sentence_1 label
    type string string int
    details
    • min: 9 tokens
    • mean: 59.58 tokens
    • max: 268 tokens
    • min: 6 tokens
    • mean: 15.31 tokens
    • max: 59 tokens
    • 1: 100.00%
  • Samples:
    sentence_0 sentence_1 label
    Prieskumná kozmická loď NASA Saturn, Cassini, objavila atmosféru okolo mesiaca Enceladus. Toto je prvý takýto objav sondy Cassini, okrem Titanu, o prítomnosti atmosféry okolo mesiaca Saturna. Titan je pätnástym zo známych satelitov Saturnu. 1
    Polícia bola privolaná do domu Highgrove House, ktorý patrí princovi Charlesovi a jeho manželke Camille, v utorok večer po tom, čo polícia dostala telefonát, že na pozemku sa nachádza narušiteľ. "Bol zadržaný v areáli a nemal prístup do žiadnych budov," uviedol hovorca polície Tony Rymer. Hovorca usadlosti odmietol médiám komentovať, že je to „vec na políciu“ a že akékoľvek pripomienky k narušeniu by sa mali podávať prostredníctvom nich. Podľa hovorkyne gloucesterskej polície bol muž z anglického Bristolu zatknutý po tom, čo sa okolo pozemku oháňal vidlami. Meno 55-ročného muža nezverejnili, polícia ho však obvinila z prečinu porušovania domovej slobody. Po zložení kaucie ho neskôr prepustili. Camilla a Charles, ktorí boli v tom čase prítomní a spali na sídlisku, neboli zranení. Princ Charles vlastní majetok v Bristole. 1
    ISLAMABAD, Pakistan - Nepokojné prímerie medzi pakistanskou vládou a militantmi Talibanu v údolí Svát sa v pondelok javilo čoraz krehkejšie, keď vládne sily už druhý deň zaútočili na militantov v susednom okrese, čo spôsobilo, že hlavný vyjednávač Talibanu prerušil rozhovory. Maulana Sufi Muhammad, protalibanský duchovný, ktorý vyjednával mierové rozhovory medzi vládou a Talibanom v Svát, v pondelok zastavil rozhovory na protest proti vojenskej operácii v okrese Lower Dir západne od Svát, uviedol jeho hovorca. Maulana Sufi Muhammad je šéfom Tehrik Nifaz Shariat-e-Muhammadi. 1
  • Loss: MultipleNegativesRankingLoss with these parameters:
    {
        "scale": 20.0,
        "similarity_fct": "cos_sim",
        "gather_across_devices": false
    }
    

Training Hyperparameters

Non-Default Hyperparameters

  • eval_strategy: steps
  • per_device_train_batch_size: 32
  • per_device_eval_batch_size: 32
  • num_train_epochs: 20
  • multi_dataset_batch_sampler: round_robin

All Hyperparameters

Click to expand
  • overwrite_output_dir: False
  • do_predict: False
  • eval_strategy: steps
  • prediction_loss_only: True
  • per_device_train_batch_size: 32
  • per_device_eval_batch_size: 32
  • per_gpu_train_batch_size: None
  • per_gpu_eval_batch_size: None
  • gradient_accumulation_steps: 1
  • eval_accumulation_steps: None
  • torch_empty_cache_steps: None
  • learning_rate: 5e-05
  • weight_decay: 0.0
  • adam_beta1: 0.9
  • adam_beta2: 0.999
  • adam_epsilon: 1e-08
  • max_grad_norm: 1
  • num_train_epochs: 20
  • max_steps: -1
  • lr_scheduler_type: linear
  • lr_scheduler_kwargs: {}
  • warmup_ratio: 0.0
  • warmup_steps: 0
  • log_level: passive
  • log_level_replica: warning
  • log_on_each_node: True
  • logging_nan_inf_filter: True
  • save_safetensors: True
  • save_on_each_node: False
  • save_only_model: False
  • restore_callback_states_from_checkpoint: False
  • no_cuda: False
  • use_cpu: False
  • use_mps_device: False
  • seed: 42
  • data_seed: None
  • jit_mode_eval: False
  • bf16: False
  • fp16: False
  • fp16_opt_level: O1
  • half_precision_backend: auto
  • bf16_full_eval: False
  • fp16_full_eval: False
  • tf32: None
  • local_rank: 0
  • ddp_backend: None
  • tpu_num_cores: None
  • tpu_metrics_debug: False
  • debug: []
  • dataloader_drop_last: False
  • dataloader_num_workers: 0
  • dataloader_prefetch_factor: None
  • past_index: -1
  • disable_tqdm: False
  • remove_unused_columns: True
  • label_names: None
  • load_best_model_at_end: False
  • ignore_data_skip: False
  • fsdp: []
  • fsdp_min_num_params: 0
  • fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
  • fsdp_transformer_layer_cls_to_wrap: None
  • accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
  • parallelism_config: None
  • deepspeed: None
  • label_smoothing_factor: 0.0
  • optim: adamw_torch_fused
  • optim_args: None
  • adafactor: False
  • group_by_length: False
  • length_column_name: length
  • project: huggingface
  • trackio_space_id: trackio
  • ddp_find_unused_parameters: None
  • ddp_bucket_cap_mb: None
  • ddp_broadcast_buffers: False
  • dataloader_pin_memory: True
  • dataloader_persistent_workers: False
  • skip_memory_metrics: True
  • use_legacy_prediction_loop: False
  • push_to_hub: False
  • resume_from_checkpoint: None
  • hub_model_id: None
  • hub_strategy: every_save
  • hub_private_repo: None
  • hub_always_push: False
  • hub_revision: None
  • gradient_checkpointing: False
  • gradient_checkpointing_kwargs: None
  • include_inputs_for_metrics: False
  • include_for_metrics: []
  • eval_do_concat_batches: True
  • fp16_backend: auto
  • push_to_hub_model_id: None
  • push_to_hub_organization: None
  • mp_parameters:
  • auto_find_batch_size: False
  • full_determinism: False
  • torchdynamo: None
  • ray_scope: last
  • ddp_timeout: 1800
  • torch_compile: False
  • torch_compile_backend: None
  • torch_compile_mode: None
  • include_tokens_per_second: False
  • include_num_input_tokens_seen: no
  • neftune_noise_alpha: None
  • optim_target_modules: None
  • batch_eval_metrics: False
  • eval_on_start: False
  • use_liger_kernel: False
  • liger_kernel_config: None
  • eval_use_gather_object: False
  • average_tokens_across_devices: True
  • prompts: None
  • batch_sampler: batch_sampler
  • multi_dataset_batch_sampler: round_robin
  • router_mapping: {}
  • learning_rate_mapping: {}

Training Logs

Epoch Step Training Loss validation_sts_spearman_cosine validation_nli_cosine_ap validation_rte_cosine_ap validation_dev_overall
0.3333 39 - 0.7072 0.2996 0.4315 0.4794
0.6667 78 - 0.7111 0.2988 0.4297 0.4799
1.0 117 - 0.7182 0.2968 0.4279 0.4810
1.3333 156 - 0.7272 0.2940 0.4256 0.4823
1.6667 195 - 0.7371 0.2908 0.4195 0.4825
2.0 234 - 0.7460 0.2862 0.4106 0.4809
2.3333 273 - 0.7457 0.2824 0.4026 0.4769
2.6667 312 - 0.7377 0.2822 0.3962 0.4721
3.0 351 - 0.7415 0.2815 0.3943 0.4724
3.3333 390 - 0.7471 0.2809 0.3916 0.4732
3.6667 429 - 0.7522 0.2800 0.3903 0.4742
4.0 468 - 0.7542 0.2796 0.3888 0.4742
4.2735 500 1.2693 - - - -
4.3333 507 - 0.7587 0.2788 0.3887 0.4754
4.6667 546 - 0.7613 0.2780 0.3879 0.4757
5.0 585 - 0.7642 0.2777 0.3867 0.4762
5.3333 624 - 0.7673 0.2769 0.3865 0.4769
5.6667 663 - 0.7674 0.2781 0.3861 0.4772
6.0 702 - 0.7731 0.2769 0.3845 0.4782
6.3333 741 - 0.7776 0.2764 0.3853 0.4797
6.6667 780 - 0.7799 0.2758 0.3843 0.4800
7.0 819 - 0.7825 0.2762 0.3842 0.4810
7.3333 858 - 0.7856 0.2756 0.3830 0.4814
7.6667 897 - 0.7866 0.2754 0.3824 0.4814
8.0 936 - 0.7913 0.2748 0.3803 0.4821
8.3333 975 - 0.7915 0.2746 0.3803 0.4821
8.5470 1000 0.4279 - - - -
8.6667 1014 - 0.7925 0.2746 0.3789 0.4820
9.0 1053 - 0.7959 0.2739 0.3803 0.4834
9.3333 1092 - 0.7974 0.2739 0.3762 0.4825
9.6667 1131 - 0.7980 0.2740 0.3796 0.4839
10.0 1170 - 0.8002 0.2738 0.3800 0.4847
10.3333 1209 - 0.7971 0.2743 0.3770 0.4828
10.6667 1248 - 0.8002 0.2741 0.3760 0.4835
11.0 1287 - 0.8026 0.2737 0.3763 0.4842
11.3333 1326 - 0.8017 0.2740 0.3744 0.4834
11.6667 1365 - 0.8037 0.2741 0.3730 0.4836
12.0 1404 - 0.8074 0.2737 0.3729 0.4847
12.3333 1443 - 0.8062 0.2736 0.3747 0.4848
12.6667 1482 - 0.8094 0.2735 0.3732 0.4854
12.8205 1500 0.2922 - - - -
13.0 1521 - 0.8102 0.2739 0.3706 0.4849
13.3333 1560 - 0.8148 0.2723 0.3739 0.4870
13.6667 1599 - 0.8136 0.2726 0.3729 0.4864
14.0 1638 - 0.8140 0.2740 0.3688 0.4856
14.3333 1677 - 0.8120 0.2738 0.3699 0.4852
14.6667 1716 - 0.8153 0.2733 0.3693 0.4859
15.0 1755 - 0.8211 0.2726 0.3692 0.4876
15.3333 1794 - 0.8212 0.2726 0.3711 0.4883
15.6667 1833 - 0.8189 0.2740 0.3711 0.4880
16.0 1872 - 0.8224 0.2736 0.3696 0.4885
16.3333 1911 - 0.8234 0.2726 0.3692 0.4884
16.6667 1950 - 0.8248 0.2733 0.3677 0.4886
17.0 1989 - 0.8276 0.2728 0.3662 0.4889
17.0940 2000 0.2114 - - - -
17.3333 2028 - 0.8264 0.2710 0.3714 0.4896
17.6667 2067 - 0.8283 0.2713 0.3721 0.4906
18.0 2106 - 0.8269 0.2724 0.3699 0.4897
18.3333 2145 - 0.8291 0.2723 0.3718 0.4911
18.6667 2184 - 0.8302 0.2720 0.3719 0.4914
19.0 2223 - 0.8298 0.2731 0.3712 0.4914

Framework Versions

  • Python: 3.13.0
  • Sentence Transformers: 5.2.0
  • Transformers: 4.57.3
  • PyTorch: 2.9.1+cu128
  • Accelerate: 1.12.0
  • Datasets: 4.4.1
  • Tokenizers: 0.22.1

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

MultipleNegativesRankingLoss

@misc{henderson2017efficient,
    title={Efficient Natural Language Response Suggestion for Smart Reply},
    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
    year={2017},
    eprint={1705.00652},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}