Upload folder using huggingface_hub

Browse files

Files changed (16) hide show

.gitattributes +1 -0
1_Pooling/config.json +10 -0
README.md +567 -0
config.json +27 -0
config_sentence_transformers.json +10 -0
model.safetensors +3 -0
modules.json +20 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +51 -0
tokenizer.json +3 -0
tokenizer_config.json +56 -0
trainer_state.json +285 -0
training_args.bin +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 768,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": true,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

README.md ADDED Viewed

	@@ -0,0 +1,567 @@

+---
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- generated_from_trainer
+- dataset_size:98660
+- loss:MultipleNegativesRankingLoss
+base_model: intfloat/multilingual-e5-base
+widget:
+- source_sentence: 'Instruct: Given a dialogue context, retrieve relevant followup
+    phrase that align with the context
+    Dialogue Context: bot_0: Do you like gaming. I am a big fan.
+    bot_1: My kids play games but I don''t play much. I love to watch movies!.
+    bot_0: Oh really what is their favorite game?
+    bot_1: I think it''s called fortnite. I sometimes watch while cooking healthy
+    meals. What''s yours?
+    bot_0: The best game I like to play is alistar.
+    bot_1: Never heard of it. Old timer here! Just turned 30. What other things do
+    you like?'
+  sentences:
+  - 'Followup phrase: I usually only eat them when my kids want them, it''s not something
+    that I''ll make for myself.  What''s your favorite dip for chicken nuggets?'
+  - 'Followup phrase: My big doberman lays on me all the time and ripped mine off'
+  - 'Followup phrase: Yeah, he also got me into cars.'
+- source_sentence: 'Instruct: Given a dialogue context, retrieve relevant followup
+    phrase that align with the context
+    Dialogue Context: bot_0: Just sitting down to dinner after work. Steak!
+    bot_1: Listening to my beethoven favorite, moonlight sonata..
+    bot_0: Nice! I listen to music at work a lot. What do you do?
+    bot_1: I practice shooting with both of my handgunds and watch british tv. You?
+    bot_0: Sales. The playlist of black sabbath usually pumps me up to sell! Lol.
+    bot_1: My grandma from italy came to visit, and iron man is her favorite song!
+    bot_0: Your grandma rocks! Love italy, hope to visit but need to pay off some
+    debt first.
+    bot_1: I understand that. I want to travel in general but I can''t at the moment..
+    bot_0: Hopefully you will! I’m so focused on my career, travel is a low priority
+    at this point.
+    bot_1: Same for me! I barely paid off my volkswagen beetle.
+    bot_0: Love that car. What color?'
+  sentences:
+  - 'Followup phrase: I hope so. I just try to keep positive, eat healthy and drink
+    lots of water.'
+  - 'Followup phrase: I just made a seafood chowder lately! It tastes great. What''s
+    your favourite dish to cook at your restuarant?'
+  - 'Followup phrase: Do you speak any other languages? I enjoy learning them.'
+- source_sentence: 'Instruct: Given a dialogue context, retrieve relevant followup
+    phrase that align with the context
+    Dialogue Context: bot_0: Hello how are you doing today?
+    bot_1: Very well thank you. How are you?
+    bot_0: Going to head out soon to play some baseball. I really like the game.'
+  sentences:
+  - 'Followup phrase: It teaches discipline too. I''m an er nurse so I don''t see
+    my son that much'
+  - 'Followup phrase: I take a boat to work! What about you?'
+  - 'Followup phrase: Yes 3 but they live out of state.. You?'
+- source_sentence: 'Instruct: Given a dialogue context, retrieve relevant followup
+    phrase that align with the context
+    Dialogue Context: bot_0: Hello, I am in college for marketing. What do you do?
+    bot_1: Hi. Right now an entrepreneur, freelance. I was an accountant before.
+    bot_0: Cool, did you not like being an accountant?
+    bot_1: Not really, I am ready for a new life, new career. Do you have a job?
+    bot_0: No, but I am hoping to design ads one day!'
+  sentences:
+  - 'Followup phrase: Nice. Any pets? I have a dog, he is my best friend..'
+  - 'Followup phrase: Yes! I like to have a little "me" time in the morning to play
+    games before I have to get up for work. It''s so relaxing. When do you usually
+    play games?'
+  - 'Followup phrase: I am a full time student but I work construction in the summer
+    months for'
+- source_sentence: 'Instruct: Given a dialogue context, retrieve relevant followup
+    phrase that align with the context
+    Dialogue Context: bot_0: Hello, I just got back from class. What are you doing?
+    bot_1: I just got done working out at the gym.
+    bot_0: Cool, what is your favorite exercise?
+    bot_1: Do you have your own vehicle?
+    bot_0: No, I am a student. I walk everywhere or I take the bus.
+    bot_1: Oh wow, that must get tiring. Do you have a significant other?
+    bot_0: It''s not, I even have energy to play baseball. I do not, I am single.
+    bot_1: Thats awesome that you have the energy. My significant other is a lawyer.
+    We''re married..
+    bot_0: Awe, I hope to have a job designing ads one day.
+    bot_1: That sounds neat. Are you a vegetarian?
+    bot_0: No, but have thought about it!'
+  sentences:
+  - 'Followup phrase: I do not. My husband wants a boy, he is in the army.'
+  - 'Followup phrase: I am amazing, except I found out I am allergic to fish!'
+  - 'Followup phrase: Yeah they can be, single with no kids, which is great!! Living
+    off the land'
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+metrics:
+- cosine_accuracy
+- cosine_accuracy_threshold
+- cosine_f1
+- cosine_f1_threshold
+- cosine_precision
+- cosine_recall
+- cosine_ap
+- cosine_mcc
+model-index:
+- name: SentenceTransformer based on intfloat/multilingual-e5-base
+  results:
+  - task:
+      type: binary-classification
+      name: Binary Classification
+    dataset:
+      name: Unknown
+      type: unknown
+    metrics:
+    - type: cosine_accuracy
+      value: 0.9324928469241774
+      name: Cosine Accuracy
+    - type: cosine_accuracy_threshold
+      value: 0.6963315010070801
+      name: Cosine Accuracy Threshold
+    - type: cosine_f1
+      value: 0.7932711614832003
+      name: Cosine F1
+    - type: cosine_f1_threshold
+      value: 0.6896486282348633
+      name: Cosine F1 Threshold
+    - type: cosine_precision
+      value: 0.791752026365013
+      name: Cosine Precision
+    - type: cosine_recall
+      value: 0.7947961373390557
+      name: Cosine Recall
+    - type: cosine_ap
+      value: 0.8751572160892609
+      name: Cosine Ap
+    - type: cosine_mcc
+      value: 0.7518321554060445
+      name: Cosine Mcc
+---
+# SentenceTransformer based on intfloat/multilingual-e5-base
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [intfloat/multilingual-e5-base](https://huggingface.co/intfloat/multilingual-e5-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [intfloat/multilingual-e5-base](https://huggingface.co/intfloat/multilingual-e5-base) <!-- at revision 835193815a3936a24a0ee7dc9e3d48c1fbb19c55 -->
+- **Maximum Sequence Length:** 512 tokens
+- **Output Dimensionality:** 768 dimensions
+- **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: XLMRobertaModel
+  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+  (2): Normalize()
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("sentence_transformers_model_id")
+# Run inference
+sentences = [
+    "Instruct: Given a dialogue context, retrieve relevant followup phrase that align with the context\nDialogue Context: bot_0: Hello, I just got back from class. What are you doing?\nbot_1: I just got done working out at the gym.\nbot_0: Cool, what is your favorite exercise?\nbot_1: Do you have your own vehicle?\nbot_0: No, I am a student. I walk everywhere or I take the bus.\nbot_1: Oh wow, that must get tiring. Do you have a significant other?\nbot_0: It's not, I even have energy to play baseball. I do not, I am single.\nbot_1: Thats awesome that you have the energy. My significant other is a lawyer. We're married..\nbot_0: Awe, I hope to have a job designing ads one day.\nbot_1: That sounds neat. Are you a vegetarian?\nbot_0: No, but have thought about it!",
+    'Followup phrase: I do not. My husband wants a boy, he is in the army.',
+    'Followup phrase: I am amazing, except I found out I am allergic to fish!',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 768]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities.shape)
+# [3, 3]
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+## Evaluation
+### Metrics
+#### Binary Classification
+* Evaluated with [<code>BinaryClassificationEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.BinaryClassificationEvaluator)
+| Metric                    | Value      |
+|:--------------------------|:-----------|
+| cosine_accuracy           | 0.9325     |
+| cosine_accuracy_threshold | 0.6963     |
+| cosine_f1                 | 0.7933     |
+| cosine_f1_threshold       | 0.6896     |
+| cosine_precision          | 0.7918     |
+| cosine_recall             | 0.7948     |
+| **cosine_ap**             | **0.8752** |
+| cosine_mcc                | 0.7518     |
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 98,660 training samples
+* Columns: <code>sentence1</code> and <code>sentence2</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | sentence1                                                                            | sentence2                                                                          |
+  |:--------|:-------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
+  | type    | string                                                                               | string                                                                             |
+  | details | <ul><li>min: 35 tokens</li><li>mean: 144.27 tokens</li><li>max: 319 tokens</li></ul> | <ul><li>min: 10 tokens</li><li>mean: 22.54 tokens</li><li>max: 41 tokens</li></ul> |
+* Samples:
+  | sentence1                                                                                                                                                                                                                                                                                                                                                                                        | sentence2                                                                                                                                                            |
+  |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code>Instruct: Given a dialogue context, retrieve relevant followup phrase that align with the context<br>Dialogue Context: bot_0: What kind of car do you own? I have a jeep.</code>                                                                                                                                                                                                           | <code>Followup phrase: I don't own my own car! I actually really enjoying walking and running, but then again, I live in a small town and semi-close to work.</code> |
+  | <code>Instruct: Given a dialogue context, retrieve relevant followup phrase that align with the context<br>Dialogue Context: bot_0: What kind of car do you own? I have a jeep.<br>bot_1: I don't own my own car! I actually really enjoying walking and running, but then again, I live in a small town and semi-close to work.</code>                                                          | <code>Followup phrase: Ah I see! I like going to the gym to work out.</code>                                                                                         |
+  | <code>Instruct: Given a dialogue context, retrieve relevant followup phrase that align with the context<br>Dialogue Context: bot_0: What kind of car do you own? I have a jeep.<br>bot_1: I don't own my own car! I actually really enjoying walking and running, but then again, I live in a small town and semi-close to work.<br>bot_0: Ah I see! I like going to the gym to work out.</code> | <code>Followup phrase: I'm a computer programmer. What do you do for work.</code>                                                                                    |
+* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 100,
+      "similarity_fct": "cos_sim"
+  }
+  ```
+### Evaluation Dataset
+#### Unnamed Dataset
+* Size: 67,104 evaluation samples
+* Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | sentence1                                                                            | sentence2                                                                           | label                                           |
+  |:--------|:-------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:------------------------------------------------|
+  | type    | string                                                                               | string                                                                              | int                                             |
+  | details | <ul><li>min: 38 tokens</li><li>mean: 137.57 tokens</li><li>max: 290 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 31.57 tokens</li><li>max: 106 tokens</li></ul> | <ul><li>0: ~83.30%</li><li>1: ~16.70%</li></ul> |
+* Samples:
+  | sentence1                                                                                                                                                     | sentence2                                                                                                                                                                                   | label          |
+  |:--------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
+  | <code>Instruct: Given a dialogue context, retrieve relevant followup phrase that align with the context<br>Dialogue Context: bot_0: Do you like music?</code> | <code>Followup phrase: Yes, you could say it is a great source of joy for me.</code>                                                                                                        | <code>1</code> |
+  | <code>Instruct: Given a dialogue context, retrieve relevant followup phrase that align with the context<br>Dialogue Context: bot_0: Do you like music?</code> | <code>Followup phrase: That sounds amazing! But I was thinking of going to mexico this summer and was going to ask if you were going to be there? Would your timeshare be available?</code> | <code>0</code> |
+  | <code>Instruct: Given a dialogue context, retrieve relevant followup phrase that align with the context<br>Dialogue Context: bot_0: Do you like music?</code> | <code>Followup phrase: Mostly just authentic mexican food, with lots of spice. </code>                                                                                                      | <code>0</code> |
+* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 100,
+      "similarity_fct": "cos_sim"
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `eval_strategy`: epoch
+- `per_device_train_batch_size`: 100
+- `per_device_eval_batch_size`: 100
+- `weight_decay`: 0.01
+- `num_train_epochs`: 5
+- `bf16`: True
+- `load_best_model_at_end`: True
+- `prompts`: {'sentence1': 'Instruct: Given a dialogue context, retrieve relevant followup phrase that align with the context\nDialogue Context: ', 'sentence2': 'Followup phrase: '}
+- `batch_sampler`: no_duplicates
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: epoch
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 100
+- `per_device_eval_batch_size`: 100
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 5e-05
+- `weight_decay`: 0.01
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1.0
+- `num_train_epochs`: 5
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.0
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: True
+- `fp16`: False
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: True
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: False
+- `prompts`: {'sentence1': 'Instruct: Given a dialogue context, retrieve relevant followup phrase that align with the context\nDialogue Context: ', 'sentence2': 'Followup phrase: '}
+- `batch_sampler`: no_duplicates
+- `multi_dataset_batch_sampler`: proportional
+</details>
+### Training Logs
+| Epoch  | Step | Training Loss | Validation Loss | cosine_ap |
+|:------:|:----:|:-------------:|:---------------:|:---------:|
+| 0.1013 | 100  | 1.8292        | -               | -         |
+| 0.2026 | 200  | 1.4433        | -               | -         |
+| 0.3040 | 300  | 1.2605        | -               | -         |
+| 0.4053 | 400  | 1.1947        | -               | -         |
+| 0.5066 | 500  | 1.1714        | -               | -         |
+| 0.6079 | 600  | 1.1106        | -               | -         |
+| 0.7092 | 700  | 1.0978        | -               | -         |
+| 0.8105 | 800  | 1.0527        | -               | -         |
+| 0.9119 | 900  | 1.0524        | -               | -         |
+| 1.0    | 987  | -             | 8.1109          | 0.8790    |
+| 1.0132 | 1000 | 1.0068        | -               | -         |
+| 1.1145 | 1100 | 0.949         | -               | -         |
+| 1.2158 | 1200 | 0.9519        | -               | -         |
+| 1.3171 | 1300 | 0.9364        | -               | -         |
+| 1.4184 | 1400 | 0.9253        | -               | -         |
+| 1.5198 | 1500 | 0.9724        | -               | -         |
+| 1.6211 | 1600 | 0.9227        | -               | -         |
+| 1.7224 | 1700 | 0.9169        | -               | -         |
+| 1.8237 | 1800 | 0.9146        | -               | -         |
+| 1.9250 | 1900 | 0.9029        | -               | -         |
+| 2.0    | 1974 | -             | 8.4529          | 0.8727    |
+| 2.0263 | 2000 | 0.9073        | -               | -         |
+| 2.1277 | 2100 | 0.8685        | -               | -         |
+| 2.2290 | 2200 | 0.8413        | -               | -         |
+| 2.3303 | 2300 | 0.8763        | -               | -         |
+| 2.4316 | 2400 | 0.8524        | -               | -         |
+| 2.5329 | 2500 | 0.8729        | -               | -         |
+| 2.6342 | 2600 | 0.856         | -               | -         |
+| 2.7356 | 2700 | 0.8652        | -               | -         |
+| 2.8369 | 2800 | 0.8768        | -               | -         |
+| 2.9382 | 2900 | 0.8477        | -               | -         |
+| 3.0    | 2961 | -             | 8.7662          | 0.8752    |
+### Framework Versions
+- Python: 3.10.18
+- Sentence Transformers: 4.1.0
+- Transformers: 4.52.4
+- PyTorch: 2.7.1+cu128
+- Accelerate: 1.7.0
+- Datasets: 3.6.0
+- Tokenizers: 0.21.1
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### MultipleNegativesRankingLoss
+```bibtex
+@misc{henderson2017efficient,
+    title={Efficient Natural Language Response Suggestion for Smart Reply},
+    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
+    year={2017},
+    eprint={1705.00652},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

config.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "architectures": [
+    "XLMRobertaModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "xlm-roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.52.4",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 250002
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "__version__": {
+    "sentence_transformers": "4.1.0",
+    "transformers": "4.52.4",
+    "pytorch": "2.7.1+cu128"
+  },
+  "prompts": {},
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:192a6ea6e8b5fca5bfefc85485059ef8c7e8e01527e7a7b7a8895cbba747d8d0
+size 556109872

modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:278e9c23212b37442b7764e68c0b2abc70b5cbcc4ba82966ce07d5bd0ff12e22
+size 1109977547

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dacb1cbdf82d93fdaf9bcb6f81233ffa5d92a38358aff49bc545ce85d7b87ac9
+size 14645

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c54094e98adc8c1d754f1d2ddf68e540133856fc7ae96da2d0bd44b127e48fa8
+size 1465

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 512,
+  "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f64cd282203706c03b339e2b5dcc41cf53dc15a5d17aa401d4ff094cc5b28cc2
+size 17082986

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "250001": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "XLMRobertaTokenizer",
+  "truncation_side": "left",
+  "unk_token": "<unk>"
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,285 @@

+{
+  "best_global_step": 987,
+  "best_metric": 8.11091423034668,
+  "best_model_checkpoint": "printing_press/dialogue-context-learning/notebooks/results/intfloat/multilingual-e5-base/checkpoint-987",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 2961,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.10131712259371833,
+      "grad_norm": 17.75,
+      "learning_rate": 4.8996960486322194e-05,
+      "loss": 1.8292,
+      "step": 100
+    },
+    {
+      "epoch": 0.20263424518743667,
+      "grad_norm": 14.0625,
+      "learning_rate": 4.798378926038501e-05,
+      "loss": 1.4433,
+      "step": 200
+    },
+    {
+      "epoch": 0.303951367781155,
+      "grad_norm": 17.125,
+      "learning_rate": 4.6970618034447823e-05,
+      "loss": 1.2605,
+      "step": 300
+    },
+    {
+      "epoch": 0.40526849037487334,
+      "grad_norm": 17.0,
+      "learning_rate": 4.595744680851064e-05,
+      "loss": 1.1947,
+      "step": 400
+    },
+    {
+      "epoch": 0.5065856129685917,
+      "grad_norm": 13.0625,
+      "learning_rate": 4.494427558257346e-05,
+      "loss": 1.1714,
+      "step": 500
+    },
+    {
+      "epoch": 0.60790273556231,
+      "grad_norm": 13.125,
+      "learning_rate": 4.393110435663627e-05,
+      "loss": 1.1106,
+      "step": 600
+    },
+    {
+      "epoch": 0.7092198581560284,
+      "grad_norm": 12.75,
+      "learning_rate": 4.291793313069909e-05,
+      "loss": 1.0978,
+      "step": 700
+    },
+    {
+      "epoch": 0.8105369807497467,
+      "grad_norm": 15.6875,
+      "learning_rate": 4.190476190476191e-05,
+      "loss": 1.0527,
+      "step": 800
+    },
+    {
+      "epoch": 0.9118541033434651,
+      "grad_norm": 13.1875,
+      "learning_rate": 4.0891590678824726e-05,
+      "loss": 1.0524,
+      "step": 900
+    },
+    {
+      "epoch": 1.0,
+      "eval_cosine_accuracy": 0.9332975679542204,
+      "eval_cosine_accuracy_threshold": 0.7227392196655273,
+      "eval_cosine_ap": 0.8790038770944348,
+      "eval_cosine_f1": 0.7938560293443375,
+      "eval_cosine_f1_threshold": 0.721563458442688,
+      "eval_cosine_mcc": 0.7542385158462755,
+      "eval_cosine_precision": 0.8146997929606625,
+      "eval_cosine_recall": 0.774052217453505,
+      "eval_loss": 8.11091423034668,
+      "eval_runtime": 108.5283,
+      "eval_samples_per_second": 618.309,
+      "eval_steps_per_second": 6.192,
+      "step": 987
+    },
+    {
+      "epoch": 1.0131712259371835,
+      "grad_norm": 14.3125,
+      "learning_rate": 3.987841945288754e-05,
+      "loss": 1.0068,
+      "step": 1000
+    },
+    {
+      "epoch": 1.1144883485309016,
+      "grad_norm": 11.875,
+      "learning_rate": 3.8865248226950355e-05,
+      "loss": 0.949,
+      "step": 1100
+    },
+    {
+      "epoch": 1.21580547112462,
+      "grad_norm": 14.625,
+      "learning_rate": 3.7852077001013173e-05,
+      "loss": 0.9519,
+      "step": 1200
+    },
+    {
+      "epoch": 1.3171225937183384,
+      "grad_norm": 12.9375,
+      "learning_rate": 3.6838905775075985e-05,
+      "loss": 0.9364,
+      "step": 1300
+    },
+    {
+      "epoch": 1.4184397163120568,
+      "grad_norm": 14.0,
+      "learning_rate": 3.58257345491388e-05,
+      "loss": 0.9253,
+      "step": 1400
+    },
+    {
+      "epoch": 1.5197568389057752,
+      "grad_norm": 14.125,
+      "learning_rate": 3.481256332320163e-05,
+      "loss": 0.9724,
+      "step": 1500
+    },
+    {
+      "epoch": 1.6210739614994933,
+      "grad_norm": 14.4375,
+      "learning_rate": 3.379939209726444e-05,
+      "loss": 0.9227,
+      "step": 1600
+    },
+    {
+      "epoch": 1.7223910840932117,
+      "grad_norm": 14.125,
+      "learning_rate": 3.278622087132726e-05,
+      "loss": 0.9169,
+      "step": 1700
+    },
+    {
+      "epoch": 1.8237082066869301,
+      "grad_norm": 13.75,
+      "learning_rate": 3.1773049645390076e-05,
+      "loss": 0.9146,
+      "step": 1800
+    },
+    {
+      "epoch": 1.9250253292806483,
+      "grad_norm": 13.75,
+      "learning_rate": 3.075987841945289e-05,
+      "loss": 0.9029,
+      "step": 1900
+    },
+    {
+      "epoch": 2.0,
+      "eval_cosine_accuracy": 0.9311665474487363,
+      "eval_cosine_accuracy_threshold": 0.7132378816604614,
+      "eval_cosine_ap": 0.872736314842169,
+      "eval_cosine_f1": 0.7911621741051702,
+      "eval_cosine_f1_threshold": 0.7051230669021606,
+      "eval_cosine_mcc": 0.7488855960607996,
+      "eval_cosine_precision": 0.7821072863882579,
+      "eval_cosine_recall": 0.8004291845493562,
+      "eval_loss": 8.452939987182617,
+      "eval_runtime": 108.4938,
+      "eval_samples_per_second": 618.505,
+      "eval_steps_per_second": 6.194,
+      "step": 1974
+    },
+    {
+      "epoch": 2.026342451874367,
+      "grad_norm": 14.3125,
+      "learning_rate": 2.9746707193515705e-05,
+      "loss": 0.9073,
+      "step": 2000
+    },
+    {
+      "epoch": 2.127659574468085,
+      "grad_norm": 11.625,
+      "learning_rate": 2.8733535967578523e-05,
+      "loss": 0.8685,
+      "step": 2100
+    },
+    {
+      "epoch": 2.2289766970618032,
+      "grad_norm": 16.25,
+      "learning_rate": 2.772036474164134e-05,
+      "loss": 0.8413,
+      "step": 2200
+    },
+    {
+      "epoch": 2.330293819655522,
+      "grad_norm": 12.1875,
+      "learning_rate": 2.6707193515704153e-05,
+      "loss": 0.8763,
+      "step": 2300
+    },
+    {
+      "epoch": 2.43161094224924,
+      "grad_norm": 12.0,
+      "learning_rate": 2.569402228976697e-05,
+      "loss": 0.8524,
+      "step": 2400
+    },
+    {
+      "epoch": 2.5329280648429586,
+      "grad_norm": 12.5,
+      "learning_rate": 2.468085106382979e-05,
+      "loss": 0.8729,
+      "step": 2500
+    },
+    {
+      "epoch": 2.634245187436677,
+      "grad_norm": 11.625,
+      "learning_rate": 2.3667679837892608e-05,
+      "loss": 0.856,
+      "step": 2600
+    },
+    {
+      "epoch": 2.735562310030395,
+      "grad_norm": 13.3125,
+      "learning_rate": 2.2654508611955422e-05,
+      "loss": 0.8652,
+      "step": 2700
+    },
+    {
+      "epoch": 2.8368794326241136,
+      "grad_norm": 13.3125,
+      "learning_rate": 2.1641337386018237e-05,
+      "loss": 0.8768,
+      "step": 2800
+    },
+    {
+      "epoch": 2.9381965552178317,
+      "grad_norm": 13.5625,
+      "learning_rate": 2.0628166160081055e-05,
+      "loss": 0.8477,
+      "step": 2900
+    },
+    {
+      "epoch": 3.0,
+      "eval_cosine_accuracy": 0.9324928469241774,
+      "eval_cosine_accuracy_threshold": 0.6963315010070801,
+      "eval_cosine_ap": 0.8751572160892609,
+      "eval_cosine_f1": 0.7932711614832003,
+      "eval_cosine_f1_threshold": 0.6896486282348633,
+      "eval_cosine_mcc": 0.7518321554060445,
+      "eval_cosine_precision": 0.791752026365013,
+      "eval_cosine_recall": 0.7947961373390557,
+      "eval_loss": 8.766173362731934,
+      "eval_runtime": 109.0776,
+      "eval_samples_per_second": 615.195,
+      "eval_steps_per_second": 6.161,
+      "step": 2961
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 4935,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 100,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89f8a8756fb0e7a67bafa8aa85b53eadf7dec573a6db3c64dd8b76cf8ca93fb2
+size 6289