Upload folder using huggingface_hub
Browse files- .gitattributes +2 -0
- 1_Pooling/config.json +10 -0
- README.md +121 -0
- checkpoints/checkpoint-4240/1_Pooling/config.json +10 -0
- checkpoints/checkpoint-4240/README.md +537 -0
- checkpoints/checkpoint-4240/config.json +27 -0
- checkpoints/checkpoint-4240/config_sentence_transformers.json +14 -0
- checkpoints/checkpoint-4240/model.safetensors +3 -0
- checkpoints/checkpoint-4240/modules.json +20 -0
- checkpoints/checkpoint-4240/optimizer.pt +3 -0
- checkpoints/checkpoint-4240/rng_state.pth +3 -0
- checkpoints/checkpoint-4240/scheduler.pt +3 -0
- checkpoints/checkpoint-4240/sentence_bert_config.json +4 -0
- checkpoints/checkpoint-4240/special_tokens_map.json +51 -0
- checkpoints/checkpoint-4240/tokenizer.json +3 -0
- checkpoints/checkpoint-4240/tokenizer_config.json +56 -0
- checkpoints/checkpoint-4240/trainer_state.json +112 -0
- checkpoints/checkpoint-4240/training_args.bin +3 -0
- checkpoints/eval/Information-Retrieval_evaluation_validation_ir_eval_results.csv +2 -0
- config.json +27 -0
- config_sentence_transformers.json +14 -0
- eval/Information-Retrieval_evaluation_validation_ir_eval_results.csv +6 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +51 -0
- tokenizer.json +3 -0
- tokenizer_config.json +56 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
checkpoints/checkpoint-4240/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 1024,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
README.md
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language:
|
| 3 |
+
- fa
|
| 4 |
+
- ar
|
| 5 |
+
- multilingual
|
| 6 |
+
license: apache-2.0
|
| 7 |
+
library_name: sentence-transformers
|
| 8 |
+
tags:
|
| 9 |
+
- sentence-transformers
|
| 10 |
+
- feature-extraction
|
| 11 |
+
- multilingual
|
| 12 |
+
- persian
|
| 13 |
+
- arabic
|
| 14 |
+
- qa
|
| 15 |
+
- information-retrieval
|
| 16 |
+
pipeline_tag: feature-extraction
|
| 17 |
+
---
|
| 18 |
+
|
| 19 |
+
# hamtaai/e5-large-instruct-hadith
|
| 20 |
+
|
| 21 |
+
This is a fine-tuned version of `intfloat/multilingual-e5-large-instruct` specifically optimized for Persian and Arabic text processing and question-answering tasks.
|
| 22 |
+
|
| 23 |
+
## Model Description
|
| 24 |
+
|
| 25 |
+
This model has been fine-tuned on a comprehensive dataset of Persian and Arabic religious texts, including:
|
| 26 |
+
- Persian and Arabic religious texts including Hadith collections
|
| 27 |
+
|
| 28 |
+
The model is particularly effective for:
|
| 29 |
+
- Semantic search in Persian and Arabic texts
|
| 30 |
+
- Question-answering tasks
|
| 31 |
+
- Information retrieval
|
| 32 |
+
- Cross-lingual understanding between Persian and Arabic
|
| 33 |
+
|
| 34 |
+
## Training Configuration
|
| 35 |
+
|
| 36 |
+
- **Base Model**: intfloat/multilingual-e5-large-instruct
|
| 37 |
+
- **Epochs**: 5
|
| 38 |
+
- **Batch Size**: 72
|
| 39 |
+
- **Learning Rate**: 2e-05
|
| 40 |
+
- **Warmup Steps Ratio**: 0.1
|
| 41 |
+
- **Evaluation Steps Ratio**: 0.5
|
| 42 |
+
|
| 43 |
+
## Usage
|
| 44 |
+
|
| 45 |
+
### Using Sentence-Transformers
|
| 46 |
+
|
| 47 |
+
```python
|
| 48 |
+
from sentence_transformers import SentenceTransformer
|
| 49 |
+
|
| 50 |
+
# Load the model
|
| 51 |
+
model = SentenceTransformer('hamtaai/e5-large-instruct-hadith')
|
| 52 |
+
|
| 53 |
+
# For instruct models, use proper prefixes
|
| 54 |
+
query = "query: سوال شما اینجا"
|
| 55 |
+
passage = "passage: متن پاسخ اینجا"
|
| 56 |
+
|
| 57 |
+
# Encode texts
|
| 58 |
+
query_embedding = model.encode(query)
|
| 59 |
+
passage_embedding = model.encode(passage)
|
| 60 |
+
|
| 61 |
+
# Calculate similarity
|
| 62 |
+
from sentence_transformers.util import cos_sim
|
| 63 |
+
similarity = cos_sim(query_embedding, passage_embedding)
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
### Using Hugging Face Transformers
|
| 67 |
+
|
| 68 |
+
```python
|
| 69 |
+
from transformers import AutoTokenizer, AutoModel
|
| 70 |
+
import torch
|
| 71 |
+
|
| 72 |
+
tokenizer = AutoTokenizer.from_pretrained('hamtaai/e5-large-instruct-hadith')
|
| 73 |
+
model = AutoModel.from_pretrained('hamtaai/e5-large-instruct-hadith')
|
| 74 |
+
|
| 75 |
+
# Tokenize and encode
|
| 76 |
+
inputs = tokenizer("متن شما", return_tensors="pt")
|
| 77 |
+
with torch.no_grad():
|
| 78 |
+
outputs = model(**inputs)
|
| 79 |
+
embeddings = outputs.last_hidden_state.mean(dim=1)
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
## Performance
|
| 83 |
+
|
| 84 |
+
This model has been optimized for Persian and Arabic text processing and shows improved performance on:
|
| 85 |
+
- Semantic similarity tasks
|
| 86 |
+
- Question-answering accuracy
|
| 87 |
+
- Cross-lingual retrieval
|
| 88 |
+
- Religious text understanding
|
| 89 |
+
|
| 90 |
+
## Training Data
|
| 91 |
+
|
| 92 |
+
The model was trained on a curated dataset of Persian and Arabic religious texts, including:
|
| 93 |
+
- Hadith collections
|
| 94 |
+
- Quranic commentaries (Tafsir)
|
| 95 |
+
- Religious question-answer pairs
|
| 96 |
+
- Contextual information for better understanding
|
| 97 |
+
|
| 98 |
+
## Limitations
|
| 99 |
+
|
| 100 |
+
- Primarily optimized for Persian and Arabic texts
|
| 101 |
+
- Performance may vary on other languages
|
| 102 |
+
- Best results achieved with proper text normalization
|
| 103 |
+
- Requires appropriate prefixes for instruct-based models
|
| 104 |
+
|
| 105 |
+
## Citation
|
| 106 |
+
|
| 107 |
+
If you use this model, please cite the original base model and mention this fine-tuned version:
|
| 108 |
+
|
| 109 |
+
```bibtex
|
| 110 |
+
@misc{hamtaai/e5_large_instruct_hadith,
|
| 111 |
+
title={hamtaai/e5-large-instruct-hadith: Fine-tuned Multilingual E5 Model for Persian and Arabic Text Processing},
|
| 112 |
+
author={Your Name},
|
| 113 |
+
year={2025},
|
| 114 |
+
publisher={Hugging Face},
|
| 115 |
+
howpublished={\url{https://huggingface.co/hamtaai/e5-large-instruct-hadith}}
|
| 116 |
+
}
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
## License
|
| 120 |
+
|
| 121 |
+
This model is released under the Apache 2.0 License.
|
checkpoints/checkpoint-4240/1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 1024,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
checkpoints/checkpoint-4240/README.md
ADDED
|
@@ -0,0 +1,537 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- sentence-transformers
|
| 4 |
+
- sentence-similarity
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- dense
|
| 7 |
+
- generated_from_trainer
|
| 8 |
+
- dataset_size:61039
|
| 9 |
+
- loss:MultipleNegativesRankingLoss
|
| 10 |
+
base_model: intfloat/multilingual-e5-large-instruct
|
| 11 |
+
widget:
|
| 12 |
+
- source_sentence: 'query: خداوند به عبدالله بن محمد چه وظایفی را در مدینه رسول الله
|
| 13 |
+
صلی الله علیه و آله واگذار کرد؟'
|
| 14 |
+
sentences:
|
| 15 |
+
- 'passage: من لدن ادم الی ابیه عبد الله فی خیر فرقه و اکرم سبط و امنع رهط و اکلاحمل
|
| 16 |
+
و اودع حجر اصطفاه الله و ارتضاه و اجتباه context: من لدن ادم الی ابیه عبد الله
|
| 17 |
+
فی خیر فرقه و اکرم سبط و امنع رهط و اکلاحمل و اودع حجر اصطفاه الله و ارتضاه و
|
| 18 |
+
اجتباه و اتاه من العلم مفاتیحه و من الحکم ینابیعه ابتعثه رحمه للعباد و ربیعا للبلاد
|
| 19 |
+
و انزل الله الیه الکتاب فیه البیان و التبیان قرانا عربیا غیر ذی عوج لعلهم یتقون
|
| 20 |
+
قد بینه للناس و نهجه بعلم قد فصله و دین قد اوضحه و فرایض قد اوجبها و حدود حدها
|
| 21 |
+
للناس و بینها و امور قد کشفها لخلقه و اعلنها فیها دلاله الی النجاه و معالم تدعو
|
| 22 |
+
الی هداه فبلغ رسول الله صلی الله علیه و اله ما'
|
| 23 |
+
- 'passage: قباله وجهک و لا ترفعهما کل ذلک context: علی بن ابراهیم عن ابیه عن ابن
|
| 24 |
+
ابی عمیر عن جمیل بن دراج عن زراره عن احدهما علیهماالسلام قال ترفع یدیک فی افتتاح
|
| 25 |
+
الصلاه قباله وجهک و لا ترفعهما کل ذلک'
|
| 26 |
+
- 'passage: بد الله بن محمد عما کان یتولاه من الحرب و الصلاه بمدینه رسول الله صلی
|
| 27 |
+
الله علیه و اله اذ کان علی ما ذکرت من جهالته بحقک و استخفافه بقدرک context: بد
|
| 28 |
+
الله بن محمد عما کان یتولاه من الحرب و الصلاه بمدینه رسول الله صلی الله علیه و
|
| 29 |
+
اله اذ کان علی ما ذکرت من جهالته بحقک و استخفافه بقدرک و عند ما قرفک به و نسبک
|
| 30 |
+
الیه من الامر الذی قد علم امیر المومنین براءتک منه و صدق نیتک فی ترک محاولته و
|
| 31 |
+
انک لم توهل نفسک له و قد ولی امیر المومنین ما کان یلی من ذلک محمد بن الفضل و امره
|
| 32 |
+
باکرامک و تبجیلک و الانتهاء الی امرک و رایک و التقرب الی الله و الی امیر المومنین
|
| 33 |
+
بذلک و امیر المومنین مشتاق الیک یحب احداث العهد بک و النظر الیک فان نشطت'
|
| 34 |
+
- source_sentence: 'query: آیا میتوان برای وجود امام علی (ع) زمانی تعیین کرد یا مرزی
|
| 35 |
+
قائل شد؟'
|
| 36 |
+
sentences:
|
| 37 |
+
- 'passage: لا یقال له: أقبل، فأقبل، ثم قال له: أدبر، فأدبر. فقال: و عزتی و جلالی
|
| 38 |
+
ما خلقت خلقا هو أحب إلی منک. context: الامام علی علیه السلام لیس لاولیته ابتداء
|
| 39 |
+
ولا لازلیته انقضاء هو الاول ولم یزل والباقی بلا اجل لا یقال له متی ولا یضرب له
|
| 40 |
+
امد ب حتی قبل کل غایه ومده وکل احصاء وعده'
|
| 41 |
+
- 'passage: لا یزال بنیانهم الذی بنوا ریبه فی قلوبهم الا ان تقطع قلوبهم context:
|
| 42 |
+
ال وای شیء یمنعه من ذلک ثم تلا هذه الایه لا یزال بنیانهم الذی بنوا ریبه فی قلوبهم
|
| 43 |
+
الا ان تقطع قلوبهم قال ثم قال تدری لای شیء تحیر ابن قیاما قال قلت لا قال انه تبع
|
| 44 |
+
ابا الحسن علیه السلام فاتاه عن یمینه وعن شماله وهو یرید مسجد النبی صلی الله علیه
|
| 45 |
+
و اله فالتفت الیه ابو الحسن علیه السلام فقال ما ترید حیرک الله قال ثم قال ارایت
|
| 46 |
+
لو رجع الیهم موسی فقالوا لو نصبته لنا فاتبعناه واقتصصنا اثره ا هم کانوا اصوب قولا
|
| 47 |
+
او من قال لن نبرح علیه عاکفین حتی یرجع الینا موسی قال قلت لا بل من قال'
|
| 48 |
+
- 'passage: حملکما علی خلعه من رقابکما کما یخلع الحرون لجامه و هو الله ربی لا اشرک
|
| 49 |
+
به شییا فلا تقولا اقل نفعا و اضعف دفعا فتستحقا اسم الشرک مع النفاق context: حملکما
|
| 50 |
+
علی خلعه من رقابکما کما یخلع الح��ون لجامه و هو الله ربی لا اشرک به شییا فلا تقولا
|
| 51 |
+
اقل نفعا و اضعف دفعا فتستحقا اسم الشرک مع النفاق و اما قولکما انی اشجع فرسان العرب
|
| 52 |
+
و هربکما من لعنی و دعایی فان لکل موقف عملا اذا اختلفت الاسنه و ماجت لبود الخیل
|
| 53 |
+
و ملا سحراکما اجوافکما فثم یکفینی الله بکمال القلب و اما اذا ابیتما بانی ادعو
|
| 54 |
+
الله فلا تجزعا من ان یدعو علیکما رجل ساحر من قوم سحره زعمتما اللهم اقعص الزبیر
|
| 55 |
+
بشر قتله و اسفک دمه علی ضلاله و عرف طلحه المذله و ادخر لهما فی الاخره'
|
| 56 |
+
- source_sentence: 'query: اگر شخصی در حال تمتع باشد و نتواند هدیه ای برای قربانی
|
| 57 |
+
پیدا کند، چه باید انجام دهد؟'
|
| 58 |
+
sentences:
|
| 59 |
+
- 'passage: سالت ابا عبد الله علیه السلام عن وداع قبر النبی صلی الله علیه و اله
|
| 60 |
+
فقال تقول صلی الله علیک السلام علیک لا جعله الله اخر تسلیمی علیک context: محمد
|
| 61 |
+
بن یحیی عن احمد بن محمد عن ابن فضال عن یونس بن یعقوب قال سالت ابا عبد الله علیه
|
| 62 |
+
السلام عن وداع قبر النبی صلی الله علیه و اله فقال تقول صلی الله علیک السلام علیک
|
| 63 |
+
لا جعله الله اخر تسلیمی علیک'
|
| 64 |
+
- 'passage: علی بن ابراهیم عن ابیه عن ابن ابی عمیر عن هشام بن الحکم عن ابی عبد الله
|
| 65 |
+
علیه السلام context: علی بن ابراهیم عن ابیه عن ابن ابی عمیر عن هشام بن الحکم عن
|
| 66 |
+
ابی عبد الله علیه السلام قال قال البنفسج سید ادهانکم'
|
| 67 |
+
- 'passage: سالته عن رجل تمتع فلم یجد ما یهدی به حتی اذا کان یوم النفر وجد ثمن شاه
|
| 68 |
+
ا یذبح او یصوم قال بل یصوم فان ایام الذبح قد مضت context: احمد بن محمد بن ابی
|
| 69 |
+
نصر عن عبد الکریم عن ابی بصیر عن احدهما علیهماالسلام قال سالته عن رجل تمتع فلم
|
| 70 |
+
یجد ما یهدی به حتی اذا کان یوم النفر وجد ثمن شاه ا یذبح او یصوم قال بل یصوم فان
|
| 71 |
+
ایام الذبح قد مضت'
|
| 72 |
+
- source_sentence: 'query: به گفته امام صادق (ع)، چه چیزی به جز موارد باطل کننده وضو،
|
| 73 |
+
از بدن خارج میشود که نباید به آن توجه کرد؟'
|
| 74 |
+
sentences:
|
| 75 |
+
- 'passage: ثم تلا هذه الایه حبب الیکم الاءیمان وزینه فی قلوبکم وکره الیکم الکفر
|
| 76 |
+
والفسوق والعصیان اولیک هم الراشدون context: علی بن ابراهیم عن ابیه عن حماد عن
|
| 77 |
+
حریز عن فضیل بن یسارقال سالت ابا عبد الله علیه السلام عن الحب والبغض ا من الاءیمان
|
| 78 |
+
هو فقال و هل الاءیمان الا الحب والبغض ثم تلا هذه الایه حبب الیکم الاءیمان وزینه
|
| 79 |
+
فی قلوبکم وکره الیکم الکفر والفسوق والعصیان اولیک هم الراشدون'
|
| 80 |
+
- 'passage: الذین انعم الله علیک بهما context: محمد بن اسماعیل عن الفضل بن شاذان
|
| 81 |
+
و احمد بن ادریس عن محمد بن عبد الجبار جمیعا عن صفوان بن یحیی عن سالم ابی الفضل
|
| 82 |
+
عن ابی عبد الله علیه السلام قال لیس ینقض الوضوء الا ما خرج من طرفیک الاسفلین اللذین
|
| 83 |
+
انعم الله علیک بهما'
|
| 84 |
+
- 'passage: ثم خلی سبیلهما context: بطن قد بدت عروق فخذیه و قد زنی بامراه مریضه
|
| 85 |
+
فامر رسول الله صلی الله علیه و اله بعذق فیه مایه شمراخ فضرب به الرجل ضربه و ضربت
|
| 86 |
+
به المراه ضربه ثم خلی سبیلهما ثم قرا هذه الایه و خذ بیدک ضغثا فاضرب به و لا تحنث'
|
| 87 |
+
- source_sentence: 'query: ابلیس به موسی چه توصیهای در مورد صدقه و نیت خالصانه کرد؟'
|
| 88 |
+
sentences:
|
| 89 |
+
- 'passage: قال من السنه لبس الخاتم context: ابو علی الاشعری عن الحسن بن علی الکوفی
|
| 90 |
+
عن عبیس بن هشام عن حسین بن احمد المنقری عن یونس بن ظبیان عن ابی عبد الله علیه
|
| 91 |
+
السلام قال من السنه لبس الخاتم'
|
| 92 |
+
- 'passage: قال المکاری و الجمال الذی یختلف و لیس له مقام یتم الصلاه و یصوم شهر
|
| 93 |
+
رمضان context: علی بن ابراهیم عن ابیه و محمد بن اسماعیل عن الفضل بن شاذان جمیعا
|
| 94 |
+
عن ابن ابی عمیر عن هشام بن الحکم عن ابی عبد الله علیه السلام قال المکاری و الجمال
|
| 95 |
+
الذی یختلف و لیس له مقام یتم الصلاه و یصوم شهر رمضان'
|
| 96 |
+
- 'passage: قال ابلیس لموسی علیه السلام اذا هممت بصدقه فامضها واذا هم العبد بصدقه
|
| 97 |
+
کنت صاحبه دون اصحابی حتى احول بینه وبینها context: قصص الانبیاء عنهم علیهم السلام
|
| 98 |
+
قال ابلیس لموسی علیه السلام اذا هممت بصدقه فامضها واذا هم العبد بصدقه کنت صاحبه
|
| 99 |
+
دون اصحابی حتی احول بینه وبینها'
|
| 100 |
+
pipeline_tag: sentence-similarity
|
| 101 |
+
library_name: sentence-transformers
|
| 102 |
+
metrics:
|
| 103 |
+
- cosine_accuracy@1
|
| 104 |
+
- cosine_accuracy@3
|
| 105 |
+
- cosine_accuracy@5
|
| 106 |
+
- cosine_accuracy@10
|
| 107 |
+
- cosine_precision@1
|
| 108 |
+
- cosine_precision@3
|
| 109 |
+
- cosine_precision@5
|
| 110 |
+
- cosine_precision@10
|
| 111 |
+
- cosine_recall@1
|
| 112 |
+
- cosine_recall@3
|
| 113 |
+
- cosine_recall@5
|
| 114 |
+
- cosine_recall@10
|
| 115 |
+
- cosine_ndcg@10
|
| 116 |
+
- cosine_mrr@10
|
| 117 |
+
- cosine_map@100
|
| 118 |
+
model-index:
|
| 119 |
+
- name: SentenceTransformer based on intfloat/multilingual-e5-large-instruct
|
| 120 |
+
results:
|
| 121 |
+
- task:
|
| 122 |
+
type: information-retrieval
|
| 123 |
+
name: Information Retrieval
|
| 124 |
+
dataset:
|
| 125 |
+
name: validation ir eval
|
| 126 |
+
type: validation_ir_eval
|
| 127 |
+
metrics:
|
| 128 |
+
- type: cosine_accuracy@1
|
| 129 |
+
value: 0.6168679041268698
|
| 130 |
+
name: Cosine Accuracy@1
|
| 131 |
+
- type: cosine_accuracy@3
|
| 132 |
+
value: 0.8619243434525468
|
| 133 |
+
name: Cosine Accuracy@3
|
| 134 |
+
- type: cosine_accuracy@5
|
| 135 |
+
value: 0.9229181343075739
|
| 136 |
+
name: Cosine Accuracy@5
|
| 137 |
+
- type: cosine_accuracy@10
|
| 138 |
+
value: 0.9657104474188634
|
| 139 |
+
name: Cosine Accuracy@10
|
| 140 |
+
- type: cosine_precision@1
|
| 141 |
+
value: 0.6168679041268698
|
| 142 |
+
name: Cosine Precision@1
|
| 143 |
+
- type: cosine_precision@3
|
| 144 |
+
value: 0.28730811448418225
|
| 145 |
+
name: Cosine Precision@3
|
| 146 |
+
- type: cosine_precision@5
|
| 147 |
+
value: 0.18458362686151478
|
| 148 |
+
name: Cosine Precision@5
|
| 149 |
+
- type: cosine_precision@10
|
| 150 |
+
value: 0.09657104474188634
|
| 151 |
+
name: Cosine Precision@10
|
| 152 |
+
- type: cosine_recall@1
|
| 153 |
+
value: 0.6168679041268698
|
| 154 |
+
name: Cosine Recall@1
|
| 155 |
+
- type: cosine_recall@3
|
| 156 |
+
value: 0.8619243434525468
|
| 157 |
+
name: Cosine Recall@3
|
| 158 |
+
- type: cosine_recall@5
|
| 159 |
+
value: 0.9229181343075739
|
| 160 |
+
name: Cosine Recall@5
|
| 161 |
+
- type: cosine_recall@10
|
| 162 |
+
value: 0.9657104474188634
|
| 163 |
+
name: Cosine Recall@10
|
| 164 |
+
- type: cosine_ndcg@10
|
| 165 |
+
value: 0.8018625829452034
|
| 166 |
+
name: Cosine Ndcg@10
|
| 167 |
+
- type: cosine_mrr@10
|
| 168 |
+
value: 0.7480495686208372
|
| 169 |
+
name: Cosine Mrr@10
|
| 170 |
+
- type: cosine_map@100
|
| 171 |
+
value: 0.7499351289030992
|
| 172 |
+
name: Cosine Map@100
|
| 173 |
+
---
|
| 174 |
+
|
| 175 |
+
# SentenceTransformer based on intfloat/multilingual-e5-large-instruct
|
| 176 |
+
|
| 177 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [intfloat/multilingual-e5-large-instruct](https://huggingface.co/intfloat/multilingual-e5-large-instruct). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 178 |
+
|
| 179 |
+
## Model Details
|
| 180 |
+
|
| 181 |
+
### Model Description
|
| 182 |
+
- **Model Type:** Sentence Transformer
|
| 183 |
+
- **Base model:** [intfloat/multilingual-e5-large-instruct](https://huggingface.co/intfloat/multilingual-e5-large-instruct) <!-- at revision 274baa43b0e13e37fafa6428dbc7938e62e5c439 -->
|
| 184 |
+
- **Maximum Sequence Length:** 512 tokens
|
| 185 |
+
- **Output Dimensionality:** 1024 dimensions
|
| 186 |
+
- **Similarity Function:** Cosine Similarity
|
| 187 |
+
<!-- - **Training Dataset:** Unknown -->
|
| 188 |
+
<!-- - **Language:** Unknown -->
|
| 189 |
+
<!-- - **License:** Unknown -->
|
| 190 |
+
|
| 191 |
+
### Model Sources
|
| 192 |
+
|
| 193 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 194 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 195 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 196 |
+
|
| 197 |
+
### Full Model Architecture
|
| 198 |
+
|
| 199 |
+
```
|
| 200 |
+
SentenceTransformer(
|
| 201 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'XLMRobertaModel'})
|
| 202 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 203 |
+
(2): Normalize()
|
| 204 |
+
)
|
| 205 |
+
```
|
| 206 |
+
|
| 207 |
+
## Usage
|
| 208 |
+
|
| 209 |
+
### Direct Usage (Sentence Transformers)
|
| 210 |
+
|
| 211 |
+
First install the Sentence Transformers library:
|
| 212 |
+
|
| 213 |
+
```bash
|
| 214 |
+
pip install -U sentence-transformers
|
| 215 |
+
```
|
| 216 |
+
|
| 217 |
+
Then you can load this model and run inference.
|
| 218 |
+
```python
|
| 219 |
+
from sentence_transformers import SentenceTransformer
|
| 220 |
+
|
| 221 |
+
# Download from the 🤗 Hub
|
| 222 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
| 223 |
+
# Run inference
|
| 224 |
+
sentences = [
|
| 225 |
+
'query: ابلیس به موسی چه توصیهای در مورد صدقه و نیت خالصانه کرد؟',
|
| 226 |
+
'passage: قال ابلیس لموسی علیه السلام اذا هممت بصدقه فامضها واذا هم العبد بصدقه کنت صاحبه دون اصحابی حتى احول بینه وبینها context: قصص الانبیاء عنهم علیهم السلام قال ابلیس لموسی علیه السلام اذا هممت بصدقه فامضها واذا هم العبد بصدقه کنت صاحبه دون اصحابی حتی احول بینه وبینها',
|
| 227 |
+
'passage: قال المکاری و الجمال الذی یختلف و لیس له مقام یتم الصلاه و یصوم شهر رمضان context: علی بن ابراهیم عن ابیه و محمد بن اسماعیل عن الفضل بن شاذان جمیعا عن ابن ابی عمیر عن هشام بن الحکم عن ابی عبد الله علیه السلام قال المکاری و الجمال الذی یختلف و لیس له مقام یتم الصلاه و یصوم شهر رمضان',
|
| 228 |
+
]
|
| 229 |
+
embeddings = model.encode(sentences)
|
| 230 |
+
print(embeddings.shape)
|
| 231 |
+
# [3, 1024]
|
| 232 |
+
|
| 233 |
+
# Get the similarity scores for the embeddings
|
| 234 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 235 |
+
print(similarities)
|
| 236 |
+
# tensor([[ 1.0000, 0.7819, -0.0468],
|
| 237 |
+
# [ 0.7819, 1.0000, -0.0474],
|
| 238 |
+
# [-0.0468, -0.0474, 1.0000]])
|
| 239 |
+
```
|
| 240 |
+
|
| 241 |
+
<!--
|
| 242 |
+
### Direct Usage (Transformers)
|
| 243 |
+
|
| 244 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 245 |
+
|
| 246 |
+
</details>
|
| 247 |
+
-->
|
| 248 |
+
|
| 249 |
+
<!--
|
| 250 |
+
### Downstream Usage (Sentence Transformers)
|
| 251 |
+
|
| 252 |
+
You can finetune this model on your own dataset.
|
| 253 |
+
|
| 254 |
+
<details><summary>Click to expand</summary>
|
| 255 |
+
|
| 256 |
+
</details>
|
| 257 |
+
-->
|
| 258 |
+
|
| 259 |
+
<!--
|
| 260 |
+
### Out-of-Scope Use
|
| 261 |
+
|
| 262 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 263 |
+
-->
|
| 264 |
+
|
| 265 |
+
## Evaluation
|
| 266 |
+
|
| 267 |
+
### Metrics
|
| 268 |
+
|
| 269 |
+
#### Information Retrieval
|
| 270 |
+
|
| 271 |
+
* Dataset: `validation_ir_eval`
|
| 272 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
| 273 |
+
|
| 274 |
+
| Metric | Value |
|
| 275 |
+
|:--------------------|:-----------|
|
| 276 |
+
| cosine_accuracy@1 | 0.6169 |
|
| 277 |
+
| cosine_accuracy@3 | 0.8619 |
|
| 278 |
+
| cosine_accuracy@5 | 0.9229 |
|
| 279 |
+
| cosine_accuracy@10 | 0.9657 |
|
| 280 |
+
| cosine_precision@1 | 0.6169 |
|
| 281 |
+
| cosine_precision@3 | 0.2873 |
|
| 282 |
+
| cosine_precision@5 | 0.1846 |
|
| 283 |
+
| cosine_precision@10 | 0.0966 |
|
| 284 |
+
| cosine_recall@1 | 0.6169 |
|
| 285 |
+
| cosine_recall@3 | 0.8619 |
|
| 286 |
+
| cosine_recall@5 | 0.9229 |
|
| 287 |
+
| cosine_recall@10 | 0.9657 |
|
| 288 |
+
| **cosine_ndcg@10** | **0.8019** |
|
| 289 |
+
| cosine_mrr@10 | 0.748 |
|
| 290 |
+
| cosine_map@100 | 0.7499 |
|
| 291 |
+
|
| 292 |
+
<!--
|
| 293 |
+
## Bias, Risks and Limitations
|
| 294 |
+
|
| 295 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 296 |
+
-->
|
| 297 |
+
|
| 298 |
+
<!--
|
| 299 |
+
### Recommendations
|
| 300 |
+
|
| 301 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 302 |
+
-->
|
| 303 |
+
|
| 304 |
+
## Training Details
|
| 305 |
+
|
| 306 |
+
### Training Dataset
|
| 307 |
+
|
| 308 |
+
#### Unnamed Dataset
|
| 309 |
+
|
| 310 |
+
* Size: 61,039 training samples
|
| 311 |
+
* Columns: <code>sentence_0</code> and <code>sentence_1</code>
|
| 312 |
+
* Approximate statistics based on the first 1000 samples:
|
| 313 |
+
| | sentence_0 | sentence_1 |
|
| 314 |
+
|:--------|:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
| 315 |
+
| type | string | string |
|
| 316 |
+
| details | <ul><li>min: 11 tokens</li><li>mean: 24.36 tokens</li><li>max: 57 tokens</li></ul> | <ul><li>min: 34 tokens</li><li>mean: 113.04 tokens</li><li>max: 468 tokens</li></ul> |
|
| 317 |
+
* Samples:
|
| 318 |
+
| sentence_0 | sentence_1 |
|
| 319 |
+
|:----------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 320 |
+
| <code>query: به نظر امیرالمومنین (ع)، ربا در این امت چگونه است و چه عاقبتی دارد؟</code> | <code>passage: والله للربا فی هذه الامه اخفی من دبیب النمل علی الصفا شوبوا ایمانکم بالصدق التاجر فاجر والفاجر فی النار context: عده من اصحابنا عن احمد بن محمد عن عثمان بن عیسی عن ابی الجارود عن الاصبغ بن نباته قال سمعت امیر المومنین علیه السلام یقول علی المنبر یا معشر التجار الفقه ثم المتجر الفقه ثم المتجر الفقه ثم المتجر والله للربا فی هذه الامه اخفی من دبیب النمل علی الصفا شوبوا ایمانکم بالصدق التاجر فاجر والفاجر فی النار الا من اخذ الحق واعطی الحق</code> |
|
| 321 |
+
| <code>query: در چه سنی شیر شتر مادر، حق مصرف شیر را پیدا میکند؟</code> | <code>passage: ففیها حقه طروقه الفحل ثم لیس فیها شیء حتی تبلغ ستین فاذا بلغت ستین ففیها جذعه context: ه ابن اللبون والانثی ابنه لبون لان امه حملت غیره ووضعته فصارت ذات لبن راجع الصحاح ج 6 ص 2192 النهایه ج 4 ص 228 لبن ثم لیس فیها شیء حتی تبلغ خمسا و اربعین فاذا بلغت خمسا و اربعین ففیها حقه طروقه الفحل ثم لیس فیها شیء حتی تبلغ ستین فاذا بلغت ستین ففیها جذعه ثم لیس فیها شیء حتی تبلغ خمسا و سبعین فاذا بلغت خمسا و سبعین ففیها ابنتا لبون ثم لیس فیها شیء حتی تبلغ تسعین فاذا بلغت تسعین ففیها حقتان طروقتا الفحل ثم لیس فیها شیء حتی تبلغ عشرین و مایه فاذا بلغت عشرین و مایه ففیها حقتان</code> |
|
| 322 |
+
| <code>query: برکت و محافظت از عقیقه به چه چیزی مرتبط است؟</code> | <code>passage: اللهم و عزتی و جلالی ما خلقت خلقا هو أحب إلی منک. context: عده من اصحابنا عن سهل بن زیاد عن بعض اصحابه یرفعه عن ابی عبد الله علیه السلام قال تقول علی العقیقه و ذکر مثله وزاد فیه اللهم لحمها بلحمه ودمها بدمه وعظمها بعظمه وشعرها بشعره وجلدها بجلده اللهم اجعلها وقاء لفلان بن فلان</code> |
|
| 323 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
| 324 |
+
```json
|
| 325 |
+
{
|
| 326 |
+
"scale": 20.0,
|
| 327 |
+
"similarity_fct": "cos_sim",
|
| 328 |
+
"gather_across_devices": false
|
| 329 |
+
}
|
| 330 |
+
```
|
| 331 |
+
|
| 332 |
+
### Training Hyperparameters
|
| 333 |
+
#### Non-Default Hyperparameters
|
| 334 |
+
|
| 335 |
+
- `eval_strategy`: steps
|
| 336 |
+
- `per_device_train_batch_size`: 72
|
| 337 |
+
- `per_device_eval_batch_size`: 72
|
| 338 |
+
- `num_train_epochs`: 5
|
| 339 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 340 |
+
|
| 341 |
+
#### All Hyperparameters
|
| 342 |
+
<details><summary>Click to expand</summary>
|
| 343 |
+
|
| 344 |
+
- `overwrite_output_dir`: False
|
| 345 |
+
- `do_predict`: False
|
| 346 |
+
- `eval_strategy`: steps
|
| 347 |
+
- `prediction_loss_only`: True
|
| 348 |
+
- `per_device_train_batch_size`: 72
|
| 349 |
+
- `per_device_eval_batch_size`: 72
|
| 350 |
+
- `per_gpu_train_batch_size`: None
|
| 351 |
+
- `per_gpu_eval_batch_size`: None
|
| 352 |
+
- `gradient_accumulation_steps`: 1
|
| 353 |
+
- `eval_accumulation_steps`: None
|
| 354 |
+
- `torch_empty_cache_steps`: None
|
| 355 |
+
- `learning_rate`: 5e-05
|
| 356 |
+
- `weight_decay`: 0.0
|
| 357 |
+
- `adam_beta1`: 0.9
|
| 358 |
+
- `adam_beta2`: 0.999
|
| 359 |
+
- `adam_epsilon`: 1e-08
|
| 360 |
+
- `max_grad_norm`: 1
|
| 361 |
+
- `num_train_epochs`: 5
|
| 362 |
+
- `max_steps`: -1
|
| 363 |
+
- `lr_scheduler_type`: linear
|
| 364 |
+
- `lr_scheduler_kwargs`: {}
|
| 365 |
+
- `warmup_ratio`: 0.0
|
| 366 |
+
- `warmup_steps`: 0
|
| 367 |
+
- `log_level`: passive
|
| 368 |
+
- `log_level_replica`: warning
|
| 369 |
+
- `log_on_each_node`: True
|
| 370 |
+
- `logging_nan_inf_filter`: True
|
| 371 |
+
- `save_safetensors`: True
|
| 372 |
+
- `save_on_each_node`: False
|
| 373 |
+
- `save_only_model`: False
|
| 374 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 375 |
+
- `no_cuda`: False
|
| 376 |
+
- `use_cpu`: False
|
| 377 |
+
- `use_mps_device`: False
|
| 378 |
+
- `seed`: 42
|
| 379 |
+
- `data_seed`: None
|
| 380 |
+
- `jit_mode_eval`: False
|
| 381 |
+
- `use_ipex`: False
|
| 382 |
+
- `bf16`: False
|
| 383 |
+
- `fp16`: False
|
| 384 |
+
- `fp16_opt_level`: O1
|
| 385 |
+
- `half_precision_backend`: auto
|
| 386 |
+
- `bf16_full_eval`: False
|
| 387 |
+
- `fp16_full_eval`: False
|
| 388 |
+
- `tf32`: None
|
| 389 |
+
- `local_rank`: 0
|
| 390 |
+
- `ddp_backend`: None
|
| 391 |
+
- `tpu_num_cores`: None
|
| 392 |
+
- `tpu_metrics_debug`: False
|
| 393 |
+
- `debug`: []
|
| 394 |
+
- `dataloader_drop_last`: False
|
| 395 |
+
- `dataloader_num_workers`: 0
|
| 396 |
+
- `dataloader_prefetch_factor`: None
|
| 397 |
+
- `past_index`: -1
|
| 398 |
+
- `disable_tqdm`: False
|
| 399 |
+
- `remove_unused_columns`: True
|
| 400 |
+
- `label_names`: None
|
| 401 |
+
- `load_best_model_at_end`: False
|
| 402 |
+
- `ignore_data_skip`: False
|
| 403 |
+
- `fsdp`: []
|
| 404 |
+
- `fsdp_min_num_params`: 0
|
| 405 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 406 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 407 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 408 |
+
- `parallelism_config`: None
|
| 409 |
+
- `deepspeed`: None
|
| 410 |
+
- `label_smoothing_factor`: 0.0
|
| 411 |
+
- `optim`: adamw_torch_fused
|
| 412 |
+
- `optim_args`: None
|
| 413 |
+
- `adafactor`: False
|
| 414 |
+
- `group_by_length`: False
|
| 415 |
+
- `length_column_name`: length
|
| 416 |
+
- `ddp_find_unused_parameters`: None
|
| 417 |
+
- `ddp_bucket_cap_mb`: None
|
| 418 |
+
- `ddp_broadcast_buffers`: False
|
| 419 |
+
- `dataloader_pin_memory`: True
|
| 420 |
+
- `dataloader_persistent_workers`: False
|
| 421 |
+
- `skip_memory_metrics`: True
|
| 422 |
+
- `use_legacy_prediction_loop`: False
|
| 423 |
+
- `push_to_hub`: False
|
| 424 |
+
- `resume_from_checkpoint`: None
|
| 425 |
+
- `hub_model_id`: None
|
| 426 |
+
- `hub_strategy`: every_save
|
| 427 |
+
- `hub_private_repo`: None
|
| 428 |
+
- `hub_always_push`: False
|
| 429 |
+
- `hub_revision`: None
|
| 430 |
+
- `gradient_checkpointing`: False
|
| 431 |
+
- `gradient_checkpointing_kwargs`: None
|
| 432 |
+
- `include_inputs_for_metrics`: False
|
| 433 |
+
- `include_for_metrics`: []
|
| 434 |
+
- `eval_do_concat_batches`: True
|
| 435 |
+
- `fp16_backend`: auto
|
| 436 |
+
- `push_to_hub_model_id`: None
|
| 437 |
+
- `push_to_hub_organization`: None
|
| 438 |
+
- `mp_parameters`:
|
| 439 |
+
- `auto_find_batch_size`: False
|
| 440 |
+
- `full_determinism`: False
|
| 441 |
+
- `torchdynamo`: None
|
| 442 |
+
- `ray_scope`: last
|
| 443 |
+
- `ddp_timeout`: 1800
|
| 444 |
+
- `torch_compile`: False
|
| 445 |
+
- `torch_compile_backend`: None
|
| 446 |
+
- `torch_compile_mode`: None
|
| 447 |
+
- `include_tokens_per_second`: False
|
| 448 |
+
- `include_num_input_tokens_seen`: False
|
| 449 |
+
- `neftune_noise_alpha`: None
|
| 450 |
+
- `optim_target_modules`: None
|
| 451 |
+
- `batch_eval_metrics`: False
|
| 452 |
+
- `eval_on_start`: False
|
| 453 |
+
- `use_liger_kernel`: False
|
| 454 |
+
- `liger_kernel_config`: None
|
| 455 |
+
- `eval_use_gather_object`: False
|
| 456 |
+
- `average_tokens_across_devices`: False
|
| 457 |
+
- `prompts`: None
|
| 458 |
+
- `batch_sampler`: batch_sampler
|
| 459 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 460 |
+
- `router_mapping`: {}
|
| 461 |
+
- `learning_rate_mapping`: {}
|
| 462 |
+
|
| 463 |
+
</details>
|
| 464 |
+
|
| 465 |
+
### Training Logs
|
| 466 |
+
| Epoch | Step | Training Loss | validation_ir_eval_cosine_ndcg@10 |
|
| 467 |
+
|:------:|:----:|:-------------:|:---------------------------------:|
|
| 468 |
+
| 0.5896 | 500 | 0.6209 | - |
|
| 469 |
+
| 1.0 | 848 | - | 0.6807 |
|
| 470 |
+
| 1.1792 | 1000 | 0.1483 | - |
|
| 471 |
+
| 1.7689 | 1500 | 0.0913 | - |
|
| 472 |
+
| 2.0 | 1696 | - | 0.7406 |
|
| 473 |
+
| 2.3585 | 2000 | 0.0596 | - |
|
| 474 |
+
| 2.9481 | 2500 | 0.0424 | - |
|
| 475 |
+
| 3.0 | 2544 | - | 0.7692 |
|
| 476 |
+
| 3.5377 | 3000 | 0.0264 | - |
|
| 477 |
+
| 4.0 | 3392 | - | 0.7941 |
|
| 478 |
+
| 4.1274 | 3500 | 0.0238 | - |
|
| 479 |
+
| 4.7170 | 4000 | 0.0174 | - |
|
| 480 |
+
| 5.0 | 4240 | - | 0.8019 |
|
| 481 |
+
|
| 482 |
+
|
| 483 |
+
### Framework Versions
|
| 484 |
+
- Python: 3.12.3
|
| 485 |
+
- Sentence Transformers: 5.1.0
|
| 486 |
+
- Transformers: 4.56.1
|
| 487 |
+
- PyTorch: 2.8.0+cu128
|
| 488 |
+
- Accelerate: 1.10.1
|
| 489 |
+
- Datasets: 4.0.0
|
| 490 |
+
- Tokenizers: 0.22.0
|
| 491 |
+
|
| 492 |
+
## Citation
|
| 493 |
+
|
| 494 |
+
### BibTeX
|
| 495 |
+
|
| 496 |
+
#### Sentence Transformers
|
| 497 |
+
```bibtex
|
| 498 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 499 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 500 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 501 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 502 |
+
month = "11",
|
| 503 |
+
year = "2019",
|
| 504 |
+
publisher = "Association for Computational Linguistics",
|
| 505 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 506 |
+
}
|
| 507 |
+
```
|
| 508 |
+
|
| 509 |
+
#### MultipleNegativesRankingLoss
|
| 510 |
+
```bibtex
|
| 511 |
+
@misc{henderson2017efficient,
|
| 512 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
| 513 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
| 514 |
+
year={2017},
|
| 515 |
+
eprint={1705.00652},
|
| 516 |
+
archivePrefix={arXiv},
|
| 517 |
+
primaryClass={cs.CL}
|
| 518 |
+
}
|
| 519 |
+
```
|
| 520 |
+
|
| 521 |
+
<!--
|
| 522 |
+
## Glossary
|
| 523 |
+
|
| 524 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 525 |
+
-->
|
| 526 |
+
|
| 527 |
+
<!--
|
| 528 |
+
## Model Card Authors
|
| 529 |
+
|
| 530 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 531 |
+
-->
|
| 532 |
+
|
| 533 |
+
<!--
|
| 534 |
+
## Model Card Contact
|
| 535 |
+
|
| 536 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 537 |
+
-->
|
checkpoints/checkpoint-4240/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"XLMRobertaModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"dtype": "float32",
|
| 9 |
+
"eos_token_id": 2,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"hidden_size": 1024,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 4096,
|
| 15 |
+
"layer_norm_eps": 1e-05,
|
| 16 |
+
"max_position_embeddings": 514,
|
| 17 |
+
"model_type": "xlm-roberta",
|
| 18 |
+
"num_attention_heads": 16,
|
| 19 |
+
"num_hidden_layers": 24,
|
| 20 |
+
"output_past": true,
|
| 21 |
+
"pad_token_id": 1,
|
| 22 |
+
"position_embedding_type": "absolute",
|
| 23 |
+
"transformers_version": "4.56.1",
|
| 24 |
+
"type_vocab_size": 1,
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"vocab_size": 250002
|
| 27 |
+
}
|
checkpoints/checkpoint-4240/config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "5.1.0",
|
| 4 |
+
"transformers": "4.56.1",
|
| 5 |
+
"pytorch": "2.8.0+cu128"
|
| 6 |
+
},
|
| 7 |
+
"model_type": "SentenceTransformer",
|
| 8 |
+
"prompts": {
|
| 9 |
+
"query": "",
|
| 10 |
+
"document": ""
|
| 11 |
+
},
|
| 12 |
+
"default_prompt_name": null,
|
| 13 |
+
"similarity_fn_name": "cosine"
|
| 14 |
+
}
|
checkpoints/checkpoint-4240/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5c97ff7a7f77ac0fabc410d09a335d673ea1ab3d901ccc1f528e011731f24bc
|
| 3 |
+
size 2239607176
|
checkpoints/checkpoint-4240/modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
checkpoints/checkpoint-4240/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07638aa6c5ec864bad7d77e7b82845bb7093138afcb34741b3e30ce9c38b00bd
|
| 3 |
+
size 4471061702
|
checkpoints/checkpoint-4240/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dee108a83ac15b08a91aec4df1478a17218b17cb393312e09f1380f3ad3089e3
|
| 3 |
+
size 14645
|
checkpoints/checkpoint-4240/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7693c53521ac2b85d7e5be254d0eb9c7dd0f1ff18c0323e60d38543987b066f0
|
| 3 |
+
size 1465
|
checkpoints/checkpoint-4240/sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 512,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
checkpoints/checkpoint-4240/special_tokens_map.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"cls_token": {
|
| 10 |
+
"content": "<s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"eos_token": {
|
| 17 |
+
"content": "</s>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"mask_token": {
|
| 24 |
+
"content": "<mask>",
|
| 25 |
+
"lstrip": true,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"pad_token": {
|
| 31 |
+
"content": "<pad>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
},
|
| 37 |
+
"sep_token": {
|
| 38 |
+
"content": "</s>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false
|
| 43 |
+
},
|
| 44 |
+
"unk_token": {
|
| 45 |
+
"content": "<unk>",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false
|
| 50 |
+
}
|
| 51 |
+
}
|
checkpoints/checkpoint-4240/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085
|
| 3 |
+
size 17082987
|
checkpoints/checkpoint-4240/tokenizer_config.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"250001": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": true,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"additional_special_tokens": [],
|
| 45 |
+
"bos_token": "<s>",
|
| 46 |
+
"clean_up_tokenization_spaces": true,
|
| 47 |
+
"cls_token": "<s>",
|
| 48 |
+
"eos_token": "</s>",
|
| 49 |
+
"extra_special_tokens": {},
|
| 50 |
+
"mask_token": "<mask>",
|
| 51 |
+
"model_max_length": 512,
|
| 52 |
+
"pad_token": "<pad>",
|
| 53 |
+
"sep_token": "</s>",
|
| 54 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
| 55 |
+
"unk_token": "<unk>"
|
| 56 |
+
}
|
checkpoints/checkpoint-4240/trainer_state.json
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 5.0,
|
| 6 |
+
"eval_steps": 4240,
|
| 7 |
+
"global_step": 4240,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.589622641509434,
|
| 14 |
+
"grad_norm": 8.571627616882324,
|
| 15 |
+
"learning_rate": 1.9606402519023878e-05,
|
| 16 |
+
"loss": 0.6209,
|
| 17 |
+
"step": 500
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 1.179245283018868,
|
| 21 |
+
"grad_norm": 3.676194667816162,
|
| 22 |
+
"learning_rate": 1.69824193125164e-05,
|
| 23 |
+
"loss": 0.1483,
|
| 24 |
+
"step": 1000
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 1.7688679245283019,
|
| 28 |
+
"grad_norm": 5.61079216003418,
|
| 29 |
+
"learning_rate": 1.4358436106008921e-05,
|
| 30 |
+
"loss": 0.0913,
|
| 31 |
+
"step": 1500
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 2.358490566037736,
|
| 35 |
+
"grad_norm": 1.2366987466812134,
|
| 36 |
+
"learning_rate": 1.1734452899501446e-05,
|
| 37 |
+
"loss": 0.0596,
|
| 38 |
+
"step": 2000
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 2.94811320754717,
|
| 42 |
+
"grad_norm": 3.220940351486206,
|
| 43 |
+
"learning_rate": 9.110469692993965e-06,
|
| 44 |
+
"loss": 0.0424,
|
| 45 |
+
"step": 2500
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 3.5377358490566038,
|
| 49 |
+
"grad_norm": 2.3057520389556885,
|
| 50 |
+
"learning_rate": 6.486486486486487e-06,
|
| 51 |
+
"loss": 0.0264,
|
| 52 |
+
"step": 3000
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"epoch": 4.127358490566038,
|
| 56 |
+
"grad_norm": 2.4243733882904053,
|
| 57 |
+
"learning_rate": 3.862503279979009e-06,
|
| 58 |
+
"loss": 0.0238,
|
| 59 |
+
"step": 3500
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"epoch": 4.716981132075472,
|
| 63 |
+
"grad_norm": 0.6340435743331909,
|
| 64 |
+
"learning_rate": 1.2385200734715299e-06,
|
| 65 |
+
"loss": 0.0174,
|
| 66 |
+
"step": 4000
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"epoch": 5.0,
|
| 70 |
+
"eval_runtime": 555.4101,
|
| 71 |
+
"eval_samples_per_second": 0.0,
|
| 72 |
+
"eval_steps_per_second": 0.0,
|
| 73 |
+
"eval_validation_ir_eval_cosine_accuracy@1": 0.6168679041268698,
|
| 74 |
+
"eval_validation_ir_eval_cosine_accuracy@10": 0.9657104474188634,
|
| 75 |
+
"eval_validation_ir_eval_cosine_accuracy@3": 0.8619243434525468,
|
| 76 |
+
"eval_validation_ir_eval_cosine_accuracy@5": 0.9229181343075739,
|
| 77 |
+
"eval_validation_ir_eval_cosine_map@100": 0.7499351289030992,
|
| 78 |
+
"eval_validation_ir_eval_cosine_mrr@10": 0.7480495686208372,
|
| 79 |
+
"eval_validation_ir_eval_cosine_ndcg@10": 0.8018625829452034,
|
| 80 |
+
"eval_validation_ir_eval_cosine_precision@1": 0.6168679041268698,
|
| 81 |
+
"eval_validation_ir_eval_cosine_precision@10": 0.09657104474188634,
|
| 82 |
+
"eval_validation_ir_eval_cosine_precision@3": 0.28730811448418225,
|
| 83 |
+
"eval_validation_ir_eval_cosine_precision@5": 0.18458362686151478,
|
| 84 |
+
"eval_validation_ir_eval_cosine_recall@1": 0.6168679041268698,
|
| 85 |
+
"eval_validation_ir_eval_cosine_recall@10": 0.9657104474188634,
|
| 86 |
+
"eval_validation_ir_eval_cosine_recall@3": 0.8619243434525468,
|
| 87 |
+
"eval_validation_ir_eval_cosine_recall@5": 0.9229181343075739,
|
| 88 |
+
"step": 4240
|
| 89 |
+
}
|
| 90 |
+
],
|
| 91 |
+
"logging_steps": 500,
|
| 92 |
+
"max_steps": 4240,
|
| 93 |
+
"num_input_tokens_seen": 0,
|
| 94 |
+
"num_train_epochs": 5,
|
| 95 |
+
"save_steps": 4240,
|
| 96 |
+
"stateful_callbacks": {
|
| 97 |
+
"TrainerControl": {
|
| 98 |
+
"args": {
|
| 99 |
+
"should_epoch_stop": false,
|
| 100 |
+
"should_evaluate": false,
|
| 101 |
+
"should_log": false,
|
| 102 |
+
"should_save": true,
|
| 103 |
+
"should_training_stop": true
|
| 104 |
+
},
|
| 105 |
+
"attributes": {}
|
| 106 |
+
}
|
| 107 |
+
},
|
| 108 |
+
"total_flos": 0.0,
|
| 109 |
+
"train_batch_size": 72,
|
| 110 |
+
"trial_name": null,
|
| 111 |
+
"trial_params": null
|
| 112 |
+
}
|
checkpoints/checkpoint-4240/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:624cce9f13a65303d107fb320c83d826406b87cabab55e57fbfe428f6ac939bf
|
| 3 |
+
size 6161
|
checkpoints/eval/Information-Retrieval_evaluation_validation_ir_eval_results.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
|
| 2 |
+
5.0,4240,0.6168679041268698,0.8619243434525468,0.9229181343075739,0.9657104474188634,0.6168679041268698,0.6168679041268698,0.28730811448418225,0.8619243434525468,0.18458362686151478,0.9229181343075739,0.09657104474188634,0.9657104474188634,0.7480495686208372,0.8018625829452034,0.7499351289030992
|
config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"XLMRobertaModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"dtype": "float32",
|
| 9 |
+
"eos_token_id": 2,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"hidden_size": 1024,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 4096,
|
| 15 |
+
"layer_norm_eps": 1e-05,
|
| 16 |
+
"max_position_embeddings": 514,
|
| 17 |
+
"model_type": "xlm-roberta",
|
| 18 |
+
"num_attention_heads": 16,
|
| 19 |
+
"num_hidden_layers": 24,
|
| 20 |
+
"output_past": true,
|
| 21 |
+
"pad_token_id": 1,
|
| 22 |
+
"position_embedding_type": "absolute",
|
| 23 |
+
"transformers_version": "4.56.1",
|
| 24 |
+
"type_vocab_size": 1,
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"vocab_size": 250002
|
| 27 |
+
}
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "5.1.0",
|
| 4 |
+
"transformers": "4.56.1",
|
| 5 |
+
"pytorch": "2.8.0+cu128"
|
| 6 |
+
},
|
| 7 |
+
"model_type": "SentenceTransformer",
|
| 8 |
+
"prompts": {
|
| 9 |
+
"query": "",
|
| 10 |
+
"document": ""
|
| 11 |
+
},
|
| 12 |
+
"default_prompt_name": null,
|
| 13 |
+
"similarity_fn_name": "cosine"
|
| 14 |
+
}
|
eval/Information-Retrieval_evaluation_validation_ir_eval_results.csv
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
|
| 2 |
+
1.0,848,0.49393011025737643,0.7212765608873015,0.7945575779419716,0.8625796621831944,0.49393011025737643,0.49393011025737643,0.24042552029576716,0.7212765608873015,0.15891151558839428,0.7945575779419716,0.08625796621831944,0.8625796621831944,0.6221027825821331,0.680702859239632,0.6272803010147108
|
| 3 |
+
2.0,1696,0.5518439030783597,0.790461835875424,0.8578941332590638,0.9175609036845296,0.5518439030783597,0.5518439030783597,0.2634872786251413,0.790461835875424,0.1715788266518128,0.8578941332590638,0.09175609036845297,0.9175609036845296,0.6831209983624696,0.7405680188224913,0.6868916083317503
|
| 4 |
+
3.0,2544,0.5784006946378545,0.8244073461229705,0.8904634741722506,0.9446255672602762,0.5784006946378545,0.5784006946378545,0.2748024487076568,0.8244073461229705,0.17809269483445012,0.8904634741722506,0.09446255672602763,0.9446255672602762,0.7120346684412195,0.7692038634810361,0.7148535491495285
|
| 5 |
+
4.0,3392,0.6093153557561559,0.8518324350005734,0.9140877144120971,0.959828961811301,0.6093153557561559,0.6093153557561559,0.2839441450001911,0.8518324350005734,0.18281754288241947,0.9140877144120971,0.09598289618113011,0.959828961811301,0.7398104048491436,0.7941017365517729,0.7419405919089418
|
| 6 |
+
5.0,4240,0.6168679041268698,0.8619243434525468,0.9229181343075739,0.9657104474188634,0.6168679041268698,0.6168679041268698,0.28730811448418225,0.8619243434525468,0.18458362686151478,0.9229181343075739,0.09657104474188634,0.9657104474188634,0.7480495686208372,0.8018625829452034,0.7499351289030992
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5c97ff7a7f77ac0fabc410d09a335d673ea1ab3d901ccc1f528e011731f24bc
|
| 3 |
+
size 2239607176
|
modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 512,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"cls_token": {
|
| 10 |
+
"content": "<s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"eos_token": {
|
| 17 |
+
"content": "</s>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"mask_token": {
|
| 24 |
+
"content": "<mask>",
|
| 25 |
+
"lstrip": true,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"pad_token": {
|
| 31 |
+
"content": "<pad>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
},
|
| 37 |
+
"sep_token": {
|
| 38 |
+
"content": "</s>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false
|
| 43 |
+
},
|
| 44 |
+
"unk_token": {
|
| 45 |
+
"content": "<unk>",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false
|
| 50 |
+
}
|
| 51 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085
|
| 3 |
+
size 17082987
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"250001": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": true,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"additional_special_tokens": [],
|
| 45 |
+
"bos_token": "<s>",
|
| 46 |
+
"clean_up_tokenization_spaces": true,
|
| 47 |
+
"cls_token": "<s>",
|
| 48 |
+
"eos_token": "</s>",
|
| 49 |
+
"extra_special_tokens": {},
|
| 50 |
+
"mask_token": "<mask>",
|
| 51 |
+
"model_max_length": 512,
|
| 52 |
+
"pad_token": "<pad>",
|
| 53 |
+
"sep_token": "</s>",
|
| 54 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
| 55 |
+
"unk_token": "<unk>"
|
| 56 |
+
}
|