diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..2d5debf632aec73fb881b874f1acb13f94749c48 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/unigram.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1500/unigram.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1521/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1521/unigram.json filter=lfs diff=lfs merge=lfs -text +checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-500/unigram.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +unigram.json filter=lfs diff=lfs merge=lfs -text diff --git a/1_Pooling/config.json b/1_Pooling/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a97f8d140b6aee43dfac9fc4521b2842657c5608 --- /dev/null +++ b/1_Pooling/config.json @@ -0,0 +1,10 @@ +{ + "word_embedding_dimension": 384, + "pooling_mode_cls_token": false, + "pooling_mode_mean_tokens": true, + "pooling_mode_max_tokens": false, + "pooling_mode_mean_sqrt_len_tokens": false, + "pooling_mode_weightedmean_tokens": false, + "pooling_mode_lasttoken": false, + "include_prompt": true +} \ No newline at end of file diff --git a/README.md b/README.md index 7b95401dc46245ac339fc25059d4a56d90b4cde5..19620cbd1536a97f93ab2e632c749313a90e2306 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,386 @@ ---- -license: apache-2.0 ---- +--- +tags: +- sentence-transformers +- sentence-similarity +- feature-extraction +- generated_from_trainer +- dataset_size:8100 +- loss:MultipleNegativesRankingLoss +base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 +widget: +- source_sentence: Apakah santri boleh keluar pondok saat dikunjungi? + sentences: + - Cukup menghubungi bagian keuangan atau humas PPS. Imam Syafi'i. + - Keluar pondok hanya boleh dengan izin resmi dan keadaan darurat. + - Ya, seperti menjadi ketua kelompok, mengatur antrian, dan memimpin doa. +- source_sentence: Apakah santri boleh membawa HP? + sentences: + - HP tidak diperbolehkan dibawa ke lingkungan pesantren. + - Ya, kurikulum disesuaikan dengan tingkat perkembangan santri. + - Santri akan mendapatkan pendampingan psikologis dan konseling. +- source_sentence: Apakah ada kegiatan kebersihan harian di TK? + sentences: + - Santri mendapat pembinaan khusus dan apresiasi. + - Ya, setiap pagi santri melakukan piket kebersihan lingkungan sesuai jadwal. + - Ya, kurikulum disesuaikan dengan tingkat perkembangan santri. +- source_sentence: Apakah ada buku panduan bagi wali santri baru? + sentences: + - Wali harus mengajukan surat izin resmi dan mendapat persetujuan pengasuh. + - Ekskul dapat diganti satu kali di tengah semester dengan izin wali kelas. + - Ya, setiap wali mendapat buku panduan saat pendaftaran. +- source_sentence: Apakah ekskul dibuka untuk santri baru? + sentences: + - Ya, santri harus menjaga ketenangan dan mengembalikan buku tepat waktu. + - Ya, santri baru dapat langsung mendaftar ekskul di awal semester. + - Ya, kurikulum terus dievaluasi dan disesuaikan dengan tantangan era modern. +pipeline_tag: sentence-similarity +library_name: sentence-transformers +metrics: +- pearson_cosine +- spearman_cosine +model-index: +- name: SentenceTransformer based on sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 + results: + - task: + type: semantic-similarity + name: Semantic Similarity + dataset: + name: eval + type: eval + metrics: + - type: pearson_cosine + value: .nan + name: Pearson Cosine + - type: spearman_cosine + value: .nan + name: Spearman Cosine +--- + +# SentenceTransformer based on sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 + +This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more. + +## Model Details + +### Model Description +- **Model Type:** Sentence Transformer +- **Base model:** [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) +- **Maximum Sequence Length:** 128 tokens +- **Output Dimensionality:** 384 dimensions +- **Similarity Function:** Cosine Similarity + + + + +### Model Sources + +- **Documentation:** [Sentence Transformers Documentation](https://sbert.net) +- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers) +- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers) + +### Full Model Architecture + +``` +SentenceTransformer( + (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel + (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) +) +``` + +## Usage + +### Direct Usage (Sentence Transformers) + +First install the Sentence Transformers library: + +```bash +pip install -U sentence-transformers +``` + +Then you can load this model and run inference. +```python +from sentence_transformers import SentenceTransformer + +# Download from the 🤗 Hub +model = SentenceTransformer("sentence_transformers_model_id") +# Run inference +sentences = [ + 'Apakah ekskul dibuka untuk santri baru?', + 'Ya, santri baru dapat langsung mendaftar ekskul di awal semester.', + 'Ya, kurikulum terus dievaluasi dan disesuaikan dengan tantangan era modern.', +] +embeddings = model.encode(sentences) +print(embeddings.shape) +# [3, 384] + +# Get the similarity scores for the embeddings +similarities = model.similarity(embeddings, embeddings) +print(similarities.shape) +# [3, 3] +``` + + + + + + + +## Evaluation + +### Metrics + +#### Semantic Similarity + +* Dataset: `eval` +* Evaluated with [EmbeddingSimilarityEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator) + +| Metric | Value | +|:--------------------|:--------| +| pearson_cosine | nan | +| **spearman_cosine** | **nan** | + + + + + +## Training Details + +### Training Dataset + +#### Unnamed Dataset + +* Size: 8,100 training samples +* Columns: sentence_0 and sentence_1 +* Approximate statistics based on the first 1000 samples: + | | sentence_0 | sentence_1 | + |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------| + | type | string | string | + | details | | | +* Samples: + | sentence_0 | sentence_1 | + |:------------------------------------------------------------|:----------------------------------------------------------------------------------------------| + | Apakah kurikulum mencakup pendidikan karakter? | Ya, pembinaan karakter menjadi bagian utama kurikulum pesantren. | + | Apakah lingkungan pondok ramah anak? | Ya, desain dan pengawasan mendukung kenyamanan dan keamanan santri. | + | Apakah nilai adab berpengaruh pada kelulusan? | Sangat berpengaruh, nilai adab menjadi pertimbangan utama dalam penilaian akhir. | +* Loss: [MultipleNegativesRankingLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters: + ```json + { + "scale": 20.0, + "similarity_fct": "cos_sim" + } + ``` + +### Training Hyperparameters +#### Non-Default Hyperparameters + +- `eval_strategy`: steps +- `per_device_train_batch_size`: 16 +- `per_device_eval_batch_size`: 16 +- `multi_dataset_batch_sampler`: round_robin + +#### All Hyperparameters +
Click to expand + +- `overwrite_output_dir`: False +- `do_predict`: False +- `eval_strategy`: steps +- `prediction_loss_only`: True +- `per_device_train_batch_size`: 16 +- `per_device_eval_batch_size`: 16 +- `per_gpu_train_batch_size`: None +- `per_gpu_eval_batch_size`: None +- `gradient_accumulation_steps`: 1 +- `eval_accumulation_steps`: None +- `torch_empty_cache_steps`: None +- `learning_rate`: 5e-05 +- `weight_decay`: 0.0 +- `adam_beta1`: 0.9 +- `adam_beta2`: 0.999 +- `adam_epsilon`: 1e-08 +- `max_grad_norm`: 1 +- `num_train_epochs`: 3 +- `max_steps`: -1 +- `lr_scheduler_type`: linear +- `lr_scheduler_kwargs`: {} +- `warmup_ratio`: 0.0 +- `warmup_steps`: 0 +- `log_level`: passive +- `log_level_replica`: warning +- `log_on_each_node`: True +- `logging_nan_inf_filter`: True +- `save_safetensors`: True +- `save_on_each_node`: False +- `save_only_model`: False +- `restore_callback_states_from_checkpoint`: False +- `no_cuda`: False +- `use_cpu`: False +- `use_mps_device`: False +- `seed`: 42 +- `data_seed`: None +- `jit_mode_eval`: False +- `use_ipex`: False +- `bf16`: False +- `fp16`: False +- `fp16_opt_level`: O1 +- `half_precision_backend`: auto +- `bf16_full_eval`: False +- `fp16_full_eval`: False +- `tf32`: None +- `local_rank`: 0 +- `ddp_backend`: None +- `tpu_num_cores`: None +- `tpu_metrics_debug`: False +- `debug`: [] +- `dataloader_drop_last`: False +- `dataloader_num_workers`: 0 +- `dataloader_prefetch_factor`: None +- `past_index`: -1 +- `disable_tqdm`: False +- `remove_unused_columns`: True +- `label_names`: None +- `load_best_model_at_end`: False +- `ignore_data_skip`: False +- `fsdp`: [] +- `fsdp_min_num_params`: 0 +- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False} +- `fsdp_transformer_layer_cls_to_wrap`: None +- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None} +- `deepspeed`: None +- `label_smoothing_factor`: 0.0 +- `optim`: adamw_torch +- `optim_args`: None +- `adafactor`: False +- `group_by_length`: False +- `length_column_name`: length +- `ddp_find_unused_parameters`: None +- `ddp_bucket_cap_mb`: None +- `ddp_broadcast_buffers`: False +- `dataloader_pin_memory`: True +- `dataloader_persistent_workers`: False +- `skip_memory_metrics`: True +- `use_legacy_prediction_loop`: False +- `push_to_hub`: False +- `resume_from_checkpoint`: None +- `hub_model_id`: None +- `hub_strategy`: every_save +- `hub_private_repo`: None +- `hub_always_push`: False +- `gradient_checkpointing`: False +- `gradient_checkpointing_kwargs`: None +- `include_inputs_for_metrics`: False +- `include_for_metrics`: [] +- `eval_do_concat_batches`: True +- `fp16_backend`: auto +- `push_to_hub_model_id`: None +- `push_to_hub_organization`: None +- `mp_parameters`: +- `auto_find_batch_size`: False +- `full_determinism`: False +- `torchdynamo`: None +- `ray_scope`: last +- `ddp_timeout`: 1800 +- `torch_compile`: False +- `torch_compile_backend`: None +- `torch_compile_mode`: None +- `include_tokens_per_second`: False +- `include_num_input_tokens_seen`: False +- `neftune_noise_alpha`: None +- `optim_target_modules`: None +- `batch_eval_metrics`: False +- `eval_on_start`: False +- `use_liger_kernel`: False +- `eval_use_gather_object`: False +- `average_tokens_across_devices`: False +- `prompts`: None +- `batch_sampler`: batch_sampler +- `multi_dataset_batch_sampler`: round_robin + +
+ +### Training Logs +| Epoch | Step | eval_spearman_cosine | +|:------:|:----:|:--------------------:| +| 0.1972 | 100 | nan | + + +### Framework Versions +- Python: 3.11.13 +- Sentence Transformers: 4.1.0 +- Transformers: 4.52.4 +- PyTorch: 2.6.0+cu124 +- Accelerate: 1.7.0 +- Datasets: 2.14.4 +- Tokenizers: 0.21.1 + +## Citation + +### BibTeX + +#### Sentence Transformers +```bibtex +@inproceedings{reimers-2019-sentence-bert, + title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks", + author = "Reimers, Nils and Gurevych, Iryna", + booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing", + month = "11", + year = "2019", + publisher = "Association for Computational Linguistics", + url = "https://arxiv.org/abs/1908.10084", +} +``` + +#### MultipleNegativesRankingLoss +```bibtex +@misc{henderson2017efficient, + title={Efficient Natural Language Response Suggestion for Smart Reply}, + author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil}, + year={2017}, + eprint={1705.00652}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +``` + + + + + + \ No newline at end of file diff --git a/checkpoint-1000/1_Pooling/config.json b/checkpoint-1000/1_Pooling/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a97f8d140b6aee43dfac9fc4521b2842657c5608 --- /dev/null +++ b/checkpoint-1000/1_Pooling/config.json @@ -0,0 +1,10 @@ +{ + "word_embedding_dimension": 384, + "pooling_mode_cls_token": false, + "pooling_mode_mean_tokens": true, + "pooling_mode_max_tokens": false, + "pooling_mode_mean_sqrt_len_tokens": false, + "pooling_mode_weightedmean_tokens": false, + "pooling_mode_lasttoken": false, + "include_prompt": true +} \ No newline at end of file diff --git a/checkpoint-1000/README.md b/checkpoint-1000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3d63e3bf3aa7b756eb2b35f216b6ddb6449b724b --- /dev/null +++ b/checkpoint-1000/README.md @@ -0,0 +1,396 @@ +--- +tags: +- sentence-transformers +- sentence-similarity +- feature-extraction +- generated_from_trainer +- dataset_size:8100 +- loss:MultipleNegativesRankingLoss +base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 +widget: +- source_sentence: Apakah santri boleh keluar pondok saat dikunjungi? + sentences: + - Cukup menghubungi bagian keuangan atau humas PPS. Imam Syafi'i. + - Keluar pondok hanya boleh dengan izin resmi dan keadaan darurat. + - Ya, seperti menjadi ketua kelompok, mengatur antrian, dan memimpin doa. +- source_sentence: Apakah santri boleh membawa HP? + sentences: + - HP tidak diperbolehkan dibawa ke lingkungan pesantren. + - Ya, kurikulum disesuaikan dengan tingkat perkembangan santri. + - Santri akan mendapatkan pendampingan psikologis dan konseling. +- source_sentence: Apakah ada kegiatan kebersihan harian di TK? + sentences: + - Santri mendapat pembinaan khusus dan apresiasi. + - Ya, setiap pagi santri melakukan piket kebersihan lingkungan sesuai jadwal. + - Ya, kurikulum disesuaikan dengan tingkat perkembangan santri. +- source_sentence: Apakah ada buku panduan bagi wali santri baru? + sentences: + - Wali harus mengajukan surat izin resmi dan mendapat persetujuan pengasuh. + - Ekskul dapat diganti satu kali di tengah semester dengan izin wali kelas. + - Ya, setiap wali mendapat buku panduan saat pendaftaran. +- source_sentence: Apakah ekskul dibuka untuk santri baru? + sentences: + - Ya, santri harus menjaga ketenangan dan mengembalikan buku tepat waktu. + - Ya, santri baru dapat langsung mendaftar ekskul di awal semester. + - Ya, kurikulum terus dievaluasi dan disesuaikan dengan tantangan era modern. +pipeline_tag: sentence-similarity +library_name: sentence-transformers +metrics: +- pearson_cosine +- spearman_cosine +model-index: +- name: SentenceTransformer based on sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 + results: + - task: + type: semantic-similarity + name: Semantic Similarity + dataset: + name: eval + type: eval + metrics: + - type: pearson_cosine + value: .nan + name: Pearson Cosine + - type: spearman_cosine + value: .nan + name: Spearman Cosine +--- + +# SentenceTransformer based on sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 + +This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more. + +## Model Details + +### Model Description +- **Model Type:** Sentence Transformer +- **Base model:** [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) +- **Maximum Sequence Length:** 128 tokens +- **Output Dimensionality:** 384 dimensions +- **Similarity Function:** Cosine Similarity + + + + +### Model Sources + +- **Documentation:** [Sentence Transformers Documentation](https://sbert.net) +- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers) +- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers) + +### Full Model Architecture + +``` +SentenceTransformer( + (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel + (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) +) +``` + +## Usage + +### Direct Usage (Sentence Transformers) + +First install the Sentence Transformers library: + +```bash +pip install -U sentence-transformers +``` + +Then you can load this model and run inference. +```python +from sentence_transformers import SentenceTransformer + +# Download from the 🤗 Hub +model = SentenceTransformer("sentence_transformers_model_id") +# Run inference +sentences = [ + 'Apakah ekskul dibuka untuk santri baru?', + 'Ya, santri baru dapat langsung mendaftar ekskul di awal semester.', + 'Ya, kurikulum terus dievaluasi dan disesuaikan dengan tantangan era modern.', +] +embeddings = model.encode(sentences) +print(embeddings.shape) +# [3, 384] + +# Get the similarity scores for the embeddings +similarities = model.similarity(embeddings, embeddings) +print(similarities.shape) +# [3, 3] +``` + + + + + + + +## Evaluation + +### Metrics + +#### Semantic Similarity + +* Dataset: `eval` +* Evaluated with [EmbeddingSimilarityEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator) + +| Metric | Value | +|:--------------------|:--------| +| pearson_cosine | nan | +| **spearman_cosine** | **nan** | + + + + + +## Training Details + +### Training Dataset + +#### Unnamed Dataset + +* Size: 8,100 training samples +* Columns: sentence_0 and sentence_1 +* Approximate statistics based on the first 1000 samples: + | | sentence_0 | sentence_1 | + |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------| + | type | string | string | + | details | | | +* Samples: + | sentence_0 | sentence_1 | + |:------------------------------------------------------------|:----------------------------------------------------------------------------------------------| + | Apakah kurikulum mencakup pendidikan karakter? | Ya, pembinaan karakter menjadi bagian utama kurikulum pesantren. | + | Apakah lingkungan pondok ramah anak? | Ya, desain dan pengawasan mendukung kenyamanan dan keamanan santri. | + | Apakah nilai adab berpengaruh pada kelulusan? | Sangat berpengaruh, nilai adab menjadi pertimbangan utama dalam penilaian akhir. | +* Loss: [MultipleNegativesRankingLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters: + ```json + { + "scale": 20.0, + "similarity_fct": "cos_sim" + } + ``` + +### Training Hyperparameters +#### Non-Default Hyperparameters + +- `eval_strategy`: steps +- `per_device_train_batch_size`: 16 +- `per_device_eval_batch_size`: 16 +- `multi_dataset_batch_sampler`: round_robin + +#### All Hyperparameters +
Click to expand + +- `overwrite_output_dir`: False +- `do_predict`: False +- `eval_strategy`: steps +- `prediction_loss_only`: True +- `per_device_train_batch_size`: 16 +- `per_device_eval_batch_size`: 16 +- `per_gpu_train_batch_size`: None +- `per_gpu_eval_batch_size`: None +- `gradient_accumulation_steps`: 1 +- `eval_accumulation_steps`: None +- `torch_empty_cache_steps`: None +- `learning_rate`: 5e-05 +- `weight_decay`: 0.0 +- `adam_beta1`: 0.9 +- `adam_beta2`: 0.999 +- `adam_epsilon`: 1e-08 +- `max_grad_norm`: 1 +- `num_train_epochs`: 3 +- `max_steps`: -1 +- `lr_scheduler_type`: linear +- `lr_scheduler_kwargs`: {} +- `warmup_ratio`: 0.0 +- `warmup_steps`: 0 +- `log_level`: passive +- `log_level_replica`: warning +- `log_on_each_node`: True +- `logging_nan_inf_filter`: True +- `save_safetensors`: True +- `save_on_each_node`: False +- `save_only_model`: False +- `restore_callback_states_from_checkpoint`: False +- `no_cuda`: False +- `use_cpu`: False +- `use_mps_device`: False +- `seed`: 42 +- `data_seed`: None +- `jit_mode_eval`: False +- `use_ipex`: False +- `bf16`: False +- `fp16`: False +- `fp16_opt_level`: O1 +- `half_precision_backend`: auto +- `bf16_full_eval`: False +- `fp16_full_eval`: False +- `tf32`: None +- `local_rank`: 0 +- `ddp_backend`: None +- `tpu_num_cores`: None +- `tpu_metrics_debug`: False +- `debug`: [] +- `dataloader_drop_last`: False +- `dataloader_num_workers`: 0 +- `dataloader_prefetch_factor`: None +- `past_index`: -1 +- `disable_tqdm`: False +- `remove_unused_columns`: True +- `label_names`: None +- `load_best_model_at_end`: False +- `ignore_data_skip`: False +- `fsdp`: [] +- `fsdp_min_num_params`: 0 +- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False} +- `fsdp_transformer_layer_cls_to_wrap`: None +- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None} +- `deepspeed`: None +- `label_smoothing_factor`: 0.0 +- `optim`: adamw_torch +- `optim_args`: None +- `adafactor`: False +- `group_by_length`: False +- `length_column_name`: length +- `ddp_find_unused_parameters`: None +- `ddp_bucket_cap_mb`: None +- `ddp_broadcast_buffers`: False +- `dataloader_pin_memory`: True +- `dataloader_persistent_workers`: False +- `skip_memory_metrics`: True +- `use_legacy_prediction_loop`: False +- `push_to_hub`: False +- `resume_from_checkpoint`: None +- `hub_model_id`: None +- `hub_strategy`: every_save +- `hub_private_repo`: None +- `hub_always_push`: False +- `gradient_checkpointing`: False +- `gradient_checkpointing_kwargs`: None +- `include_inputs_for_metrics`: False +- `include_for_metrics`: [] +- `eval_do_concat_batches`: True +- `fp16_backend`: auto +- `push_to_hub_model_id`: None +- `push_to_hub_organization`: None +- `mp_parameters`: +- `auto_find_batch_size`: False +- `full_determinism`: False +- `torchdynamo`: None +- `ray_scope`: last +- `ddp_timeout`: 1800 +- `torch_compile`: False +- `torch_compile_backend`: None +- `torch_compile_mode`: None +- `include_tokens_per_second`: False +- `include_num_input_tokens_seen`: False +- `neftune_noise_alpha`: None +- `optim_target_modules`: None +- `batch_eval_metrics`: False +- `eval_on_start`: False +- `use_liger_kernel`: False +- `eval_use_gather_object`: False +- `average_tokens_across_devices`: False +- `prompts`: None +- `batch_sampler`: batch_sampler +- `multi_dataset_batch_sampler`: round_robin + +
+ +### Training Logs +| Epoch | Step | Training Loss | eval_spearman_cosine | +|:------:|:----:|:-------------:|:--------------------:| +| 0.1972 | 100 | - | nan | +| 0.3945 | 200 | - | nan | +| 0.5917 | 300 | - | nan | +| 0.7890 | 400 | - | nan | +| 0.9862 | 500 | 0.28 | nan | +| 1.0 | 507 | - | nan | +| 1.1834 | 600 | - | nan | +| 1.3807 | 700 | - | nan | +| 1.5779 | 800 | - | nan | +| 1.7751 | 900 | - | nan | +| 1.9724 | 1000 | 0.0393 | nan | + + +### Framework Versions +- Python: 3.11.13 +- Sentence Transformers: 4.1.0 +- Transformers: 4.52.4 +- PyTorch: 2.6.0+cu124 +- Accelerate: 1.7.0 +- Datasets: 2.14.4 +- Tokenizers: 0.21.1 + +## Citation + +### BibTeX + +#### Sentence Transformers +```bibtex +@inproceedings{reimers-2019-sentence-bert, + title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks", + author = "Reimers, Nils and Gurevych, Iryna", + booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing", + month = "11", + year = "2019", + publisher = "Association for Computational Linguistics", + url = "https://arxiv.org/abs/1908.10084", +} +``` + +#### MultipleNegativesRankingLoss +```bibtex +@misc{henderson2017efficient, + title={Efficient Natural Language Response Suggestion for Smart Reply}, + author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil}, + year={2017}, + eprint={1705.00652}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +``` + + + + + + \ No newline at end of file diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..464f7e70944f84ce569eb4b77fe9edde4962b29c --- /dev/null +++ b/checkpoint-1000/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertModel" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 384, + "initializer_range": 0.02, + "intermediate_size": 1536, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.52.4", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 250037 +} diff --git a/checkpoint-1000/config_sentence_transformers.json b/checkpoint-1000/config_sentence_transformers.json new file mode 100644 index 0000000000000000000000000000000000000000..f3cc0c6900ee82c776d67a63601aeed12365ef95 --- /dev/null +++ b/checkpoint-1000/config_sentence_transformers.json @@ -0,0 +1,10 @@ +{ + "__version__": { + "sentence_transformers": "4.1.0", + "transformers": "4.52.4", + "pytorch": "2.6.0+cu124" + }, + "prompts": {}, + "default_prompt_name": null, + "similarity_fn_name": "cosine" +} \ No newline at end of file diff --git a/checkpoint-1000/model.safetensors b/checkpoint-1000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e118c9e86a79815cda2811fc3f65c83576cdc847 --- /dev/null +++ b/checkpoint-1000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8aed365b353fe1a54943d41c6a59bcfaee3f3501107ca9531e092c11ab366a1 +size 470637416 diff --git a/checkpoint-1000/modules.json b/checkpoint-1000/modules.json new file mode 100644 index 0000000000000000000000000000000000000000..f7640f94e81bb7f4f04daf1668850b38763a13d9 --- /dev/null +++ b/checkpoint-1000/modules.json @@ -0,0 +1,14 @@ +[ + { + "idx": 0, + "name": "0", + "path": "", + "type": "sentence_transformers.models.Transformer" + }, + { + "idx": 1, + "name": "1", + "path": "1_Pooling", + "type": "sentence_transformers.models.Pooling" + } +] \ No newline at end of file diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5dac20ae4ae84845c0f9e163739974d7bd152d5c --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee6cff9449516512dcb2d88111e071dd61e9b7a1213709ef6775fbfe5fb29747 +size 940212218 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5d665e13fc7e04426e049b922c282fc499f95862 --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e60ec7c6daffa2345890216a615b934b4e3c644297c631caadc8f76c5bdf44 +size 14244 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab9f125af412802701d6945c825cdb9fd9112e01 --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e973e9591cee2efb2deae3b216d2c5736cc6ba56a19a94afca78b909c125c023 +size 1064 diff --git a/checkpoint-1000/sentence_bert_config.json b/checkpoint-1000/sentence_bert_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5fd10429389515d3e5cccdeda08cae5fea1ae82e --- /dev/null +++ b/checkpoint-1000/sentence_bert_config.json @@ -0,0 +1,4 @@ +{ + "max_seq_length": 128, + "do_lower_case": false +} \ No newline at end of file diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b1879d702821e753ffe4245048eee415d54a9385 --- /dev/null +++ b/checkpoint-1000/special_tokens_map.json @@ -0,0 +1,51 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "cls_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "sep_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1000/tokenizer.json b/checkpoint-1000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..e3420945e193cc0791136cdc6e5cd69801c838af --- /dev/null +++ b/checkpoint-1000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cad551d5600a84242d0973327029452a1e3672ba6313c2a3c3d69c4310e12719 +size 17082987 diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..facf4436a8f11c26085c16a14f4e576853927a9e --- /dev/null +++ b/checkpoint-1000/tokenizer_config.json @@ -0,0 +1,65 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "do_lower_case": true, + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "max_length": 128, + "model_max_length": 128, + "pad_to_multiple_of": null, + "pad_token": "", + "pad_token_type_id": 0, + "padding_side": "right", + "sep_token": "", + "stride": 0, + "strip_accents": null, + "tokenize_chinese_chars": true, + "tokenizer_class": "BertTokenizer", + "truncation_side": "right", + "truncation_strategy": "longest_first", + "unk_token": "" +} diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1364df385086813dee1ec168882f159a80c371bc --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,138 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.972386587771203, + "eval_steps": 100, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.19723865877712032, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.2181, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 100 + }, + { + "epoch": 0.39447731755424065, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.5897, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 200 + }, + { + "epoch": 0.591715976331361, + "eval_pearson_cosine": NaN, + "eval_runtime": 2.847, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 300 + }, + { + "epoch": 0.7889546351084813, + "eval_pearson_cosine": NaN, + "eval_runtime": 2.166, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 400 + }, + { + "epoch": 0.9861932938856016, + "grad_norm": 8.998483657836914, + "learning_rate": 1.4372355430183358e-05, + "loss": 0.28, + "step": 500 + }, + { + "epoch": 0.9861932938856016, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.2219, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 500 + }, + { + "epoch": 1.183431952662722, + "eval_pearson_cosine": NaN, + "eval_runtime": 3.4138, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 600 + }, + { + "epoch": 1.3806706114398422, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.3764, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 700 + }, + { + "epoch": 1.5779092702169626, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.2387, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 800 + }, + { + "epoch": 1.7751479289940828, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.2026, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 900 + }, + { + "epoch": 1.972386587771203, + "grad_norm": 0.12859900295734406, + "learning_rate": 7.320169252468266e-06, + "loss": 0.0393, + "step": 1000 + }, + { + "epoch": 1.972386587771203, + "eval_pearson_cosine": NaN, + "eval_runtime": 4.1539, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 1000 + } + ], + "logging_steps": 500, + "max_steps": 1521, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..63aeb8a8474ed761c42cad4785153f0f39ee60df --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6b0c772261a311993bb357660901823f0e8111b9d4198a0bb2d995ed7daee06 +size 5496 diff --git a/checkpoint-1000/unigram.json b/checkpoint-1000/unigram.json new file mode 100644 index 0000000000000000000000000000000000000000..2faa9ec874108d53a017ff2c7ab98d155fb21a82 --- /dev/null +++ b/checkpoint-1000/unigram.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da145b5e7700ae40f16691ec32a0b1fdc1ee3298db22a31ea55f57a966c4a65d +size 14763260 diff --git a/checkpoint-1500/1_Pooling/config.json b/checkpoint-1500/1_Pooling/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a97f8d140b6aee43dfac9fc4521b2842657c5608 --- /dev/null +++ b/checkpoint-1500/1_Pooling/config.json @@ -0,0 +1,10 @@ +{ + "word_embedding_dimension": 384, + "pooling_mode_cls_token": false, + "pooling_mode_mean_tokens": true, + "pooling_mode_max_tokens": false, + "pooling_mode_mean_sqrt_len_tokens": false, + "pooling_mode_weightedmean_tokens": false, + "pooling_mode_lasttoken": false, + "include_prompt": true +} \ No newline at end of file diff --git a/checkpoint-1500/README.md b/checkpoint-1500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3c9d6394d03528cbe3ed9de01502e3f541d6453f --- /dev/null +++ b/checkpoint-1500/README.md @@ -0,0 +1,402 @@ +--- +tags: +- sentence-transformers +- sentence-similarity +- feature-extraction +- generated_from_trainer +- dataset_size:8100 +- loss:MultipleNegativesRankingLoss +base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 +widget: +- source_sentence: Apakah santri boleh keluar pondok saat dikunjungi? + sentences: + - Cukup menghubungi bagian keuangan atau humas PPS. Imam Syafi'i. + - Keluar pondok hanya boleh dengan izin resmi dan keadaan darurat. + - Ya, seperti menjadi ketua kelompok, mengatur antrian, dan memimpin doa. +- source_sentence: Apakah santri boleh membawa HP? + sentences: + - HP tidak diperbolehkan dibawa ke lingkungan pesantren. + - Ya, kurikulum disesuaikan dengan tingkat perkembangan santri. + - Santri akan mendapatkan pendampingan psikologis dan konseling. +- source_sentence: Apakah ada kegiatan kebersihan harian di TK? + sentences: + - Santri mendapat pembinaan khusus dan apresiasi. + - Ya, setiap pagi santri melakukan piket kebersihan lingkungan sesuai jadwal. + - Ya, kurikulum disesuaikan dengan tingkat perkembangan santri. +- source_sentence: Apakah ada buku panduan bagi wali santri baru? + sentences: + - Wali harus mengajukan surat izin resmi dan mendapat persetujuan pengasuh. + - Ekskul dapat diganti satu kali di tengah semester dengan izin wali kelas. + - Ya, setiap wali mendapat buku panduan saat pendaftaran. +- source_sentence: Apakah ekskul dibuka untuk santri baru? + sentences: + - Ya, santri harus menjaga ketenangan dan mengembalikan buku tepat waktu. + - Ya, santri baru dapat langsung mendaftar ekskul di awal semester. + - Ya, kurikulum terus dievaluasi dan disesuaikan dengan tantangan era modern. +pipeline_tag: sentence-similarity +library_name: sentence-transformers +metrics: +- pearson_cosine +- spearman_cosine +model-index: +- name: SentenceTransformer based on sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 + results: + - task: + type: semantic-similarity + name: Semantic Similarity + dataset: + name: eval + type: eval + metrics: + - type: pearson_cosine + value: .nan + name: Pearson Cosine + - type: spearman_cosine + value: .nan + name: Spearman Cosine +--- + +# SentenceTransformer based on sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 + +This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more. + +## Model Details + +### Model Description +- **Model Type:** Sentence Transformer +- **Base model:** [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) +- **Maximum Sequence Length:** 128 tokens +- **Output Dimensionality:** 384 dimensions +- **Similarity Function:** Cosine Similarity + + + + +### Model Sources + +- **Documentation:** [Sentence Transformers Documentation](https://sbert.net) +- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers) +- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers) + +### Full Model Architecture + +``` +SentenceTransformer( + (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel + (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) +) +``` + +## Usage + +### Direct Usage (Sentence Transformers) + +First install the Sentence Transformers library: + +```bash +pip install -U sentence-transformers +``` + +Then you can load this model and run inference. +```python +from sentence_transformers import SentenceTransformer + +# Download from the 🤗 Hub +model = SentenceTransformer("sentence_transformers_model_id") +# Run inference +sentences = [ + 'Apakah ekskul dibuka untuk santri baru?', + 'Ya, santri baru dapat langsung mendaftar ekskul di awal semester.', + 'Ya, kurikulum terus dievaluasi dan disesuaikan dengan tantangan era modern.', +] +embeddings = model.encode(sentences) +print(embeddings.shape) +# [3, 384] + +# Get the similarity scores for the embeddings +similarities = model.similarity(embeddings, embeddings) +print(similarities.shape) +# [3, 3] +``` + + + + + + + +## Evaluation + +### Metrics + +#### Semantic Similarity + +* Dataset: `eval` +* Evaluated with [EmbeddingSimilarityEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator) + +| Metric | Value | +|:--------------------|:--------| +| pearson_cosine | nan | +| **spearman_cosine** | **nan** | + + + + + +## Training Details + +### Training Dataset + +#### Unnamed Dataset + +* Size: 8,100 training samples +* Columns: sentence_0 and sentence_1 +* Approximate statistics based on the first 1000 samples: + | | sentence_0 | sentence_1 | + |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------| + | type | string | string | + | details |
  • min: 7 tokens
  • mean: 11.19 tokens
  • max: 18 tokens
|
  • min: 9 tokens
  • mean: 15.87 tokens
  • max: 42 tokens
| +* Samples: + | sentence_0 | sentence_1 | + |:------------------------------------------------------------|:----------------------------------------------------------------------------------------------| + | Apakah kurikulum mencakup pendidikan karakter? | Ya, pembinaan karakter menjadi bagian utama kurikulum pesantren. | + | Apakah lingkungan pondok ramah anak? | Ya, desain dan pengawasan mendukung kenyamanan dan keamanan santri. | + | Apakah nilai adab berpengaruh pada kelulusan? | Sangat berpengaruh, nilai adab menjadi pertimbangan utama dalam penilaian akhir. | +* Loss: [MultipleNegativesRankingLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters: + ```json + { + "scale": 20.0, + "similarity_fct": "cos_sim" + } + ``` + +### Training Hyperparameters +#### Non-Default Hyperparameters + +- `eval_strategy`: steps +- `per_device_train_batch_size`: 16 +- `per_device_eval_batch_size`: 16 +- `multi_dataset_batch_sampler`: round_robin + +#### All Hyperparameters +
Click to expand + +- `overwrite_output_dir`: False +- `do_predict`: False +- `eval_strategy`: steps +- `prediction_loss_only`: True +- `per_device_train_batch_size`: 16 +- `per_device_eval_batch_size`: 16 +- `per_gpu_train_batch_size`: None +- `per_gpu_eval_batch_size`: None +- `gradient_accumulation_steps`: 1 +- `eval_accumulation_steps`: None +- `torch_empty_cache_steps`: None +- `learning_rate`: 5e-05 +- `weight_decay`: 0.0 +- `adam_beta1`: 0.9 +- `adam_beta2`: 0.999 +- `adam_epsilon`: 1e-08 +- `max_grad_norm`: 1 +- `num_train_epochs`: 3 +- `max_steps`: -1 +- `lr_scheduler_type`: linear +- `lr_scheduler_kwargs`: {} +- `warmup_ratio`: 0.0 +- `warmup_steps`: 0 +- `log_level`: passive +- `log_level_replica`: warning +- `log_on_each_node`: True +- `logging_nan_inf_filter`: True +- `save_safetensors`: True +- `save_on_each_node`: False +- `save_only_model`: False +- `restore_callback_states_from_checkpoint`: False +- `no_cuda`: False +- `use_cpu`: False +- `use_mps_device`: False +- `seed`: 42 +- `data_seed`: None +- `jit_mode_eval`: False +- `use_ipex`: False +- `bf16`: False +- `fp16`: False +- `fp16_opt_level`: O1 +- `half_precision_backend`: auto +- `bf16_full_eval`: False +- `fp16_full_eval`: False +- `tf32`: None +- `local_rank`: 0 +- `ddp_backend`: None +- `tpu_num_cores`: None +- `tpu_metrics_debug`: False +- `debug`: [] +- `dataloader_drop_last`: False +- `dataloader_num_workers`: 0 +- `dataloader_prefetch_factor`: None +- `past_index`: -1 +- `disable_tqdm`: False +- `remove_unused_columns`: True +- `label_names`: None +- `load_best_model_at_end`: False +- `ignore_data_skip`: False +- `fsdp`: [] +- `fsdp_min_num_params`: 0 +- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False} +- `fsdp_transformer_layer_cls_to_wrap`: None +- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None} +- `deepspeed`: None +- `label_smoothing_factor`: 0.0 +- `optim`: adamw_torch +- `optim_args`: None +- `adafactor`: False +- `group_by_length`: False +- `length_column_name`: length +- `ddp_find_unused_parameters`: None +- `ddp_bucket_cap_mb`: None +- `ddp_broadcast_buffers`: False +- `dataloader_pin_memory`: True +- `dataloader_persistent_workers`: False +- `skip_memory_metrics`: True +- `use_legacy_prediction_loop`: False +- `push_to_hub`: False +- `resume_from_checkpoint`: None +- `hub_model_id`: None +- `hub_strategy`: every_save +- `hub_private_repo`: None +- `hub_always_push`: False +- `gradient_checkpointing`: False +- `gradient_checkpointing_kwargs`: None +- `include_inputs_for_metrics`: False +- `include_for_metrics`: [] +- `eval_do_concat_batches`: True +- `fp16_backend`: auto +- `push_to_hub_model_id`: None +- `push_to_hub_organization`: None +- `mp_parameters`: +- `auto_find_batch_size`: False +- `full_determinism`: False +- `torchdynamo`: None +- `ray_scope`: last +- `ddp_timeout`: 1800 +- `torch_compile`: False +- `torch_compile_backend`: None +- `torch_compile_mode`: None +- `include_tokens_per_second`: False +- `include_num_input_tokens_seen`: False +- `neftune_noise_alpha`: None +- `optim_target_modules`: None +- `batch_eval_metrics`: False +- `eval_on_start`: False +- `use_liger_kernel`: False +- `eval_use_gather_object`: False +- `average_tokens_across_devices`: False +- `prompts`: None +- `batch_sampler`: batch_sampler +- `multi_dataset_batch_sampler`: round_robin + +
+ +### Training Logs +| Epoch | Step | Training Loss | eval_spearman_cosine | +|:------:|:----:|:-------------:|:--------------------:| +| 0.1972 | 100 | - | nan | +| 0.3945 | 200 | - | nan | +| 0.5917 | 300 | - | nan | +| 0.7890 | 400 | - | nan | +| 0.9862 | 500 | 0.28 | nan | +| 1.0 | 507 | - | nan | +| 1.1834 | 600 | - | nan | +| 1.3807 | 700 | - | nan | +| 1.5779 | 800 | - | nan | +| 1.7751 | 900 | - | nan | +| 1.9724 | 1000 | 0.0393 | nan | +| 2.0 | 1014 | - | nan | +| 2.1696 | 1100 | - | nan | +| 2.3669 | 1200 | - | nan | +| 2.5641 | 1300 | - | nan | +| 2.7613 | 1400 | - | nan | +| 2.9586 | 1500 | 0.0274 | nan | + + +### Framework Versions +- Python: 3.11.13 +- Sentence Transformers: 4.1.0 +- Transformers: 4.52.4 +- PyTorch: 2.6.0+cu124 +- Accelerate: 1.7.0 +- Datasets: 2.14.4 +- Tokenizers: 0.21.1 + +## Citation + +### BibTeX + +#### Sentence Transformers +```bibtex +@inproceedings{reimers-2019-sentence-bert, + title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks", + author = "Reimers, Nils and Gurevych, Iryna", + booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing", + month = "11", + year = "2019", + publisher = "Association for Computational Linguistics", + url = "https://arxiv.org/abs/1908.10084", +} +``` + +#### MultipleNegativesRankingLoss +```bibtex +@misc{henderson2017efficient, + title={Efficient Natural Language Response Suggestion for Smart Reply}, + author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil}, + year={2017}, + eprint={1705.00652}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +``` + + + + + + \ No newline at end of file diff --git a/checkpoint-1500/config.json b/checkpoint-1500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..464f7e70944f84ce569eb4b77fe9edde4962b29c --- /dev/null +++ b/checkpoint-1500/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertModel" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 384, + "initializer_range": 0.02, + "intermediate_size": 1536, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.52.4", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 250037 +} diff --git a/checkpoint-1500/config_sentence_transformers.json b/checkpoint-1500/config_sentence_transformers.json new file mode 100644 index 0000000000000000000000000000000000000000..f3cc0c6900ee82c776d67a63601aeed12365ef95 --- /dev/null +++ b/checkpoint-1500/config_sentence_transformers.json @@ -0,0 +1,10 @@ +{ + "__version__": { + "sentence_transformers": "4.1.0", + "transformers": "4.52.4", + "pytorch": "2.6.0+cu124" + }, + "prompts": {}, + "default_prompt_name": null, + "similarity_fn_name": "cosine" +} \ No newline at end of file diff --git a/checkpoint-1500/model.safetensors b/checkpoint-1500/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..240c93288629db38e78082f3c62cc1a16bc4452f --- /dev/null +++ b/checkpoint-1500/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:912d6a3b8bf777d9a898b03f0c4dbfab1b45d564b490b270afd867103372f2c5 +size 470637416 diff --git a/checkpoint-1500/modules.json b/checkpoint-1500/modules.json new file mode 100644 index 0000000000000000000000000000000000000000..f7640f94e81bb7f4f04daf1668850b38763a13d9 --- /dev/null +++ b/checkpoint-1500/modules.json @@ -0,0 +1,14 @@ +[ + { + "idx": 0, + "name": "0", + "path": "", + "type": "sentence_transformers.models.Transformer" + }, + { + "idx": 1, + "name": "1", + "path": "1_Pooling", + "type": "sentence_transformers.models.Pooling" + } +] \ No newline at end of file diff --git a/checkpoint-1500/optimizer.pt b/checkpoint-1500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b1f759883cbc3fa0959eb1f12267afff3a221e5d --- /dev/null +++ b/checkpoint-1500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74a8db289c9bb2d1a7f7e4cab91157c90af1334ee158f670ba4edbed5acfdf20 +size 940212218 diff --git a/checkpoint-1500/rng_state.pth b/checkpoint-1500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7240594fac2aafc9d90efca9770c92d89ba47735 --- /dev/null +++ b/checkpoint-1500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dfee4901e3a689f3d66ecbc899041b4ba253322395c73cd095c44cc6f098d4d +size 14244 diff --git a/checkpoint-1500/scheduler.pt b/checkpoint-1500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2aff276a1b4c3e26ca02d753b56c479294c0aed7 --- /dev/null +++ b/checkpoint-1500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4c6d2d58a3b9966705e49a7c4047f9889d86d517c933ee6e63399fdc64fd784 +size 1064 diff --git a/checkpoint-1500/sentence_bert_config.json b/checkpoint-1500/sentence_bert_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5fd10429389515d3e5cccdeda08cae5fea1ae82e --- /dev/null +++ b/checkpoint-1500/sentence_bert_config.json @@ -0,0 +1,4 @@ +{ + "max_seq_length": 128, + "do_lower_case": false +} \ No newline at end of file diff --git a/checkpoint-1500/special_tokens_map.json b/checkpoint-1500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b1879d702821e753ffe4245048eee415d54a9385 --- /dev/null +++ b/checkpoint-1500/special_tokens_map.json @@ -0,0 +1,51 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "cls_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "sep_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1500/tokenizer.json b/checkpoint-1500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..e3420945e193cc0791136cdc6e5cd69801c838af --- /dev/null +++ b/checkpoint-1500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cad551d5600a84242d0973327029452a1e3672ba6313c2a3c3d69c4310e12719 +size 17082987 diff --git a/checkpoint-1500/tokenizer_config.json b/checkpoint-1500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..facf4436a8f11c26085c16a14f4e576853927a9e --- /dev/null +++ b/checkpoint-1500/tokenizer_config.json @@ -0,0 +1,65 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "do_lower_case": true, + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "max_length": 128, + "model_max_length": 128, + "pad_to_multiple_of": null, + "pad_token": "", + "pad_token_type_id": 0, + "padding_side": "right", + "sep_token": "", + "stride": 0, + "strip_accents": null, + "tokenize_chinese_chars": true, + "tokenizer_class": "BertTokenizer", + "truncation_side": "right", + "truncation_strategy": "longest_first", + "unk_token": "" +} diff --git a/checkpoint-1500/trainer_state.json b/checkpoint-1500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f2d6cb52f2ff02bf60acb4a34974061eae4abca9 --- /dev/null +++ b/checkpoint-1500/trainer_state.json @@ -0,0 +1,190 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.9585798816568047, + "eval_steps": 100, + "global_step": 1500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.19723865877712032, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.2181, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 100 + }, + { + "epoch": 0.39447731755424065, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.5897, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 200 + }, + { + "epoch": 0.591715976331361, + "eval_pearson_cosine": NaN, + "eval_runtime": 2.847, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 300 + }, + { + "epoch": 0.7889546351084813, + "eval_pearson_cosine": NaN, + "eval_runtime": 2.166, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 400 + }, + { + "epoch": 0.9861932938856016, + "grad_norm": 8.998483657836914, + "learning_rate": 1.4372355430183358e-05, + "loss": 0.28, + "step": 500 + }, + { + "epoch": 0.9861932938856016, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.2219, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 500 + }, + { + "epoch": 1.183431952662722, + "eval_pearson_cosine": NaN, + "eval_runtime": 3.4138, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 600 + }, + { + "epoch": 1.3806706114398422, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.3764, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 700 + }, + { + "epoch": 1.5779092702169626, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.2387, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 800 + }, + { + "epoch": 1.7751479289940828, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.2026, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 900 + }, + { + "epoch": 1.972386587771203, + "grad_norm": 0.12859900295734406, + "learning_rate": 7.320169252468266e-06, + "loss": 0.0393, + "step": 1000 + }, + { + "epoch": 1.972386587771203, + "eval_pearson_cosine": NaN, + "eval_runtime": 4.1539, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 1000 + }, + { + "epoch": 2.1696252465483234, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.2677, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 1100 + }, + { + "epoch": 2.366863905325444, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.2481, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 1200 + }, + { + "epoch": 2.564102564102564, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.5014, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 1300 + }, + { + "epoch": 2.7613412228796843, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.6007, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 1400 + }, + { + "epoch": 2.9585798816568047, + "grad_norm": 2.3075523376464844, + "learning_rate": 2.679830747531735e-07, + "loss": 0.0274, + "step": 1500 + }, + { + "epoch": 2.9585798816568047, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.6228, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 1500 + } + ], + "logging_steps": 500, + "max_steps": 1521, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1500/training_args.bin b/checkpoint-1500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..63aeb8a8474ed761c42cad4785153f0f39ee60df --- /dev/null +++ b/checkpoint-1500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6b0c772261a311993bb357660901823f0e8111b9d4198a0bb2d995ed7daee06 +size 5496 diff --git a/checkpoint-1500/unigram.json b/checkpoint-1500/unigram.json new file mode 100644 index 0000000000000000000000000000000000000000..2faa9ec874108d53a017ff2c7ab98d155fb21a82 --- /dev/null +++ b/checkpoint-1500/unigram.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da145b5e7700ae40f16691ec32a0b1fdc1ee3298db22a31ea55f57a966c4a65d +size 14763260 diff --git a/checkpoint-1521/1_Pooling/config.json b/checkpoint-1521/1_Pooling/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a97f8d140b6aee43dfac9fc4521b2842657c5608 --- /dev/null +++ b/checkpoint-1521/1_Pooling/config.json @@ -0,0 +1,10 @@ +{ + "word_embedding_dimension": 384, + "pooling_mode_cls_token": false, + "pooling_mode_mean_tokens": true, + "pooling_mode_max_tokens": false, + "pooling_mode_mean_sqrt_len_tokens": false, + "pooling_mode_weightedmean_tokens": false, + "pooling_mode_lasttoken": false, + "include_prompt": true +} \ No newline at end of file diff --git a/checkpoint-1521/README.md b/checkpoint-1521/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3c9d6394d03528cbe3ed9de01502e3f541d6453f --- /dev/null +++ b/checkpoint-1521/README.md @@ -0,0 +1,402 @@ +--- +tags: +- sentence-transformers +- sentence-similarity +- feature-extraction +- generated_from_trainer +- dataset_size:8100 +- loss:MultipleNegativesRankingLoss +base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 +widget: +- source_sentence: Apakah santri boleh keluar pondok saat dikunjungi? + sentences: + - Cukup menghubungi bagian keuangan atau humas PPS. Imam Syafi'i. + - Keluar pondok hanya boleh dengan izin resmi dan keadaan darurat. + - Ya, seperti menjadi ketua kelompok, mengatur antrian, dan memimpin doa. +- source_sentence: Apakah santri boleh membawa HP? + sentences: + - HP tidak diperbolehkan dibawa ke lingkungan pesantren. + - Ya, kurikulum disesuaikan dengan tingkat perkembangan santri. + - Santri akan mendapatkan pendampingan psikologis dan konseling. +- source_sentence: Apakah ada kegiatan kebersihan harian di TK? + sentences: + - Santri mendapat pembinaan khusus dan apresiasi. + - Ya, setiap pagi santri melakukan piket kebersihan lingkungan sesuai jadwal. + - Ya, kurikulum disesuaikan dengan tingkat perkembangan santri. +- source_sentence: Apakah ada buku panduan bagi wali santri baru? + sentences: + - Wali harus mengajukan surat izin resmi dan mendapat persetujuan pengasuh. + - Ekskul dapat diganti satu kali di tengah semester dengan izin wali kelas. + - Ya, setiap wali mendapat buku panduan saat pendaftaran. +- source_sentence: Apakah ekskul dibuka untuk santri baru? + sentences: + - Ya, santri harus menjaga ketenangan dan mengembalikan buku tepat waktu. + - Ya, santri baru dapat langsung mendaftar ekskul di awal semester. + - Ya, kurikulum terus dievaluasi dan disesuaikan dengan tantangan era modern. +pipeline_tag: sentence-similarity +library_name: sentence-transformers +metrics: +- pearson_cosine +- spearman_cosine +model-index: +- name: SentenceTransformer based on sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 + results: + - task: + type: semantic-similarity + name: Semantic Similarity + dataset: + name: eval + type: eval + metrics: + - type: pearson_cosine + value: .nan + name: Pearson Cosine + - type: spearman_cosine + value: .nan + name: Spearman Cosine +--- + +# SentenceTransformer based on sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 + +This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more. + +## Model Details + +### Model Description +- **Model Type:** Sentence Transformer +- **Base model:** [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) +- **Maximum Sequence Length:** 128 tokens +- **Output Dimensionality:** 384 dimensions +- **Similarity Function:** Cosine Similarity + + + + +### Model Sources + +- **Documentation:** [Sentence Transformers Documentation](https://sbert.net) +- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers) +- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers) + +### Full Model Architecture + +``` +SentenceTransformer( + (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel + (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) +) +``` + +## Usage + +### Direct Usage (Sentence Transformers) + +First install the Sentence Transformers library: + +```bash +pip install -U sentence-transformers +``` + +Then you can load this model and run inference. +```python +from sentence_transformers import SentenceTransformer + +# Download from the 🤗 Hub +model = SentenceTransformer("sentence_transformers_model_id") +# Run inference +sentences = [ + 'Apakah ekskul dibuka untuk santri baru?', + 'Ya, santri baru dapat langsung mendaftar ekskul di awal semester.', + 'Ya, kurikulum terus dievaluasi dan disesuaikan dengan tantangan era modern.', +] +embeddings = model.encode(sentences) +print(embeddings.shape) +# [3, 384] + +# Get the similarity scores for the embeddings +similarities = model.similarity(embeddings, embeddings) +print(similarities.shape) +# [3, 3] +``` + + + + + + + +## Evaluation + +### Metrics + +#### Semantic Similarity + +* Dataset: `eval` +* Evaluated with [EmbeddingSimilarityEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator) + +| Metric | Value | +|:--------------------|:--------| +| pearson_cosine | nan | +| **spearman_cosine** | **nan** | + + + + + +## Training Details + +### Training Dataset + +#### Unnamed Dataset + +* Size: 8,100 training samples +* Columns: sentence_0 and sentence_1 +* Approximate statistics based on the first 1000 samples: + | | sentence_0 | sentence_1 | + |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------| + | type | string | string | + | details |
  • min: 7 tokens
  • mean: 11.19 tokens
  • max: 18 tokens
|
  • min: 9 tokens
  • mean: 15.87 tokens
  • max: 42 tokens
| +* Samples: + | sentence_0 | sentence_1 | + |:------------------------------------------------------------|:----------------------------------------------------------------------------------------------| + | Apakah kurikulum mencakup pendidikan karakter? | Ya, pembinaan karakter menjadi bagian utama kurikulum pesantren. | + | Apakah lingkungan pondok ramah anak? | Ya, desain dan pengawasan mendukung kenyamanan dan keamanan santri. | + | Apakah nilai adab berpengaruh pada kelulusan? | Sangat berpengaruh, nilai adab menjadi pertimbangan utama dalam penilaian akhir. | +* Loss: [MultipleNegativesRankingLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters: + ```json + { + "scale": 20.0, + "similarity_fct": "cos_sim" + } + ``` + +### Training Hyperparameters +#### Non-Default Hyperparameters + +- `eval_strategy`: steps +- `per_device_train_batch_size`: 16 +- `per_device_eval_batch_size`: 16 +- `multi_dataset_batch_sampler`: round_robin + +#### All Hyperparameters +
Click to expand + +- `overwrite_output_dir`: False +- `do_predict`: False +- `eval_strategy`: steps +- `prediction_loss_only`: True +- `per_device_train_batch_size`: 16 +- `per_device_eval_batch_size`: 16 +- `per_gpu_train_batch_size`: None +- `per_gpu_eval_batch_size`: None +- `gradient_accumulation_steps`: 1 +- `eval_accumulation_steps`: None +- `torch_empty_cache_steps`: None +- `learning_rate`: 5e-05 +- `weight_decay`: 0.0 +- `adam_beta1`: 0.9 +- `adam_beta2`: 0.999 +- `adam_epsilon`: 1e-08 +- `max_grad_norm`: 1 +- `num_train_epochs`: 3 +- `max_steps`: -1 +- `lr_scheduler_type`: linear +- `lr_scheduler_kwargs`: {} +- `warmup_ratio`: 0.0 +- `warmup_steps`: 0 +- `log_level`: passive +- `log_level_replica`: warning +- `log_on_each_node`: True +- `logging_nan_inf_filter`: True +- `save_safetensors`: True +- `save_on_each_node`: False +- `save_only_model`: False +- `restore_callback_states_from_checkpoint`: False +- `no_cuda`: False +- `use_cpu`: False +- `use_mps_device`: False +- `seed`: 42 +- `data_seed`: None +- `jit_mode_eval`: False +- `use_ipex`: False +- `bf16`: False +- `fp16`: False +- `fp16_opt_level`: O1 +- `half_precision_backend`: auto +- `bf16_full_eval`: False +- `fp16_full_eval`: False +- `tf32`: None +- `local_rank`: 0 +- `ddp_backend`: None +- `tpu_num_cores`: None +- `tpu_metrics_debug`: False +- `debug`: [] +- `dataloader_drop_last`: False +- `dataloader_num_workers`: 0 +- `dataloader_prefetch_factor`: None +- `past_index`: -1 +- `disable_tqdm`: False +- `remove_unused_columns`: True +- `label_names`: None +- `load_best_model_at_end`: False +- `ignore_data_skip`: False +- `fsdp`: [] +- `fsdp_min_num_params`: 0 +- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False} +- `fsdp_transformer_layer_cls_to_wrap`: None +- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None} +- `deepspeed`: None +- `label_smoothing_factor`: 0.0 +- `optim`: adamw_torch +- `optim_args`: None +- `adafactor`: False +- `group_by_length`: False +- `length_column_name`: length +- `ddp_find_unused_parameters`: None +- `ddp_bucket_cap_mb`: None +- `ddp_broadcast_buffers`: False +- `dataloader_pin_memory`: True +- `dataloader_persistent_workers`: False +- `skip_memory_metrics`: True +- `use_legacy_prediction_loop`: False +- `push_to_hub`: False +- `resume_from_checkpoint`: None +- `hub_model_id`: None +- `hub_strategy`: every_save +- `hub_private_repo`: None +- `hub_always_push`: False +- `gradient_checkpointing`: False +- `gradient_checkpointing_kwargs`: None +- `include_inputs_for_metrics`: False +- `include_for_metrics`: [] +- `eval_do_concat_batches`: True +- `fp16_backend`: auto +- `push_to_hub_model_id`: None +- `push_to_hub_organization`: None +- `mp_parameters`: +- `auto_find_batch_size`: False +- `full_determinism`: False +- `torchdynamo`: None +- `ray_scope`: last +- `ddp_timeout`: 1800 +- `torch_compile`: False +- `torch_compile_backend`: None +- `torch_compile_mode`: None +- `include_tokens_per_second`: False +- `include_num_input_tokens_seen`: False +- `neftune_noise_alpha`: None +- `optim_target_modules`: None +- `batch_eval_metrics`: False +- `eval_on_start`: False +- `use_liger_kernel`: False +- `eval_use_gather_object`: False +- `average_tokens_across_devices`: False +- `prompts`: None +- `batch_sampler`: batch_sampler +- `multi_dataset_batch_sampler`: round_robin + +
+ +### Training Logs +| Epoch | Step | Training Loss | eval_spearman_cosine | +|:------:|:----:|:-------------:|:--------------------:| +| 0.1972 | 100 | - | nan | +| 0.3945 | 200 | - | nan | +| 0.5917 | 300 | - | nan | +| 0.7890 | 400 | - | nan | +| 0.9862 | 500 | 0.28 | nan | +| 1.0 | 507 | - | nan | +| 1.1834 | 600 | - | nan | +| 1.3807 | 700 | - | nan | +| 1.5779 | 800 | - | nan | +| 1.7751 | 900 | - | nan | +| 1.9724 | 1000 | 0.0393 | nan | +| 2.0 | 1014 | - | nan | +| 2.1696 | 1100 | - | nan | +| 2.3669 | 1200 | - | nan | +| 2.5641 | 1300 | - | nan | +| 2.7613 | 1400 | - | nan | +| 2.9586 | 1500 | 0.0274 | nan | + + +### Framework Versions +- Python: 3.11.13 +- Sentence Transformers: 4.1.0 +- Transformers: 4.52.4 +- PyTorch: 2.6.0+cu124 +- Accelerate: 1.7.0 +- Datasets: 2.14.4 +- Tokenizers: 0.21.1 + +## Citation + +### BibTeX + +#### Sentence Transformers +```bibtex +@inproceedings{reimers-2019-sentence-bert, + title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks", + author = "Reimers, Nils and Gurevych, Iryna", + booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing", + month = "11", + year = "2019", + publisher = "Association for Computational Linguistics", + url = "https://arxiv.org/abs/1908.10084", +} +``` + +#### MultipleNegativesRankingLoss +```bibtex +@misc{henderson2017efficient, + title={Efficient Natural Language Response Suggestion for Smart Reply}, + author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil}, + year={2017}, + eprint={1705.00652}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +``` + + + + + + \ No newline at end of file diff --git a/checkpoint-1521/config.json b/checkpoint-1521/config.json new file mode 100644 index 0000000000000000000000000000000000000000..464f7e70944f84ce569eb4b77fe9edde4962b29c --- /dev/null +++ b/checkpoint-1521/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertModel" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 384, + "initializer_range": 0.02, + "intermediate_size": 1536, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.52.4", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 250037 +} diff --git a/checkpoint-1521/config_sentence_transformers.json b/checkpoint-1521/config_sentence_transformers.json new file mode 100644 index 0000000000000000000000000000000000000000..f3cc0c6900ee82c776d67a63601aeed12365ef95 --- /dev/null +++ b/checkpoint-1521/config_sentence_transformers.json @@ -0,0 +1,10 @@ +{ + "__version__": { + "sentence_transformers": "4.1.0", + "transformers": "4.52.4", + "pytorch": "2.6.0+cu124" + }, + "prompts": {}, + "default_prompt_name": null, + "similarity_fn_name": "cosine" +} \ No newline at end of file diff --git a/checkpoint-1521/model.safetensors b/checkpoint-1521/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f1a3cb48e86a0cdc76bf106bf0df73a297d98e0 --- /dev/null +++ b/checkpoint-1521/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3dcd6d2d9140cbd7c02ecc37a617f845a0054d900bd1eda5819e0d7663ebe2a +size 470637416 diff --git a/checkpoint-1521/modules.json b/checkpoint-1521/modules.json new file mode 100644 index 0000000000000000000000000000000000000000..f7640f94e81bb7f4f04daf1668850b38763a13d9 --- /dev/null +++ b/checkpoint-1521/modules.json @@ -0,0 +1,14 @@ +[ + { + "idx": 0, + "name": "0", + "path": "", + "type": "sentence_transformers.models.Transformer" + }, + { + "idx": 1, + "name": "1", + "path": "1_Pooling", + "type": "sentence_transformers.models.Pooling" + } +] \ No newline at end of file diff --git a/checkpoint-1521/optimizer.pt b/checkpoint-1521/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..60f92a1c9d43f74183ad53da06d2ce10c5f23e0d --- /dev/null +++ b/checkpoint-1521/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:927ca651bdb6433b442b7c5e009f5e424eb2480f9ece6f60717af5c320ac0beb +size 940212218 diff --git a/checkpoint-1521/rng_state.pth b/checkpoint-1521/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bfe070f5cd2e5ee8ccdc3dac64797d38e9641f78 --- /dev/null +++ b/checkpoint-1521/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c997fe6a93ef53310c727d6001a259df7f798f43848aaa19c09caf8e7df80ab +size 14244 diff --git a/checkpoint-1521/scheduler.pt b/checkpoint-1521/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7dd288ce5ca681f81138fbd401f3d89507d7e5cb --- /dev/null +++ b/checkpoint-1521/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:947e75d17a87abcec3ab62aa97fe552d39f14ab8277603337044f9ca9009bd02 +size 1064 diff --git a/checkpoint-1521/sentence_bert_config.json b/checkpoint-1521/sentence_bert_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5fd10429389515d3e5cccdeda08cae5fea1ae82e --- /dev/null +++ b/checkpoint-1521/sentence_bert_config.json @@ -0,0 +1,4 @@ +{ + "max_seq_length": 128, + "do_lower_case": false +} \ No newline at end of file diff --git a/checkpoint-1521/special_tokens_map.json b/checkpoint-1521/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b1879d702821e753ffe4245048eee415d54a9385 --- /dev/null +++ b/checkpoint-1521/special_tokens_map.json @@ -0,0 +1,51 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "cls_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "sep_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1521/tokenizer.json b/checkpoint-1521/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..e3420945e193cc0791136cdc6e5cd69801c838af --- /dev/null +++ b/checkpoint-1521/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cad551d5600a84242d0973327029452a1e3672ba6313c2a3c3d69c4310e12719 +size 17082987 diff --git a/checkpoint-1521/tokenizer_config.json b/checkpoint-1521/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..facf4436a8f11c26085c16a14f4e576853927a9e --- /dev/null +++ b/checkpoint-1521/tokenizer_config.json @@ -0,0 +1,65 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "do_lower_case": true, + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "max_length": 128, + "model_max_length": 128, + "pad_to_multiple_of": null, + "pad_token": "", + "pad_token_type_id": 0, + "padding_side": "right", + "sep_token": "", + "stride": 0, + "strip_accents": null, + "tokenize_chinese_chars": true, + "tokenizer_class": "BertTokenizer", + "truncation_side": "right", + "truncation_strategy": "longest_first", + "unk_token": "" +} diff --git a/checkpoint-1521/trainer_state.json b/checkpoint-1521/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..36ed564e8265a6ae91afb42c9c2e35e522ad42c9 --- /dev/null +++ b/checkpoint-1521/trainer_state.json @@ -0,0 +1,190 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 100, + "global_step": 1521, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.19723865877712032, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.2181, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 100 + }, + { + "epoch": 0.39447731755424065, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.5897, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 200 + }, + { + "epoch": 0.591715976331361, + "eval_pearson_cosine": NaN, + "eval_runtime": 2.847, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 300 + }, + { + "epoch": 0.7889546351084813, + "eval_pearson_cosine": NaN, + "eval_runtime": 2.166, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 400 + }, + { + "epoch": 0.9861932938856016, + "grad_norm": 8.998483657836914, + "learning_rate": 1.4372355430183358e-05, + "loss": 0.28, + "step": 500 + }, + { + "epoch": 0.9861932938856016, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.2219, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 500 + }, + { + "epoch": 1.183431952662722, + "eval_pearson_cosine": NaN, + "eval_runtime": 3.4138, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 600 + }, + { + "epoch": 1.3806706114398422, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.3764, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 700 + }, + { + "epoch": 1.5779092702169626, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.2387, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 800 + }, + { + "epoch": 1.7751479289940828, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.2026, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 900 + }, + { + "epoch": 1.972386587771203, + "grad_norm": 0.12859900295734406, + "learning_rate": 7.320169252468266e-06, + "loss": 0.0393, + "step": 1000 + }, + { + "epoch": 1.972386587771203, + "eval_pearson_cosine": NaN, + "eval_runtime": 4.1539, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 1000 + }, + { + "epoch": 2.1696252465483234, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.2677, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 1100 + }, + { + "epoch": 2.366863905325444, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.2481, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 1200 + }, + { + "epoch": 2.564102564102564, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.5014, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 1300 + }, + { + "epoch": 2.7613412228796843, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.6007, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 1400 + }, + { + "epoch": 2.9585798816568047, + "grad_norm": 2.3075523376464844, + "learning_rate": 2.679830747531735e-07, + "loss": 0.0274, + "step": 1500 + }, + { + "epoch": 2.9585798816568047, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.6228, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 1500 + } + ], + "logging_steps": 500, + "max_steps": 1521, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1521/training_args.bin b/checkpoint-1521/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..63aeb8a8474ed761c42cad4785153f0f39ee60df --- /dev/null +++ b/checkpoint-1521/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6b0c772261a311993bb357660901823f0e8111b9d4198a0bb2d995ed7daee06 +size 5496 diff --git a/checkpoint-1521/unigram.json b/checkpoint-1521/unigram.json new file mode 100644 index 0000000000000000000000000000000000000000..2faa9ec874108d53a017ff2c7ab98d155fb21a82 --- /dev/null +++ b/checkpoint-1521/unigram.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da145b5e7700ae40f16691ec32a0b1fdc1ee3298db22a31ea55f57a966c4a65d +size 14763260 diff --git a/checkpoint-500/1_Pooling/config.json b/checkpoint-500/1_Pooling/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a97f8d140b6aee43dfac9fc4521b2842657c5608 --- /dev/null +++ b/checkpoint-500/1_Pooling/config.json @@ -0,0 +1,10 @@ +{ + "word_embedding_dimension": 384, + "pooling_mode_cls_token": false, + "pooling_mode_mean_tokens": true, + "pooling_mode_max_tokens": false, + "pooling_mode_mean_sqrt_len_tokens": false, + "pooling_mode_weightedmean_tokens": false, + "pooling_mode_lasttoken": false, + "include_prompt": true +} \ No newline at end of file diff --git a/checkpoint-500/README.md b/checkpoint-500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0c7523e8087b182ddb288092203c1ddd553dda73 --- /dev/null +++ b/checkpoint-500/README.md @@ -0,0 +1,390 @@ +--- +tags: +- sentence-transformers +- sentence-similarity +- feature-extraction +- generated_from_trainer +- dataset_size:8100 +- loss:MultipleNegativesRankingLoss +base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 +widget: +- source_sentence: Apakah santri boleh keluar pondok saat dikunjungi? + sentences: + - Cukup menghubungi bagian keuangan atau humas PPS. Imam Syafi'i. + - Keluar pondok hanya boleh dengan izin resmi dan keadaan darurat. + - Ya, seperti menjadi ketua kelompok, mengatur antrian, dan memimpin doa. +- source_sentence: Apakah santri boleh membawa HP? + sentences: + - HP tidak diperbolehkan dibawa ke lingkungan pesantren. + - Ya, kurikulum disesuaikan dengan tingkat perkembangan santri. + - Santri akan mendapatkan pendampingan psikologis dan konseling. +- source_sentence: Apakah ada kegiatan kebersihan harian di TK? + sentences: + - Santri mendapat pembinaan khusus dan apresiasi. + - Ya, setiap pagi santri melakukan piket kebersihan lingkungan sesuai jadwal. + - Ya, kurikulum disesuaikan dengan tingkat perkembangan santri. +- source_sentence: Apakah ada buku panduan bagi wali santri baru? + sentences: + - Wali harus mengajukan surat izin resmi dan mendapat persetujuan pengasuh. + - Ekskul dapat diganti satu kali di tengah semester dengan izin wali kelas. + - Ya, setiap wali mendapat buku panduan saat pendaftaran. +- source_sentence: Apakah ekskul dibuka untuk santri baru? + sentences: + - Ya, santri harus menjaga ketenangan dan mengembalikan buku tepat waktu. + - Ya, santri baru dapat langsung mendaftar ekskul di awal semester. + - Ya, kurikulum terus dievaluasi dan disesuaikan dengan tantangan era modern. +pipeline_tag: sentence-similarity +library_name: sentence-transformers +metrics: +- pearson_cosine +- spearman_cosine +model-index: +- name: SentenceTransformer based on sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 + results: + - task: + type: semantic-similarity + name: Semantic Similarity + dataset: + name: eval + type: eval + metrics: + - type: pearson_cosine + value: .nan + name: Pearson Cosine + - type: spearman_cosine + value: .nan + name: Spearman Cosine +--- + +# SentenceTransformer based on sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 + +This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more. + +## Model Details + +### Model Description +- **Model Type:** Sentence Transformer +- **Base model:** [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) +- **Maximum Sequence Length:** 128 tokens +- **Output Dimensionality:** 384 dimensions +- **Similarity Function:** Cosine Similarity + + + + +### Model Sources + +- **Documentation:** [Sentence Transformers Documentation](https://sbert.net) +- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers) +- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers) + +### Full Model Architecture + +``` +SentenceTransformer( + (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel + (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) +) +``` + +## Usage + +### Direct Usage (Sentence Transformers) + +First install the Sentence Transformers library: + +```bash +pip install -U sentence-transformers +``` + +Then you can load this model and run inference. +```python +from sentence_transformers import SentenceTransformer + +# Download from the 🤗 Hub +model = SentenceTransformer("sentence_transformers_model_id") +# Run inference +sentences = [ + 'Apakah ekskul dibuka untuk santri baru?', + 'Ya, santri baru dapat langsung mendaftar ekskul di awal semester.', + 'Ya, kurikulum terus dievaluasi dan disesuaikan dengan tantangan era modern.', +] +embeddings = model.encode(sentences) +print(embeddings.shape) +# [3, 384] + +# Get the similarity scores for the embeddings +similarities = model.similarity(embeddings, embeddings) +print(similarities.shape) +# [3, 3] +``` + + + + + + + +## Evaluation + +### Metrics + +#### Semantic Similarity + +* Dataset: `eval` +* Evaluated with [EmbeddingSimilarityEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator) + +| Metric | Value | +|:--------------------|:--------| +| pearson_cosine | nan | +| **spearman_cosine** | **nan** | + + + + + +## Training Details + +### Training Dataset + +#### Unnamed Dataset + +* Size: 8,100 training samples +* Columns: sentence_0 and sentence_1 +* Approximate statistics based on the first 1000 samples: + | | sentence_0 | sentence_1 | + |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------| + | type | string | string | + | details |
  • min: 7 tokens
  • mean: 11.19 tokens
  • max: 18 tokens
|
  • min: 9 tokens
  • mean: 15.87 tokens
  • max: 42 tokens
| +* Samples: + | sentence_0 | sentence_1 | + |:------------------------------------------------------------|:----------------------------------------------------------------------------------------------| + | Apakah kurikulum mencakup pendidikan karakter? | Ya, pembinaan karakter menjadi bagian utama kurikulum pesantren. | + | Apakah lingkungan pondok ramah anak? | Ya, desain dan pengawasan mendukung kenyamanan dan keamanan santri. | + | Apakah nilai adab berpengaruh pada kelulusan? | Sangat berpengaruh, nilai adab menjadi pertimbangan utama dalam penilaian akhir. | +* Loss: [MultipleNegativesRankingLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters: + ```json + { + "scale": 20.0, + "similarity_fct": "cos_sim" + } + ``` + +### Training Hyperparameters +#### Non-Default Hyperparameters + +- `eval_strategy`: steps +- `per_device_train_batch_size`: 16 +- `per_device_eval_batch_size`: 16 +- `multi_dataset_batch_sampler`: round_robin + +#### All Hyperparameters +
Click to expand + +- `overwrite_output_dir`: False +- `do_predict`: False +- `eval_strategy`: steps +- `prediction_loss_only`: True +- `per_device_train_batch_size`: 16 +- `per_device_eval_batch_size`: 16 +- `per_gpu_train_batch_size`: None +- `per_gpu_eval_batch_size`: None +- `gradient_accumulation_steps`: 1 +- `eval_accumulation_steps`: None +- `torch_empty_cache_steps`: None +- `learning_rate`: 5e-05 +- `weight_decay`: 0.0 +- `adam_beta1`: 0.9 +- `adam_beta2`: 0.999 +- `adam_epsilon`: 1e-08 +- `max_grad_norm`: 1 +- `num_train_epochs`: 3 +- `max_steps`: -1 +- `lr_scheduler_type`: linear +- `lr_scheduler_kwargs`: {} +- `warmup_ratio`: 0.0 +- `warmup_steps`: 0 +- `log_level`: passive +- `log_level_replica`: warning +- `log_on_each_node`: True +- `logging_nan_inf_filter`: True +- `save_safetensors`: True +- `save_on_each_node`: False +- `save_only_model`: False +- `restore_callback_states_from_checkpoint`: False +- `no_cuda`: False +- `use_cpu`: False +- `use_mps_device`: False +- `seed`: 42 +- `data_seed`: None +- `jit_mode_eval`: False +- `use_ipex`: False +- `bf16`: False +- `fp16`: False +- `fp16_opt_level`: O1 +- `half_precision_backend`: auto +- `bf16_full_eval`: False +- `fp16_full_eval`: False +- `tf32`: None +- `local_rank`: 0 +- `ddp_backend`: None +- `tpu_num_cores`: None +- `tpu_metrics_debug`: False +- `debug`: [] +- `dataloader_drop_last`: False +- `dataloader_num_workers`: 0 +- `dataloader_prefetch_factor`: None +- `past_index`: -1 +- `disable_tqdm`: False +- `remove_unused_columns`: True +- `label_names`: None +- `load_best_model_at_end`: False +- `ignore_data_skip`: False +- `fsdp`: [] +- `fsdp_min_num_params`: 0 +- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False} +- `fsdp_transformer_layer_cls_to_wrap`: None +- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None} +- `deepspeed`: None +- `label_smoothing_factor`: 0.0 +- `optim`: adamw_torch +- `optim_args`: None +- `adafactor`: False +- `group_by_length`: False +- `length_column_name`: length +- `ddp_find_unused_parameters`: None +- `ddp_bucket_cap_mb`: None +- `ddp_broadcast_buffers`: False +- `dataloader_pin_memory`: True +- `dataloader_persistent_workers`: False +- `skip_memory_metrics`: True +- `use_legacy_prediction_loop`: False +- `push_to_hub`: False +- `resume_from_checkpoint`: None +- `hub_model_id`: None +- `hub_strategy`: every_save +- `hub_private_repo`: None +- `hub_always_push`: False +- `gradient_checkpointing`: False +- `gradient_checkpointing_kwargs`: None +- `include_inputs_for_metrics`: False +- `include_for_metrics`: [] +- `eval_do_concat_batches`: True +- `fp16_backend`: auto +- `push_to_hub_model_id`: None +- `push_to_hub_organization`: None +- `mp_parameters`: +- `auto_find_batch_size`: False +- `full_determinism`: False +- `torchdynamo`: None +- `ray_scope`: last +- `ddp_timeout`: 1800 +- `torch_compile`: False +- `torch_compile_backend`: None +- `torch_compile_mode`: None +- `include_tokens_per_second`: False +- `include_num_input_tokens_seen`: False +- `neftune_noise_alpha`: None +- `optim_target_modules`: None +- `batch_eval_metrics`: False +- `eval_on_start`: False +- `use_liger_kernel`: False +- `eval_use_gather_object`: False +- `average_tokens_across_devices`: False +- `prompts`: None +- `batch_sampler`: batch_sampler +- `multi_dataset_batch_sampler`: round_robin + +
+ +### Training Logs +| Epoch | Step | Training Loss | eval_spearman_cosine | +|:------:|:----:|:-------------:|:--------------------:| +| 0.1972 | 100 | - | nan | +| 0.3945 | 200 | - | nan | +| 0.5917 | 300 | - | nan | +| 0.7890 | 400 | - | nan | +| 0.9862 | 500 | 0.28 | nan | + + +### Framework Versions +- Python: 3.11.13 +- Sentence Transformers: 4.1.0 +- Transformers: 4.52.4 +- PyTorch: 2.6.0+cu124 +- Accelerate: 1.7.0 +- Datasets: 2.14.4 +- Tokenizers: 0.21.1 + +## Citation + +### BibTeX + +#### Sentence Transformers +```bibtex +@inproceedings{reimers-2019-sentence-bert, + title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks", + author = "Reimers, Nils and Gurevych, Iryna", + booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing", + month = "11", + year = "2019", + publisher = "Association for Computational Linguistics", + url = "https://arxiv.org/abs/1908.10084", +} +``` + +#### MultipleNegativesRankingLoss +```bibtex +@misc{henderson2017efficient, + title={Efficient Natural Language Response Suggestion for Smart Reply}, + author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil}, + year={2017}, + eprint={1705.00652}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +``` + + + + + + \ No newline at end of file diff --git a/checkpoint-500/config.json b/checkpoint-500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..464f7e70944f84ce569eb4b77fe9edde4962b29c --- /dev/null +++ b/checkpoint-500/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertModel" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 384, + "initializer_range": 0.02, + "intermediate_size": 1536, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.52.4", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 250037 +} diff --git a/checkpoint-500/config_sentence_transformers.json b/checkpoint-500/config_sentence_transformers.json new file mode 100644 index 0000000000000000000000000000000000000000..f3cc0c6900ee82c776d67a63601aeed12365ef95 --- /dev/null +++ b/checkpoint-500/config_sentence_transformers.json @@ -0,0 +1,10 @@ +{ + "__version__": { + "sentence_transformers": "4.1.0", + "transformers": "4.52.4", + "pytorch": "2.6.0+cu124" + }, + "prompts": {}, + "default_prompt_name": null, + "similarity_fn_name": "cosine" +} \ No newline at end of file diff --git a/checkpoint-500/model.safetensors b/checkpoint-500/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d53d1553eb5852fd1146f07c889f8efa176e9192 --- /dev/null +++ b/checkpoint-500/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c771074344c171f649113f187ee4f5e84cf74141bf79c68fa7fe14090af07d37 +size 470637416 diff --git a/checkpoint-500/modules.json b/checkpoint-500/modules.json new file mode 100644 index 0000000000000000000000000000000000000000..f7640f94e81bb7f4f04daf1668850b38763a13d9 --- /dev/null +++ b/checkpoint-500/modules.json @@ -0,0 +1,14 @@ +[ + { + "idx": 0, + "name": "0", + "path": "", + "type": "sentence_transformers.models.Transformer" + }, + { + "idx": 1, + "name": "1", + "path": "1_Pooling", + "type": "sentence_transformers.models.Pooling" + } +] \ No newline at end of file diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..45543f0c3f65e4aa45f79c810a9593bca1c8be4d --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2992e11c547539a22925a87274f5c3a206e4fda4a48148c1271af5e148086e5c +size 940212218 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..68ba81f09bb5ffe6bcaebd576450f349aec26828 --- /dev/null +++ b/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd03621e96c47ff7577267b655d719a9eb8e326908859a1ca544561c0bca02a +size 14244 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..29d55299f56ab2c70c50dacfe5ce1e83b5e13c81 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5c9e223a2a1793197c54539a9896f4bdacadd79d538180dec0c217c6d19c0b0 +size 1064 diff --git a/checkpoint-500/sentence_bert_config.json b/checkpoint-500/sentence_bert_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5fd10429389515d3e5cccdeda08cae5fea1ae82e --- /dev/null +++ b/checkpoint-500/sentence_bert_config.json @@ -0,0 +1,4 @@ +{ + "max_seq_length": 128, + "do_lower_case": false +} \ No newline at end of file diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b1879d702821e753ffe4245048eee415d54a9385 --- /dev/null +++ b/checkpoint-500/special_tokens_map.json @@ -0,0 +1,51 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "cls_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "sep_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-500/tokenizer.json b/checkpoint-500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..e3420945e193cc0791136cdc6e5cd69801c838af --- /dev/null +++ b/checkpoint-500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cad551d5600a84242d0973327029452a1e3672ba6313c2a3c3d69c4310e12719 +size 17082987 diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..facf4436a8f11c26085c16a14f4e576853927a9e --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1,65 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "do_lower_case": true, + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "max_length": 128, + "model_max_length": 128, + "pad_to_multiple_of": null, + "pad_token": "", + "pad_token_type_id": 0, + "padding_side": "right", + "sep_token": "", + "stride": 0, + "strip_accents": null, + "tokenize_chinese_chars": true, + "tokenizer_class": "BertTokenizer", + "truncation_side": "right", + "truncation_strategy": "longest_first", + "unk_token": "" +} diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..02d115c6e24f8ecf08c3ba3cc738cebbf8c8d8ea --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,86 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9861932938856016, + "eval_steps": 100, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.19723865877712032, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.2181, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 100 + }, + { + "epoch": 0.39447731755424065, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.5897, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 200 + }, + { + "epoch": 0.591715976331361, + "eval_pearson_cosine": NaN, + "eval_runtime": 2.847, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 300 + }, + { + "epoch": 0.7889546351084813, + "eval_pearson_cosine": NaN, + "eval_runtime": 2.166, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 400 + }, + { + "epoch": 0.9861932938856016, + "grad_norm": 8.998483657836914, + "learning_rate": 1.4372355430183358e-05, + "loss": 0.28, + "step": 500 + }, + { + "epoch": 0.9861932938856016, + "eval_pearson_cosine": NaN, + "eval_runtime": 1.2219, + "eval_samples_per_second": 0.0, + "eval_spearman_cosine": NaN, + "eval_steps_per_second": 0.0, + "step": 500 + } + ], + "logging_steps": 500, + "max_steps": 1521, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..63aeb8a8474ed761c42cad4785153f0f39ee60df --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6b0c772261a311993bb357660901823f0e8111b9d4198a0bb2d995ed7daee06 +size 5496 diff --git a/checkpoint-500/unigram.json b/checkpoint-500/unigram.json new file mode 100644 index 0000000000000000000000000000000000000000..2faa9ec874108d53a017ff2c7ab98d155fb21a82 --- /dev/null +++ b/checkpoint-500/unigram.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da145b5e7700ae40f16691ec32a0b1fdc1ee3298db22a31ea55f57a966c4a65d +size 14763260 diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..464f7e70944f84ce569eb4b77fe9edde4962b29c --- /dev/null +++ b/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "BertModel" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 384, + "initializer_range": 0.02, + "intermediate_size": 1536, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.52.4", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 250037 +} diff --git a/config_sentence_transformers.json b/config_sentence_transformers.json new file mode 100644 index 0000000000000000000000000000000000000000..f3cc0c6900ee82c776d67a63601aeed12365ef95 --- /dev/null +++ b/config_sentence_transformers.json @@ -0,0 +1,10 @@ +{ + "__version__": { + "sentence_transformers": "4.1.0", + "transformers": "4.52.4", + "pytorch": "2.6.0+cu124" + }, + "prompts": {}, + "default_prompt_name": null, + "similarity_fn_name": "cosine" +} \ No newline at end of file diff --git a/eval/similarity_evaluation_eval_results.csv b/eval/similarity_evaluation_eval_results.csv new file mode 100644 index 0000000000000000000000000000000000000000..24358b4acf4a3e4d207e5f89ce6bf03fd53e513b --- /dev/null +++ b/eval/similarity_evaluation_eval_results.csv @@ -0,0 +1,19 @@ +epoch,steps,cosine_pearson,cosine_spearman +0.19723865877712032,100,nan,nan +0.39447731755424065,200,nan,nan +0.591715976331361,300,nan,nan +0.7889546351084813,400,nan,nan +0.9861932938856016,500,nan,nan +1.0,507,nan,nan +1.183431952662722,600,nan,nan +1.3806706114398422,700,nan,nan +1.5779092702169626,800,nan,nan +1.7751479289940828,900,nan,nan +1.972386587771203,1000,nan,nan +2.0,1014,nan,nan +2.1696252465483234,1100,nan,nan +2.366863905325444,1200,nan,nan +2.564102564102564,1300,nan,nan +2.7613412228796843,1400,nan,nan +2.9585798816568047,1500,nan,nan +3.0,1521,nan,nan diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f8c582d18d494f8fd9cd00e434956c8b76ca2fd --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0e6a54b26b99f52ef388fcc084f0bf3dae92f31cfd2780aca75b654a351d2e7 +size 470637416 diff --git a/modules.json b/modules.json new file mode 100644 index 0000000000000000000000000000000000000000..f7640f94e81bb7f4f04daf1668850b38763a13d9 --- /dev/null +++ b/modules.json @@ -0,0 +1,14 @@ +[ + { + "idx": 0, + "name": "0", + "path": "", + "type": "sentence_transformers.models.Transformer" + }, + { + "idx": 1, + "name": "1", + "path": "1_Pooling", + "type": "sentence_transformers.models.Pooling" + } +] \ No newline at end of file diff --git a/runs/Jun16_11-52-47_d94e51fe69b2/events.out.tfevents.1750074768.d94e51fe69b2.313.0 b/runs/Jun16_11-52-47_d94e51fe69b2/events.out.tfevents.1750074768.d94e51fe69b2.313.0 new file mode 100644 index 0000000000000000000000000000000000000000..15e122d1c117e6c3b77d8af8674bc76cf60b3584 --- /dev/null +++ b/runs/Jun16_11-52-47_d94e51fe69b2/events.out.tfevents.1750074768.d94e51fe69b2.313.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c86bca098d95b13e092e9aabc03d87a5a90a0577f95f68c60500d26c4e8a729b +size 10381 diff --git a/sentence_bert_config.json b/sentence_bert_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5fd10429389515d3e5cccdeda08cae5fea1ae82e --- /dev/null +++ b/sentence_bert_config.json @@ -0,0 +1,4 @@ +{ + "max_seq_length": 128, + "do_lower_case": false +} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b1879d702821e753ffe4245048eee415d54a9385 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,51 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "cls_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "sep_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..e3420945e193cc0791136cdc6e5cd69801c838af --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cad551d5600a84242d0973327029452a1e3672ba6313c2a3c3d69c4310e12719 +size 17082987 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..facf4436a8f11c26085c16a14f4e576853927a9e --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,65 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "do_lower_case": true, + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "max_length": 128, + "model_max_length": 128, + "pad_to_multiple_of": null, + "pad_token": "", + "pad_token_type_id": 0, + "padding_side": "right", + "sep_token": "", + "stride": 0, + "strip_accents": null, + "tokenize_chinese_chars": true, + "tokenizer_class": "BertTokenizer", + "truncation_side": "right", + "truncation_strategy": "longest_first", + "unk_token": "" +} diff --git a/unigram.json b/unigram.json new file mode 100644 index 0000000000000000000000000000000000000000..2faa9ec874108d53a017ff2c7ab98d155fb21a82 --- /dev/null +++ b/unigram.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da145b5e7700ae40f16691ec32a0b1fdc1ee3298db22a31ea55f57a966c4a65d +size 14763260