Sentence Similarity
sentence-transformers
Safetensors
feature-extraction
dense
Generated from Trainer
dataset_size:40374
loss:MultipleNegativesRankingLoss
Eval Results (legacy)
Instructions to use vrnP66/finetuned-embedding-model with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- sentence-transformers
How to use vrnP66/finetuned-embedding-model with sentence-transformers:
from sentence_transformers import SentenceTransformer model = SentenceTransformer("vrnP66/finetuned-embedding-model") sentences = [ "यथोवाच भगवान् धन्वन्तरिः ||२||", "**Ashtanga Hridayam, Uttara Sthana, chapter 22, sutra 106**\n\n**Sutra**:\nपटोल-निम्ब-यष्ट्य्-आह्व-वासा-जात्य्-अरिमेदसाम् । खदिरस्य वरायाश् च पृथग् एवं प्रकल्पना ॥ १०६ ॥\n\n**English Transliteration**:\npaṭola-nimba-yaṣṭy-āhva-vāsā-jāty-arimedasām | khadirasya varāyāś ca pṛthag evaṁ prakalpanā || 106 ||\n\n**English Translation**:\nThus, a separate preparation should be made from patola, nimba, licorice, vasa, jati, arimedasa, khadira, and vara.", "**Susrut Samhita, Sharira Sthana, chapter 9, sutra 2**\n\n**Sutra**:\nयथोवाच भगवान् धन्वन्तरिः ||२||\n\n**English Transliteration**:\nyathovāca bhagavān dhanvantariḥ ||2||\n\n**English Translation**:\nThus spoke the venerable Dhanvantari.", "**Susrut Samhita, Chikitsa Sthana, chapter 24, sutra 85**\n\n**Sutra**:\nसुखं वातं प्रसेवेत ग्रीष्मे शरदि मानवः | निवातं ह्यायुषे सेव्यमारोग्याय च सर्वदा ||८५||\n\n**English Transliteration**:\nsukhaṃ vātaṃ praseveta grīṣme śaradi mānavaḥ | nivātaṃ hyāyuṣe sevyamārogyāya ca sarvadā ||85||\n\n**English Translation**:\nA person should enjoy pleasant wind in summer and autumn. Absence of wind is always beneficial for longevity and health." ] embeddings = model.encode(sentences) similarities = model.similarity(embeddings, embeddings) print(similarities.shape) # [4, 4] - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 100, | |
| "global_step": 5048, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07923930269413629, | |
| "grad_norm": 0.36957958340644836, | |
| "learning_rate": 9.801980198019804e-06, | |
| "loss": 1.0261, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07923930269413629, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9996037483215332, | |
| "eval_loss": 0.009768067859113216, | |
| "eval_runtime": 34.4825, | |
| "eval_samples_per_second": 146.364, | |
| "eval_steps_per_second": 4.582, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.15847860538827258, | |
| "grad_norm": 0.15150195360183716, | |
| "learning_rate": 1.9702970297029703e-05, | |
| "loss": 0.0086, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.15847860538827258, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.999405562877655, | |
| "eval_loss": 0.006445930805057287, | |
| "eval_runtime": 33.42, | |
| "eval_samples_per_second": 151.017, | |
| "eval_steps_per_second": 4.728, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.23771790808240886, | |
| "grad_norm": 0.24285651743412018, | |
| "learning_rate": 2.9603960396039603e-05, | |
| "loss": 0.0043, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.23771790808240886, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9996037483215332, | |
| "eval_loss": 0.00557651836425066, | |
| "eval_runtime": 32.8039, | |
| "eval_samples_per_second": 153.854, | |
| "eval_steps_per_second": 4.816, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.31695721077654515, | |
| "grad_norm": 0.061441030353307724, | |
| "learning_rate": 3.950495049504951e-05, | |
| "loss": 0.0051, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.31695721077654515, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.999405562877655, | |
| "eval_loss": 0.005679984577000141, | |
| "eval_runtime": 32.9416, | |
| "eval_samples_per_second": 153.21, | |
| "eval_steps_per_second": 4.796, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.39619651347068147, | |
| "grad_norm": 0.09473396837711334, | |
| "learning_rate": 4.9405940594059405e-05, | |
| "loss": 0.0068, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.39619651347068147, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 1.0, | |
| "eval_loss": 0.008005364798009396, | |
| "eval_runtime": 32.9704, | |
| "eval_samples_per_second": 153.077, | |
| "eval_steps_per_second": 4.792, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4754358161648177, | |
| "grad_norm": 2.07448148727417, | |
| "learning_rate": 4.8965441338322695e-05, | |
| "loss": 0.0078, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4754358161648177, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9998018741607666, | |
| "eval_loss": 0.007311029359698296, | |
| "eval_runtime": 33.4995, | |
| "eval_samples_per_second": 150.659, | |
| "eval_steps_per_second": 4.716, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.554675118858954, | |
| "grad_norm": 0.44440600275993347, | |
| "learning_rate": 4.7864847017389395e-05, | |
| "loss": 0.0066, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.554675118858954, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.999405562877655, | |
| "eval_loss": 0.00900042150169611, | |
| "eval_runtime": 33.8286, | |
| "eval_samples_per_second": 149.193, | |
| "eval_steps_per_second": 4.671, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6339144215530903, | |
| "grad_norm": 0.06913918256759644, | |
| "learning_rate": 4.676425269645609e-05, | |
| "loss": 0.0081, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6339144215530903, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9996037483215332, | |
| "eval_loss": 0.0071762725710868835, | |
| "eval_runtime": 33.3998, | |
| "eval_samples_per_second": 151.109, | |
| "eval_steps_per_second": 4.731, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7131537242472267, | |
| "grad_norm": 0.7136378884315491, | |
| "learning_rate": 4.566365837552278e-05, | |
| "loss": 0.0096, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7131537242472267, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.999405562877655, | |
| "eval_loss": 0.008276536129415035, | |
| "eval_runtime": 33.755, | |
| "eval_samples_per_second": 149.519, | |
| "eval_steps_per_second": 4.681, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7923930269413629, | |
| "grad_norm": 0.5450271964073181, | |
| "learning_rate": 4.456306405458948e-05, | |
| "loss": 0.0088, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7923930269413629, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.999405562877655, | |
| "eval_loss": 0.006255414802581072, | |
| "eval_runtime": 33.084, | |
| "eval_samples_per_second": 152.551, | |
| "eval_steps_per_second": 4.776, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8716323296354992, | |
| "grad_norm": 0.026704631745815277, | |
| "learning_rate": 4.346246973365617e-05, | |
| "loss": 0.0087, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8716323296354992, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.999405562877655, | |
| "eval_loss": 0.006418165750801563, | |
| "eval_runtime": 32.6589, | |
| "eval_samples_per_second": 154.537, | |
| "eval_steps_per_second": 4.838, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.9508716323296355, | |
| "grad_norm": 2.8012053966522217, | |
| "learning_rate": 4.236187541272287e-05, | |
| "loss": 0.0096, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9508716323296355, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9996037483215332, | |
| "eval_loss": 0.006508174352347851, | |
| "eval_runtime": 33.0205, | |
| "eval_samples_per_second": 152.844, | |
| "eval_steps_per_second": 4.785, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.0301109350237718, | |
| "grad_norm": 0.059912703931331635, | |
| "learning_rate": 4.1261281091789564e-05, | |
| "loss": 0.0085, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.0301109350237718, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9998018741607666, | |
| "eval_loss": 0.005652877036482096, | |
| "eval_runtime": 32.9331, | |
| "eval_samples_per_second": 153.25, | |
| "eval_steps_per_second": 4.798, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.109350237717908, | |
| "grad_norm": 0.039631109684705734, | |
| "learning_rate": 4.0160686770856264e-05, | |
| "loss": 0.0049, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.109350237717908, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9998018741607666, | |
| "eval_loss": 0.004980658181011677, | |
| "eval_runtime": 34.2128, | |
| "eval_samples_per_second": 147.518, | |
| "eval_steps_per_second": 4.618, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.1885895404120443, | |
| "grad_norm": 0.02145099826157093, | |
| "learning_rate": 3.9060092449922957e-05, | |
| "loss": 0.0048, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.1885895404120443, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9996037483215332, | |
| "eval_loss": 0.005376764573156834, | |
| "eval_runtime": 32.9215, | |
| "eval_samples_per_second": 153.304, | |
| "eval_steps_per_second": 4.799, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.2678288431061806, | |
| "grad_norm": 1.3420361280441284, | |
| "learning_rate": 3.7959498128989656e-05, | |
| "loss": 0.0032, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.2678288431061806, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9998018741607666, | |
| "eval_loss": 0.005436555948108435, | |
| "eval_runtime": 66.7901, | |
| "eval_samples_per_second": 75.565, | |
| "eval_steps_per_second": 2.366, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.3470681458003169, | |
| "grad_norm": 0.040875934064388275, | |
| "learning_rate": 3.685890380805635e-05, | |
| "loss": 0.0017, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.3470681458003169, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.999405562877655, | |
| "eval_loss": 0.006017347332090139, | |
| "eval_runtime": 67.8654, | |
| "eval_samples_per_second": 74.368, | |
| "eval_steps_per_second": 2.328, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.4263074484944531, | |
| "grad_norm": 0.030839553102850914, | |
| "learning_rate": 3.575830948712305e-05, | |
| "loss": 0.0032, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.4263074484944531, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.999405562877655, | |
| "eval_loss": 0.0059111895971000195, | |
| "eval_runtime": 67.895, | |
| "eval_samples_per_second": 74.335, | |
| "eval_steps_per_second": 2.327, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.5055467511885894, | |
| "grad_norm": 0.06008416414260864, | |
| "learning_rate": 3.465771516618974e-05, | |
| "loss": 0.0072, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.5055467511885894, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9996037483215332, | |
| "eval_loss": 0.006080140359699726, | |
| "eval_runtime": 67.931, | |
| "eval_samples_per_second": 74.296, | |
| "eval_steps_per_second": 2.326, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.5847860538827259, | |
| "grad_norm": 0.02880307100713253, | |
| "learning_rate": 3.355712084525644e-05, | |
| "loss": 0.0077, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.5847860538827259, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.999405562877655, | |
| "eval_loss": 0.007432046812027693, | |
| "eval_runtime": 67.8369, | |
| "eval_samples_per_second": 74.399, | |
| "eval_steps_per_second": 2.329, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.6640253565768621, | |
| "grad_norm": 0.1070467010140419, | |
| "learning_rate": 3.245652652432314e-05, | |
| "loss": 0.0068, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.6640253565768621, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9952446818351746, | |
| "eval_loss": 0.08792955428361893, | |
| "eval_runtime": 67.8539, | |
| "eval_samples_per_second": 74.38, | |
| "eval_steps_per_second": 2.329, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.7432646592709984, | |
| "grad_norm": 0.05642708018422127, | |
| "learning_rate": 3.135593220338983e-05, | |
| "loss": 0.0056, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.7432646592709984, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9996037483215332, | |
| "eval_loss": 0.006125820800662041, | |
| "eval_runtime": 67.8381, | |
| "eval_samples_per_second": 74.398, | |
| "eval_steps_per_second": 2.329, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.8225039619651349, | |
| "grad_norm": 0.04333237186074257, | |
| "learning_rate": 3.0255337882456532e-05, | |
| "loss": 0.0087, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.8225039619651349, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 1.0, | |
| "eval_loss": 0.005210440140217543, | |
| "eval_runtime": 68.0258, | |
| "eval_samples_per_second": 74.192, | |
| "eval_steps_per_second": 2.323, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.9017432646592711, | |
| "grad_norm": 0.10529103130102158, | |
| "learning_rate": 2.9154743561523225e-05, | |
| "loss": 0.0112, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.9017432646592711, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9998018741607666, | |
| "eval_loss": 0.004964636173099279, | |
| "eval_runtime": 67.9671, | |
| "eval_samples_per_second": 74.257, | |
| "eval_steps_per_second": 2.325, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.9809825673534074, | |
| "grad_norm": 0.030080392956733704, | |
| "learning_rate": 2.805414924058992e-05, | |
| "loss": 0.0036, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.9809825673534074, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.999405562877655, | |
| "eval_loss": 0.003934075124561787, | |
| "eval_runtime": 67.9768, | |
| "eval_samples_per_second": 74.246, | |
| "eval_steps_per_second": 2.324, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.0602218700475436, | |
| "grad_norm": 0.05913154035806656, | |
| "learning_rate": 2.6953554919656613e-05, | |
| "loss": 0.0047, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.0602218700475436, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.999405562877655, | |
| "eval_loss": 0.004718616604804993, | |
| "eval_runtime": 68.1157, | |
| "eval_samples_per_second": 74.095, | |
| "eval_steps_per_second": 2.32, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.13946117274168, | |
| "grad_norm": 0.02771185152232647, | |
| "learning_rate": 2.5852960598723313e-05, | |
| "loss": 0.0054, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.13946117274168, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9998018741607666, | |
| "eval_loss": 0.007168024778366089, | |
| "eval_runtime": 67.8508, | |
| "eval_samples_per_second": 74.384, | |
| "eval_steps_per_second": 2.329, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.218700475435816, | |
| "grad_norm": 0.023437298834323883, | |
| "learning_rate": 2.4752366277790006e-05, | |
| "loss": 0.0052, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.218700475435816, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9998018741607666, | |
| "eval_loss": 0.004668584559112787, | |
| "eval_runtime": 67.8782, | |
| "eval_samples_per_second": 74.354, | |
| "eval_steps_per_second": 2.328, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.2979397781299524, | |
| "grad_norm": 0.11916761100292206, | |
| "learning_rate": 2.36517719568567e-05, | |
| "loss": 0.0044, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.2979397781299524, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9996037483215332, | |
| "eval_loss": 0.0058599598705768585, | |
| "eval_runtime": 67.9158, | |
| "eval_samples_per_second": 74.313, | |
| "eval_steps_per_second": 2.326, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.3771790808240887, | |
| "grad_norm": 0.10791371762752533, | |
| "learning_rate": 2.2551177635923398e-05, | |
| "loss": 0.0051, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.3771790808240887, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9996037483215332, | |
| "eval_loss": 0.00463093351572752, | |
| "eval_runtime": 68.0279, | |
| "eval_samples_per_second": 74.19, | |
| "eval_steps_per_second": 2.323, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.456418383518225, | |
| "grad_norm": 6.349626064300537, | |
| "learning_rate": 2.1450583314990097e-05, | |
| "loss": 0.0068, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.456418383518225, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9996037483215332, | |
| "eval_loss": 0.008199479430913925, | |
| "eval_runtime": 68.0218, | |
| "eval_samples_per_second": 74.197, | |
| "eval_steps_per_second": 2.323, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.535657686212361, | |
| "grad_norm": 0.07518544048070908, | |
| "learning_rate": 2.0349988994056793e-05, | |
| "loss": 0.0051, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.535657686212361, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9996037483215332, | |
| "eval_loss": 0.004618997685611248, | |
| "eval_runtime": 67.9844, | |
| "eval_samples_per_second": 74.238, | |
| "eval_steps_per_second": 2.324, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.6148969889064975, | |
| "grad_norm": 0.6720037460327148, | |
| "learning_rate": 1.924939467312349e-05, | |
| "loss": 0.0025, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.6148969889064975, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9998018741607666, | |
| "eval_loss": 0.005006096325814724, | |
| "eval_runtime": 67.9382, | |
| "eval_samples_per_second": 74.288, | |
| "eval_steps_per_second": 2.326, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.6941362916006337, | |
| "grad_norm": 0.0643150731921196, | |
| "learning_rate": 1.8148800352190185e-05, | |
| "loss": 0.004, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.6941362916006337, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9992074370384216, | |
| "eval_loss": 0.005211703013628721, | |
| "eval_runtime": 67.9947, | |
| "eval_samples_per_second": 74.226, | |
| "eval_steps_per_second": 2.324, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.7733755942947704, | |
| "grad_norm": 0.07725568860769272, | |
| "learning_rate": 1.704820603125688e-05, | |
| "loss": 0.0019, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.7733755942947704, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9996037483215332, | |
| "eval_loss": 0.004846959374845028, | |
| "eval_runtime": 68.0179, | |
| "eval_samples_per_second": 74.201, | |
| "eval_steps_per_second": 2.323, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.8526148969889062, | |
| "grad_norm": 0.024744508787989616, | |
| "learning_rate": 1.5947611710323578e-05, | |
| "loss": 0.0039, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.8526148969889062, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 1.0, | |
| "eval_loss": 0.0042335595935583115, | |
| "eval_runtime": 68.2641, | |
| "eval_samples_per_second": 73.933, | |
| "eval_steps_per_second": 2.315, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.931854199683043, | |
| "grad_norm": 0.015782877802848816, | |
| "learning_rate": 1.4847017389390272e-05, | |
| "loss": 0.0045, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.931854199683043, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9998018741607666, | |
| "eval_loss": 0.004929765127599239, | |
| "eval_runtime": 67.901, | |
| "eval_samples_per_second": 74.329, | |
| "eval_steps_per_second": 2.327, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 3.011093502377179, | |
| "grad_norm": 2.817779779434204, | |
| "learning_rate": 1.3746423068456968e-05, | |
| "loss": 0.002, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 3.011093502377179, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9996037483215332, | |
| "eval_loss": 0.004618987441062927, | |
| "eval_runtime": 67.8421, | |
| "eval_samples_per_second": 74.393, | |
| "eval_steps_per_second": 2.329, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 3.0903328050713155, | |
| "grad_norm": 0.011266672052443027, | |
| "learning_rate": 1.2645828747523664e-05, | |
| "loss": 0.0028, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 3.0903328050713155, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9996037483215332, | |
| "eval_loss": 0.004977984819561243, | |
| "eval_runtime": 67.9701, | |
| "eval_samples_per_second": 74.253, | |
| "eval_steps_per_second": 2.325, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 3.1695721077654517, | |
| "grad_norm": 0.030996697023510933, | |
| "learning_rate": 1.154523442659036e-05, | |
| "loss": 0.0033, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.1695721077654517, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9992074370384216, | |
| "eval_loss": 0.004901626612991095, | |
| "eval_runtime": 68.0995, | |
| "eval_samples_per_second": 74.112, | |
| "eval_steps_per_second": 2.32, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.248811410459588, | |
| "grad_norm": 0.011963835917413235, | |
| "learning_rate": 1.0444640105657054e-05, | |
| "loss": 0.0052, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 3.248811410459588, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9996037483215332, | |
| "eval_loss": 0.0047536250203847885, | |
| "eval_runtime": 67.9001, | |
| "eval_samples_per_second": 74.33, | |
| "eval_steps_per_second": 2.327, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 3.3280507131537242, | |
| "grad_norm": 0.00867912545800209, | |
| "learning_rate": 9.34404578472375e-06, | |
| "loss": 0.0026, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 3.3280507131537242, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.999405562877655, | |
| "eval_loss": 0.004853234626352787, | |
| "eval_runtime": 67.9652, | |
| "eval_samples_per_second": 74.259, | |
| "eval_steps_per_second": 2.325, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 3.4072900158478605, | |
| "grad_norm": 0.012174161151051521, | |
| "learning_rate": 8.243451463790447e-06, | |
| "loss": 0.0043, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 3.4072900158478605, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 1.0, | |
| "eval_loss": 0.004362211097031832, | |
| "eval_runtime": 68.0327, | |
| "eval_samples_per_second": 74.185, | |
| "eval_steps_per_second": 2.322, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 3.4865293185419968, | |
| "grad_norm": 1.0564968585968018, | |
| "learning_rate": 7.142857142857143e-06, | |
| "loss": 0.0038, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 3.4865293185419968, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9998018741607666, | |
| "eval_loss": 0.004115838557481766, | |
| "eval_runtime": 67.8268, | |
| "eval_samples_per_second": 74.41, | |
| "eval_steps_per_second": 2.329, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 3.565768621236133, | |
| "grad_norm": 0.018350793048739433, | |
| "learning_rate": 6.04226282192384e-06, | |
| "loss": 0.003, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.565768621236133, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 1.0, | |
| "eval_loss": 0.0043433657847344875, | |
| "eval_runtime": 68.0255, | |
| "eval_samples_per_second": 74.193, | |
| "eval_steps_per_second": 2.323, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.6450079239302693, | |
| "grad_norm": 0.0198595579713583, | |
| "learning_rate": 4.941668500990535e-06, | |
| "loss": 0.003, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 3.6450079239302693, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 1.0, | |
| "eval_loss": 0.004538228269666433, | |
| "eval_runtime": 67.8043, | |
| "eval_samples_per_second": 74.435, | |
| "eval_steps_per_second": 2.33, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 3.7242472266244055, | |
| "grad_norm": 0.014352944679558277, | |
| "learning_rate": 3.841074180057231e-06, | |
| "loss": 0.003, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 3.7242472266244055, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9998018741607666, | |
| "eval_loss": 0.004474899731576443, | |
| "eval_runtime": 67.8863, | |
| "eval_samples_per_second": 74.345, | |
| "eval_steps_per_second": 2.327, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 3.8034865293185423, | |
| "grad_norm": 0.05333567038178444, | |
| "learning_rate": 2.740479859123927e-06, | |
| "loss": 0.0009, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 3.8034865293185423, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9998018741607666, | |
| "eval_loss": 0.0041495212353765965, | |
| "eval_runtime": 68.1146, | |
| "eval_samples_per_second": 74.096, | |
| "eval_steps_per_second": 2.32, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 3.882725832012678, | |
| "grad_norm": 0.017361685633659363, | |
| "learning_rate": 1.639885538190623e-06, | |
| "loss": 0.0048, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 3.882725832012678, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9998018741607666, | |
| "eval_loss": 0.004217915236949921, | |
| "eval_runtime": 67.5299, | |
| "eval_samples_per_second": 74.737, | |
| "eval_steps_per_second": 2.34, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 3.9619651347068148, | |
| "grad_norm": 0.021525979042053223, | |
| "learning_rate": 5.392912172573191e-07, | |
| "loss": 0.0035, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.9619651347068148, | |
| "eval_Embedding_Dataset_Dev_cosine_accuracy": 0.9996037483215332, | |
| "eval_loss": 0.004212545696645975, | |
| "eval_runtime": 67.678, | |
| "eval_samples_per_second": 74.574, | |
| "eval_steps_per_second": 2.335, | |
| "step": 5000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 5048, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |