Instructions to use genies-models/openllama-3b-math_textbook with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use genies-models/openllama-3b-math_textbook with PEFT:
from peft import PeftModel from transformers import AutoModelForSequenceClassification base_model = AutoModelForSequenceClassification.from_pretrained("models/openllama-3b") model = PeftModel.from_pretrained(base_model, "genies-models/openllama-3b-math_textbook") - Notebooks
- Google Colab
- Kaggle
| [ | |
| { | |
| "loss": 0.7006, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.02, | |
| "step": 1 | |
| }, | |
| { | |
| "loss": 0.6947, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.04, | |
| "step": 2 | |
| }, | |
| { | |
| "loss": 0.6878, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.05, | |
| "step": 3 | |
| }, | |
| { | |
| "loss": 0.6825, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.07, | |
| "step": 4 | |
| }, | |
| { | |
| "loss": 0.6987, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.09, | |
| "step": 5 | |
| }, | |
| { | |
| "loss": 0.6822, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.11, | |
| "step": 6 | |
| }, | |
| { | |
| "loss": 0.6907, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.12, | |
| "step": 7 | |
| }, | |
| { | |
| "loss": 0.6787, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.14, | |
| "step": 8 | |
| }, | |
| { | |
| "loss": 0.669, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.16, | |
| "step": 9 | |
| }, | |
| { | |
| "loss": 0.6796, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.18, | |
| "step": 10 | |
| }, | |
| { | |
| "loss": 0.6759, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.19, | |
| "step": 11 | |
| }, | |
| { | |
| "loss": 0.6747, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.21, | |
| "step": 12 | |
| }, | |
| { | |
| "loss": 0.6943, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.23, | |
| "step": 13 | |
| }, | |
| { | |
| "loss": 0.6733, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.25, | |
| "step": 14 | |
| }, | |
| { | |
| "loss": 0.6583, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.26, | |
| "step": 15 | |
| }, | |
| { | |
| "loss": 0.675, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.28, | |
| "step": 16 | |
| }, | |
| { | |
| "loss": 0.6641, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.3, | |
| "step": 17 | |
| }, | |
| { | |
| "loss": 0.6838, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.32, | |
| "step": 18 | |
| }, | |
| { | |
| "loss": 0.6989, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.33, | |
| "step": 19 | |
| }, | |
| { | |
| "loss": 0.6786, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.35, | |
| "step": 20 | |
| }, | |
| { | |
| "loss": 0.6568, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.37, | |
| "step": 21 | |
| }, | |
| { | |
| "loss": 0.6521, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.39, | |
| "step": 22 | |
| }, | |
| { | |
| "loss": 0.6857, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.4, | |
| "step": 23 | |
| }, | |
| { | |
| "loss": 0.6711, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.42, | |
| "step": 24 | |
| }, | |
| { | |
| "loss": 0.6697, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.44, | |
| "step": 25 | |
| }, | |
| { | |
| "eval_math_textbook_loss": 0.6798593401908875, | |
| "eval_math_textbook_score": -0.25211843848228455, | |
| "eval_math_textbook_brier_score": 0.25211843848228455, | |
| "eval_math_textbook_average_probability": 0.503490686416626, | |
| "eval_math_textbook_accuracy": 0.53, | |
| "eval_math_textbook_probabilities": [ | |
| 0.5158194303512573, | |
| 0.49721020460128784, | |
| 0.9063051342964172, | |
| 0.5153171420097351, | |
| 0.5130631923675537, | |
| 0.519275426864624, | |
| 0.493958979845047, | |
| 0.4960753917694092, | |
| 0.5113411545753479, | |
| 0.5189540982246399, | |
| 0.5387983918190002, | |
| 0.9734545350074768, | |
| 0.5005897879600525, | |
| 0.49967193603515625, | |
| 0.5027124881744385, | |
| 0.4757806360721588, | |
| 0.427785724401474, | |
| 0.4375956356525421, | |
| 0.4909268617630005, | |
| 0.48740366101264954, | |
| 0.49429094791412354, | |
| 0.4976944327354431, | |
| 0.49607059359550476, | |
| 0.4892137050628662, | |
| 0.5081691741943359, | |
| 0.503533661365509, | |
| 0.4984170198440552, | |
| 0.5000449419021606, | |
| 0.5007362365722656, | |
| 0.4983435869216919, | |
| 0.5112137198448181, | |
| 0.4848161041736603, | |
| 0.4866231381893158, | |
| 0.4989570379257202, | |
| 0.49595242738723755, | |
| 0.5011706948280334, | |
| 0.5360090732574463, | |
| 0.5374536514282227, | |
| 0.5365106463432312, | |
| 0.4761095941066742, | |
| 0.5143330693244934, | |
| 0.5405329465866089, | |
| 0.49808478355407715, | |
| 0.5106655955314636, | |
| 0.5113275647163391, | |
| 0.5002411603927612, | |
| 0.5014262199401855, | |
| 0.5024149417877197, | |
| 0.49171775579452515, | |
| 0.5023812055587769, | |
| 0.49619442224502563, | |
| 0.5013337135314941, | |
| 0.48335951566696167, | |
| 0.4942980408668518, | |
| 0.5092995166778564, | |
| 0.4998982548713684, | |
| 0.5000958442687988, | |
| 0.4993903636932373, | |
| 0.4998873770236969, | |
| 0.4995279610157013, | |
| 0.5030616521835327, | |
| 0.5184087753295898, | |
| 0.5327391624450684, | |
| 0.4986587464809418, | |
| 0.5032759308815002, | |
| 0.4996039867401123, | |
| 0.497980535030365, | |
| 0.4976195991039276, | |
| 0.49684402346611023, | |
| 0.5003330111503601, | |
| 0.4988357424736023, | |
| 0.5002389550209045, | |
| 0.5010008215904236, | |
| 0.5012192726135254, | |
| 0.49900633096694946, | |
| 0.504207968711853, | |
| 0.5043627023696899, | |
| 0.4923577606678009, | |
| 0.4893873333930969, | |
| 0.5001519918441772, | |
| 0.4918787479400635, | |
| 0.49767187237739563, | |
| 0.49995043873786926, | |
| 0.5005597472190857, | |
| 0.4910438358783722, | |
| 0.5141940712928772, | |
| 0.5066387057304382, | |
| 0.49924227595329285, | |
| 0.5007526278495789, | |
| 0.5010164976119995, | |
| 0.515100359916687, | |
| 0.4902750849723816, | |
| 0.5263916850090027, | |
| 0.5023711919784546, | |
| 0.5037990212440491, | |
| 0.5036882758140564, | |
| 0.2796187698841095, | |
| 0.2769010663032532, | |
| 0.2782217860221863, | |
| 0.5006805062294006 | |
| ], | |
| "eval_math_textbook_runtime": 19.0702, | |
| "eval_math_textbook_samples_per_second": 5.244, | |
| "eval_math_textbook_steps_per_second": 0.105, | |
| "epoch": 0.44, | |
| "step": 25 | |
| }, | |
| { | |
| "loss": 0.703, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.46, | |
| "step": 26 | |
| }, | |
| { | |
| "loss": 0.6178, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.47, | |
| "step": 27 | |
| }, | |
| { | |
| "loss": 0.6481, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.49, | |
| "step": 28 | |
| }, | |
| { | |
| "loss": 0.6675, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.51, | |
| "step": 29 | |
| }, | |
| { | |
| "loss": 0.6476, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.53, | |
| "step": 30 | |
| }, | |
| { | |
| "loss": 0.6855, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.54, | |
| "step": 31 | |
| }, | |
| { | |
| "loss": 0.6762, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.56, | |
| "step": 32 | |
| }, | |
| { | |
| "loss": 0.6666, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.58, | |
| "step": 33 | |
| }, | |
| { | |
| "loss": 0.6522, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.6, | |
| "step": 34 | |
| }, | |
| { | |
| "loss": 0.6955, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.61, | |
| "step": 35 | |
| }, | |
| { | |
| "loss": 0.6441, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.63, | |
| "step": 36 | |
| }, | |
| { | |
| "loss": 0.7226, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.65, | |
| "step": 37 | |
| }, | |
| { | |
| "loss": 0.6256, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.67, | |
| "step": 38 | |
| }, | |
| { | |
| "loss": 0.6679, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.68, | |
| "step": 39 | |
| }, | |
| { | |
| "loss": 0.6795, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.7, | |
| "step": 40 | |
| }, | |
| { | |
| "loss": 0.6767, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.72, | |
| "step": 41 | |
| }, | |
| { | |
| "loss": 0.6299, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.74, | |
| "step": 42 | |
| }, | |
| { | |
| "loss": 0.6904, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.75, | |
| "step": 43 | |
| }, | |
| { | |
| "loss": 0.6604, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.77, | |
| "step": 44 | |
| }, | |
| { | |
| "loss": 0.6656, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.79, | |
| "step": 45 | |
| }, | |
| { | |
| "loss": 0.5932, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.81, | |
| "step": 46 | |
| }, | |
| { | |
| "loss": 0.6023, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.82, | |
| "step": 47 | |
| }, | |
| { | |
| "loss": 0.6213, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.84, | |
| "step": 48 | |
| }, | |
| { | |
| "loss": 0.666, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.86, | |
| "step": 49 | |
| }, | |
| { | |
| "loss": 0.5679, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.88, | |
| "step": 50 | |
| }, | |
| { | |
| "eval_math_textbook_loss": 0.6765614151954651, | |
| "eval_math_textbook_score": -0.25464117527008057, | |
| "eval_math_textbook_brier_score": 0.25464117527008057, | |
| "eval_math_textbook_average_probability": 0.5048416256904602, | |
| "eval_math_textbook_accuracy": 0.56, | |
| "eval_math_textbook_probabilities": [ | |
| 0.5234187841415405, | |
| 0.49480947852134705, | |
| 0.9842031002044678, | |
| 0.5374545454978943, | |
| 0.5133032202720642, | |
| 0.5459731221199036, | |
| 0.48925507068634033, | |
| 0.494358092546463, | |
| 0.5328835248947144, | |
| 0.5834941267967224, | |
| 0.6279994249343872, | |
| 0.9957192540168762, | |
| 0.5011470317840576, | |
| 0.500601053237915, | |
| 0.506278932094574, | |
| 0.4537994861602783, | |
| 0.3125247061252594, | |
| 0.3253447115421295, | |
| 0.4988914430141449, | |
| 0.48819318413734436, | |
| 0.5061185359954834, | |
| 0.49299943447113037, | |
| 0.489584356546402, | |
| 0.49003836512565613, | |
| 0.5141531229019165, | |
| 0.515479326248169, | |
| 0.5011539459228516, | |
| 0.5012525916099548, | |
| 0.5015091896057129, | |
| 0.4995449185371399, | |
| 0.524846613407135, | |
| 0.449972540140152, | |
| 0.46025410294532776, | |
| 0.5029928088188171, | |
| 0.4909910261631012, | |
| 0.5055700540542603, | |
| 0.5659182071685791, | |
| 0.5701009631156921, | |
| 0.5675790905952454, | |
| 0.45312631130218506, | |
| 0.5314764380455017, | |
| 0.575640082359314, | |
| 0.49749186635017395, | |
| 0.5514194965362549, | |
| 0.5537279844284058, | |
| 0.5053136944770813, | |
| 0.5059685707092285, | |
| 0.5016698837280273, | |
| 0.48786991834640503, | |
| 0.5116444826126099, | |
| 0.48717230558395386, | |
| 0.49897342920303345, | |
| 0.45621031522750854, | |
| 0.46634674072265625, | |
| 0.522278904914856, | |
| 0.4994622468948364, | |
| 0.4996844530105591, | |
| 0.4987800121307373, | |
| 0.5004392862319946, | |
| 0.5018196702003479, | |
| 0.5205135345458984, | |
| 0.5497657060623169, | |
| 0.5863233804702759, | |
| 0.49731746315956116, | |
| 0.5212339162826538, | |
| 0.49849265813827515, | |
| 0.4926159679889679, | |
| 0.4818965792655945, | |
| 0.4915739595890045, | |
| 0.5013987421989441, | |
| 0.49831223487854004, | |
| 0.5014359951019287, | |
| 0.5025668740272522, | |
| 0.500033974647522, | |
| 0.4971452057361603, | |
| 0.5203076601028442, | |
| 0.5108263492584229, | |
| 0.4904172122478485, | |
| 0.4854221045970917, | |
| 0.5058303475379944, | |
| 0.4912535846233368, | |
| 0.493615984916687, | |
| 0.49969109892845154, | |
| 0.4993969202041626, | |
| 0.48576128482818604, | |
| 0.5414730906486511, | |
| 0.5250031352043152, | |
| 0.4980846643447876, | |
| 0.5048013925552368, | |
| 0.5006403923034668, | |
| 0.5311446189880371, | |
| 0.4737245738506317, | |
| 0.5515600442886353, | |
| 0.5109672546386719, | |
| 0.5131567716598511, | |
| 0.5119375586509705, | |
| 0.16946758329868317, | |
| 0.18919195234775543, | |
| 0.17149381339550018, | |
| 0.5021427273750305 | |
| ], | |
| "eval_math_textbook_runtime": 19.0382, | |
| "eval_math_textbook_samples_per_second": 5.253, | |
| "eval_math_textbook_steps_per_second": 0.105, | |
| "epoch": 0.88, | |
| "step": 50 | |
| }, | |
| { | |
| "loss": 0.6445, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.89, | |
| "step": 51 | |
| }, | |
| { | |
| "loss": 0.6888, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.91, | |
| "step": 52 | |
| }, | |
| { | |
| "loss": 0.6501, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.93, | |
| "step": 53 | |
| }, | |
| { | |
| "loss": 0.6092, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.95, | |
| "step": 54 | |
| }, | |
| { | |
| "loss": 0.6645, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.96, | |
| "step": 55 | |
| }, | |
| { | |
| "loss": 0.6154, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.98, | |
| "step": 56 | |
| }, | |
| { | |
| "loss": 0.6258, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.0, | |
| "step": 57 | |
| }, | |
| { | |
| "loss": 0.6063, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.02, | |
| "step": 58 | |
| }, | |
| { | |
| "loss": 0.5914, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.04, | |
| "step": 59 | |
| }, | |
| { | |
| "loss": 0.6427, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.05, | |
| "step": 60 | |
| }, | |
| { | |
| "loss": 0.6403, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.07, | |
| "step": 61 | |
| }, | |
| { | |
| "loss": 0.6142, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.09, | |
| "step": 62 | |
| }, | |
| { | |
| "loss": 0.6127, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.11, | |
| "step": 63 | |
| }, | |
| { | |
| "loss": 0.6158, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.12, | |
| "step": 64 | |
| }, | |
| { | |
| "loss": 0.6626, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.14, | |
| "step": 65 | |
| }, | |
| { | |
| "loss": 0.6157, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.16, | |
| "step": 66 | |
| }, | |
| { | |
| "loss": 0.5597, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.18, | |
| "step": 67 | |
| }, | |
| { | |
| "loss": 0.605, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.19, | |
| "step": 68 | |
| }, | |
| { | |
| "loss": 0.6055, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.21, | |
| "step": 69 | |
| }, | |
| { | |
| "loss": 0.5729, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.23, | |
| "step": 70 | |
| }, | |
| { | |
| "loss": 0.6415, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.25, | |
| "step": 71 | |
| }, | |
| { | |
| "loss": 0.5855, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.26, | |
| "step": 72 | |
| }, | |
| { | |
| "loss": 0.6223, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.28, | |
| "step": 73 | |
| }, | |
| { | |
| "loss": 0.642, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.3, | |
| "step": 74 | |
| }, | |
| { | |
| "loss": 0.6395, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.32, | |
| "step": 75 | |
| }, | |
| { | |
| "eval_math_textbook_loss": 0.6669816374778748, | |
| "eval_math_textbook_score": -0.25246959924697876, | |
| "eval_math_textbook_brier_score": 0.25246959924697876, | |
| "eval_math_textbook_average_probability": 0.520313024520874, | |
| "eval_math_textbook_accuracy": 0.64, | |
| "eval_math_textbook_probabilities": [ | |
| 0.5060237050056458, | |
| 0.49064791202545166, | |
| 0.9997841715812683, | |
| 0.5625044107437134, | |
| 0.5323017835617065, | |
| 0.6205570101737976, | |
| 0.49191153049468994, | |
| 0.5035534501075745, | |
| 0.6117144823074341, | |
| 0.5694499611854553, | |
| 0.62008136510849, | |
| 0.9997065663337708, | |
| 0.5023816823959351, | |
| 0.5059705376625061, | |
| 0.5042400360107422, | |
| 0.3661307096481323, | |
| 0.19351331889629364, | |
| 0.23905089497566223, | |
| 0.5743193030357361, | |
| 0.5918472409248352, | |
| 0.60948646068573, | |
| 0.4980088472366333, | |
| 0.47986215353012085, | |
| 0.6472142934799194, | |
| 0.5021066069602966, | |
| 0.577959418296814, | |
| 0.4980534315109253, | |
| 0.4979358911514282, | |
| 0.5210027098655701, | |
| 0.5152252912521362, | |
| 0.5233669281005859, | |
| 0.5047284364700317, | |
| 0.5074074864387512, | |
| 0.5100474953651428, | |
| 0.4624626040458679, | |
| 0.5011353492736816, | |
| 0.5204907059669495, | |
| 0.5256100296974182, | |
| 0.523885190486908, | |
| 0.3173781633377075, | |
| 0.7477626204490662, | |
| 0.8833009004592896, | |
| 0.4930415749549866, | |
| 0.6655856966972351, | |
| 0.6711253523826599, | |
| 0.5148721933364868, | |
| 0.514183521270752, | |
| 0.5223090648651123, | |
| 0.47892889380455017, | |
| 0.5266019105911255, | |
| 0.4735231399536133, | |
| 0.48682326078414917, | |
| 0.4247528314590454, | |
| 0.4452798366546631, | |
| 0.5557901859283447, | |
| 0.500255823135376, | |
| 0.5031067728996277, | |
| 0.4988952875137329, | |
| 0.49860796332359314, | |
| 0.502892792224884, | |
| 0.7142451405525208, | |
| 0.7836840152740479, | |
| 0.8584773540496826, | |
| 0.500076413154602, | |
| 0.5083998441696167, | |
| 0.5015168190002441, | |
| 0.49127575755119324, | |
| 0.4740145802497864, | |
| 0.4983091354370117, | |
| 0.5068450570106506, | |
| 0.49789443612098694, | |
| 0.5055553913116455, | |
| 0.4963601231575012, | |
| 0.49431753158569336, | |
| 0.48503610491752625, | |
| 0.503131091594696, | |
| 0.5051704049110413, | |
| 0.49630600214004517, | |
| 0.5080066323280334, | |
| 0.5008676052093506, | |
| 0.5115634799003601, | |
| 0.49521031975746155, | |
| 0.5034043192863464, | |
| 0.49739542603492737, | |
| 0.42049840092658997, | |
| 0.6085013747215271, | |
| 0.5950183272361755, | |
| 0.4983161389827728, | |
| 0.5233954787254333, | |
| 0.49963676929473877, | |
| 0.6716890931129456, | |
| 0.3312073051929474, | |
| 0.7989556789398193, | |
| 0.532721757888794, | |
| 0.532436728477478, | |
| 0.5268887281417847, | |
| 0.002659181132912636, | |
| 0.0034369390923529863, | |
| 0.0025918150786310434, | |
| 0.5095915794372559 | |
| ], | |
| "eval_math_textbook_runtime": 19.0329, | |
| "eval_math_textbook_samples_per_second": 5.254, | |
| "eval_math_textbook_steps_per_second": 0.105, | |
| "epoch": 1.32, | |
| "step": 75 | |
| }, | |
| { | |
| "loss": 0.5907, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.33, | |
| "step": 76 | |
| }, | |
| { | |
| "loss": 0.6291, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.35, | |
| "step": 77 | |
| }, | |
| { | |
| "loss": 0.6285, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.37, | |
| "step": 78 | |
| }, | |
| { | |
| "loss": 0.7668, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.39, | |
| "step": 79 | |
| }, | |
| { | |
| "loss": 0.5893, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.4, | |
| "step": 80 | |
| }, | |
| { | |
| "loss": 0.6201, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.42, | |
| "step": 81 | |
| }, | |
| { | |
| "loss": 0.5551, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.44, | |
| "step": 82 | |
| }, | |
| { | |
| "loss": 0.5957, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.46, | |
| "step": 83 | |
| }, | |
| { | |
| "loss": 0.4946, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.47, | |
| "step": 84 | |
| }, | |
| { | |
| "loss": 0.6317, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.49, | |
| "step": 85 | |
| }, | |
| { | |
| "loss": 0.6051, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.51, | |
| "step": 86 | |
| }, | |
| { | |
| "loss": 0.5159, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.53, | |
| "step": 87 | |
| }, | |
| { | |
| "loss": 0.5903, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.54, | |
| "step": 88 | |
| }, | |
| { | |
| "loss": 0.6392, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.56, | |
| "step": 89 | |
| }, | |
| { | |
| "loss": 0.5701, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.58, | |
| "step": 90 | |
| }, | |
| { | |
| "loss": 0.5516, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.6, | |
| "step": 91 | |
| }, | |
| { | |
| "loss": 0.5567, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.61, | |
| "step": 92 | |
| }, | |
| { | |
| "loss": 0.6248, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.63, | |
| "step": 93 | |
| }, | |
| { | |
| "loss": 0.6205, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.65, | |
| "step": 94 | |
| }, | |
| { | |
| "loss": 0.5114, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.67, | |
| "step": 95 | |
| }, | |
| { | |
| "loss": 0.5833, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.68, | |
| "step": 96 | |
| }, | |
| { | |
| "loss": 0.5364, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.7, | |
| "step": 97 | |
| }, | |
| { | |
| "loss": 0.4792, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.72, | |
| "step": 98 | |
| }, | |
| { | |
| "loss": 0.4465, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.74, | |
| "step": 99 | |
| }, | |
| { | |
| "loss": 0.5236, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.75, | |
| "step": 100 | |
| }, | |
| { | |
| "eval_math_textbook_loss": 0.6085913181304932, | |
| "eval_math_textbook_score": -0.2288055270910263, | |
| "eval_math_textbook_brier_score": 0.2288055270910263, | |
| "eval_math_textbook_average_probability": 0.5589618682861328, | |
| "eval_math_textbook_accuracy": 0.62, | |
| "eval_math_textbook_probabilities": [ | |
| 0.6090167760848999, | |
| 0.39267632365226746, | |
| 0.9999756813049316, | |
| 0.6500729918479919, | |
| 0.5652173161506653, | |
| 0.7466467618942261, | |
| 0.490016907453537, | |
| 0.4640730321407318, | |
| 0.7244709730148315, | |
| 0.9277529120445251, | |
| 0.9807989597320557, | |
| 0.9996448755264282, | |
| 0.5205850005149841, | |
| 0.5194319486618042, | |
| 0.5667957663536072, | |
| 0.5747010707855225, | |
| 0.5128976106643677, | |
| 0.5322805047035217, | |
| 0.6718254685401917, | |
| 0.6567670106887817, | |
| 0.7737979888916016, | |
| 0.48333612084388733, | |
| 0.4443439543247223, | |
| 0.6398590207099915, | |
| 0.6147111058235168, | |
| 0.8212567567825317, | |
| 0.5021203756332397, | |
| 0.5463013052940369, | |
| 0.5228294134140015, | |
| 0.5497167110443115, | |
| 0.633198618888855, | |
| 0.3830769956111908, | |
| 0.3979168236255646, | |
| 0.4695032238960266, | |
| 0.40719327330589294, | |
| 0.4301484227180481, | |
| 0.7173041701316833, | |
| 0.7347032427787781, | |
| 0.7265180945396423, | |
| 0.40913403034210205, | |
| 0.7016667127609253, | |
| 0.7560572624206543, | |
| 0.4757772982120514, | |
| 0.966978907585144, | |
| 0.9720750451087952, | |
| 0.5886526107788086, | |
| 0.5479859113693237, | |
| 0.5956962704658508, | |
| 0.4101646840572357, | |
| 0.5605520009994507, | |
| 0.372322678565979, | |
| 0.4412081837654114, | |
| 0.26623469591140747, | |
| 0.30087795853614807, | |
| 0.6590585708618164, | |
| 0.44494733214378357, | |
| 0.46975815296173096, | |
| 0.5008803606033325, | |
| 0.5089434385299683, | |
| 0.5352832078933716, | |
| 0.6927960515022278, | |
| 0.7665735483169556, | |
| 0.8244929313659668, | |
| 0.4457288682460785, | |
| 0.5919255018234253, | |
| 0.46556612849235535, | |
| 0.36381617188453674, | |
| 0.31074991822242737, | |
| 0.4532500207424164, | |
| 0.5130099654197693, | |
| 0.5030292272567749, | |
| 0.5009328722953796, | |
| 0.4911012649536133, | |
| 0.46780434250831604, | |
| 0.44806256890296936, | |
| 0.6889190673828125, | |
| 0.47469109296798706, | |
| 0.4170834422111511, | |
| 0.6015594601631165, | |
| 0.5276020765304565, | |
| 0.638933539390564, | |
| 0.48303303122520447, | |
| 0.47112226486206055, | |
| 0.48979878425598145, | |
| 0.5966281294822693, | |
| 0.8498927354812622, | |
| 0.8357806205749512, | |
| 0.4697180688381195, | |
| 0.5513212084770203, | |
| 0.45615488290786743, | |
| 0.5793567895889282, | |
| 0.4038052558898926, | |
| 0.603541374206543, | |
| 0.6687532663345337, | |
| 0.6773205399513245, | |
| 0.6696297526359558, | |
| 0.0004917322075925767, | |
| 0.002351292409002781, | |
| 0.0006308771553449333, | |
| 0.5114908218383789 | |
| ], | |
| "eval_math_textbook_runtime": 19.0366, | |
| "eval_math_textbook_samples_per_second": 5.253, | |
| "eval_math_textbook_steps_per_second": 0.105, | |
| "epoch": 1.75, | |
| "step": 100 | |
| }, | |
| { | |
| "train_runtime": 1703.3257, | |
| "train_samples_per_second": 1.879, | |
| "train_steps_per_second": 0.059, | |
| "total_flos": 0.0, | |
| "train_loss": 0.6338043370842934, | |
| "epoch": 1.75, | |
| "step": 100 | |
| } | |
| ] |