Instructions to use genies-models/openllama-3b-math with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use genies-models/openllama-3b-math with PEFT:
from peft import PeftModel from transformers import AutoModelForSequenceClassification base_model = AutoModelForSequenceClassification.from_pretrained("models/openllama-3b") model = PeftModel.from_pretrained(base_model, "genies-models/openllama-3b-math") - Notebooks
- Google Colab
- Kaggle
| [ | |
| { | |
| "loss": 0.6862, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.02, | |
| "step": 1 | |
| }, | |
| { | |
| "loss": 0.692, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.04, | |
| "step": 2 | |
| }, | |
| { | |
| "loss": 0.6954, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.05, | |
| "step": 3 | |
| }, | |
| { | |
| "loss": 0.7031, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.07, | |
| "step": 4 | |
| }, | |
| { | |
| "loss": 0.6928, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.09, | |
| "step": 5 | |
| }, | |
| { | |
| "loss": 0.699, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.11, | |
| "step": 6 | |
| }, | |
| { | |
| "loss": 0.6902, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.12, | |
| "step": 7 | |
| }, | |
| { | |
| "loss": 0.681, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.14, | |
| "step": 8 | |
| }, | |
| { | |
| "loss": 0.6931, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.16, | |
| "step": 9 | |
| }, | |
| { | |
| "loss": 0.6892, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.18, | |
| "step": 10 | |
| }, | |
| { | |
| "loss": 0.6898, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.19, | |
| "step": 11 | |
| }, | |
| { | |
| "loss": 0.6982, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.21, | |
| "step": 12 | |
| }, | |
| { | |
| "loss": 0.6922, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.23, | |
| "step": 13 | |
| }, | |
| { | |
| "loss": 0.6865, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.25, | |
| "step": 14 | |
| }, | |
| { | |
| "loss": 0.6901, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.26, | |
| "step": 15 | |
| }, | |
| { | |
| "loss": 0.6953, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.28, | |
| "step": 16 | |
| }, | |
| { | |
| "loss": 0.6845, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.3, | |
| "step": 17 | |
| }, | |
| { | |
| "loss": 0.6914, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.32, | |
| "step": 18 | |
| }, | |
| { | |
| "loss": 0.6885, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.33, | |
| "step": 19 | |
| }, | |
| { | |
| "loss": 0.6903, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.35, | |
| "step": 20 | |
| }, | |
| { | |
| "loss": 0.6895, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.37, | |
| "step": 21 | |
| }, | |
| { | |
| "loss": 0.6775, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.39, | |
| "step": 22 | |
| }, | |
| { | |
| "loss": 0.6843, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.4, | |
| "step": 23 | |
| }, | |
| { | |
| "loss": 0.6778, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.42, | |
| "step": 24 | |
| }, | |
| { | |
| "loss": 0.6954, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.44, | |
| "step": 25 | |
| }, | |
| { | |
| "eval_math_loss": 0.6901352405548096, | |
| "eval_math_score": -0.25167757272720337, | |
| "eval_math_brier_score": 0.25167757272720337, | |
| "eval_math_average_probability": 0.4987456500530243, | |
| "eval_math_accuracy": 0.62, | |
| "eval_math_probabilities": [ | |
| 0.5027638673782349, | |
| 0.5069999694824219, | |
| 0.4998233914375305, | |
| 0.49942639470100403, | |
| 0.5001113414764404, | |
| 0.4992215633392334, | |
| 0.5041193962097168, | |
| 0.49987202882766724, | |
| 0.5148451328277588, | |
| 0.525566041469574, | |
| 0.5015634894371033, | |
| 0.5299053192138672, | |
| 0.500709056854248, | |
| 0.49956902861595154, | |
| 0.5017619729042053, | |
| 0.5067934989929199, | |
| 0.4978564977645874, | |
| 0.4987451434135437, | |
| 0.4950011372566223, | |
| 0.5006818175315857, | |
| 0.4997928738594055, | |
| 0.501230776309967, | |
| 0.5018536448478699, | |
| 0.5026345252990723, | |
| 0.4967585802078247, | |
| 0.4941774606704712, | |
| 0.4975478947162628, | |
| 0.5060742497444153, | |
| 0.47517138719558716, | |
| 0.5136992931365967, | |
| 0.5003398060798645, | |
| 0.500635027885437, | |
| 0.5026965737342834, | |
| 0.49716416001319885, | |
| 0.5036399960517883, | |
| 0.5027968287467957, | |
| 0.4978507161140442, | |
| 0.49902665615081787, | |
| 0.49480193853378296, | |
| 0.5012259483337402, | |
| 0.4997442662715912, | |
| 0.49923667311668396, | |
| 0.4908088147640228, | |
| 0.5081710815429688, | |
| 0.4765812158584595, | |
| 0.5016435384750366, | |
| 0.5013003945350647, | |
| 0.4970977306365967, | |
| 0.5095471143722534, | |
| 0.4940992593765259, | |
| 0.5008733868598938, | |
| 0.5002974271774292, | |
| 0.5011077523231506, | |
| 0.49904587864875793, | |
| 0.5007302761077881, | |
| 0.5004144310951233, | |
| 0.5012476444244385, | |
| 0.5047202706336975, | |
| 0.5079423785209656, | |
| 0.5031242370605469, | |
| 0.5022978186607361, | |
| 0.4994467496871948, | |
| 0.5034329891204834, | |
| 0.5073589086532593, | |
| 0.5306907892227173, | |
| 0.5277766585350037, | |
| 0.500605583190918, | |
| 0.49940890073776245, | |
| 0.5001015067100525, | |
| 0.5020729899406433, | |
| 0.49872809648513794, | |
| 0.5019404888153076, | |
| 0.5015720129013062, | |
| 0.505891740322113, | |
| 0.5065779685974121, | |
| 0.5094985961914062, | |
| 0.48333850502967834, | |
| 0.4985012710094452, | |
| 0.5102980732917786, | |
| 0.5111819505691528, | |
| 0.5107572078704834, | |
| 0.5016093850135803, | |
| 0.5002478361129761, | |
| 0.5022361874580383, | |
| 0.5004898309707642, | |
| 0.4977990686893463, | |
| 0.49885597825050354, | |
| 0.5022542476654053, | |
| 0.49431321024894714, | |
| 0.5005004405975342, | |
| 0.5246015787124634, | |
| 0.5063254833221436, | |
| 0.505049467086792, | |
| 0.4940093457698822, | |
| 0.49063223600387573, | |
| 0.4952346384525299, | |
| 0.3910456895828247, | |
| 0.39186742901802063, | |
| 0.3929601311683655, | |
| 0.5008682012557983 | |
| ], | |
| "eval_math_runtime": 22.613, | |
| "eval_math_samples_per_second": 4.422, | |
| "eval_math_steps_per_second": 0.088, | |
| "epoch": 0.44, | |
| "step": 25 | |
| }, | |
| { | |
| "loss": 0.6919, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.46, | |
| "step": 26 | |
| }, | |
| { | |
| "loss": 0.6883, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.47, | |
| "step": 27 | |
| }, | |
| { | |
| "loss": 0.6733, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.49, | |
| "step": 28 | |
| }, | |
| { | |
| "loss": 0.6873, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.51, | |
| "step": 29 | |
| }, | |
| { | |
| "loss": 0.6645, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.53, | |
| "step": 30 | |
| }, | |
| { | |
| "loss": 0.6799, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.54, | |
| "step": 31 | |
| }, | |
| { | |
| "loss": 0.6721, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.56, | |
| "step": 32 | |
| }, | |
| { | |
| "loss": 0.6906, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.58, | |
| "step": 33 | |
| }, | |
| { | |
| "loss": 0.6775, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.6, | |
| "step": 34 | |
| }, | |
| { | |
| "loss": 0.6868, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.61, | |
| "step": 35 | |
| }, | |
| { | |
| "loss": 0.6691, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.63, | |
| "step": 36 | |
| }, | |
| { | |
| "loss": 0.6775, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.65, | |
| "step": 37 | |
| }, | |
| { | |
| "loss": 0.6875, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.67, | |
| "step": 38 | |
| }, | |
| { | |
| "loss": 0.6882, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.68, | |
| "step": 39 | |
| }, | |
| { | |
| "loss": 0.6659, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.7, | |
| "step": 40 | |
| }, | |
| { | |
| "loss": 0.6831, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.72, | |
| "step": 41 | |
| }, | |
| { | |
| "loss": 0.6883, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.74, | |
| "step": 42 | |
| }, | |
| { | |
| "loss": 0.6687, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.75, | |
| "step": 43 | |
| }, | |
| { | |
| "loss": 0.6756, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.77, | |
| "step": 44 | |
| }, | |
| { | |
| "loss": 0.6767, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.79, | |
| "step": 45 | |
| }, | |
| { | |
| "loss": 0.6726, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.81, | |
| "step": 46 | |
| }, | |
| { | |
| "loss": 0.6817, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.82, | |
| "step": 47 | |
| }, | |
| { | |
| "loss": 0.6843, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.84, | |
| "step": 48 | |
| }, | |
| { | |
| "loss": 0.6677, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.86, | |
| "step": 49 | |
| }, | |
| { | |
| "loss": 0.675, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.88, | |
| "step": 50 | |
| }, | |
| { | |
| "eval_math_loss": 0.6834437847137451, | |
| "eval_math_score": -0.25087547302246094, | |
| "eval_math_brier_score": 0.25087547302246094, | |
| "eval_math_average_probability": 0.5003729462623596, | |
| "eval_math_accuracy": 0.67, | |
| "eval_math_probabilities": [ | |
| 0.5053072571754456, | |
| 0.5104738473892212, | |
| 0.4998767375946045, | |
| 0.4987499713897705, | |
| 0.4995637536048889, | |
| 0.4979211091995239, | |
| 0.5108855366706848, | |
| 0.5023437142372131, | |
| 0.5171610713005066, | |
| 0.5161605477333069, | |
| 0.5233566761016846, | |
| 0.5833942294120789, | |
| 0.5003718733787537, | |
| 0.49910590052604675, | |
| 0.5014094114303589, | |
| 0.5064855217933655, | |
| 0.49743178486824036, | |
| 0.4980136752128601, | |
| 0.4959304630756378, | |
| 0.5030016899108887, | |
| 0.5019874572753906, | |
| 0.5007861852645874, | |
| 0.5035613179206848, | |
| 0.5047624707221985, | |
| 0.49380984902381897, | |
| 0.4902263581752777, | |
| 0.49334290623664856, | |
| 0.5064259767532349, | |
| 0.4730111360549927, | |
| 0.5091708302497864, | |
| 0.5004432201385498, | |
| 0.49959176778793335, | |
| 0.4988599419593811, | |
| 0.5252774357795715, | |
| 0.5397801399230957, | |
| 0.5378824472427368, | |
| 0.5073467493057251, | |
| 0.5127724409103394, | |
| 0.48429790139198303, | |
| 0.5011600852012634, | |
| 0.499787300825119, | |
| 0.5010166168212891, | |
| 0.49625164270401, | |
| 0.512554407119751, | |
| 0.48166966438293457, | |
| 0.5029576420783997, | |
| 0.49886441230773926, | |
| 0.49318164587020874, | |
| 0.5096920728683472, | |
| 0.49129679799079895, | |
| 0.5006589889526367, | |
| 0.5004881024360657, | |
| 0.5014761090278625, | |
| 0.500096321105957, | |
| 0.5061331987380981, | |
| 0.5045473575592041, | |
| 0.506636381149292, | |
| 0.510319173336029, | |
| 0.5139249563217163, | |
| 0.516427218914032, | |
| 0.5025309324264526, | |
| 0.5002322196960449, | |
| 0.49999839067459106, | |
| 0.5077803730964661, | |
| 0.5266351103782654, | |
| 0.5349680185317993, | |
| 0.5012303590774536, | |
| 0.5011308789253235, | |
| 0.502089262008667, | |
| 0.49999287724494934, | |
| 0.49622201919555664, | |
| 0.5034714937210083, | |
| 0.5027754306793213, | |
| 0.5089088678359985, | |
| 0.5117012858390808, | |
| 0.5095623135566711, | |
| 0.4832465946674347, | |
| 0.49723362922668457, | |
| 0.5502822399139404, | |
| 0.5483925342559814, | |
| 0.5437333583831787, | |
| 0.5012779831886292, | |
| 0.4989190697669983, | |
| 0.5021101236343384, | |
| 0.5062341094017029, | |
| 0.5053580403327942, | |
| 0.5082789659500122, | |
| 0.5026023387908936, | |
| 0.49390891194343567, | |
| 0.5006174445152283, | |
| 0.5223895311355591, | |
| 0.5041326284408569, | |
| 0.5024306178092957, | |
| 0.492806077003479, | |
| 0.513796865940094, | |
| 0.4959098994731903, | |
| 0.31710273027420044, | |
| 0.3174270689487457, | |
| 0.3193003535270691, | |
| 0.5011496543884277 | |
| ], | |
| "eval_math_runtime": 22.5936, | |
| "eval_math_samples_per_second": 4.426, | |
| "eval_math_steps_per_second": 0.089, | |
| "epoch": 0.88, | |
| "step": 50 | |
| }, | |
| { | |
| "loss": 0.6771, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.89, | |
| "step": 51 | |
| }, | |
| { | |
| "loss": 0.6825, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.91, | |
| "step": 52 | |
| }, | |
| { | |
| "loss": 0.6914, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.93, | |
| "step": 53 | |
| }, | |
| { | |
| "loss": 0.6884, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.95, | |
| "step": 54 | |
| }, | |
| { | |
| "loss": 0.6772, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.96, | |
| "step": 55 | |
| }, | |
| { | |
| "loss": 0.6635, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.98, | |
| "step": 56 | |
| }, | |
| { | |
| "loss": 0.6667, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.0, | |
| "step": 57 | |
| }, | |
| { | |
| "loss": 0.6587, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.02, | |
| "step": 58 | |
| }, | |
| { | |
| "loss": 0.6802, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.04, | |
| "step": 59 | |
| }, | |
| { | |
| "loss": 0.6705, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.05, | |
| "step": 60 | |
| }, | |
| { | |
| "loss": 0.659, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.07, | |
| "step": 61 | |
| }, | |
| { | |
| "loss": 0.6922, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.09, | |
| "step": 62 | |
| }, | |
| { | |
| "loss": 0.6712, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.11, | |
| "step": 63 | |
| }, | |
| { | |
| "loss": 0.6512, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.12, | |
| "step": 64 | |
| }, | |
| { | |
| "loss": 0.6716, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.14, | |
| "step": 65 | |
| }, | |
| { | |
| "loss": 0.6885, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.16, | |
| "step": 66 | |
| }, | |
| { | |
| "loss": 0.6468, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.18, | |
| "step": 67 | |
| }, | |
| { | |
| "loss": 0.6314, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.19, | |
| "step": 68 | |
| }, | |
| { | |
| "loss": 0.6651, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.21, | |
| "step": 69 | |
| }, | |
| { | |
| "loss": 0.6706, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.23, | |
| "step": 70 | |
| }, | |
| { | |
| "loss": 0.6134, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.25, | |
| "step": 71 | |
| }, | |
| { | |
| "loss": 0.6405, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.26, | |
| "step": 72 | |
| }, | |
| { | |
| "loss": 0.6676, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.28, | |
| "step": 73 | |
| }, | |
| { | |
| "loss": 0.6093, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.3, | |
| "step": 74 | |
| }, | |
| { | |
| "loss": 0.6183, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.32, | |
| "step": 75 | |
| }, | |
| { | |
| "eval_math_loss": 0.665087103843689, | |
| "eval_math_score": -0.24755878746509552, | |
| "eval_math_brier_score": 0.24755878746509552, | |
| "eval_math_average_probability": 0.508122980594635, | |
| "eval_math_accuracy": 0.63, | |
| "eval_math_probabilities": [ | |
| 0.5121186971664429, | |
| 0.5127348899841309, | |
| 0.5004041790962219, | |
| 0.5009912252426147, | |
| 0.5002092123031616, | |
| 0.5010939836502075, | |
| 0.545912504196167, | |
| 0.5255416035652161, | |
| 0.5276244878768921, | |
| 0.5262688994407654, | |
| 0.6037449240684509, | |
| 0.6872636079788208, | |
| 0.45226553082466125, | |
| 0.4923723638057709, | |
| 0.49667802453041077, | |
| 0.5102534890174866, | |
| 0.4977477788925171, | |
| 0.4969833195209503, | |
| 0.49458229541778564, | |
| 0.508873462677002, | |
| 0.5096623301506042, | |
| 0.4953479766845703, | |
| 0.5070879459381104, | |
| 0.5050402283668518, | |
| 0.4883251190185547, | |
| 0.4748927354812622, | |
| 0.47420623898506165, | |
| 0.5024446845054626, | |
| 0.44197729229927063, | |
| 0.5237727165222168, | |
| 0.49600568413734436, | |
| 0.49905824661254883, | |
| 0.4909888207912445, | |
| 0.611699104309082, | |
| 0.656679630279541, | |
| 0.6554825305938721, | |
| 0.5338494777679443, | |
| 0.5640528202056885, | |
| 0.46428170800209045, | |
| 0.4977650046348572, | |
| 0.497366726398468, | |
| 0.508729100227356, | |
| 0.5139797925949097, | |
| 0.5323095321655273, | |
| 0.5103693604469299, | |
| 0.5118908882141113, | |
| 0.493362158536911, | |
| 0.4925827085971832, | |
| 0.5059145092964172, | |
| 0.48735955357551575, | |
| 0.49885982275009155, | |
| 0.5037399530410767, | |
| 0.5033603310585022, | |
| 0.5000958442687988, | |
| 0.5084394216537476, | |
| 0.5043569207191467, | |
| 0.5100818276405334, | |
| 0.5124533772468567, | |
| 0.5260497331619263, | |
| 0.5502150058746338, | |
| 0.5108760595321655, | |
| 0.5024101138114929, | |
| 0.4991067051887512, | |
| 0.49586227536201477, | |
| 0.5228884816169739, | |
| 0.5393039584159851, | |
| 0.49779072403907776, | |
| 0.5032389760017395, | |
| 0.5024223327636719, | |
| 0.5007798671722412, | |
| 0.4911024272441864, | |
| 0.5055527687072754, | |
| 0.5001845359802246, | |
| 0.5026863813400269, | |
| 0.5191549062728882, | |
| 0.5366024971008301, | |
| 0.4879572093486786, | |
| 0.48914453387260437, | |
| 0.6904913187026978, | |
| 0.6880560517311096, | |
| 0.6807676553726196, | |
| 0.4983310401439667, | |
| 0.49475836753845215, | |
| 0.5018371343612671, | |
| 0.5274589657783508, | |
| 0.5387904047966003, | |
| 0.5446135401725769, | |
| 0.49650830030441284, | |
| 0.488943487405777, | |
| 0.4967746436595917, | |
| 0.49659326672554016, | |
| 0.5072319507598877, | |
| 0.505716860294342, | |
| 0.49774613976478577, | |
| 0.553057849407196, | |
| 0.5152707695960999, | |
| 0.17283889651298523, | |
| 0.17193157970905304, | |
| 0.17517608404159546, | |
| 0.5045385956764221 | |
| ], | |
| "eval_math_runtime": 22.5741, | |
| "eval_math_samples_per_second": 4.43, | |
| "eval_math_steps_per_second": 0.089, | |
| "epoch": 1.32, | |
| "step": 75 | |
| }, | |
| { | |
| "loss": 0.6424, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.33, | |
| "step": 76 | |
| }, | |
| { | |
| "loss": 0.6144, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.35, | |
| "step": 77 | |
| }, | |
| { | |
| "loss": 0.6389, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.37, | |
| "step": 78 | |
| }, | |
| { | |
| "loss": 0.6363, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.39, | |
| "step": 79 | |
| }, | |
| { | |
| "loss": 0.6334, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.4, | |
| "step": 80 | |
| }, | |
| { | |
| "loss": 0.6685, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.42, | |
| "step": 81 | |
| }, | |
| { | |
| "loss": 0.677, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.44, | |
| "step": 82 | |
| }, | |
| { | |
| "loss": 0.6031, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.46, | |
| "step": 83 | |
| }, | |
| { | |
| "loss": 0.6428, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.47, | |
| "step": 84 | |
| }, | |
| { | |
| "loss": 0.6308, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.49, | |
| "step": 85 | |
| }, | |
| { | |
| "loss": 0.5668, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.51, | |
| "step": 86 | |
| }, | |
| { | |
| "loss": 0.586, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.53, | |
| "step": 87 | |
| }, | |
| { | |
| "loss": 0.6652, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.54, | |
| "step": 88 | |
| }, | |
| { | |
| "loss": 0.5834, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.56, | |
| "step": 89 | |
| }, | |
| { | |
| "loss": 0.6447, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.58, | |
| "step": 90 | |
| }, | |
| { | |
| "loss": 0.5041, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.6, | |
| "step": 91 | |
| }, | |
| { | |
| "loss": 0.5564, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.61, | |
| "step": 92 | |
| }, | |
| { | |
| "loss": 0.5746, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.63, | |
| "step": 93 | |
| }, | |
| { | |
| "loss": 0.6814, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.65, | |
| "step": 94 | |
| }, | |
| { | |
| "loss": 0.6013, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.67, | |
| "step": 95 | |
| }, | |
| { | |
| "loss": 0.5654, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.68, | |
| "step": 96 | |
| }, | |
| { | |
| "loss": 0.5918, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.7, | |
| "step": 97 | |
| }, | |
| { | |
| "loss": 0.5496, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.72, | |
| "step": 98 | |
| }, | |
| { | |
| "loss": 0.6326, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.74, | |
| "step": 99 | |
| }, | |
| { | |
| "loss": 0.5314, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.75, | |
| "step": 100 | |
| }, | |
| { | |
| "eval_math_loss": 0.6169581413269043, | |
| "eval_math_score": -0.2320980429649353, | |
| "eval_math_brier_score": 0.2320980429649353, | |
| "eval_math_average_probability": 0.5478038787841797, | |
| "eval_math_accuracy": 0.63, | |
| "eval_math_probabilities": [ | |
| 0.5236212611198425, | |
| 0.5186634659767151, | |
| 0.5260717868804932, | |
| 0.489012211561203, | |
| 0.49739015102386475, | |
| 0.4983677864074707, | |
| 0.7061103582382202, | |
| 0.6276401281356812, | |
| 0.5337700843811035, | |
| 0.6074290871620178, | |
| 0.6426130533218384, | |
| 0.8374870419502258, | |
| 0.37604060769081116, | |
| 0.36783960461616516, | |
| 0.39347073435783386, | |
| 0.5029640793800354, | |
| 0.49337878823280334, | |
| 0.4871605336666107, | |
| 0.526091456413269, | |
| 0.510998010635376, | |
| 0.5022907257080078, | |
| 0.5005046129226685, | |
| 0.5276081562042236, | |
| 0.5287099480628967, | |
| 0.5003510117530823, | |
| 0.42485782504081726, | |
| 0.37669673562049866, | |
| 0.4912723898887634, | |
| 0.22055673599243164, | |
| 0.627392053604126, | |
| 0.49359387159347534, | |
| 0.48515188694000244, | |
| 0.4972233772277832, | |
| 0.8575774431228638, | |
| 0.910191535949707, | |
| 0.8751074075698853, | |
| 0.8624600172042847, | |
| 0.8954752683639526, | |
| 0.5143798589706421, | |
| 0.4631466567516327, | |
| 0.4894852042198181, | |
| 0.6127458214759827, | |
| 0.5760451555252075, | |
| 0.6216609477996826, | |
| 0.5542522072792053, | |
| 0.6066001653671265, | |
| 0.4518378674983978, | |
| 0.5172532200813293, | |
| 0.5548712611198425, | |
| 0.47210246324539185, | |
| 0.480351984500885, | |
| 0.5317205190658569, | |
| 0.5237709283828735, | |
| 0.5205705165863037, | |
| 0.5198208093643188, | |
| 0.5110467076301575, | |
| 0.5243876576423645, | |
| 0.6869658827781677, | |
| 0.7886658310890198, | |
| 0.8274509906768799, | |
| 0.5666228532791138, | |
| 0.5315422415733337, | |
| 0.4891224503517151, | |
| 0.4611716568470001, | |
| 0.5529481768608093, | |
| 0.6163869500160217, | |
| 0.47675707936286926, | |
| 0.5012125968933105, | |
| 0.5132050514221191, | |
| 0.5116791725158691, | |
| 0.49521586298942566, | |
| 0.4946640133857727, | |
| 0.5137390494346619, | |
| 0.4953595697879791, | |
| 0.5362335443496704, | |
| 0.7229145169258118, | |
| 0.4241175949573517, | |
| 0.3835858404636383, | |
| 0.8711888790130615, | |
| 0.8752027153968811, | |
| 0.8618167638778687, | |
| 0.4735512435436249, | |
| 0.47097814083099365, | |
| 0.4957125186920166, | |
| 0.8091335296630859, | |
| 0.8663551807403564, | |
| 0.8784079551696777, | |
| 0.5133549571037292, | |
| 0.42201387882232666, | |
| 0.4296361207962036, | |
| 0.3151407241821289, | |
| 0.5211864113807678, | |
| 0.5909596085548401, | |
| 0.524550199508667, | |
| 0.7371199131011963, | |
| 0.5599789619445801, | |
| 0.04795292019844055, | |
| 0.045255232602357864, | |
| 0.04756557196378708, | |
| 0.5445749759674072 | |
| ], | |
| "eval_math_runtime": 22.5981, | |
| "eval_math_samples_per_second": 4.425, | |
| "eval_math_steps_per_second": 0.089, | |
| "epoch": 1.75, | |
| "step": 100 | |
| }, | |
| { | |
| "train_runtime": 2129.8823, | |
| "train_samples_per_second": 1.502, | |
| "train_steps_per_second": 0.047, | |
| "total_flos": 0.0, | |
| "train_loss": 0.6600256007909775, | |
| "epoch": 1.75, | |
| "step": 100 | |
| } | |
| ] |