Instructions to use genies-models/openllama-3b-math_hard with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use genies-models/openllama-3b-math_hard with PEFT:
from peft import PeftModel from transformers import AutoModelForSequenceClassification base_model = AutoModelForSequenceClassification.from_pretrained("models/openllama-3b") model = PeftModel.from_pretrained(base_model, "genies-models/openllama-3b-math_hard") - Notebooks
- Google Colab
- Kaggle
| [ | |
| { | |
| "loss": 0.7082, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.02, | |
| "step": 1 | |
| }, | |
| { | |
| "loss": 0.7006, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.04, | |
| "step": 2 | |
| }, | |
| { | |
| "loss": 0.6979, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.05, | |
| "step": 3 | |
| }, | |
| { | |
| "loss": 0.6887, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.07, | |
| "step": 4 | |
| }, | |
| { | |
| "loss": 0.6946, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.09, | |
| "step": 5 | |
| }, | |
| { | |
| "loss": 0.6997, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.11, | |
| "step": 6 | |
| }, | |
| { | |
| "loss": 0.6956, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.12, | |
| "step": 7 | |
| }, | |
| { | |
| "loss": 0.6916, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.14, | |
| "step": 8 | |
| }, | |
| { | |
| "loss": 0.6935, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.16, | |
| "step": 9 | |
| }, | |
| { | |
| "loss": 0.6894, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.18, | |
| "step": 10 | |
| }, | |
| { | |
| "loss": 0.6924, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.19, | |
| "step": 11 | |
| }, | |
| { | |
| "loss": 0.6834, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.21, | |
| "step": 12 | |
| }, | |
| { | |
| "loss": 0.6911, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.23, | |
| "step": 13 | |
| }, | |
| { | |
| "loss": 0.6945, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.25, | |
| "step": 14 | |
| }, | |
| { | |
| "loss": 0.6819, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.26, | |
| "step": 15 | |
| }, | |
| { | |
| "loss": 0.6824, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.28, | |
| "step": 16 | |
| }, | |
| { | |
| "loss": 0.692, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.3, | |
| "step": 17 | |
| }, | |
| { | |
| "loss": 0.6878, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.32, | |
| "step": 18 | |
| }, | |
| { | |
| "loss": 0.6739, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.33, | |
| "step": 19 | |
| }, | |
| { | |
| "loss": 0.6814, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.35, | |
| "step": 20 | |
| }, | |
| { | |
| "loss": 0.6755, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.37, | |
| "step": 21 | |
| }, | |
| { | |
| "loss": 0.6525, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.39, | |
| "step": 22 | |
| }, | |
| { | |
| "loss": 0.6933, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.4, | |
| "step": 23 | |
| }, | |
| { | |
| "loss": 0.6912, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.42, | |
| "step": 24 | |
| }, | |
| { | |
| "loss": 0.6837, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.44, | |
| "step": 25 | |
| }, | |
| { | |
| "eval_math_hard_loss": 0.6894748210906982, | |
| "eval_math_hard_score": -0.24721547961235046, | |
| "eval_math_hard_brier_score": 0.24721547961235046, | |
| "eval_math_hard_average_probability": 0.503162682056427, | |
| "eval_math_hard_accuracy": 0.67, | |
| "eval_math_hard_probabilities": [ | |
| 0.4227713644504547, | |
| 0.4243110716342926, | |
| 0.4217226803302765, | |
| 0.5080846548080444, | |
| 0.5003512501716614, | |
| 0.5066218972206116, | |
| 0.5015118718147278, | |
| 0.500447154045105, | |
| 0.5005831122398376, | |
| 0.5008090734481812, | |
| 0.4988376200199127, | |
| 0.5015636682510376, | |
| 0.49952030181884766, | |
| 0.5036075115203857, | |
| 0.5045143961906433, | |
| 0.4983898401260376, | |
| 0.5007873773574829, | |
| 0.5014252662658691, | |
| 0.5002598166465759, | |
| 0.5008907318115234, | |
| 0.5009533166885376, | |
| 0.5221489071846008, | |
| 0.5234034657478333, | |
| 0.5247792601585388, | |
| 0.5126287937164307, | |
| 0.5133108496665955, | |
| 0.5110536813735962, | |
| 0.5020310282707214, | |
| 0.4892224371433258, | |
| 0.5109142065048218, | |
| 0.5004593729972839, | |
| 0.5045181512832642, | |
| 0.49915167689323425, | |
| 0.49843019247055054, | |
| 0.4997433125972748, | |
| 0.49867600202560425, | |
| 0.502083957195282, | |
| 0.501447856426239, | |
| 0.5003753304481506, | |
| 0.5011199712753296, | |
| 0.49873456358909607, | |
| 0.49848291277885437, | |
| 0.48643049597740173, | |
| 0.49849626421928406, | |
| 0.48845013976097107, | |
| 0.5013667345046997, | |
| 0.500347375869751, | |
| 0.5004876255989075, | |
| 0.5421925783157349, | |
| 0.557721734046936, | |
| 0.5099676847457886, | |
| 0.49849411845207214, | |
| 0.4995008707046509, | |
| 0.49974963068962097, | |
| 0.5455760955810547, | |
| 0.5438253283500671, | |
| 0.5432474613189697, | |
| 0.5029205679893494, | |
| 0.4974336326122284, | |
| 0.5029483437538147, | |
| 0.5062002539634705, | |
| 0.509421169757843, | |
| 0.5076862573623657, | |
| 0.4995964765548706, | |
| 0.502112090587616, | |
| 0.5017081499099731, | |
| 0.49831974506378174, | |
| 0.4960530996322632, | |
| 0.4953078627586365, | |
| 0.5019406676292419, | |
| 0.4999215304851532, | |
| 0.498506635427475, | |
| 0.5036457777023315, | |
| 0.5040311217308044, | |
| 0.5019571185112, | |
| 0.5034784078598022, | |
| 0.5008260011672974, | |
| 0.5031225681304932, | |
| 0.5478922128677368, | |
| 0.5407694578170776, | |
| 0.5346946120262146, | |
| 0.5009269714355469, | |
| 0.501581072807312, | |
| 0.5043609142303467, | |
| 0.5141059756278992, | |
| 0.5151600241661072, | |
| 0.4976750612258911, | |
| 0.5016065239906311, | |
| 0.501369833946228, | |
| 0.5012784600257874, | |
| 0.49994927644729614, | |
| 0.5037755370140076, | |
| 0.5020310878753662, | |
| 0.4963326156139374, | |
| 0.49563106894493103, | |
| 0.49556463956832886, | |
| 0.49954840540885925, | |
| 0.49477893114089966, | |
| 0.5013437867164612, | |
| 0.502220630645752 | |
| ], | |
| "eval_math_hard_runtime": 19.1857, | |
| "eval_math_hard_samples_per_second": 5.212, | |
| "eval_math_hard_steps_per_second": 0.104, | |
| "epoch": 0.44, | |
| "step": 25 | |
| }, | |
| { | |
| "loss": 0.6608, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.46, | |
| "step": 26 | |
| }, | |
| { | |
| "loss": 0.6878, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.47, | |
| "step": 27 | |
| }, | |
| { | |
| "loss": 0.6559, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.49, | |
| "step": 28 | |
| }, | |
| { | |
| "loss": 0.6863, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.51, | |
| "step": 29 | |
| }, | |
| { | |
| "loss": 0.6592, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.53, | |
| "step": 30 | |
| }, | |
| { | |
| "loss": 0.6823, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.54, | |
| "step": 31 | |
| }, | |
| { | |
| "loss": 0.6765, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.56, | |
| "step": 32 | |
| }, | |
| { | |
| "loss": 0.6858, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.58, | |
| "step": 33 | |
| }, | |
| { | |
| "loss": 0.6909, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.6, | |
| "step": 34 | |
| }, | |
| { | |
| "loss": 0.6811, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.61, | |
| "step": 35 | |
| }, | |
| { | |
| "loss": 0.6753, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.63, | |
| "step": 36 | |
| }, | |
| { | |
| "loss": 0.6758, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.65, | |
| "step": 37 | |
| }, | |
| { | |
| "loss": 0.6865, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.67, | |
| "step": 38 | |
| }, | |
| { | |
| "loss": 0.6402, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.68, | |
| "step": 39 | |
| }, | |
| { | |
| "loss": 0.6536, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.7, | |
| "step": 40 | |
| }, | |
| { | |
| "loss": 0.645, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.72, | |
| "step": 41 | |
| }, | |
| { | |
| "loss": 0.6855, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.74, | |
| "step": 42 | |
| }, | |
| { | |
| "loss": 0.6286, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.75, | |
| "step": 43 | |
| }, | |
| { | |
| "loss": 0.6574, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.77, | |
| "step": 44 | |
| }, | |
| { | |
| "loss": 0.668, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.79, | |
| "step": 45 | |
| }, | |
| { | |
| "loss": 0.6643, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.81, | |
| "step": 46 | |
| }, | |
| { | |
| "loss": 0.6755, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.82, | |
| "step": 47 | |
| }, | |
| { | |
| "loss": 0.6663, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.84, | |
| "step": 48 | |
| }, | |
| { | |
| "loss": 0.6525, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.86, | |
| "step": 49 | |
| }, | |
| { | |
| "loss": 0.676, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.88, | |
| "step": 50 | |
| }, | |
| { | |
| "eval_math_hard_loss": 0.6725212931632996, | |
| "eval_math_hard_score": -0.24000222980976105, | |
| "eval_math_hard_brier_score": 0.24000222980976105, | |
| "eval_math_hard_average_probability": 0.5112404823303223, | |
| "eval_math_hard_accuracy": 0.71, | |
| "eval_math_hard_probabilities": [ | |
| 0.45277896523475647, | |
| 0.45469722151756287, | |
| 0.45128071308135986, | |
| 0.5148431658744812, | |
| 0.5021870732307434, | |
| 0.5043631792068481, | |
| 0.5066627860069275, | |
| 0.5054929256439209, | |
| 0.5019177794456482, | |
| 0.5219594240188599, | |
| 0.5049334764480591, | |
| 0.5112125277519226, | |
| 0.5003976821899414, | |
| 0.5065910220146179, | |
| 0.5072922110557556, | |
| 0.4964824318885803, | |
| 0.5019433498382568, | |
| 0.5026321411132812, | |
| 0.5000293850898743, | |
| 0.49963968992233276, | |
| 0.5028585195541382, | |
| 0.5398517847061157, | |
| 0.5473343729972839, | |
| 0.5544767379760742, | |
| 0.5060956478118896, | |
| 0.5239856839179993, | |
| 0.5277472734451294, | |
| 0.4985102713108063, | |
| 0.47175613045692444, | |
| 0.5033620595932007, | |
| 0.49817827343940735, | |
| 0.5102205872535706, | |
| 0.49686363339424133, | |
| 0.4992176294326782, | |
| 0.503284752368927, | |
| 0.5037179589271545, | |
| 0.5031449794769287, | |
| 0.5042518973350525, | |
| 0.5031511783599854, | |
| 0.49838143587112427, | |
| 0.4941232204437256, | |
| 0.4982997477054596, | |
| 0.4887068271636963, | |
| 0.4955118000507355, | |
| 0.4895956814289093, | |
| 0.5002733469009399, | |
| 0.5003429055213928, | |
| 0.49709463119506836, | |
| 0.5760056972503662, | |
| 0.571745753288269, | |
| 0.5338752865791321, | |
| 0.4955299496650696, | |
| 0.5003539323806763, | |
| 0.49888306856155396, | |
| 0.6243072748184204, | |
| 0.6245272755622864, | |
| 0.6229971647262573, | |
| 0.5035498738288879, | |
| 0.49556151032447815, | |
| 0.5018459558486938, | |
| 0.5172255039215088, | |
| 0.5213637948036194, | |
| 0.5222153067588806, | |
| 0.5011674165725708, | |
| 0.5060593485832214, | |
| 0.504572868347168, | |
| 0.4984975755214691, | |
| 0.5009143352508545, | |
| 0.4929470717906952, | |
| 0.4994816482067108, | |
| 0.49936628341674805, | |
| 0.4988759160041809, | |
| 0.5091677904129028, | |
| 0.5084511637687683, | |
| 0.5050278902053833, | |
| 0.5068730115890503, | |
| 0.5035209059715271, | |
| 0.5078574419021606, | |
| 0.6247063279151917, | |
| 0.6266769170761108, | |
| 0.6112779974937439, | |
| 0.5065394639968872, | |
| 0.5045663118362427, | |
| 0.5105231404304504, | |
| 0.5081201791763306, | |
| 0.5091950297355652, | |
| 0.5013695955276489, | |
| 0.5074512362480164, | |
| 0.50473552942276, | |
| 0.5031643509864807, | |
| 0.5013434290885925, | |
| 0.5119460225105286, | |
| 0.5085548162460327, | |
| 0.5006793737411499, | |
| 0.503533661365509, | |
| 0.4990837275981903, | |
| 0.47225868701934814, | |
| 0.4665180742740631, | |
| 0.47514545917510986, | |
| 0.5062111020088196 | |
| ], | |
| "eval_math_hard_runtime": 19.1606, | |
| "eval_math_hard_samples_per_second": 5.219, | |
| "eval_math_hard_steps_per_second": 0.104, | |
| "epoch": 0.88, | |
| "step": 50 | |
| }, | |
| { | |
| "loss": 0.6385, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.89, | |
| "step": 51 | |
| }, | |
| { | |
| "loss": 0.6424, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.91, | |
| "step": 52 | |
| }, | |
| { | |
| "loss": 0.6859, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.93, | |
| "step": 53 | |
| }, | |
| { | |
| "loss": 0.6738, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.95, | |
| "step": 54 | |
| }, | |
| { | |
| "loss": 0.6795, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.96, | |
| "step": 55 | |
| }, | |
| { | |
| "loss": 0.6794, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 0.98, | |
| "step": 56 | |
| }, | |
| { | |
| "loss": 0.6308, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.0, | |
| "step": 57 | |
| }, | |
| { | |
| "loss": 0.5867, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.02, | |
| "step": 58 | |
| }, | |
| { | |
| "loss": 0.651, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.04, | |
| "step": 59 | |
| }, | |
| { | |
| "loss": 0.6177, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.05, | |
| "step": 60 | |
| }, | |
| { | |
| "loss": 0.6132, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.07, | |
| "step": 61 | |
| }, | |
| { | |
| "loss": 0.6702, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.09, | |
| "step": 62 | |
| }, | |
| { | |
| "loss": 0.6141, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.11, | |
| "step": 63 | |
| }, | |
| { | |
| "loss": 0.6563, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.12, | |
| "step": 64 | |
| }, | |
| { | |
| "loss": 0.6674, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.14, | |
| "step": 65 | |
| }, | |
| { | |
| "loss": 0.6499, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.16, | |
| "step": 66 | |
| }, | |
| { | |
| "loss": 0.6782, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.18, | |
| "step": 67 | |
| }, | |
| { | |
| "loss": 0.6058, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.19, | |
| "step": 68 | |
| }, | |
| { | |
| "loss": 0.5962, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.21, | |
| "step": 69 | |
| }, | |
| { | |
| "loss": 0.6426, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.23, | |
| "step": 70 | |
| }, | |
| { | |
| "loss": 0.626, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.25, | |
| "step": 71 | |
| }, | |
| { | |
| "loss": 0.6095, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.26, | |
| "step": 72 | |
| }, | |
| { | |
| "loss": 0.6375, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.28, | |
| "step": 73 | |
| }, | |
| { | |
| "loss": 0.5938, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.3, | |
| "step": 74 | |
| }, | |
| { | |
| "loss": 0.5915, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.32, | |
| "step": 75 | |
| }, | |
| { | |
| "eval_math_hard_loss": 0.6123586893081665, | |
| "eval_math_hard_score": -0.2185293436050415, | |
| "eval_math_hard_brier_score": 0.2185293436050415, | |
| "eval_math_hard_average_probability": 0.5461798310279846, | |
| "eval_math_hard_accuracy": 0.7, | |
| "eval_math_hard_probabilities": [ | |
| 0.6981080174446106, | |
| 0.6993874907493591, | |
| 0.6964402198791504, | |
| 0.496179461479187, | |
| 0.5118089318275452, | |
| 0.4348101019859314, | |
| 0.5450701713562012, | |
| 0.5478569865226746, | |
| 0.5073054432868958, | |
| 0.6003513336181641, | |
| 0.48766252398490906, | |
| 0.5207421183586121, | |
| 0.5321508646011353, | |
| 0.5228312611579895, | |
| 0.5217955112457275, | |
| 0.4980320930480957, | |
| 0.5108472108840942, | |
| 0.5094117522239685, | |
| 0.5452646613121033, | |
| 0.5308429002761841, | |
| 0.5391503572463989, | |
| 0.685689389705658, | |
| 0.7256925106048584, | |
| 0.7535527348518372, | |
| 0.492637038230896, | |
| 0.5849336385726929, | |
| 0.6581502556800842, | |
| 0.507750391960144, | |
| 0.4166055917739868, | |
| 0.537865400314331, | |
| 0.480641633272171, | |
| 0.5339422821998596, | |
| 0.4952820837497711, | |
| 0.5112771987915039, | |
| 0.5374191403388977, | |
| 0.5484182834625244, | |
| 0.5139420032501221, | |
| 0.5436358451843262, | |
| 0.535308301448822, | |
| 0.4862540364265442, | |
| 0.46842843294143677, | |
| 0.5044496655464172, | |
| 0.47761040925979614, | |
| 0.47824832797050476, | |
| 0.49812138080596924, | |
| 0.49636951088905334, | |
| 0.5025320053100586, | |
| 0.45967310667037964, | |
| 0.47252291440963745, | |
| 0.43059277534484863, | |
| 0.34971874952316284, | |
| 0.47613152861595154, | |
| 0.50247722864151, | |
| 0.5095160603523254, | |
| 0.948936939239502, | |
| 0.9514877200126648, | |
| 0.9500554800033569, | |
| 0.5087370276451111, | |
| 0.4877012372016907, | |
| 0.49044281244277954, | |
| 0.5669111609458923, | |
| 0.5985153317451477, | |
| 0.6142491698265076, | |
| 0.5062389969825745, | |
| 0.5240121483802795, | |
| 0.5009501576423645, | |
| 0.5466399788856506, | |
| 0.5929512977600098, | |
| 0.4971684515476227, | |
| 0.4791565537452698, | |
| 0.49307700991630554, | |
| 0.4976717531681061, | |
| 0.5357528328895569, | |
| 0.534887969493866, | |
| 0.5193175673484802, | |
| 0.5436345338821411, | |
| 0.5458875298500061, | |
| 0.5654334425926208, | |
| 0.8299615979194641, | |
| 0.8604872822761536, | |
| 0.8265635967254639, | |
| 0.5350908637046814, | |
| 0.5183643102645874, | |
| 0.5320155620574951, | |
| 0.4727846086025238, | |
| 0.46650999784469604, | |
| 0.5301773548126221, | |
| 0.5392518043518066, | |
| 0.532899022102356, | |
| 0.5097134113311768, | |
| 0.5105643272399902, | |
| 0.5698909163475037, | |
| 0.5556212663650513, | |
| 0.5259840488433838, | |
| 0.5478115081787109, | |
| 0.5297166705131531, | |
| 0.3596650958061218, | |
| 0.3327234983444214, | |
| 0.3539780080318451, | |
| 0.5469507575035095 | |
| ], | |
| "eval_math_hard_runtime": 19.1569, | |
| "eval_math_hard_samples_per_second": 5.22, | |
| "eval_math_hard_steps_per_second": 0.104, | |
| "epoch": 1.32, | |
| "step": 75 | |
| }, | |
| { | |
| "loss": 0.6574, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.33, | |
| "step": 76 | |
| }, | |
| { | |
| "loss": 0.5659, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.35, | |
| "step": 77 | |
| }, | |
| { | |
| "loss": 0.6504, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.37, | |
| "step": 78 | |
| }, | |
| { | |
| "loss": 0.6161, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.39, | |
| "step": 79 | |
| }, | |
| { | |
| "loss": 0.5772, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.4, | |
| "step": 80 | |
| }, | |
| { | |
| "loss": 0.5762, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.42, | |
| "step": 81 | |
| }, | |
| { | |
| "loss": 0.5572, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.44, | |
| "step": 82 | |
| }, | |
| { | |
| "loss": 0.5432, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.46, | |
| "step": 83 | |
| }, | |
| { | |
| "loss": 0.5356, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.47, | |
| "step": 84 | |
| }, | |
| { | |
| "loss": 0.4974, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.49, | |
| "step": 85 | |
| }, | |
| { | |
| "loss": 0.5639, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.51, | |
| "step": 86 | |
| }, | |
| { | |
| "loss": 0.585, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.53, | |
| "step": 87 | |
| }, | |
| { | |
| "loss": 0.5924, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.54, | |
| "step": 88 | |
| }, | |
| { | |
| "loss": 0.5241, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.56, | |
| "step": 89 | |
| }, | |
| { | |
| "loss": 0.591, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.58, | |
| "step": 90 | |
| }, | |
| { | |
| "loss": 0.5843, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.6, | |
| "step": 91 | |
| }, | |
| { | |
| "loss": 0.5517, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.61, | |
| "step": 92 | |
| }, | |
| { | |
| "loss": 0.552, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.63, | |
| "step": 93 | |
| }, | |
| { | |
| "loss": 0.5687, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.65, | |
| "step": 94 | |
| }, | |
| { | |
| "loss": 0.5504, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.67, | |
| "step": 95 | |
| }, | |
| { | |
| "loss": 0.6031, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.68, | |
| "step": 96 | |
| }, | |
| { | |
| "loss": 0.5703, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.7, | |
| "step": 97 | |
| }, | |
| { | |
| "loss": 0.5655, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.72, | |
| "step": 98 | |
| }, | |
| { | |
| "loss": 0.5707, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.74, | |
| "step": 99 | |
| }, | |
| { | |
| "loss": 0.5428, | |
| "learning_rate": 7.2e-05, | |
| "epoch": 1.75, | |
| "step": 100 | |
| }, | |
| { | |
| "eval_math_hard_loss": 0.5149410963058472, | |
| "eval_math_hard_score": -0.18658693134784698, | |
| "eval_math_hard_brier_score": 0.18658693134784698, | |
| "eval_math_hard_average_probability": 0.6249247193336487, | |
| "eval_math_hard_accuracy": 0.74, | |
| "eval_math_hard_probabilities": [ | |
| 0.9090185761451721, | |
| 0.9152910113334656, | |
| 0.920225203037262, | |
| 0.544651210308075, | |
| 0.48588231205940247, | |
| 0.26481539011001587, | |
| 0.6502981185913086, | |
| 0.7012529373168945, | |
| 0.5503808259963989, | |
| 0.6474319696426392, | |
| 0.42047441005706787, | |
| 0.4609397053718567, | |
| 0.626591682434082, | |
| 0.6665865778923035, | |
| 0.657002329826355, | |
| 0.5227519869804382, | |
| 0.5618593692779541, | |
| 0.5485963821411133, | |
| 0.8084971904754639, | |
| 0.7115694880485535, | |
| 0.7779558300971985, | |
| 0.9740842580795288, | |
| 0.9867558479309082, | |
| 0.9904892444610596, | |
| 0.7436046004295349, | |
| 0.7511422634124756, | |
| 0.8598321676254272, | |
| 0.7189204692840576, | |
| 0.6654929518699646, | |
| 0.7797940969467163, | |
| 0.49751776456832886, | |
| 0.6186977624893188, | |
| 0.5393147468566895, | |
| 0.5485488176345825, | |
| 0.6180264949798584, | |
| 0.6347217559814453, | |
| 0.42208293080329895, | |
| 0.5922693610191345, | |
| 0.5913844704627991, | |
| 0.4507058262825012, | |
| 0.36954668164253235, | |
| 0.5368402004241943, | |
| 0.4939379096031189, | |
| 0.4328914284706116, | |
| 0.585762083530426, | |
| 0.5262035727500916, | |
| 0.5042811632156372, | |
| 0.23688159883022308, | |
| 0.3037083148956299, | |
| 0.12116531282663345, | |
| 0.2997088134288788, | |
| 0.4748251438140869, | |
| 0.5383250713348389, | |
| 0.5640963315963745, | |
| 0.9970376491546631, | |
| 0.9975969195365906, | |
| 0.997580885887146, | |
| 0.513490617275238, | |
| 0.47000354528427124, | |
| 0.44651105999946594, | |
| 0.9373555183410645, | |
| 0.9684251546859741, | |
| 0.9780910015106201, | |
| 0.5609255433082581, | |
| 0.6275558471679688, | |
| 0.5644670724868774, | |
| 0.8380681276321411, | |
| 0.9534356594085693, | |
| 0.6312075257301331, | |
| 0.40665751695632935, | |
| 0.5042629837989807, | |
| 0.48140040040016174, | |
| 0.6677272915840149, | |
| 0.6427435278892517, | |
| 0.5668158531188965, | |
| 0.7906490564346313, | |
| 0.8034231662750244, | |
| 0.8900921940803528, | |
| 0.9583730697631836, | |
| 0.9730780124664307, | |
| 0.9701274633407593, | |
| 0.6236186623573303, | |
| 0.5862740278244019, | |
| 0.5670204162597656, | |
| 0.4843604266643524, | |
| 0.4715549647808075, | |
| 0.6955848336219788, | |
| 0.687962532043457, | |
| 0.7231170535087585, | |
| 0.4752002954483032, | |
| 0.31005188822746277, | |
| 0.7067242860794067, | |
| 0.6296723484992981, | |
| 0.6213487386703491, | |
| 0.7203496098518372, | |
| 0.6451394557952881, | |
| 0.10958302021026611, | |
| 0.08802775293588638, | |
| 0.10785944014787674, | |
| 0.7782877683639526 | |
| ], | |
| "eval_math_hard_runtime": 19.1566, | |
| "eval_math_hard_samples_per_second": 5.22, | |
| "eval_math_hard_steps_per_second": 0.104, | |
| "epoch": 1.75, | |
| "step": 100 | |
| }, | |
| { | |
| "train_runtime": 1950.9713, | |
| "train_samples_per_second": 1.64, | |
| "train_steps_per_second": 0.051, | |
| "total_flos": 0.0, | |
| "train_loss": 0.6416401875019073, | |
| "epoch": 1.75, | |
| "step": 100 | |
| } | |
| ] |