Instructions to use genies-models/llama-7b-code_is_correct with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use genies-models/llama-7b-code_is_correct with PEFT:
from peft import PeftModel from transformers import AutoModelForSequenceClassification base_model = AutoModelForSequenceClassification.from_pretrained("models/llama-7b") model = PeftModel.from_pretrained(base_model, "genies-models/llama-7b-code_is_correct") - Notebooks
- Google Colab
- Kaggle
| [ | |
| { | |
| "loss": 2.319, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.05, | |
| "step": 1 | |
| }, | |
| { | |
| "loss": 1.1571, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.11, | |
| "step": 2 | |
| }, | |
| { | |
| "loss": 0.8951, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.16, | |
| "step": 3 | |
| }, | |
| { | |
| "loss": 0.7019, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.21, | |
| "step": 4 | |
| }, | |
| { | |
| "loss": 0.6999, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.26, | |
| "step": 5 | |
| }, | |
| { | |
| "loss": 0.7884, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.32, | |
| "step": 6 | |
| }, | |
| { | |
| "loss": 0.6625, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.37, | |
| "step": 7 | |
| }, | |
| { | |
| "loss": 0.6943, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.42, | |
| "step": 8 | |
| }, | |
| { | |
| "loss": 0.7418, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.47, | |
| "step": 9 | |
| }, | |
| { | |
| "loss": 0.6976, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.53, | |
| "step": 10 | |
| }, | |
| { | |
| "loss": 0.7081, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.58, | |
| "step": 11 | |
| }, | |
| { | |
| "loss": 0.6817, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.63, | |
| "step": 12 | |
| }, | |
| { | |
| "loss": 0.6799, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.68, | |
| "step": 13 | |
| }, | |
| { | |
| "loss": 0.7014, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.74, | |
| "step": 14 | |
| }, | |
| { | |
| "loss": 0.7207, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.79, | |
| "step": 15 | |
| }, | |
| { | |
| "loss": 0.681, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.84, | |
| "step": 16 | |
| }, | |
| { | |
| "loss": 0.7092, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.89, | |
| "step": 17 | |
| }, | |
| { | |
| "loss": 0.691, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.95, | |
| "step": 18 | |
| }, | |
| { | |
| "loss": 0.6796, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.0, | |
| "step": 19 | |
| }, | |
| { | |
| "loss": 0.7201, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.05, | |
| "step": 20 | |
| }, | |
| { | |
| "loss": 0.6609, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.11, | |
| "step": 21 | |
| }, | |
| { | |
| "loss": 0.716, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.16, | |
| "step": 22 | |
| }, | |
| { | |
| "loss": 0.7015, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.21, | |
| "step": 23 | |
| }, | |
| { | |
| "loss": 0.7698, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.26, | |
| "step": 24 | |
| }, | |
| { | |
| "loss": 0.7193, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.32, | |
| "step": 25 | |
| }, | |
| { | |
| "eval_code_is_correct_loss": 0.7040590047836304, | |
| "eval_code_is_correct_score": -0.25534066557884216, | |
| "eval_code_is_correct_brier_score": 0.25534066557884216, | |
| "eval_code_is_correct_average_probability": 0.4964892268180847, | |
| "eval_code_is_correct_accuracy": 0.48, | |
| "eval_code_is_correct_probabilities": [ | |
| 0.537757158279419, | |
| 0.45456114411354065, | |
| 0.46773606538772583, | |
| 0.5088943839073181, | |
| 0.44396457076072693, | |
| 0.5441208481788635, | |
| 0.46894216537475586, | |
| 0.5342907905578613, | |
| 0.5333516001701355, | |
| 0.543117105960846, | |
| 0.4494006931781769, | |
| 0.5351378321647644, | |
| 0.5340185761451721, | |
| 0.5496283173561096, | |
| 0.46146854758262634, | |
| 0.46118637919425964, | |
| 0.47490179538726807, | |
| 0.54707932472229, | |
| 0.4630311131477356, | |
| 0.46271002292633057, | |
| 0.553754448890686, | |
| 0.45833271741867065, | |
| 0.44106921553611755, | |
| 0.5405190587043762, | |
| 0.5340151786804199, | |
| 0.538471519947052, | |
| 0.44194644689559937, | |
| 0.4529338479042053, | |
| 0.46126678586006165, | |
| 0.547139585018158, | |
| 0.4498041272163391, | |
| 0.4580332040786743, | |
| 0.5330279469490051, | |
| 0.47573158144950867, | |
| 0.4609244763851166, | |
| 0.5373966693878174, | |
| 0.4560283422470093, | |
| 0.45565375685691833, | |
| 0.4430266320705414, | |
| 0.5496351718902588, | |
| 0.46626290678977966, | |
| 0.5276714563369751, | |
| 0.5368297696113586, | |
| 0.4585987329483032, | |
| 0.4506687819957733, | |
| 0.48509010672569275, | |
| 0.5378000736236572, | |
| 0.5482139587402344, | |
| 0.53798508644104, | |
| 0.5404862761497498, | |
| 0.5462475419044495, | |
| 0.5316811203956604, | |
| 0.545008659362793, | |
| 0.4452662467956543, | |
| 0.46562960743904114, | |
| 0.4650121331214905, | |
| 0.5360617637634277, | |
| 0.4509516656398773, | |
| 0.44952070713043213, | |
| 0.4582957625389099, | |
| 0.4517310857772827, | |
| 0.4437441825866699, | |
| 0.4606005549430847, | |
| 0.547730028629303, | |
| 0.4598681926727295, | |
| 0.46059927344322205, | |
| 0.4745525121688843, | |
| 0.5424997210502625, | |
| 0.5405730605125427, | |
| 0.5450511574745178, | |
| 0.4547448456287384, | |
| 0.5489879846572876, | |
| 0.5354413986206055, | |
| 0.4355437159538269, | |
| 0.5529230833053589, | |
| 0.44915857911109924, | |
| 0.44940751791000366, | |
| 0.4544369876384735, | |
| 0.45526307821273804, | |
| 0.5274561047554016, | |
| 0.44203782081604004, | |
| 0.4448660910129547, | |
| 0.520516574382782, | |
| 0.5427135825157166, | |
| 0.4694376587867737, | |
| 0.45909956097602844, | |
| 0.46876445412635803, | |
| 0.5469857454299927, | |
| 0.5509116053581238, | |
| 0.4485166072845459, | |
| 0.5540351867675781, | |
| 0.5366657972335815, | |
| 0.5381718277931213, | |
| 0.539725661277771, | |
| 0.5412744879722595, | |
| 0.5397677421569824, | |
| 0.5319379568099976, | |
| 0.5362846255302429, | |
| 0.45564961433410645, | |
| 0.44395512342453003 | |
| ], | |
| "eval_code_is_correct_runtime": 38.6775, | |
| "eval_code_is_correct_samples_per_second": 2.585, | |
| "eval_code_is_correct_steps_per_second": 0.052, | |
| "epoch": 1.32, | |
| "step": 25 | |
| }, | |
| { | |
| "loss": 0.6893, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.37, | |
| "step": 26 | |
| }, | |
| { | |
| "loss": 0.6885, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.42, | |
| "step": 27 | |
| }, | |
| { | |
| "loss": 0.6805, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.47, | |
| "step": 28 | |
| }, | |
| { | |
| "loss": 0.7027, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.53, | |
| "step": 29 | |
| }, | |
| { | |
| "loss": 0.7283, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.58, | |
| "step": 30 | |
| }, | |
| { | |
| "loss": 0.6708, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.63, | |
| "step": 31 | |
| }, | |
| { | |
| "loss": 0.7083, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.68, | |
| "step": 32 | |
| }, | |
| { | |
| "loss": 0.7336, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.74, | |
| "step": 33 | |
| }, | |
| { | |
| "loss": 0.7028, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.79, | |
| "step": 34 | |
| }, | |
| { | |
| "loss": 0.695, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.84, | |
| "step": 35 | |
| }, | |
| { | |
| "loss": 0.6997, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.89, | |
| "step": 36 | |
| }, | |
| { | |
| "loss": 0.6743, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.95, | |
| "step": 37 | |
| }, | |
| { | |
| "loss": 0.7388, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.0, | |
| "step": 38 | |
| }, | |
| { | |
| "loss": 0.6464, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.05, | |
| "step": 39 | |
| }, | |
| { | |
| "loss": 0.6772, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.11, | |
| "step": 40 | |
| }, | |
| { | |
| "loss": 0.7009, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.16, | |
| "step": 41 | |
| }, | |
| { | |
| "loss": 0.7021, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.21, | |
| "step": 42 | |
| }, | |
| { | |
| "loss": 0.7051, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.26, | |
| "step": 43 | |
| }, | |
| { | |
| "loss": 0.6816, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.32, | |
| "step": 44 | |
| }, | |
| { | |
| "loss": 0.7047, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.37, | |
| "step": 45 | |
| }, | |
| { | |
| "loss": 0.683, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.42, | |
| "step": 46 | |
| }, | |
| { | |
| "loss": 0.6875, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.47, | |
| "step": 47 | |
| }, | |
| { | |
| "loss": 0.6723, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.53, | |
| "step": 48 | |
| }, | |
| { | |
| "loss": 0.6727, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.58, | |
| "step": 49 | |
| }, | |
| { | |
| "loss": 0.6683, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.63, | |
| "step": 50 | |
| }, | |
| { | |
| "eval_code_is_correct_loss": 0.6990968585014343, | |
| "eval_code_is_correct_score": -0.2527521252632141, | |
| "eval_code_is_correct_brier_score": 0.2527521252632141, | |
| "eval_code_is_correct_average_probability": 0.5049085021018982, | |
| "eval_code_is_correct_accuracy": 0.52, | |
| "eval_code_is_correct_probabilities": [ | |
| 0.4113783538341522, | |
| 0.5813317894935608, | |
| 0.5584416389465332, | |
| 0.41934269666671753, | |
| 0.5603463053703308, | |
| 0.4234032928943634, | |
| 0.5691850781440735, | |
| 0.45391106605529785, | |
| 0.41783419251441956, | |
| 0.3891553580760956, | |
| 0.5918279886245728, | |
| 0.39812296628952026, | |
| 0.3982398509979248, | |
| 0.4082736074924469, | |
| 0.5677108764648438, | |
| 0.6011993885040283, | |
| 0.5879720449447632, | |
| 0.3799493908882141, | |
| 0.6072862148284912, | |
| 0.5974151492118835, | |
| 0.41310346126556396, | |
| 0.563809871673584, | |
| 0.5413035750389099, | |
| 0.4107910394668579, | |
| 0.39933568239212036, | |
| 0.4007815718650818, | |
| 0.5928361415863037, | |
| 0.5920814871788025, | |
| 0.5649393200874329, | |
| 0.4397546648979187, | |
| 0.5982930660247803, | |
| 0.5736469030380249, | |
| 0.38247016072273254, | |
| 0.6029582023620605, | |
| 0.6050556302070618, | |
| 0.4481448829174042, | |
| 0.5958505272865295, | |
| 0.5872756838798523, | |
| 0.5685001015663147, | |
| 0.4170364737510681, | |
| 0.5979400873184204, | |
| 0.41077008843421936, | |
| 0.4539501667022705, | |
| 0.5808403491973877, | |
| 0.5866948962211609, | |
| 0.5965925455093384, | |
| 0.40257659554481506, | |
| 0.41161125898361206, | |
| 0.3845840096473694, | |
| 0.4100808799266815, | |
| 0.41701117157936096, | |
| 0.44743281602859497, | |
| 0.3830299377441406, | |
| 0.5776842832565308, | |
| 0.602536141872406, | |
| 0.6121425628662109, | |
| 0.4026949107646942, | |
| 0.6027580499649048, | |
| 0.5705916285514832, | |
| 0.5934571623802185, | |
| 0.5883546471595764, | |
| 0.5897589325904846, | |
| 0.5788743495941162, | |
| 0.42781656980514526, | |
| 0.6336899399757385, | |
| 0.5650004148483276, | |
| 0.5743370056152344, | |
| 0.4256901144981384, | |
| 0.4294798672199249, | |
| 0.4059537351131439, | |
| 0.5902615189552307, | |
| 0.4481872022151947, | |
| 0.46126502752304077, | |
| 0.5778518915176392, | |
| 0.409288614988327, | |
| 0.5872939229011536, | |
| 0.553527295589447, | |
| 0.6006361246109009, | |
| 0.6073405146598816, | |
| 0.3934749662876129, | |
| 0.5734230279922485, | |
| 0.5781282782554626, | |
| 0.4140267074108124, | |
| 0.4379793703556061, | |
| 0.5872884392738342, | |
| 0.5996562242507935, | |
| 0.6247859001159668, | |
| 0.42214199900627136, | |
| 0.4056937098503113, | |
| 0.5941511392593384, | |
| 0.4240345358848572, | |
| 0.3906824290752411, | |
| 0.4487050473690033, | |
| 0.4064308702945709, | |
| 0.4991621673107147, | |
| 0.3930267095565796, | |
| 0.4262542128562927, | |
| 0.39416787028312683, | |
| 0.6034715175628662, | |
| 0.5542858839035034 | |
| ], | |
| "eval_code_is_correct_runtime": 38.6577, | |
| "eval_code_is_correct_samples_per_second": 2.587, | |
| "eval_code_is_correct_steps_per_second": 0.052, | |
| "epoch": 2.63, | |
| "step": 50 | |
| }, | |
| { | |
| "loss": 0.6992, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.68, | |
| "step": 51 | |
| }, | |
| { | |
| "loss": 0.7576, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.74, | |
| "step": 52 | |
| }, | |
| { | |
| "loss": 0.6925, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.79, | |
| "step": 53 | |
| }, | |
| { | |
| "loss": 0.6884, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.84, | |
| "step": 54 | |
| }, | |
| { | |
| "loss": 0.6555, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.89, | |
| "step": 55 | |
| }, | |
| { | |
| "loss": 0.7276, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.95, | |
| "step": 56 | |
| }, | |
| { | |
| "loss": 0.6827, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.0, | |
| "step": 57 | |
| }, | |
| { | |
| "loss": 0.6291, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.05, | |
| "step": 58 | |
| }, | |
| { | |
| "loss": 0.7595, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.11, | |
| "step": 59 | |
| }, | |
| { | |
| "loss": 0.6753, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.16, | |
| "step": 60 | |
| }, | |
| { | |
| "loss": 0.7555, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.21, | |
| "step": 61 | |
| }, | |
| { | |
| "loss": 0.6965, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.26, | |
| "step": 62 | |
| }, | |
| { | |
| "loss": 0.6775, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.32, | |
| "step": 63 | |
| }, | |
| { | |
| "loss": 0.6841, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.37, | |
| "step": 64 | |
| }, | |
| { | |
| "loss": 0.6706, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.42, | |
| "step": 65 | |
| }, | |
| { | |
| "loss": 0.7128, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.47, | |
| "step": 66 | |
| }, | |
| { | |
| "loss": 0.7036, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.53, | |
| "step": 67 | |
| }, | |
| { | |
| "loss": 0.67, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.58, | |
| "step": 68 | |
| }, | |
| { | |
| "loss": 0.6816, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.63, | |
| "step": 69 | |
| }, | |
| { | |
| "loss": 0.6635, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.68, | |
| "step": 70 | |
| }, | |
| { | |
| "loss": 0.6457, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.74, | |
| "step": 71 | |
| }, | |
| { | |
| "loss": 0.6369, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.79, | |
| "step": 72 | |
| }, | |
| { | |
| "loss": 0.6955, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.84, | |
| "step": 73 | |
| }, | |
| { | |
| "loss": 0.6937, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.89, | |
| "step": 74 | |
| }, | |
| { | |
| "loss": 0.6842, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.95, | |
| "step": 75 | |
| }, | |
| { | |
| "eval_code_is_correct_loss": 0.7020106315612793, | |
| "eval_code_is_correct_score": -0.2538911998271942, | |
| "eval_code_is_correct_brier_score": 0.2538911998271942, | |
| "eval_code_is_correct_average_probability": 0.502829372882843, | |
| "eval_code_is_correct_accuracy": 0.45, | |
| "eval_code_is_correct_probabilities": [ | |
| 0.5088492035865784, | |
| 0.4741206467151642, | |
| 0.4018367826938629, | |
| 0.49836266040802, | |
| 0.3702163100242615, | |
| 0.6138777732849121, | |
| 0.43586498498916626, | |
| 0.6494307518005371, | |
| 0.5666269063949585, | |
| 0.5525698065757751, | |
| 0.3989929258823395, | |
| 0.49194008111953735, | |
| 0.5326002836227417, | |
| 0.5366830825805664, | |
| 0.42133161425590515, | |
| 0.42769843339920044, | |
| 0.44886454939842224, | |
| 0.4730355739593506, | |
| 0.48204565048217773, | |
| 0.4801846444606781, | |
| 0.5868195295333862, | |
| 0.4399736523628235, | |
| 0.3782220780849457, | |
| 0.5690118074417114, | |
| 0.5845021605491638, | |
| 0.5171434283256531, | |
| 0.41184958815574646, | |
| 0.4574517011642456, | |
| 0.41264650225639343, | |
| 0.6442864537239075, | |
| 0.48700496554374695, | |
| 0.3954983949661255, | |
| 0.4781593382358551, | |
| 0.43342939019203186, | |
| 0.4707704186439514, | |
| 0.5405588746070862, | |
| 0.4150758981704712, | |
| 0.4007004499435425, | |
| 0.34886103868484497, | |
| 0.5174517631530762, | |
| 0.47847726941108704, | |
| 0.548160970211029, | |
| 0.7044894099235535, | |
| 0.45586660504341125, | |
| 0.4582586884498596, | |
| 0.5384201407432556, | |
| 0.5183372497558594, | |
| 0.5092693567276001, | |
| 0.5427851676940918, | |
| 0.5732136964797974, | |
| 0.5735348463058472, | |
| 0.6746758818626404, | |
| 0.49393337965011597, | |
| 0.42578810453414917, | |
| 0.4929383099079132, | |
| 0.4470900893211365, | |
| 0.574955940246582, | |
| 0.4823959767818451, | |
| 0.3653642237186432, | |
| 0.46867260336875916, | |
| 0.42166757583618164, | |
| 0.47335392236709595, | |
| 0.4112188518047333, | |
| 0.6188163757324219, | |
| 0.5855951905250549, | |
| 0.47489026188850403, | |
| 0.45155712962150574, | |
| 0.6093342900276184, | |
| 0.605854868888855, | |
| 0.562972903251648, | |
| 0.4357713460922241, | |
| 0.598077118396759, | |
| 0.5654999613761902, | |
| 0.43106329441070557, | |
| 0.5422348976135254, | |
| 0.40416285395622253, | |
| 0.4005366861820221, | |
| 0.46095961332321167, | |
| 0.490922749042511, | |
| 0.49807679653167725, | |
| 0.40843698382377625, | |
| 0.4329848885536194, | |
| 0.5059437155723572, | |
| 0.6385719180107117, | |
| 0.49556964635849, | |
| 0.43445929884910583, | |
| 0.49239978194236755, | |
| 0.6199828386306763, | |
| 0.5497725605964661, | |
| 0.40616798400878906, | |
| 0.6228035092353821, | |
| 0.5643633008003235, | |
| 0.5394735932350159, | |
| 0.5994738340377808, | |
| 0.7553389668464661, | |
| 0.5502137541770935, | |
| 0.579784631729126, | |
| 0.5687956809997559, | |
| 0.5465372800827026, | |
| 0.3481208384037018 | |
| ], | |
| "eval_code_is_correct_runtime": 38.6558, | |
| "eval_code_is_correct_samples_per_second": 2.587, | |
| "eval_code_is_correct_steps_per_second": 0.052, | |
| "epoch": 3.95, | |
| "step": 75 | |
| }, | |
| { | |
| "loss": 0.6694, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.0, | |
| "step": 76 | |
| }, | |
| { | |
| "loss": 0.6634, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.05, | |
| "step": 77 | |
| }, | |
| { | |
| "loss": 0.6602, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.11, | |
| "step": 78 | |
| }, | |
| { | |
| "loss": 0.6396, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.16, | |
| "step": 79 | |
| }, | |
| { | |
| "loss": 0.7378, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.21, | |
| "step": 80 | |
| }, | |
| { | |
| "loss": 0.8125, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.26, | |
| "step": 81 | |
| }, | |
| { | |
| "loss": 0.5817, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.32, | |
| "step": 82 | |
| }, | |
| { | |
| "loss": 0.6169, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.37, | |
| "step": 83 | |
| }, | |
| { | |
| "loss": 0.6157, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.42, | |
| "step": 84 | |
| }, | |
| { | |
| "loss": 0.6109, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.47, | |
| "step": 85 | |
| }, | |
| { | |
| "loss": 0.5057, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.53, | |
| "step": 86 | |
| }, | |
| { | |
| "loss": 0.5264, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.58, | |
| "step": 87 | |
| }, | |
| { | |
| "loss": 0.6092, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.63, | |
| "step": 88 | |
| }, | |
| { | |
| "loss": 0.5337, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.68, | |
| "step": 89 | |
| }, | |
| { | |
| "loss": 0.6412, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.74, | |
| "step": 90 | |
| }, | |
| { | |
| "loss": 0.5222, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.79, | |
| "step": 91 | |
| }, | |
| { | |
| "loss": 0.612, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.84, | |
| "step": 92 | |
| }, | |
| { | |
| "loss": 0.6778, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.89, | |
| "step": 93 | |
| }, | |
| { | |
| "loss": 0.673, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.95, | |
| "step": 94 | |
| }, | |
| { | |
| "loss": 0.5896, | |
| "learning_rate": 0.0002, | |
| "epoch": 5.0, | |
| "step": 95 | |
| }, | |
| { | |
| "loss": 0.6669, | |
| "learning_rate": 0.0002, | |
| "epoch": 5.05, | |
| "step": 96 | |
| }, | |
| { | |
| "loss": 0.7279, | |
| "learning_rate": 0.0002, | |
| "epoch": 5.11, | |
| "step": 97 | |
| }, | |
| { | |
| "loss": 0.4678, | |
| "learning_rate": 0.0002, | |
| "epoch": 5.16, | |
| "step": 98 | |
| }, | |
| { | |
| "loss": 0.5572, | |
| "learning_rate": 0.0002, | |
| "epoch": 5.21, | |
| "step": 99 | |
| }, | |
| { | |
| "loss": 0.5173, | |
| "learning_rate": 0.0002, | |
| "epoch": 5.26, | |
| "step": 100 | |
| }, | |
| { | |
| "eval_code_is_correct_loss": 0.6044353246688843, | |
| "eval_code_is_correct_score": -0.20567844808101654, | |
| "eval_code_is_correct_brier_score": 0.20567844808101654, | |
| "eval_code_is_correct_average_probability": 0.5809673070907593, | |
| "eval_code_is_correct_accuracy": 0.69, | |
| "eval_code_is_correct_probabilities": [ | |
| 0.495758056640625, | |
| 0.6722157001495361, | |
| 0.5509936213493347, | |
| 0.30042609572410583, | |
| 0.4185842275619507, | |
| 0.6230117678642273, | |
| 0.6212649941444397, | |
| 0.6848446726799011, | |
| 0.6345605850219727, | |
| 0.3738310635089874, | |
| 0.4916575253009796, | |
| 0.4045080244541168, | |
| 0.43291226029396057, | |
| 0.4618946313858032, | |
| 0.6218451857566833, | |
| 0.3830638527870178, | |
| 0.6171413064002991, | |
| 0.2532573342323303, | |
| 0.6897477507591248, | |
| 0.6743988990783691, | |
| 0.7978313565254211, | |
| 0.6350521445274353, | |
| 0.5316161513328552, | |
| 0.347903311252594, | |
| 0.6238374710083008, | |
| 0.298748254776001, | |
| 0.5980588793754578, | |
| 0.6492370963096619, | |
| 0.5917383432388306, | |
| 0.6613048911094666, | |
| 0.6900146007537842, | |
| 0.5732241272926331, | |
| 0.23556573688983917, | |
| 0.5519108772277832, | |
| 0.7326077222824097, | |
| 0.856022834777832, | |
| 0.5683114528656006, | |
| 0.6112112402915955, | |
| 0.43082374334335327, | |
| 0.2740175426006317, | |
| 0.710313618183136, | |
| 0.3746766149997711, | |
| 0.9831528067588806, | |
| 0.7283727526664734, | |
| 0.6630771160125732, | |
| 0.7669262886047363, | |
| 0.310622900724411, | |
| 0.4299599826335907, | |
| 0.40406081080436707, | |
| 0.569300651550293, | |
| 0.3967253267765045, | |
| 0.9761428833007812, | |
| 0.25975796580314636, | |
| 0.5678868889808655, | |
| 0.6639503836631775, | |
| 0.5604276657104492, | |
| 0.5109429955482483, | |
| 0.7717424631118774, | |
| 0.5076764225959778, | |
| 0.480741024017334, | |
| 0.6118683218955994, | |
| 0.7221248149871826, | |
| 0.42638152837753296, | |
| 0.9544664621353149, | |
| 0.7895866632461548, | |
| 0.5485232472419739, | |
| 0.5473383665084839, | |
| 0.5949488878250122, | |
| 0.6287338137626648, | |
| 0.3621536195278168, | |
| 0.5074862837791443, | |
| 0.8851379156112671, | |
| 0.8405280113220215, | |
| 0.6945863962173462, | |
| 0.5270545482635498, | |
| 0.5763978362083435, | |
| 0.6713876724243164, | |
| 0.7237599492073059, | |
| 0.8137936592102051, | |
| 0.33156439661979675, | |
| 0.6127635836601257, | |
| 0.44015809893608093, | |
| 0.2821398079395294, | |
| 0.6921646595001221, | |
| 0.6725201606750488, | |
| 0.6315755844116211, | |
| 0.7930103540420532, | |
| 0.7124245166778564, | |
| 0.3897824287414551, | |
| 0.5918102860450745, | |
| 0.7035940289497375, | |
| 0.36644646525382996, | |
| 0.4447966516017914, | |
| 0.5399353504180908, | |
| 0.9951468110084534, | |
| 0.3498772382736206, | |
| 0.765509307384491, | |
| 0.6127597689628601, | |
| 0.8720724582672119, | |
| 0.5690097212791443 | |
| ], | |
| "eval_code_is_correct_runtime": 38.7216, | |
| "eval_code_is_correct_samples_per_second": 2.583, | |
| "eval_code_is_correct_steps_per_second": 0.052, | |
| "epoch": 5.26, | |
| "step": 100 | |
| }, | |
| { | |
| "train_runtime": 2619.7779, | |
| "train_samples_per_second": 1.221, | |
| "train_steps_per_second": 0.038, | |
| "total_flos": 0.0, | |
| "train_loss": 0.6988758665323257, | |
| "epoch": 5.26, | |
| "step": 100 | |
| } | |
| ] |