Instructions to use genies-models/llama-30b-code_is_correct with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use genies-models/llama-30b-code_is_correct with PEFT:
from peft import PeftModel from transformers import AutoModelForSequenceClassification base_model = AutoModelForSequenceClassification.from_pretrained("models/llama-30b") model = PeftModel.from_pretrained(base_model, "genies-models/llama-30b-code_is_correct") - Notebooks
- Google Colab
- Kaggle
| [ | |
| { | |
| "loss": 0.687, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.05, | |
| "step": 1 | |
| }, | |
| { | |
| "loss": 0.722, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.11, | |
| "step": 2 | |
| }, | |
| { | |
| "loss": 0.7011, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.16, | |
| "step": 3 | |
| }, | |
| { | |
| "loss": 0.6987, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.21, | |
| "step": 4 | |
| }, | |
| { | |
| "loss": 0.6776, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.26, | |
| "step": 5 | |
| }, | |
| { | |
| "loss": 0.7507, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.32, | |
| "step": 6 | |
| }, | |
| { | |
| "loss": 0.6607, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.37, | |
| "step": 7 | |
| }, | |
| { | |
| "loss": 0.6974, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.42, | |
| "step": 8 | |
| }, | |
| { | |
| "loss": 0.727, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.47, | |
| "step": 9 | |
| }, | |
| { | |
| "loss": 0.6979, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.53, | |
| "step": 10 | |
| }, | |
| { | |
| "loss": 0.7196, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.58, | |
| "step": 11 | |
| }, | |
| { | |
| "loss": 0.6835, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.63, | |
| "step": 12 | |
| }, | |
| { | |
| "loss": 0.689, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.68, | |
| "step": 13 | |
| }, | |
| { | |
| "loss": 0.6883, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.74, | |
| "step": 14 | |
| }, | |
| { | |
| "loss": 0.6923, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.79, | |
| "step": 15 | |
| }, | |
| { | |
| "loss": 0.6878, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.84, | |
| "step": 16 | |
| }, | |
| { | |
| "loss": 0.6622, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.89, | |
| "step": 17 | |
| }, | |
| { | |
| "loss": 0.6656, | |
| "learning_rate": 0.0002, | |
| "epoch": 0.95, | |
| "step": 18 | |
| }, | |
| { | |
| "loss": 0.6045, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.0, | |
| "step": 19 | |
| }, | |
| { | |
| "loss": 0.7383, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.05, | |
| "step": 20 | |
| }, | |
| { | |
| "loss": 0.6277, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.11, | |
| "step": 21 | |
| }, | |
| { | |
| "loss": 0.7156, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.16, | |
| "step": 22 | |
| }, | |
| { | |
| "loss": 0.6615, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.21, | |
| "step": 23 | |
| }, | |
| { | |
| "loss": 0.681, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.26, | |
| "step": 24 | |
| }, | |
| { | |
| "loss": 0.6368, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.32, | |
| "step": 25 | |
| }, | |
| { | |
| "eval_code_is_correct_loss": 0.6748137474060059, | |
| "eval_code_is_correct_score": -0.24037876725196838, | |
| "eval_code_is_correct_brier_score": 0.24037876725196838, | |
| "eval_code_is_correct_average_probability": 0.5268679857254028, | |
| "eval_code_is_correct_accuracy": 0.56, | |
| "eval_code_is_correct_probabilities": [ | |
| 0.43019697070121765, | |
| 0.6227403879165649, | |
| 0.6141183376312256, | |
| 0.3344857096672058, | |
| 0.6293616890907288, | |
| 0.4664916694164276, | |
| 0.6512068510055542, | |
| 0.44083818793296814, | |
| 0.417765736579895, | |
| 0.3489360511302948, | |
| 0.6306682825088501, | |
| 0.40677180886268616, | |
| 0.36523571610450745, | |
| 0.46857500076293945, | |
| 0.6413342356681824, | |
| 0.6176690459251404, | |
| 0.6807307004928589, | |
| 0.31330692768096924, | |
| 0.6547641754150391, | |
| 0.6119447350502014, | |
| 0.4726662337779999, | |
| 0.6131753325462341, | |
| 0.600269079208374, | |
| 0.33192408084869385, | |
| 0.3614324629306793, | |
| 0.34707942605018616, | |
| 0.6705795526504517, | |
| 0.6425331234931946, | |
| 0.5840022563934326, | |
| 0.3998101055622101, | |
| 0.6653164625167847, | |
| 0.574996292591095, | |
| 0.37777000665664673, | |
| 0.6291854977607727, | |
| 0.6616199612617493, | |
| 0.4081592559814453, | |
| 0.6753640174865723, | |
| 0.6286922693252563, | |
| 0.6147665977478027, | |
| 0.4005477726459503, | |
| 0.6473195552825928, | |
| 0.35249093174934387, | |
| 0.540384829044342, | |
| 0.6459094285964966, | |
| 0.6578614115715027, | |
| 0.683413028717041, | |
| 0.31949999928474426, | |
| 0.38618671894073486, | |
| 0.3694519102573395, | |
| 0.3965170979499817, | |
| 0.3777723014354706, | |
| 0.553892195224762, | |
| 0.35859158635139465, | |
| 0.6390789747238159, | |
| 0.6438376307487488, | |
| 0.6507594585418701, | |
| 0.3823850154876709, | |
| 0.6562716364860535, | |
| 0.6082704663276672, | |
| 0.6338680386543274, | |
| 0.678227961063385, | |
| 0.6367619037628174, | |
| 0.5889072418212891, | |
| 0.4969732165336609, | |
| 0.6729732751846313, | |
| 0.6613313555717468, | |
| 0.6332442164421082, | |
| 0.4045434892177582, | |
| 0.38479217886924744, | |
| 0.3474903404712677, | |
| 0.6648860573768616, | |
| 0.46690836548805237, | |
| 0.5336642861366272, | |
| 0.633358895778656, | |
| 0.4208388030529022, | |
| 0.630777895450592, | |
| 0.625720202922821, | |
| 0.6393349766731262, | |
| 0.6842279434204102, | |
| 0.34046560525894165, | |
| 0.6573684811592102, | |
| 0.623447597026825, | |
| 0.3347625434398651, | |
| 0.439355731010437, | |
| 0.6188381314277649, | |
| 0.6108013987541199, | |
| 0.673854649066925, | |
| 0.4441554844379425, | |
| 0.40074998140335083, | |
| 0.6538906097412109, | |
| 0.4779479205608368, | |
| 0.3645903170108795, | |
| 0.3905934989452362, | |
| 0.3223285377025604, | |
| 0.6476134061813354, | |
| 0.3171766400337219, | |
| 0.36856701970100403, | |
| 0.3680305778980255, | |
| 0.6812744140625, | |
| 0.6352286338806152 | |
| ], | |
| "eval_code_is_correct_runtime": 88.8545, | |
| "eval_code_is_correct_samples_per_second": 1.125, | |
| "eval_code_is_correct_steps_per_second": 0.045, | |
| "epoch": 1.32, | |
| "step": 25 | |
| }, | |
| { | |
| "loss": 0.6594, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.37, | |
| "step": 26 | |
| }, | |
| { | |
| "loss": 0.6907, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.42, | |
| "step": 27 | |
| }, | |
| { | |
| "loss": 0.5943, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.47, | |
| "step": 28 | |
| }, | |
| { | |
| "loss": 0.6849, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.53, | |
| "step": 29 | |
| }, | |
| { | |
| "loss": 0.5921, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.58, | |
| "step": 30 | |
| }, | |
| { | |
| "loss": 0.7788, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.63, | |
| "step": 31 | |
| }, | |
| { | |
| "loss": 0.64, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.68, | |
| "step": 32 | |
| }, | |
| { | |
| "loss": 0.5442, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.74, | |
| "step": 33 | |
| }, | |
| { | |
| "loss": 0.5134, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.79, | |
| "step": 34 | |
| }, | |
| { | |
| "loss": 0.5845, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.84, | |
| "step": 35 | |
| }, | |
| { | |
| "loss": 0.6618, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.89, | |
| "step": 36 | |
| }, | |
| { | |
| "loss": 0.5564, | |
| "learning_rate": 0.0002, | |
| "epoch": 1.95, | |
| "step": 37 | |
| }, | |
| { | |
| "loss": 0.5295, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.0, | |
| "step": 38 | |
| }, | |
| { | |
| "loss": 0.3214, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.05, | |
| "step": 39 | |
| }, | |
| { | |
| "loss": 0.541, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.11, | |
| "step": 40 | |
| }, | |
| { | |
| "loss": 0.4606, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.16, | |
| "step": 41 | |
| }, | |
| { | |
| "loss": 0.3594, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.21, | |
| "step": 42 | |
| }, | |
| { | |
| "loss": 0.4218, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.26, | |
| "step": 43 | |
| }, | |
| { | |
| "loss": 0.7866, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.32, | |
| "step": 44 | |
| }, | |
| { | |
| "loss": 0.4861, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.37, | |
| "step": 45 | |
| }, | |
| { | |
| "loss": 0.715, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.42, | |
| "step": 46 | |
| }, | |
| { | |
| "loss": 0.3984, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.47, | |
| "step": 47 | |
| }, | |
| { | |
| "loss": 0.4485, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.53, | |
| "step": 48 | |
| }, | |
| { | |
| "loss": 0.414, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.58, | |
| "step": 49 | |
| }, | |
| { | |
| "loss": 0.5285, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.63, | |
| "step": 50 | |
| }, | |
| { | |
| "eval_code_is_correct_loss": 0.4825914800167084, | |
| "eval_code_is_correct_score": -0.15681606531143188, | |
| "eval_code_is_correct_brier_score": 0.15681606531143188, | |
| "eval_code_is_correct_average_probability": 0.6661742329597473, | |
| "eval_code_is_correct_accuracy": 0.78, | |
| "eval_code_is_correct_probabilities": [ | |
| 0.9890788197517395, | |
| 0.5256270170211792, | |
| 0.31149938702583313, | |
| 0.4484615623950958, | |
| 0.6472581028938293, | |
| 0.990279495716095, | |
| 0.6154259443283081, | |
| 0.8025317192077637, | |
| 0.9357706308364868, | |
| 0.5503436923027039, | |
| 0.7849963903427124, | |
| 0.35905611515045166, | |
| 0.7320591807365417, | |
| 0.9779102206230164, | |
| 0.6362199187278748, | |
| 0.4611852467060089, | |
| 0.6441273093223572, | |
| 0.28485170006752014, | |
| 0.6162811517715454, | |
| 0.6565234661102295, | |
| 0.9911870360374451, | |
| 0.47038084268569946, | |
| 0.36016836762428284, | |
| 0.27217498421669006, | |
| 0.8941972851753235, | |
| 0.8256573677062988, | |
| 0.7179439663887024, | |
| 0.5055176019668579, | |
| 0.5404551029205322, | |
| 0.691025972366333, | |
| 0.48174765706062317, | |
| 0.3958812355995178, | |
| 0.9058158993721008, | |
| 0.3519555628299713, | |
| 0.7665671706199646, | |
| 0.9405277967453003, | |
| 0.5666556358337402, | |
| 0.6163994669914246, | |
| 0.7029475569725037, | |
| 0.4140672981739044, | |
| 0.6872601509094238, | |
| 0.47729596495628357, | |
| 0.9915251135826111, | |
| 0.6698000431060791, | |
| 0.5583893060684204, | |
| 0.5625171661376953, | |
| 0.3677026927471161, | |
| 0.9410431385040283, | |
| 0.8823411464691162, | |
| 0.7719405889511108, | |
| 0.8952547907829285, | |
| 0.9944015145301819, | |
| 0.6242450475692749, | |
| 0.7421666383743286, | |
| 0.6514956951141357, | |
| 0.5674803256988525, | |
| 0.888701319694519, | |
| 0.7480524778366089, | |
| 0.7139648199081421, | |
| 0.5017758011817932, | |
| 0.1396978348493576, | |
| 0.7807031273841858, | |
| 0.20994068682193756, | |
| 0.9944880604743958, | |
| 0.6406017541885376, | |
| 0.6515274047851562, | |
| 0.5916007161140442, | |
| 0.5801588296890259, | |
| 0.7971768975257874, | |
| 0.4683827757835388, | |
| 0.6826773285865784, | |
| 0.9399536848068237, | |
| 0.9587288498878479, | |
| 0.7912093997001648, | |
| 0.9399692416191101, | |
| 0.7925052642822266, | |
| 0.4648332893848419, | |
| 0.7946687936782837, | |
| 0.7067309021949768, | |
| 0.7728155851364136, | |
| 0.7523179054260254, | |
| 0.23098425567150116, | |
| 0.3329189121723175, | |
| 0.6994566321372986, | |
| 0.44642090797424316, | |
| 0.5221709609031677, | |
| 0.7311367988586426, | |
| 0.9462592601776123, | |
| 0.2204018384218216, | |
| 0.7329748868942261, | |
| 0.9443881511688232, | |
| 0.7437092661857605, | |
| 0.5737127065658569, | |
| 0.6452605724334717, | |
| 0.9964486360549927, | |
| 0.5256209373474121, | |
| 0.9053956270217896, | |
| 0.9367812275886536, | |
| 0.7426385879516602, | |
| 0.6719332337379456 | |
| ], | |
| "eval_code_is_correct_runtime": 89.0987, | |
| "eval_code_is_correct_samples_per_second": 1.122, | |
| "eval_code_is_correct_steps_per_second": 0.045, | |
| "epoch": 2.63, | |
| "step": 50 | |
| }, | |
| { | |
| "loss": 0.3911, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.68, | |
| "step": 51 | |
| }, | |
| { | |
| "loss": 0.4484, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.74, | |
| "step": 52 | |
| }, | |
| { | |
| "loss": 0.4836, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.79, | |
| "step": 53 | |
| }, | |
| { | |
| "loss": 0.2963, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.84, | |
| "step": 54 | |
| }, | |
| { | |
| "loss": 0.3431, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.89, | |
| "step": 55 | |
| }, | |
| { | |
| "loss": 0.4689, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.95, | |
| "step": 56 | |
| }, | |
| { | |
| "loss": 0.3658, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.0, | |
| "step": 57 | |
| }, | |
| { | |
| "loss": 0.2631, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.05, | |
| "step": 58 | |
| }, | |
| { | |
| "loss": 0.219, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.11, | |
| "step": 59 | |
| }, | |
| { | |
| "loss": 0.3664, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.16, | |
| "step": 60 | |
| }, | |
| { | |
| "loss": 0.0843, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.21, | |
| "step": 61 | |
| }, | |
| { | |
| "loss": 0.1934, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.26, | |
| "step": 62 | |
| }, | |
| { | |
| "loss": 0.5592, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.32, | |
| "step": 63 | |
| }, | |
| { | |
| "loss": 0.2187, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.37, | |
| "step": 64 | |
| }, | |
| { | |
| "loss": 0.5812, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.42, | |
| "step": 65 | |
| }, | |
| { | |
| "loss": 0.1651, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.47, | |
| "step": 66 | |
| }, | |
| { | |
| "loss": 0.2347, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.53, | |
| "step": 67 | |
| }, | |
| { | |
| "loss": 0.2858, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.58, | |
| "step": 68 | |
| }, | |
| { | |
| "loss": 0.398, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.63, | |
| "step": 69 | |
| }, | |
| { | |
| "loss": 0.2263, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.68, | |
| "step": 70 | |
| }, | |
| { | |
| "loss": 0.1997, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.74, | |
| "step": 71 | |
| }, | |
| { | |
| "loss": 0.1962, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.79, | |
| "step": 72 | |
| }, | |
| { | |
| "loss": 0.3922, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.84, | |
| "step": 73 | |
| }, | |
| { | |
| "loss": 0.2901, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.89, | |
| "step": 74 | |
| }, | |
| { | |
| "loss": 0.354, | |
| "learning_rate": 0.0002, | |
| "epoch": 3.95, | |
| "step": 75 | |
| }, | |
| { | |
| "eval_code_is_correct_loss": 0.4227831959724426, | |
| "eval_code_is_correct_score": -0.12974894046783447, | |
| "eval_code_is_correct_brier_score": 0.12974894046783447, | |
| "eval_code_is_correct_average_probability": 0.7597215175628662, | |
| "eval_code_is_correct_accuracy": 0.81, | |
| "eval_code_is_correct_probabilities": [ | |
| 0.9988728165626526, | |
| 0.48829779028892517, | |
| 0.5296067595481873, | |
| 0.2780781388282776, | |
| 0.7249220609664917, | |
| 0.9996078610420227, | |
| 0.8340080976486206, | |
| 0.861427366733551, | |
| 0.9820781350135803, | |
| 0.785228431224823, | |
| 0.9509273171424866, | |
| 0.2166699916124344, | |
| 0.8465635180473328, | |
| 0.998379111289978, | |
| 0.5253419280052185, | |
| 0.34222710132598877, | |
| 0.9068326950073242, | |
| 0.1039603129029274, | |
| 0.953624963760376, | |
| 0.8484816551208496, | |
| 0.999100923538208, | |
| 0.7170456051826477, | |
| 0.4141126275062561, | |
| 0.08783219009637833, | |
| 0.9520941376686096, | |
| 0.9142940044403076, | |
| 0.922050416469574, | |
| 0.7979483604431152, | |
| 0.7149856686592102, | |
| 0.8099195957183838, | |
| 0.6641011238098145, | |
| 0.731330156326294, | |
| 0.987343430519104, | |
| 0.6779031157493591, | |
| 0.9263503551483154, | |
| 0.9907992482185364, | |
| 0.7801071405410767, | |
| 0.8316705226898193, | |
| 0.7261466979980469, | |
| 0.15835784375667572, | |
| 0.7666859030723572, | |
| 0.3682948648929596, | |
| 0.999472439289093, | |
| 0.8673612475395203, | |
| 0.7001268863677979, | |
| 0.7785540223121643, | |
| 0.3994494676589966, | |
| 0.9955601692199707, | |
| 0.9255762696266174, | |
| 0.9511540532112122, | |
| 0.9472480416297913, | |
| 0.9996776580810547, | |
| 0.7907305955886841, | |
| 0.9202771782875061, | |
| 0.7952592968940735, | |
| 0.9026097059249878, | |
| 0.971696138381958, | |
| 0.9698152542114258, | |
| 0.8726643919944763, | |
| 0.9103471636772156, | |
| 0.06682927906513214, | |
| 0.983897864818573, | |
| 0.4112030267715454, | |
| 0.9997578263282776, | |
| 0.8332673907279968, | |
| 0.9109005331993103, | |
| 0.8171624541282654, | |
| 0.6875969767570496, | |
| 0.9541371464729309, | |
| 0.8013278245925903, | |
| 0.8411256074905396, | |
| 0.9754989147186279, | |
| 0.9865782856941223, | |
| 0.9783108830451965, | |
| 0.9962783455848694, | |
| 0.9778187274932861, | |
| 0.43399563431739807, | |
| 0.9774395227432251, | |
| 0.9118335843086243, | |
| 0.4630809426307678, | |
| 0.9608380198478699, | |
| 0.13883967697620392, | |
| 0.10208643227815628, | |
| 0.5756421685218811, | |
| 0.5382805466651917, | |
| 0.6014205813407898, | |
| 0.9483709931373596, | |
| 0.997037410736084, | |
| 0.027208847925066948, | |
| 0.951516330242157, | |
| 0.9978541731834412, | |
| 0.3260488510131836, | |
| 0.40766820311546326, | |
| 0.8333786725997925, | |
| 0.9996563196182251, | |
| 0.9164862632751465, | |
| 0.9894892573356628, | |
| 0.9954167604446411, | |
| 0.9306528568267822, | |
| 0.9150286316871643 | |
| ], | |
| "eval_code_is_correct_runtime": 89.1528, | |
| "eval_code_is_correct_samples_per_second": 1.122, | |
| "eval_code_is_correct_steps_per_second": 0.045, | |
| "epoch": 3.95, | |
| "step": 75 | |
| }, | |
| { | |
| "loss": 0.1587, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.0, | |
| "step": 76 | |
| }, | |
| { | |
| "loss": 0.1926, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.05, | |
| "step": 77 | |
| }, | |
| { | |
| "loss": 0.1286, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.11, | |
| "step": 78 | |
| }, | |
| { | |
| "loss": 0.2267, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.16, | |
| "step": 79 | |
| }, | |
| { | |
| "loss": 0.1104, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.21, | |
| "step": 80 | |
| }, | |
| { | |
| "loss": 0.1765, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.26, | |
| "step": 81 | |
| }, | |
| { | |
| "loss": 0.3804, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.32, | |
| "step": 82 | |
| }, | |
| { | |
| "loss": 0.1671, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.37, | |
| "step": 83 | |
| }, | |
| { | |
| "loss": 0.1771, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.42, | |
| "step": 84 | |
| }, | |
| { | |
| "loss": 0.1781, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.47, | |
| "step": 85 | |
| }, | |
| { | |
| "loss": 0.0831, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.53, | |
| "step": 86 | |
| }, | |
| { | |
| "loss": 0.1021, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.58, | |
| "step": 87 | |
| }, | |
| { | |
| "loss": 0.3782, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.63, | |
| "step": 88 | |
| }, | |
| { | |
| "loss": 0.582, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.68, | |
| "step": 89 | |
| }, | |
| { | |
| "loss": 0.5176, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.74, | |
| "step": 90 | |
| }, | |
| { | |
| "loss": 0.4676, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.79, | |
| "step": 91 | |
| }, | |
| { | |
| "loss": 0.1139, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.84, | |
| "step": 92 | |
| }, | |
| { | |
| "loss": 0.1754, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.89, | |
| "step": 93 | |
| }, | |
| { | |
| "loss": 0.1578, | |
| "learning_rate": 0.0002, | |
| "epoch": 4.95, | |
| "step": 94 | |
| }, | |
| { | |
| "loss": 0.2121, | |
| "learning_rate": 0.0002, | |
| "epoch": 5.0, | |
| "step": 95 | |
| }, | |
| { | |
| "loss": 0.1099, | |
| "learning_rate": 0.0002, | |
| "epoch": 5.05, | |
| "step": 96 | |
| }, | |
| { | |
| "loss": 0.1318, | |
| "learning_rate": 0.0002, | |
| "epoch": 5.11, | |
| "step": 97 | |
| }, | |
| { | |
| "loss": 0.1261, | |
| "learning_rate": 0.0002, | |
| "epoch": 5.16, | |
| "step": 98 | |
| }, | |
| { | |
| "loss": 0.1414, | |
| "learning_rate": 0.0002, | |
| "epoch": 5.21, | |
| "step": 99 | |
| }, | |
| { | |
| "loss": 0.0838, | |
| "learning_rate": 0.0002, | |
| "epoch": 5.26, | |
| "step": 100 | |
| }, | |
| { | |
| "eval_code_is_correct_loss": 0.6910045146942139, | |
| "eval_code_is_correct_score": -0.18250787258148193, | |
| "eval_code_is_correct_brier_score": 0.18250787258148193, | |
| "eval_code_is_correct_average_probability": 0.7548456788063049, | |
| "eval_code_is_correct_accuracy": 0.77, | |
| "eval_code_is_correct_probabilities": [ | |
| 0.9995959401130676, | |
| 0.0028756344690918922, | |
| 0.5750472545623779, | |
| 0.3717687427997589, | |
| 0.9003951549530029, | |
| 0.9999061822891235, | |
| 0.34974727034568787, | |
| 0.9861345887184143, | |
| 0.997637152671814, | |
| 0.986896812915802, | |
| 0.997134804725647, | |
| 0.12967519462108612, | |
| 0.9531847238540649, | |
| 0.9992759823799133, | |
| 0.20013374090194702, | |
| 0.06892868876457214, | |
| 0.9374101758003235, | |
| 0.021766141057014465, | |
| 0.7759655117988586, | |
| 0.87876957654953, | |
| 0.9996174573898315, | |
| 0.4874546229839325, | |
| 0.05481430143117905, | |
| 0.07372044026851654, | |
| 0.9974690675735474, | |
| 0.9834303855895996, | |
| 0.9566057920455933, | |
| 0.9595711827278137, | |
| 0.8324235081672668, | |
| 0.9975442290306091, | |
| 0.6663374304771423, | |
| 0.9153403043746948, | |
| 0.9981799125671387, | |
| 0.21779882907867432, | |
| 0.9679686427116394, | |
| 0.9959613680839539, | |
| 0.6366267800331116, | |
| 0.9374737739562988, | |
| 0.5792506337165833, | |
| 0.071602463722229, | |
| 0.14550523459911346, | |
| 0.08400178700685501, | |
| 0.999697208404541, | |
| 0.9761826395988464, | |
| 0.7455314993858337, | |
| 0.23751415312290192, | |
| 0.13082778453826904, | |
| 0.9988062381744385, | |
| 0.9915608763694763, | |
| 0.9885241389274597, | |
| 0.9979588985443115, | |
| 0.9998948574066162, | |
| 0.980595588684082, | |
| 0.9910929799079895, | |
| 0.8016942739486694, | |
| 0.9494630694389343, | |
| 0.9971957206726074, | |
| 0.9917238354682922, | |
| 0.9877293109893799, | |
| 0.7314670085906982, | |
| 0.012357236817479134, | |
| 0.9953846335411072, | |
| 0.8007073998451233, | |
| 0.9999083280563354, | |
| 0.8738554120063782, | |
| 0.9894973039627075, | |
| 0.9153193235397339, | |
| 0.9965702295303345, | |
| 0.9988541603088379, | |
| 0.9915642738342285, | |
| 0.9359388947486877, | |
| 0.9891656041145325, | |
| 0.996810257434845, | |
| 0.9968699812889099, | |
| 0.9994422793388367, | |
| 0.9953920841217041, | |
| 0.015405597165226936, | |
| 0.9934167861938477, | |
| 0.966199517250061, | |
| 0.8508797287940979, | |
| 0.9916513562202454, | |
| 0.8259217739105225, | |
| 0.0839756578207016, | |
| 0.72227942943573, | |
| 0.4742227792739868, | |
| 0.007106057368218899, | |
| 0.9803650379180908, | |
| 0.9996927976608276, | |
| 0.0069855921901762486, | |
| 0.9928151369094849, | |
| 0.9996411800384521, | |
| 0.1924436241388321, | |
| 0.8279777765274048, | |
| 0.9689691066741943, | |
| 0.9998074173927307, | |
| 0.994343638420105, | |
| 0.9985198378562927, | |
| 0.9983432292938232, | |
| 0.9436729550361633, | |
| 0.9738808274269104 | |
| ], | |
| "eval_code_is_correct_runtime": 89.0718, | |
| "eval_code_is_correct_samples_per_second": 1.123, | |
| "eval_code_is_correct_steps_per_second": 0.045, | |
| "epoch": 5.26, | |
| "step": 100 | |
| }, | |
| { | |
| "train_runtime": 6498.1873, | |
| "train_samples_per_second": 0.492, | |
| "train_steps_per_second": 0.015, | |
| "total_flos": 0.0, | |
| "train_loss": 0.443891556635499, | |
| "epoch": 5.26, | |
| "step": 100 | |
| } | |
| ] |