Instructions to use genies-models/llama-13b-counterfactual_python with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use genies-models/llama-13b-counterfactual_python with PEFT:
from peft import PeftModel from transformers import AutoModelForSequenceClassification base_model = AutoModelForSequenceClassification.from_pretrained("models/llama-13b") model = PeftModel.from_pretrained(base_model, "genies-models/llama-13b-counterfactual_python") - Notebooks
- Google Colab
- Kaggle
| [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6934, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7009, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7062, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002, | |
| "loss": 0.686, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7047, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6334, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002, | |
| "loss": 0.605, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6189, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6136, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6527, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002, | |
| "loss": 0.625, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6205, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5828, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6865, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6206, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5727, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5636, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5843, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5781, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5301, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0002, | |
| "loss": 0.4634, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5421, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0002, | |
| "loss": 0.4616, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5744, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0002, | |
| "loss": 0.4898, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_counterfactual_python_accuracy": 0.71, | |
| "eval_counterfactual_python_average_probability": 0.6018641591072083, | |
| "eval_counterfactual_python_brier_score": 0.18612590432167053, | |
| "eval_counterfactual_python_loss": 0.5548506379127502, | |
| "eval_counterfactual_python_probabilities": [ | |
| 0.4399513304233551, | |
| 0.3824414312839508, | |
| 0.49292659759521484, | |
| 0.49140578508377075, | |
| 0.5525286197662354, | |
| 0.5023423433303833, | |
| 0.8358141183853149, | |
| 0.8170955181121826, | |
| 0.8217412233352661, | |
| 0.5099817514419556, | |
| 0.5226550102233887, | |
| 0.5172945261001587, | |
| 0.5407564043998718, | |
| 0.832125186920166, | |
| 0.6691119074821472, | |
| 0.3960307538509369, | |
| 0.40982741117477417, | |
| 0.4054381549358368, | |
| 0.3980039060115814, | |
| 0.5097438097000122, | |
| 0.46157148480415344, | |
| 0.999834418296814, | |
| 0.9997686743736267, | |
| 0.6286450624465942, | |
| 0.4992108643054962, | |
| 0.4995265007019043, | |
| 0.49947184324264526, | |
| 0.6661597490310669, | |
| 0.5678064823150635, | |
| 0.5518903732299805, | |
| 0.9946170449256897, | |
| 0.5167043209075928, | |
| 0.7187278270721436, | |
| 0.6506595015525818, | |
| 0.45834824442863464, | |
| 0.5075061917304993, | |
| 0.7641502618789673, | |
| 0.8412681818008423, | |
| 0.8201811909675598, | |
| 0.712796688079834, | |
| 0.4470563232898712, | |
| 0.752856433391571, | |
| 0.49904751777648926, | |
| 0.5006982088088989, | |
| 0.49779361486434937, | |
| 0.5420497059822083, | |
| 0.5103813409805298, | |
| 0.5681271553039551, | |
| 0.5055561065673828, | |
| 0.5130311250686646, | |
| 0.520115315914154, | |
| 0.5002112984657288, | |
| 0.5003986358642578, | |
| 0.5015159845352173, | |
| 0.4310661554336548, | |
| 0.39916884899139404, | |
| 0.3779604732990265, | |
| 0.5216459035873413, | |
| 0.48685702681541443, | |
| 0.4875470697879791, | |
| 0.5760115385055542, | |
| 0.5495185852050781, | |
| 0.5108343958854675, | |
| 0.3479890823364258, | |
| 0.5016701817512512, | |
| 0.6537013053894043, | |
| 0.48777666687965393, | |
| 0.5615178942680359, | |
| 0.6843218803405762, | |
| 0.9859493374824524, | |
| 0.4966055750846863, | |
| 0.5694008469581604, | |
| 0.6889368295669556, | |
| 0.9881029725074768, | |
| 0.6518113017082214, | |
| 0.5003181099891663, | |
| 0.499865859746933, | |
| 0.5000035166740417, | |
| 0.47015380859375, | |
| 0.6886747479438782, | |
| 0.6613497734069824, | |
| 0.8089476823806763, | |
| 0.6401104927062988, | |
| 0.965166449546814, | |
| 0.511438250541687, | |
| 0.4359276592731476, | |
| 0.5117321014404297, | |
| 0.888465166091919, | |
| 0.7270707488059998, | |
| 0.8963049054145813, | |
| 0.9560953974723816, | |
| 0.4923214912414551, | |
| 0.7713887095451355, | |
| 0.4578387439250946, | |
| 0.581863522529602, | |
| 0.5949514508247375, | |
| 0.660294771194458, | |
| 0.8132676482200623, | |
| 0.9012545347213745, | |
| 0.5283259749412537 | |
| ], | |
| "eval_counterfactual_python_runtime": 115.6923, | |
| "eval_counterfactual_python_samples_per_second": 0.864, | |
| "eval_counterfactual_python_score": -0.18612590432167053, | |
| "eval_counterfactual_python_steps_per_second": 0.035, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6184, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0002, | |
| "loss": 0.4981, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5432, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5007, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0002, | |
| "loss": 0.4117, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3814, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3971, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0002, | |
| "loss": 0.4263, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0002, | |
| "loss": 0.39, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.0002, | |
| "loss": 0.4961, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0002, | |
| "loss": 0.43, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0002, | |
| "loss": 0.4313, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5106, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5962, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5286, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0002, | |
| "loss": 0.4137, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6289, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3254, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0002, | |
| "loss": 0.4166, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2807, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0002, | |
| "loss": 0.347, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3647, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3106, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2726, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.0002, | |
| "loss": 0.4282, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_counterfactual_python_accuracy": 0.76, | |
| "eval_counterfactual_python_average_probability": 0.7085832953453064, | |
| "eval_counterfactual_python_brier_score": 0.14923186600208282, | |
| "eval_counterfactual_python_loss": 0.4408469498157501, | |
| "eval_counterfactual_python_probabilities": [ | |
| 0.4579019546508789, | |
| 0.2912074029445648, | |
| 0.4707266092300415, | |
| 0.9109028577804565, | |
| 0.9054293036460876, | |
| 0.6167607307434082, | |
| 0.999431312084198, | |
| 0.9996742010116577, | |
| 0.9998310804367065, | |
| 0.8127307891845703, | |
| 0.9300280809402466, | |
| 0.9862361550331116, | |
| 0.5450853705406189, | |
| 0.9860369563102722, | |
| 0.45218417048454285, | |
| 0.5249521732330322, | |
| 0.5249036550521851, | |
| 0.5102124214172363, | |
| 0.13406088948249817, | |
| 0.46701082587242126, | |
| 0.656515896320343, | |
| 0.9971957206726074, | |
| 0.9986909031867981, | |
| 0.9891945123672485, | |
| 0.4993564188480377, | |
| 0.49984410405158997, | |
| 0.4994036853313446, | |
| 0.9986547231674194, | |
| 0.9975965619087219, | |
| 0.9984816908836365, | |
| 0.96566241979599, | |
| 0.2921470105648041, | |
| 0.809596061706543, | |
| 0.950958788394928, | |
| 0.10198835283517838, | |
| 0.5572080016136169, | |
| 0.8759415149688721, | |
| 0.8848286271095276, | |
| 0.8782072067260742, | |
| 0.8780391812324524, | |
| 0.29135316610336304, | |
| 0.9999716281890869, | |
| 0.501299262046814, | |
| 0.5012289881706238, | |
| 0.5003526210784912, | |
| 0.4440124034881592, | |
| 0.5238028764724731, | |
| 0.6040167212486267, | |
| 0.6674030423164368, | |
| 0.6233087778091431, | |
| 0.7688833475112915, | |
| 0.4992963373661041, | |
| 0.4999959170818329, | |
| 0.5006823539733887, | |
| 0.5086904168128967, | |
| 0.3447871804237366, | |
| 0.1972731500864029, | |
| 0.8772821426391602, | |
| 0.7153769731521606, | |
| 0.8498809337615967, | |
| 0.9998918771743774, | |
| 0.9995546936988831, | |
| 0.65556401014328, | |
| 0.4574921131134033, | |
| 0.9508897662162781, | |
| 0.9797239303588867, | |
| 0.7450618147850037, | |
| 0.9751894474029541, | |
| 0.7989564538002014, | |
| 0.9915909767150879, | |
| 0.4941618740558624, | |
| 0.6462482810020447, | |
| 0.6306169629096985, | |
| 0.9712220430374146, | |
| 0.8286543488502502, | |
| 0.5009964108467102, | |
| 0.5022417306900024, | |
| 0.5038025379180908, | |
| 0.5169787406921387, | |
| 0.9881459474563599, | |
| 0.2681385576725006, | |
| 0.8438000082969666, | |
| 0.944349467754364, | |
| 0.9870184659957886, | |
| 0.41617363691329956, | |
| 0.9707878828048706, | |
| 0.4993491470813751, | |
| 0.9655181169509888, | |
| 0.9586531519889832, | |
| 0.9758415222167969, | |
| 0.9831867218017578, | |
| 0.24219882488250732, | |
| 0.9792550206184387, | |
| 0.2845119535923004, | |
| 0.7598068714141846, | |
| 0.7937849164009094, | |
| 0.9666680693626404, | |
| 0.9630246162414551, | |
| 0.9810232520103455, | |
| 0.664535641670227 | |
| ], | |
| "eval_counterfactual_python_runtime": 115.6103, | |
| "eval_counterfactual_python_samples_per_second": 0.865, | |
| "eval_counterfactual_python_score": -0.14923186600208282, | |
| "eval_counterfactual_python_steps_per_second": 0.035, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.0002, | |
| "loss": 0.32, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2524, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2453, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3394, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2808, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3592, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3809, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3313, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2164, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2325, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2153, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1792, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2809, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1901, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3248, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2392, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1959, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1354, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2675, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1491, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1655, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3664, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1994, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2261, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1674, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_counterfactual_python_accuracy": 0.83, | |
| "eval_counterfactual_python_average_probability": 0.8026735782623291, | |
| "eval_counterfactual_python_brier_score": 0.1076725646853447, | |
| "eval_counterfactual_python_loss": 0.35705506801605225, | |
| "eval_counterfactual_python_probabilities": [ | |
| 0.37226179242134094, | |
| 0.44944408535957336, | |
| 0.6381627917289734, | |
| 0.9999721050262451, | |
| 0.9999608993530273, | |
| 0.9998444318771362, | |
| 0.9992689490318298, | |
| 0.9997060894966125, | |
| 0.9999666213989258, | |
| 0.9854297637939453, | |
| 0.9986262321472168, | |
| 0.9999815225601196, | |
| 0.9984927177429199, | |
| 0.9999380111694336, | |
| 0.6290276050567627, | |
| 0.9135867953300476, | |
| 0.5242010354995728, | |
| 0.9837318658828735, | |
| 0.27289149165153503, | |
| 0.5738738775253296, | |
| 0.922862708568573, | |
| 0.9999669790267944, | |
| 0.9999966621398926, | |
| 0.9999661445617676, | |
| 0.499489426612854, | |
| 0.49973809719085693, | |
| 0.4992947578430176, | |
| 0.9958611130714417, | |
| 0.9863598942756653, | |
| 0.9936357736587524, | |
| 0.9988870024681091, | |
| 0.6340756416320801, | |
| 0.9680150151252747, | |
| 0.9985277652740479, | |
| 0.023388121277093887, | |
| 0.6880031228065491, | |
| 0.9903197288513184, | |
| 0.9959518909454346, | |
| 0.9932522177696228, | |
| 0.8932461738586426, | |
| 0.48274222016334534, | |
| 0.9999949932098389, | |
| 0.5033888220787048, | |
| 0.5017038583755493, | |
| 0.5028126239776611, | |
| 0.6858635544776917, | |
| 0.6307231187820435, | |
| 0.7390716671943665, | |
| 0.9004430770874023, | |
| 0.849486768245697, | |
| 0.8997713327407837, | |
| 0.4998168647289276, | |
| 0.4995257258415222, | |
| 0.49966782331466675, | |
| 0.9129173755645752, | |
| 0.7249950170516968, | |
| 0.1306697428226471, | |
| 0.9999997615814209, | |
| 0.9952723383903503, | |
| 0.9994613528251648, | |
| 1.0, | |
| 1.0, | |
| 0.8033077120780945, | |
| 0.3031558096408844, | |
| 0.999403715133667, | |
| 0.9999992847442627, | |
| 0.9955515265464783, | |
| 0.9999738931655884, | |
| 0.922993004322052, | |
| 0.9998573064804077, | |
| 0.5210863351821899, | |
| 0.39889705181121826, | |
| 0.9018835425376892, | |
| 0.9958513975143433, | |
| 0.9834888577461243, | |
| 0.5017654299736023, | |
| 0.505089282989502, | |
| 0.5089318752288818, | |
| 0.9484497308731079, | |
| 0.9999573230743408, | |
| 0.0037226954009383917, | |
| 0.9574972987174988, | |
| 0.9998819828033447, | |
| 0.9999995231628418, | |
| 0.6174806356430054, | |
| 0.9999998807907104, | |
| 0.9597611427307129, | |
| 0.9999853372573853, | |
| 0.9999836683273315, | |
| 0.9999920129776001, | |
| 0.9999992847442627, | |
| 0.23779642581939697, | |
| 0.9999992847442627, | |
| 0.39742353558540344, | |
| 0.9875879883766174, | |
| 0.9611554145812988, | |
| 0.9997416138648987, | |
| 0.9997121691703796, | |
| 0.9999167919158936, | |
| 0.9785425662994385 | |
| ], | |
| "eval_counterfactual_python_runtime": 115.6487, | |
| "eval_counterfactual_python_samples_per_second": 0.865, | |
| "eval_counterfactual_python_score": -0.1076725646853447, | |
| "eval_counterfactual_python_steps_per_second": 0.035, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.0002, | |
| "loss": 0.114, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1064, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1611, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2577, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1534, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1445, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1551, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0575, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1398, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2892, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.0002, | |
| "loss": 0.109, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2969, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.0002, | |
| "loss": 0.201, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3329, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.0002, | |
| "loss": 0.248, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1914, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2144, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1893, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1773, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3111, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1146, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1504, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1385, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1045, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2411, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "eval_counterfactual_python_accuracy": 0.86, | |
| "eval_counterfactual_python_average_probability": 0.8131078481674194, | |
| "eval_counterfactual_python_brier_score": 0.10470182448625565, | |
| "eval_counterfactual_python_loss": 0.3495933413505554, | |
| "eval_counterfactual_python_probabilities": [ | |
| 0.23627710342407227, | |
| 0.8137345314025879, | |
| 0.5596910715103149, | |
| 0.9998366832733154, | |
| 0.9997907280921936, | |
| 0.9812625050544739, | |
| 0.9999997615814209, | |
| 0.9999972581863403, | |
| 0.9999994039535522, | |
| 0.9988768696784973, | |
| 0.9998831748962402, | |
| 0.9999924898147583, | |
| 0.994093120098114, | |
| 0.9999728202819824, | |
| 0.9200253486633301, | |
| 0.7116681337356567, | |
| 0.5914531946182251, | |
| 0.6839994192123413, | |
| 0.43964019417762756, | |
| 0.5331021547317505, | |
| 0.7898567914962769, | |
| 0.9999998807907104, | |
| 1.0, | |
| 0.9999988079071045, | |
| 0.49959611892700195, | |
| 0.500809907913208, | |
| 0.4987000823020935, | |
| 0.9999631643295288, | |
| 0.9999265670776367, | |
| 0.999942421913147, | |
| 0.9998346567153931, | |
| 0.5650460720062256, | |
| 0.999765932559967, | |
| 0.9999769926071167, | |
| 0.14878123998641968, | |
| 0.7281113862991333, | |
| 0.999479353427887, | |
| 0.9998818635940552, | |
| 0.9999390840530396, | |
| 0.962016761302948, | |
| 0.34597229957580566, | |
| 0.9999991655349731, | |
| 0.5076113343238831, | |
| 0.5036648511886597, | |
| 0.5070807933807373, | |
| 0.7509257197380066, | |
| 0.6171873211860657, | |
| 0.8994553089141846, | |
| 0.9729470014572144, | |
| 0.938168466091156, | |
| 0.9896105527877808, | |
| 0.5001404881477356, | |
| 0.498879998922348, | |
| 0.4977039098739624, | |
| 0.9747163653373718, | |
| 0.9621886610984802, | |
| 0.3873462677001953, | |
| 0.9999829530715942, | |
| 0.9993657469749451, | |
| 0.9999830722808838, | |
| 1.0, | |
| 1.0, | |
| 0.9606991410255432, | |
| 0.38706913590431213, | |
| 0.999995231628418, | |
| 0.9999992847442627, | |
| 0.999321460723877, | |
| 0.9999303817749023, | |
| 0.9974218606948853, | |
| 0.9999773502349854, | |
| 0.5980316400527954, | |
| 0.9870619177818298, | |
| 0.32300835847854614, | |
| 0.9956004619598389, | |
| 0.9514665603637695, | |
| 0.5014922022819519, | |
| 0.508743941783905, | |
| 0.5154624581336975, | |
| 0.9465087652206421, | |
| 0.9999905824661255, | |
| 0.20708410441875458, | |
| 0.9972707629203796, | |
| 0.9970442652702332, | |
| 0.998826801776886, | |
| 0.5420639514923096, | |
| 1.0, | |
| 0.912463903427124, | |
| 0.9993578791618347, | |
| 0.998062789440155, | |
| 0.9998818635940552, | |
| 0.9950023293495178, | |
| 0.05175871402025223, | |
| 0.9883875250816345, | |
| 0.0011090744519606233, | |
| 0.990136444568634, | |
| 0.989197313785553, | |
| 0.9999979734420776, | |
| 0.9999985694885254, | |
| 0.9999994039535522, | |
| 0.9635055661201477 | |
| ], | |
| "eval_counterfactual_python_runtime": 115.6625, | |
| "eval_counterfactual_python_samples_per_second": 0.865, | |
| "eval_counterfactual_python_score": -0.10470182448625565, | |
| "eval_counterfactual_python_steps_per_second": 0.035, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2103, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1066, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1153, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1088, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2025, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2141, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1051, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1626, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1926, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2089, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3994, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1149, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 0.0002, | |
| "loss": 0.195, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0988, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0945, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1407, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0607, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0496, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 0.0002, | |
| "loss": 0.108, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0848, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0976, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0672, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0818, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0695, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0835, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "eval_counterfactual_python_accuracy": 0.89, | |
| "eval_counterfactual_python_average_probability": 0.8471024036407471, | |
| "eval_counterfactual_python_brier_score": 0.08900060504674911, | |
| "eval_counterfactual_python_loss": 0.3583179712295532, | |
| "eval_counterfactual_python_probabilities": [ | |
| 0.05809181556105614, | |
| 0.9953858256340027, | |
| 0.6866684556007385, | |
| 1.0, | |
| 0.9999998807907104, | |
| 0.9999960660934448, | |
| 0.9999985694885254, | |
| 0.9999978542327881, | |
| 1.0, | |
| 0.9999997615814209, | |
| 1.0, | |
| 1.0, | |
| 0.990549623966217, | |
| 0.9999145269393921, | |
| 0.9818199872970581, | |
| 0.9946061968803406, | |
| 0.9380003213882446, | |
| 0.998694121837616, | |
| 0.8848497867584229, | |
| 0.7099964618682861, | |
| 0.9983460903167725, | |
| 0.9999994039535522, | |
| 1.0, | |
| 1.0, | |
| 0.49947404861450195, | |
| 0.5004017949104309, | |
| 0.49699756503105164, | |
| 0.9999998807907104, | |
| 0.9999995231628418, | |
| 0.9999996423721313, | |
| 1.0, | |
| 0.8359315991401672, | |
| 0.9999997615814209, | |
| 0.9999977350234985, | |
| 0.7866907119750977, | |
| 0.6192131638526917, | |
| 0.9999843835830688, | |
| 0.9999998807907104, | |
| 0.9999995231628418, | |
| 0.9525272250175476, | |
| 0.5780937671661377, | |
| 1.0, | |
| 0.5239486694335938, | |
| 0.5134084820747375, | |
| 0.5220418572425842, | |
| 0.8747755885124207, | |
| 0.36531761288642883, | |
| 0.9437304139137268, | |
| 0.9974839687347412, | |
| 0.9675891399383545, | |
| 0.9971730709075928, | |
| 0.5026473999023438, | |
| 0.4968365430831909, | |
| 0.4916587471961975, | |
| 0.9146653413772583, | |
| 0.9551383852958679, | |
| 0.23804785311222076, | |
| 1.0, | |
| 0.9999998807907104, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 0.9973796606063843, | |
| 0.29981765151023865, | |
| 1.0, | |
| 1.0, | |
| 0.9998533725738525, | |
| 0.9999867677688599, | |
| 0.9904412031173706, | |
| 0.9999600648880005, | |
| 0.8782179355621338, | |
| 0.9403055906295776, | |
| 0.809188187122345, | |
| 0.9999572038650513, | |
| 0.999546468257904, | |
| 0.5033687353134155, | |
| 0.511478841304779, | |
| 0.5243589878082275, | |
| 0.6943322420120239, | |
| 0.9999990463256836, | |
| 0.002750272862613201, | |
| 0.9997815489768982, | |
| 0.9998233914375305, | |
| 0.9999880790710449, | |
| 0.5942977666854858, | |
| 1.0, | |
| 0.9955739974975586, | |
| 1.0, | |
| 0.9999998807907104, | |
| 1.0, | |
| 0.9997686743736267, | |
| 0.16937687993049622, | |
| 0.9986080527305603, | |
| 0.00019977407646365464, | |
| 0.9956634640693665, | |
| 0.995561957359314, | |
| 0.9999858140945435, | |
| 0.9999788999557495, | |
| 0.9999983310699463, | |
| 0.9999972581863403 | |
| ], | |
| "eval_counterfactual_python_runtime": 115.5963, | |
| "eval_counterfactual_python_samples_per_second": 0.865, | |
| "eval_counterfactual_python_score": -0.08900060504674911, | |
| "eval_counterfactual_python_steps_per_second": 0.035, | |
| "step": 125 | |
| }, | |
| { | |
| "loss": 0.1327, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.21, | |
| "step": 126 | |
| }, | |
| { | |
| "loss": 0.0882, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.23, | |
| "step": 127 | |
| }, | |
| { | |
| "loss": 0.0875, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.25, | |
| "step": 128 | |
| }, | |
| { | |
| "loss": 0.0984, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.26, | |
| "step": 129 | |
| }, | |
| { | |
| "loss": 0.0553, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.28, | |
| "step": 130 | |
| }, | |
| { | |
| "loss": 0.104, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.3, | |
| "step": 131 | |
| }, | |
| { | |
| "loss": 0.0297, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.32, | |
| "step": 132 | |
| }, | |
| { | |
| "loss": 0.0265, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.33, | |
| "step": 133 | |
| }, | |
| { | |
| "loss": 0.0555, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.35, | |
| "step": 134 | |
| }, | |
| { | |
| "loss": 0.026, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.37, | |
| "step": 135 | |
| }, | |
| { | |
| "loss": 0.1426, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.39, | |
| "step": 136 | |
| }, | |
| { | |
| "loss": 0.102, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.4, | |
| "step": 137 | |
| }, | |
| { | |
| "loss": 0.0794, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.42, | |
| "step": 138 | |
| }, | |
| { | |
| "loss": 0.1079, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.44, | |
| "step": 139 | |
| }, | |
| { | |
| "loss": 0.0256, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.46, | |
| "step": 140 | |
| }, | |
| { | |
| "loss": 0.048, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.47, | |
| "step": 141 | |
| }, | |
| { | |
| "loss": 0.0463, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.49, | |
| "step": 142 | |
| }, | |
| { | |
| "loss": 0.0278, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.51, | |
| "step": 143 | |
| }, | |
| { | |
| "loss": 0.1124, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.53, | |
| "step": 144 | |
| }, | |
| { | |
| "loss": 0.0683, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.54, | |
| "step": 145 | |
| }, | |
| { | |
| "loss": 0.0371, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.56, | |
| "step": 146 | |
| }, | |
| { | |
| "loss": 0.1198, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.58, | |
| "step": 147 | |
| }, | |
| { | |
| "loss": 0.0386, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.6, | |
| "step": 148 | |
| }, | |
| { | |
| "loss": 0.1031, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.61, | |
| "step": 149 | |
| }, | |
| { | |
| "loss": 0.0731, | |
| "learning_rate": 0.0002, | |
| "epoch": 2.63, | |
| "step": 150 | |
| }, | |
| { | |
| "eval_counterfactual_python_loss": 0.40711697936058044, | |
| "eval_counterfactual_python_score": -0.09575603157281876, | |
| "eval_counterfactual_python_brier_score": 0.09575603157281876, | |
| "eval_counterfactual_python_average_probability": 0.8392205834388733, | |
| "eval_counterfactual_python_accuracy": 0.89, | |
| "eval_counterfactual_python_probabilities": [ | |
| 0.2721835970878601, | |
| 0.9999978542327881, | |
| 0.5331283211708069, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 0.9999324083328247, | |
| 0.9999998807907104, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 0.9998410940170288, | |
| 0.9999990463256836, | |
| 0.9952380657196045, | |
| 0.9997170567512512, | |
| 0.9911946058273315, | |
| 0.9999902248382568, | |
| 0.8052052855491638, | |
| 0.5563430786132812, | |
| 0.8789305686950684, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 0.5007253885269165, | |
| 0.5058515667915344, | |
| 0.4912094175815582, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 0.9610124230384827, | |
| 1.0, | |
| 0.9999998807907104, | |
| 0.7409366369247437, | |
| 0.5872294306755066, | |
| 0.9999994039535522, | |
| 1.0, | |
| 1.0, | |
| 0.9307988882064819, | |
| 0.5262203216552734, | |
| 1.0, | |
| 0.6476882100105286, | |
| 0.5980938076972961, | |
| 0.6242404580116272, | |
| 0.6360834836959839, | |
| 0.2772783637046814, | |
| 0.9846892356872559, | |
| 0.9951990246772766, | |
| 0.9936495423316956, | |
| 0.999901533126831, | |
| 0.5019819736480713, | |
| 0.48769497871398926, | |
| 0.47629255056381226, | |
| 0.9037674069404602, | |
| 0.8574671745300293, | |
| 0.068470299243927, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 0.9995761513710022, | |
| 0.24760562181472778, | |
| 0.9999995231628418, | |
| 1.0, | |
| 0.9999412298202515, | |
| 1.0, | |
| 0.9950709342956543, | |
| 0.9999998807907104, | |
| 0.8830302357673645, | |
| 0.1665273755788803, | |
| 0.8684807419776917, | |
| 0.9999932050704956, | |
| 0.9999849796295166, | |
| 0.5084021687507629, | |
| 0.5264973044395447, | |
| 0.5384640693664551, | |
| 0.9106827974319458, | |
| 1.0, | |
| 0.00011814858589787036, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 0.6591076850891113, | |
| 1.0, | |
| 0.9994019269943237, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 0.2933136820793152, | |
| 0.9999998807907104, | |
| 0.00010689133341656998, | |
| 0.999189555644989, | |
| 0.9987898468971252, | |
| 0.9999929666519165, | |
| 0.9995980858802795, | |
| 1.0, | |
| 1.0 | |
| ], | |
| "eval_counterfactual_python_runtime": 116.1742, | |
| "eval_counterfactual_python_samples_per_second": 0.861, | |
| "eval_counterfactual_python_steps_per_second": 0.034, | |
| "epoch": 2.63, | |
| "step": 150 | |
| }, | |
| { | |
| "train_runtime": 2419.5441, | |
| "train_samples_per_second": 1.984, | |
| "train_steps_per_second": 0.062, | |
| "total_flos": 0.0, | |
| "train_loss": 0.012238509245216847, | |
| "epoch": 2.63, | |
| "step": 150 | |
| } | |
| ] |