Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.14378145219266714, | |
| "eval_steps": 500, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "ce_ib": 64.96094512939453, | |
| "ce_orig": 0.7435811758041382, | |
| "epoch": 0, | |
| "kl_loss": 3856.220703125, | |
| "loss_ib": 192.84352111816406, | |
| "step": 0 | |
| }, | |
| { | |
| "ce_ib": 65.86748504638672, | |
| "ce_orig": 1.261900782585144, | |
| "epoch": 0.00014378145219266715, | |
| "kl_loss": 3989.227294921875, | |
| "loss_ib": 199.4943084716797, | |
| "step": 1 | |
| }, | |
| { | |
| "ce_ib": 63.70602798461914, | |
| "ce_orig": 1.069283127784729, | |
| "epoch": 0.0002875629043853343, | |
| "kl_loss": 3018.861572265625, | |
| "loss_ib": 150.9749298095703, | |
| "step": 2 | |
| }, | |
| { | |
| "ce_ib": 65.56930541992188, | |
| "ce_orig": 0.9985544085502625, | |
| "epoch": 0.00043134435657800146, | |
| "kl_loss": 3440.156494140625, | |
| "loss_ib": 172.04061889648438, | |
| "step": 3 | |
| }, | |
| { | |
| "ce_ib": 62.74740982055664, | |
| "ce_orig": 0.7562570571899414, | |
| "epoch": 0.0005751258087706686, | |
| "kl_loss": 3999.942626953125, | |
| "loss_ib": 200.02850341796875, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0007189072609633358, | |
| "grad_norm": Infinity, | |
| "learning_rate": 0.0, | |
| "loss": 183.2429, | |
| "step": 5 | |
| }, | |
| { | |
| "ce_ib": 64.26500701904297, | |
| "ce_orig": 0.704367995262146, | |
| "epoch": 0.0007189072609633358, | |
| "kl_loss": 3585.45947265625, | |
| "loss_ib": 179.3050994873047, | |
| "step": 5 | |
| }, | |
| { | |
| "ce_ib": 64.85440826416016, | |
| "ce_orig": 0.990234375, | |
| "epoch": 0.0008626887131560029, | |
| "kl_loss": 3754.7421875, | |
| "loss_ib": 187.76953125, | |
| "step": 6 | |
| }, | |
| { | |
| "ce_ib": 64.96634674072266, | |
| "ce_orig": 0.9086857438087463, | |
| "epoch": 0.00100647016534867, | |
| "kl_loss": 3929.58203125, | |
| "loss_ib": 196.51158142089844, | |
| "step": 7 | |
| }, | |
| { | |
| "ce_ib": 64.07415771484375, | |
| "ce_orig": 0.7150144577026367, | |
| "epoch": 0.0011502516175413372, | |
| "kl_loss": 3740.4150390625, | |
| "loss_ib": 187.0527801513672, | |
| "step": 8 | |
| }, | |
| { | |
| "ce_ib": 65.45182800292969, | |
| "ce_orig": 0.8336902856826782, | |
| "epoch": 0.0012940330697340044, | |
| "kl_loss": 3728.460205078125, | |
| "loss_ib": 186.4557342529297, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0014378145219266715, | |
| "grad_norm": 2721.89794921875, | |
| "learning_rate": 0.0, | |
| "loss": 185.5215, | |
| "step": 10 | |
| }, | |
| { | |
| "ce_ib": 64.31118774414062, | |
| "ce_orig": 0.773049533367157, | |
| "epoch": 0.0014378145219266715, | |
| "kl_loss": 3938.441650390625, | |
| "loss_ib": 196.95423889160156, | |
| "step": 10 | |
| }, | |
| { | |
| "ce_ib": 68.13079071044922, | |
| "ce_orig": 1.7455261945724487, | |
| "epoch": 0.0015815959741193387, | |
| "kl_loss": 3521.255615234375, | |
| "loss_ib": 176.0968475341797, | |
| "step": 11 | |
| }, | |
| { | |
| "ce_ib": 65.10643005371094, | |
| "ce_orig": 1.115777850151062, | |
| "epoch": 0.0017253774263120058, | |
| "kl_loss": 3872.32763671875, | |
| "loss_ib": 193.64894104003906, | |
| "step": 12 | |
| }, | |
| { | |
| "ce_ib": 66.39643859863281, | |
| "ce_orig": 0.9639286994934082, | |
| "epoch": 0.001869158878504673, | |
| "kl_loss": 3771.317626953125, | |
| "loss_ib": 188.59909057617188, | |
| "step": 13 | |
| }, | |
| { | |
| "ce_ib": 63.847900390625, | |
| "ce_orig": 0.6167153716087341, | |
| "epoch": 0.00201294033069734, | |
| "kl_loss": 3233.497314453125, | |
| "loss_ib": 161.706787109375, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.002156721782890007, | |
| "grad_norm": 2457.46435546875, | |
| "learning_rate": 3.9936102236421723e-07, | |
| "loss": 181.2678, | |
| "step": 15 | |
| }, | |
| { | |
| "ce_ib": 64.9281997680664, | |
| "ce_orig": 1.2498202323913574, | |
| "epoch": 0.002156721782890007, | |
| "kl_loss": 3831.1611328125, | |
| "loss_ib": 191.59051513671875, | |
| "step": 15 | |
| }, | |
| { | |
| "ce_ib": 65.2757797241211, | |
| "ce_orig": 1.0968470573425293, | |
| "epoch": 0.0023005032350826744, | |
| "kl_loss": 3685.48193359375, | |
| "loss_ib": 184.30674743652344, | |
| "step": 16 | |
| }, | |
| { | |
| "ce_ib": 64.68568420410156, | |
| "ce_orig": 0.8841207027435303, | |
| "epoch": 0.0024442846872753414, | |
| "kl_loss": 3744.38134765625, | |
| "loss_ib": 187.2514190673828, | |
| "step": 17 | |
| }, | |
| { | |
| "ce_ib": 64.82100677490234, | |
| "ce_orig": 1.1195234060287476, | |
| "epoch": 0.0025880661394680087, | |
| "kl_loss": 3526.25927734375, | |
| "loss_ib": 176.3453826904297, | |
| "step": 18 | |
| }, | |
| { | |
| "ce_ib": 63.829200744628906, | |
| "ce_orig": 0.9802423119544983, | |
| "epoch": 0.0027318475916606757, | |
| "kl_loss": 3915.511474609375, | |
| "loss_ib": 195.80747985839844, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.002875629043853343, | |
| "grad_norm": 2735.018310546875, | |
| "learning_rate": 7.987220447284345e-07, | |
| "loss": 187.5199, | |
| "step": 20 | |
| }, | |
| { | |
| "ce_ib": 64.30339050292969, | |
| "ce_orig": 0.713705837726593, | |
| "epoch": 0.002875629043853343, | |
| "kl_loss": 3551.751220703125, | |
| "loss_ib": 177.61972045898438, | |
| "step": 20 | |
| }, | |
| { | |
| "ce_ib": 65.2120132446289, | |
| "ce_orig": 1.1765546798706055, | |
| "epoch": 0.00301941049604601, | |
| "kl_loss": 2725.1201171875, | |
| "loss_ib": 136.2886199951172, | |
| "step": 21 | |
| }, | |
| { | |
| "ce_ib": 64.87537384033203, | |
| "ce_orig": 1.1634544134140015, | |
| "epoch": 0.0031631919482386773, | |
| "kl_loss": 3579.1513671875, | |
| "loss_ib": 178.99000549316406, | |
| "step": 22 | |
| }, | |
| { | |
| "ce_ib": 63.88631820678711, | |
| "ce_orig": 0.9461633563041687, | |
| "epoch": 0.0033069734004313443, | |
| "kl_loss": 3845.448974609375, | |
| "loss_ib": 192.3043975830078, | |
| "step": 23 | |
| }, | |
| { | |
| "ce_ib": 64.70732116699219, | |
| "ce_orig": 1.03489351272583, | |
| "epoch": 0.0034507548526240116, | |
| "kl_loss": 3712.78076171875, | |
| "loss_ib": 185.67138671875, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0035945363048166786, | |
| "grad_norm": 2554.739501953125, | |
| "learning_rate": 1.1980830670926517e-06, | |
| "loss": 181.5605, | |
| "step": 25 | |
| }, | |
| { | |
| "ce_ib": 60.482200622558594, | |
| "ce_orig": 0.3934582769870758, | |
| "epoch": 0.0035945363048166786, | |
| "kl_loss": 3043.28076171875, | |
| "loss_ib": 152.19427490234375, | |
| "step": 25 | |
| }, | |
| { | |
| "ce_ib": 63.36570739746094, | |
| "ce_orig": 0.7366315126419067, | |
| "epoch": 0.003738317757009346, | |
| "kl_loss": 3766.66796875, | |
| "loss_ib": 188.36508178710938, | |
| "step": 26 | |
| }, | |
| { | |
| "ce_ib": 64.73159790039062, | |
| "ce_orig": 0.8274144530296326, | |
| "epoch": 0.003882099209202013, | |
| "kl_loss": 4114.853515625, | |
| "loss_ib": 205.77505493164062, | |
| "step": 27 | |
| }, | |
| { | |
| "ce_ib": 64.48138427734375, | |
| "ce_orig": 0.9502752423286438, | |
| "epoch": 0.00402588066139468, | |
| "kl_loss": 3778.4033203125, | |
| "loss_ib": 188.95240783691406, | |
| "step": 28 | |
| }, | |
| { | |
| "ce_ib": 65.42862701416016, | |
| "ce_orig": 1.2197273969650269, | |
| "epoch": 0.004169662113587347, | |
| "kl_loss": 3769.447509765625, | |
| "loss_ib": 188.50509643554688, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.004313443565780014, | |
| "grad_norm": 2604.34765625, | |
| "learning_rate": 1.597444089456869e-06, | |
| "loss": 185.5906, | |
| "step": 30 | |
| }, | |
| { | |
| "ce_ib": 66.01839447021484, | |
| "ce_orig": 1.2358959913253784, | |
| "epoch": 0.004313443565780014, | |
| "kl_loss": 3728.17578125, | |
| "loss_ib": 186.44180297851562, | |
| "step": 30 | |
| }, | |
| { | |
| "ce_ib": 63.86208724975586, | |
| "ce_orig": 0.9959704875946045, | |
| "epoch": 0.004457225017972682, | |
| "kl_loss": 4007.545654296875, | |
| "loss_ib": 200.4092254638672, | |
| "step": 31 | |
| }, | |
| { | |
| "ce_ib": 63.890628814697266, | |
| "ce_orig": 0.785792887210846, | |
| "epoch": 0.004601006470165349, | |
| "kl_loss": 3334.66552734375, | |
| "loss_ib": 166.76522827148438, | |
| "step": 32 | |
| }, | |
| { | |
| "ce_ib": 63.21049118041992, | |
| "ce_orig": 0.9940950274467468, | |
| "epoch": 0.004744787922358016, | |
| "kl_loss": 3633.696533203125, | |
| "loss_ib": 181.7164306640625, | |
| "step": 33 | |
| }, | |
| { | |
| "ce_ib": 65.66783905029297, | |
| "ce_orig": 1.3076696395874023, | |
| "epoch": 0.004888569374550683, | |
| "kl_loss": 3813.81396484375, | |
| "loss_ib": 190.72354125976562, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0050323508267433505, | |
| "grad_norm": 2616.953369140625, | |
| "learning_rate": 1.9968051118210863e-06, | |
| "loss": 187.4055, | |
| "step": 35 | |
| }, | |
| { | |
| "ce_ib": 63.93610763549805, | |
| "ce_orig": 1.0693968534469604, | |
| "epoch": 0.0050323508267433505, | |
| "kl_loss": 2865.432373046875, | |
| "loss_ib": 143.3035888671875, | |
| "step": 35 | |
| }, | |
| { | |
| "ce_ib": 63.83279037475586, | |
| "ce_orig": 0.9967127442359924, | |
| "epoch": 0.0051761322789360175, | |
| "kl_loss": 3682.119384765625, | |
| "loss_ib": 184.1378936767578, | |
| "step": 36 | |
| }, | |
| { | |
| "ce_ib": 63.78058624267578, | |
| "ce_orig": 0.7548370957374573, | |
| "epoch": 0.005319913731128684, | |
| "kl_loss": 3521.068359375, | |
| "loss_ib": 176.08531188964844, | |
| "step": 37 | |
| }, | |
| { | |
| "ce_ib": 63.56543731689453, | |
| "ce_orig": 0.9782358407974243, | |
| "epoch": 0.005463695183321351, | |
| "kl_loss": 3663.02587890625, | |
| "loss_ib": 183.18309020996094, | |
| "step": 38 | |
| }, | |
| { | |
| "ce_ib": 65.04608917236328, | |
| "ce_orig": 0.8212652802467346, | |
| "epoch": 0.005607476635514018, | |
| "kl_loss": 4082.810546875, | |
| "loss_ib": 204.1730499267578, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.005751258087706686, | |
| "grad_norm": 2647.227294921875, | |
| "learning_rate": 2.3961661341853035e-06, | |
| "loss": 181.9243, | |
| "step": 40 | |
| }, | |
| { | |
| "ce_ib": 65.51580047607422, | |
| "ce_orig": 1.1804547309875488, | |
| "epoch": 0.005751258087706686, | |
| "kl_loss": 3949.86279296875, | |
| "loss_ib": 197.52590942382812, | |
| "step": 40 | |
| }, | |
| { | |
| "ce_ib": 63.80126190185547, | |
| "ce_orig": 0.7081286907196045, | |
| "epoch": 0.005895039539899353, | |
| "kl_loss": 3912.12548828125, | |
| "loss_ib": 195.63816833496094, | |
| "step": 41 | |
| }, | |
| { | |
| "ce_ib": 62.75702667236328, | |
| "ce_orig": 0.7087532877922058, | |
| "epoch": 0.00603882099209202, | |
| "kl_loss": 3891.41259765625, | |
| "loss_ib": 194.60202026367188, | |
| "step": 42 | |
| }, | |
| { | |
| "ce_ib": 64.6529541015625, | |
| "ce_orig": 1.2302581071853638, | |
| "epoch": 0.006182602444284687, | |
| "kl_loss": 3733.06787109375, | |
| "loss_ib": 186.6857147216797, | |
| "step": 43 | |
| }, | |
| { | |
| "ce_ib": 63.076133728027344, | |
| "ce_orig": 0.8998420238494873, | |
| "epoch": 0.006326383896477355, | |
| "kl_loss": 3209.076904296875, | |
| "loss_ib": 160.4853973388672, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.006470165348670022, | |
| "grad_norm": 2475.28173828125, | |
| "learning_rate": 2.7955271565495207e-06, | |
| "loss": 182.2709, | |
| "step": 45 | |
| }, | |
| { | |
| "ce_ib": 61.6526985168457, | |
| "ce_orig": 0.6503346562385559, | |
| "epoch": 0.006470165348670022, | |
| "kl_loss": 3745.544921875, | |
| "loss_ib": 187.30807495117188, | |
| "step": 45 | |
| }, | |
| { | |
| "ce_ib": 63.45009231567383, | |
| "ce_orig": 1.2053508758544922, | |
| "epoch": 0.0066139468008626886, | |
| "kl_loss": 3523.22900390625, | |
| "loss_ib": 176.19317626953125, | |
| "step": 46 | |
| }, | |
| { | |
| "ce_ib": 64.10655212402344, | |
| "ce_orig": 1.1375271081924438, | |
| "epoch": 0.0067577282530553555, | |
| "kl_loss": 3449.62353515625, | |
| "loss_ib": 172.5132293701172, | |
| "step": 47 | |
| }, | |
| { | |
| "ce_ib": 61.43892288208008, | |
| "ce_orig": 0.6051114201545715, | |
| "epoch": 0.006901509705248023, | |
| "kl_loss": 2918.55615234375, | |
| "loss_ib": 145.9585418701172, | |
| "step": 48 | |
| }, | |
| { | |
| "ce_ib": 62.07788848876953, | |
| "ce_orig": 0.6533306837081909, | |
| "epoch": 0.00704529115744069, | |
| "kl_loss": 3852.226318359375, | |
| "loss_ib": 192.64236450195312, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.007189072609633357, | |
| "grad_norm": 2666.057861328125, | |
| "learning_rate": 3.194888178913738e-06, | |
| "loss": 178.7684, | |
| "step": 50 | |
| }, | |
| { | |
| "ce_ib": 63.88691329956055, | |
| "ce_orig": 0.9801137447357178, | |
| "epoch": 0.007189072609633357, | |
| "kl_loss": 3421.68798828125, | |
| "loss_ib": 171.11634826660156, | |
| "step": 50 | |
| }, | |
| { | |
| "ce_ib": 63.39102554321289, | |
| "ce_orig": 1.208335041999817, | |
| "epoch": 0.007332854061826024, | |
| "kl_loss": 3816.6484375, | |
| "loss_ib": 190.86412048339844, | |
| "step": 51 | |
| }, | |
| { | |
| "ce_ib": 62.79481506347656, | |
| "ce_orig": 1.1687792539596558, | |
| "epoch": 0.007476635514018692, | |
| "kl_loss": 3562.60595703125, | |
| "loss_ib": 178.1616973876953, | |
| "step": 52 | |
| }, | |
| { | |
| "ce_ib": 59.30780792236328, | |
| "ce_orig": 0.48160627484321594, | |
| "epoch": 0.007620416966211359, | |
| "kl_loss": 2727.423828125, | |
| "loss_ib": 136.40084838867188, | |
| "step": 53 | |
| }, | |
| { | |
| "ce_ib": 60.45918655395508, | |
| "ce_orig": 0.729110598564148, | |
| "epoch": 0.007764198418404026, | |
| "kl_loss": 3612.10888671875, | |
| "loss_ib": 180.6356658935547, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.007907979870596693, | |
| "grad_norm": 2656.139404296875, | |
| "learning_rate": 3.5942492012779555e-06, | |
| "loss": 179.8713, | |
| "step": 55 | |
| }, | |
| { | |
| "ce_ib": 62.10517883300781, | |
| "ce_orig": 0.8718740344047546, | |
| "epoch": 0.007907979870596693, | |
| "kl_loss": 3526.4736328125, | |
| "loss_ib": 176.354736328125, | |
| "step": 55 | |
| }, | |
| { | |
| "ce_ib": 61.65966796875, | |
| "ce_orig": 0.8692609071731567, | |
| "epoch": 0.00805176132278936, | |
| "kl_loss": 3907.55126953125, | |
| "loss_ib": 195.4084014892578, | |
| "step": 56 | |
| }, | |
| { | |
| "ce_ib": 62.87982940673828, | |
| "ce_orig": 1.1107903718948364, | |
| "epoch": 0.008195542774982027, | |
| "kl_loss": 3505.02880859375, | |
| "loss_ib": 175.2828826904297, | |
| "step": 57 | |
| }, | |
| { | |
| "ce_ib": 61.1724853515625, | |
| "ce_orig": 0.7401551008224487, | |
| "epoch": 0.008339324227174694, | |
| "kl_loss": 3798.70947265625, | |
| "loss_ib": 189.966064453125, | |
| "step": 58 | |
| }, | |
| { | |
| "ce_ib": 64.25030517578125, | |
| "ce_orig": 1.37394380569458, | |
| "epoch": 0.008483105679367362, | |
| "kl_loss": 3642.968994140625, | |
| "loss_ib": 182.1805877685547, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.008626887131560028, | |
| "grad_norm": 2610.953369140625, | |
| "learning_rate": 3.993610223642173e-06, | |
| "loss": 181.3146, | |
| "step": 60 | |
| }, | |
| { | |
| "ce_ib": 61.306488037109375, | |
| "ce_orig": 0.8149375915527344, | |
| "epoch": 0.008626887131560028, | |
| "kl_loss": 3716.3310546875, | |
| "loss_ib": 185.8472137451172, | |
| "step": 60 | |
| }, | |
| { | |
| "ce_ib": 63.48124313354492, | |
| "ce_orig": 1.404058575630188, | |
| "epoch": 0.008770668583752696, | |
| "kl_loss": 3322.540283203125, | |
| "loss_ib": 166.15875244140625, | |
| "step": 61 | |
| }, | |
| { | |
| "ce_ib": 63.29791259765625, | |
| "ce_orig": 1.1193113327026367, | |
| "epoch": 0.008914450035945364, | |
| "kl_loss": 3468.67529296875, | |
| "loss_ib": 173.46542358398438, | |
| "step": 62 | |
| }, | |
| { | |
| "ce_ib": 62.13097381591797, | |
| "ce_orig": 1.177595615386963, | |
| "epoch": 0.00905823148813803, | |
| "kl_loss": 3650.87646484375, | |
| "loss_ib": 182.57489013671875, | |
| "step": 63 | |
| }, | |
| { | |
| "ce_ib": 60.98870849609375, | |
| "ce_orig": 0.9005176424980164, | |
| "epoch": 0.009202012940330698, | |
| "kl_loss": 3419.352783203125, | |
| "loss_ib": 170.99813842773438, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.009345794392523364, | |
| "grad_norm": 2418.697998046875, | |
| "learning_rate": 4.39297124600639e-06, | |
| "loss": 176.6997, | |
| "step": 65 | |
| }, | |
| { | |
| "ce_ib": 60.742515563964844, | |
| "ce_orig": 0.8558191657066345, | |
| "epoch": 0.009345794392523364, | |
| "kl_loss": 3522.680908203125, | |
| "loss_ib": 176.16441345214844, | |
| "step": 65 | |
| }, | |
| { | |
| "ce_ib": 61.35293960571289, | |
| "ce_orig": 0.6822745203971863, | |
| "epoch": 0.009489575844716032, | |
| "kl_loss": 3174.692138671875, | |
| "loss_ib": 158.76528930664062, | |
| "step": 66 | |
| }, | |
| { | |
| "ce_ib": 60.16307067871094, | |
| "ce_orig": 0.6927408576011658, | |
| "epoch": 0.0096333572969087, | |
| "kl_loss": 3434.4345703125, | |
| "loss_ib": 171.75181579589844, | |
| "step": 67 | |
| }, | |
| { | |
| "ce_ib": 60.551544189453125, | |
| "ce_orig": 0.7352694272994995, | |
| "epoch": 0.009777138749101365, | |
| "kl_loss": 3096.6083984375, | |
| "loss_ib": 154.86070251464844, | |
| "step": 68 | |
| }, | |
| { | |
| "ce_ib": 61.049774169921875, | |
| "ce_orig": 0.8262593150138855, | |
| "epoch": 0.009920920201294033, | |
| "kl_loss": 3612.48095703125, | |
| "loss_ib": 180.6545867919922, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.010064701653486701, | |
| "grad_norm": 2490.9189453125, | |
| "learning_rate": 4.792332268370607e-06, | |
| "loss": 168.4689, | |
| "step": 70 | |
| }, | |
| { | |
| "ce_ib": 59.70656204223633, | |
| "ce_orig": 0.7031822204589844, | |
| "epoch": 0.010064701653486701, | |
| "kl_loss": 3356.534423828125, | |
| "loss_ib": 167.85658264160156, | |
| "step": 70 | |
| }, | |
| { | |
| "ce_ib": 58.52351760864258, | |
| "ce_orig": 0.8787212371826172, | |
| "epoch": 0.010208483105679367, | |
| "kl_loss": 3540.501953125, | |
| "loss_ib": 177.0543670654297, | |
| "step": 71 | |
| }, | |
| { | |
| "ce_ib": 60.51292419433594, | |
| "ce_orig": 0.872455894947052, | |
| "epoch": 0.010352264557872035, | |
| "kl_loss": 3272.396240234375, | |
| "loss_ib": 163.6500701904297, | |
| "step": 72 | |
| }, | |
| { | |
| "ce_ib": 59.571720123291016, | |
| "ce_orig": 0.9494105577468872, | |
| "epoch": 0.010496046010064701, | |
| "kl_loss": 3453.132568359375, | |
| "loss_ib": 172.68641662597656, | |
| "step": 73 | |
| }, | |
| { | |
| "ce_ib": 60.829315185546875, | |
| "ce_orig": 0.9209774136543274, | |
| "epoch": 0.010639827462257369, | |
| "kl_loss": 3232.62890625, | |
| "loss_ib": 161.661865234375, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.010783608914450037, | |
| "grad_norm": 2442.193359375, | |
| "learning_rate": 5.191693290734825e-06, | |
| "loss": 169.7118, | |
| "step": 75 | |
| }, | |
| { | |
| "ce_ib": 58.91569519042969, | |
| "ce_orig": 0.6644178628921509, | |
| "epoch": 0.010783608914450037, | |
| "kl_loss": 3597.986572265625, | |
| "loss_ib": 179.9287872314453, | |
| "step": 75 | |
| }, | |
| { | |
| "ce_ib": 59.93379592895508, | |
| "ce_orig": 0.6643899083137512, | |
| "epoch": 0.010927390366642703, | |
| "kl_loss": 3446.6064453125, | |
| "loss_ib": 172.36029052734375, | |
| "step": 76 | |
| }, | |
| { | |
| "ce_ib": 61.500518798828125, | |
| "ce_orig": 1.1189907789230347, | |
| "epoch": 0.01107117181883537, | |
| "kl_loss": 3529.22216796875, | |
| "loss_ib": 176.4918670654297, | |
| "step": 77 | |
| }, | |
| { | |
| "ce_ib": 59.83492660522461, | |
| "ce_orig": 0.6313321590423584, | |
| "epoch": 0.011214953271028037, | |
| "kl_loss": 3552.268798828125, | |
| "loss_ib": 177.6433563232422, | |
| "step": 78 | |
| }, | |
| { | |
| "ce_ib": 59.81180953979492, | |
| "ce_orig": 1.0850353240966797, | |
| "epoch": 0.011358734723220704, | |
| "kl_loss": 3466.029296875, | |
| "loss_ib": 173.3313751220703, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.011502516175413372, | |
| "grad_norm": 2574.475341796875, | |
| "learning_rate": 5.591054313099041e-06, | |
| "loss": 170.7064, | |
| "step": 80 | |
| }, | |
| { | |
| "ce_ib": 62.353084564208984, | |
| "ce_orig": 1.3420498371124268, | |
| "epoch": 0.011502516175413372, | |
| "kl_loss": 3235.225341796875, | |
| "loss_ib": 161.79244995117188, | |
| "step": 80 | |
| }, | |
| { | |
| "ce_ib": 60.79109573364258, | |
| "ce_orig": 1.3365955352783203, | |
| "epoch": 0.011646297627606038, | |
| "kl_loss": 3334.964599609375, | |
| "loss_ib": 166.77862548828125, | |
| "step": 81 | |
| }, | |
| { | |
| "ce_ib": 60.66354751586914, | |
| "ce_orig": 0.7855740785598755, | |
| "epoch": 0.011790079079798706, | |
| "kl_loss": 3520.52783203125, | |
| "loss_ib": 176.05673217773438, | |
| "step": 82 | |
| }, | |
| { | |
| "ce_ib": 59.765869140625, | |
| "ce_orig": 0.8332124352455139, | |
| "epoch": 0.011933860531991374, | |
| "kl_loss": 3197.080078125, | |
| "loss_ib": 159.88389587402344, | |
| "step": 83 | |
| }, | |
| { | |
| "ce_ib": 60.493858337402344, | |
| "ce_orig": 1.5375986099243164, | |
| "epoch": 0.01207764198418404, | |
| "kl_loss": 3220.095703125, | |
| "loss_ib": 161.0350341796875, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.012221423436376708, | |
| "grad_norm": 2434.75390625, | |
| "learning_rate": 5.990415335463259e-06, | |
| "loss": 165.5288, | |
| "step": 85 | |
| }, | |
| { | |
| "ce_ib": 57.47407150268555, | |
| "ce_orig": 0.8656359910964966, | |
| "epoch": 0.012221423436376708, | |
| "kl_loss": 3253.564453125, | |
| "loss_ib": 162.7069549560547, | |
| "step": 85 | |
| }, | |
| { | |
| "ce_ib": 59.44199752807617, | |
| "ce_orig": 0.8775683045387268, | |
| "epoch": 0.012365204888569374, | |
| "kl_loss": 3335.959716796875, | |
| "loss_ib": 166.8277130126953, | |
| "step": 86 | |
| }, | |
| { | |
| "ce_ib": 59.450687408447266, | |
| "ce_orig": 0.7520811557769775, | |
| "epoch": 0.012508986340762042, | |
| "kl_loss": 3219.025634765625, | |
| "loss_ib": 160.9810028076172, | |
| "step": 87 | |
| }, | |
| { | |
| "ce_ib": 58.30461120605469, | |
| "ce_orig": 0.9134323000907898, | |
| "epoch": 0.01265276779295471, | |
| "kl_loss": 2271.015625, | |
| "loss_ib": 113.5799331665039, | |
| "step": 88 | |
| }, | |
| { | |
| "ce_ib": 58.684146881103516, | |
| "ce_orig": 1.100233793258667, | |
| "epoch": 0.012796549245147375, | |
| "kl_loss": 3356.873779296875, | |
| "loss_ib": 167.87303161621094, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.012940330697340043, | |
| "grad_norm": 2395.843994140625, | |
| "learning_rate": 6.389776357827476e-06, | |
| "loss": 153.9006, | |
| "step": 90 | |
| }, | |
| { | |
| "ce_ib": 57.74800491333008, | |
| "ce_orig": 1.038017988204956, | |
| "epoch": 0.012940330697340043, | |
| "kl_loss": 3335.373046875, | |
| "loss_ib": 166.7975311279297, | |
| "step": 90 | |
| }, | |
| { | |
| "ce_ib": 57.7064323425293, | |
| "ce_orig": 0.921914279460907, | |
| "epoch": 0.013084112149532711, | |
| "kl_loss": 3284.214111328125, | |
| "loss_ib": 164.23956298828125, | |
| "step": 91 | |
| }, | |
| { | |
| "ce_ib": 57.82907485961914, | |
| "ce_orig": 0.9391928911209106, | |
| "epoch": 0.013227893601725377, | |
| "kl_loss": 3100.417724609375, | |
| "loss_ib": 155.0498046875, | |
| "step": 92 | |
| }, | |
| { | |
| "ce_ib": 58.58163833618164, | |
| "ce_orig": 1.0686829090118408, | |
| "epoch": 0.013371675053918045, | |
| "kl_loss": 3164.060546875, | |
| "loss_ib": 158.23233032226562, | |
| "step": 93 | |
| }, | |
| { | |
| "ce_ib": 56.14845657348633, | |
| "ce_orig": 0.4495549201965332, | |
| "epoch": 0.013515456506110711, | |
| "kl_loss": 2954.31982421875, | |
| "loss_ib": 147.7440643310547, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.013659237958303379, | |
| "grad_norm": 2321.239013671875, | |
| "learning_rate": 6.789137380191693e-06, | |
| "loss": 156.8229, | |
| "step": 95 | |
| }, | |
| { | |
| "ce_ib": 57.893184661865234, | |
| "ce_orig": 1.1613727807998657, | |
| "epoch": 0.013659237958303379, | |
| "kl_loss": 3191.733154296875, | |
| "loss_ib": 159.6156005859375, | |
| "step": 95 | |
| }, | |
| { | |
| "ce_ib": 58.80827713012695, | |
| "ce_orig": 1.103546380996704, | |
| "epoch": 0.013803019410496047, | |
| "kl_loss": 2989.6494140625, | |
| "loss_ib": 149.5118865966797, | |
| "step": 96 | |
| }, | |
| { | |
| "ce_ib": 57.372257232666016, | |
| "ce_orig": 0.7489521503448486, | |
| "epoch": 0.013946800862688713, | |
| "kl_loss": 3076.787109375, | |
| "loss_ib": 153.8680419921875, | |
| "step": 97 | |
| }, | |
| { | |
| "ce_ib": 58.20832061767578, | |
| "ce_orig": 1.0948829650878906, | |
| "epoch": 0.01409058231488138, | |
| "kl_loss": 3079.382080078125, | |
| "loss_ib": 153.9982147216797, | |
| "step": 98 | |
| }, | |
| { | |
| "ce_ib": 57.56401443481445, | |
| "ce_orig": 1.252577543258667, | |
| "epoch": 0.014234363767074048, | |
| "kl_loss": 2953.885498046875, | |
| "loss_ib": 147.7230682373047, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.014378145219266714, | |
| "grad_norm": 2354.333251953125, | |
| "learning_rate": 7.188498402555911e-06, | |
| "loss": 151.9125, | |
| "step": 100 | |
| }, | |
| { | |
| "ce_ib": 57.552894592285156, | |
| "ce_orig": 1.0784616470336914, | |
| "epoch": 0.014378145219266714, | |
| "kl_loss": 3071.802734375, | |
| "loss_ib": 153.61891174316406, | |
| "step": 100 | |
| }, | |
| { | |
| "ce_ib": 56.86176681518555, | |
| "ce_orig": 0.6961782574653625, | |
| "epoch": 0.014521926671459382, | |
| "kl_loss": 3063.923095703125, | |
| "loss_ib": 153.22459411621094, | |
| "step": 101 | |
| }, | |
| { | |
| "ce_ib": 54.33999252319336, | |
| "ce_orig": 0.5625413656234741, | |
| "epoch": 0.014665708123652048, | |
| "kl_loss": 2982.339111328125, | |
| "loss_ib": 149.14413452148438, | |
| "step": 102 | |
| }, | |
| { | |
| "ce_ib": 55.64839553833008, | |
| "ce_orig": 0.7663992047309875, | |
| "epoch": 0.014809489575844716, | |
| "kl_loss": 3048.7392578125, | |
| "loss_ib": 152.46478271484375, | |
| "step": 103 | |
| }, | |
| { | |
| "ce_ib": 58.38852310180664, | |
| "ce_orig": 1.1540484428405762, | |
| "epoch": 0.014953271028037384, | |
| "kl_loss": 2707.0400390625, | |
| "loss_ib": 135.38121032714844, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.01509705248023005, | |
| "grad_norm": 2202.033447265625, | |
| "learning_rate": 7.5878594249201285e-06, | |
| "loss": 148.0272, | |
| "step": 105 | |
| }, | |
| { | |
| "ce_ib": 58.33769607543945, | |
| "ce_orig": 1.282652735710144, | |
| "epoch": 0.01509705248023005, | |
| "kl_loss": 2638.0634765625, | |
| "loss_ib": 131.93235778808594, | |
| "step": 105 | |
| }, | |
| { | |
| "ce_ib": 57.01485061645508, | |
| "ce_orig": 1.1682567596435547, | |
| "epoch": 0.015240833932422718, | |
| "kl_loss": 2910.34326171875, | |
| "loss_ib": 145.54566955566406, | |
| "step": 106 | |
| }, | |
| { | |
| "ce_ib": 56.478248596191406, | |
| "ce_orig": 1.093648910522461, | |
| "epoch": 0.015384615384615385, | |
| "kl_loss": 2749.603759765625, | |
| "loss_ib": 137.5084228515625, | |
| "step": 107 | |
| }, | |
| { | |
| "ce_ib": 55.969581604003906, | |
| "ce_orig": 0.8221930861473083, | |
| "epoch": 0.015528396836808052, | |
| "kl_loss": 2742.017822265625, | |
| "loss_ib": 137.12889099121094, | |
| "step": 108 | |
| }, | |
| { | |
| "ce_ib": 55.39336013793945, | |
| "ce_orig": 1.138152837753296, | |
| "epoch": 0.015672178289000718, | |
| "kl_loss": 2064.01513671875, | |
| "loss_ib": 103.22845458984375, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.015815959741193385, | |
| "grad_norm": 1960.8631591796875, | |
| "learning_rate": 7.987220447284345e-06, | |
| "loss": 139.8337, | |
| "step": 110 | |
| }, | |
| { | |
| "ce_ib": 57.7611198425293, | |
| "ce_orig": 0.8048346042633057, | |
| "epoch": 0.015815959741193385, | |
| "kl_loss": 2746.037353515625, | |
| "loss_ib": 137.33074951171875, | |
| "step": 110 | |
| }, | |
| { | |
| "ce_ib": 54.72801208496094, | |
| "ce_orig": 0.9340922236442566, | |
| "epoch": 0.015959741193386053, | |
| "kl_loss": 2660.56982421875, | |
| "loss_ib": 133.0558624267578, | |
| "step": 111 | |
| }, | |
| { | |
| "ce_ib": 56.28373336791992, | |
| "ce_orig": 1.2209872007369995, | |
| "epoch": 0.01610352264557872, | |
| "kl_loss": 2672.66015625, | |
| "loss_ib": 133.66114807128906, | |
| "step": 112 | |
| }, | |
| { | |
| "ce_ib": 55.83729553222656, | |
| "ce_orig": 1.1345103979110718, | |
| "epoch": 0.01624730409777139, | |
| "kl_loss": 2844.884765625, | |
| "loss_ib": 142.27215576171875, | |
| "step": 113 | |
| }, | |
| { | |
| "ce_ib": 55.07097625732422, | |
| "ce_orig": 1.2232747077941895, | |
| "epoch": 0.016391085549964053, | |
| "kl_loss": 2053.98974609375, | |
| "loss_ib": 102.7270278930664, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.01653486700215672, | |
| "grad_norm": 2072.072998046875, | |
| "learning_rate": 8.386581469648563e-06, | |
| "loss": 135.2724, | |
| "step": 115 | |
| }, | |
| { | |
| "ce_ib": 56.160423278808594, | |
| "ce_orig": 0.8365716338157654, | |
| "epoch": 0.01653486700215672, | |
| "kl_loss": 2660.091796875, | |
| "loss_ib": 133.0326690673828, | |
| "step": 115 | |
| }, | |
| { | |
| "ce_ib": 54.47496795654297, | |
| "ce_orig": 1.0809355974197388, | |
| "epoch": 0.01667864845434939, | |
| "kl_loss": 2690.080078125, | |
| "loss_ib": 134.53123474121094, | |
| "step": 116 | |
| }, | |
| { | |
| "ce_ib": 53.27537155151367, | |
| "ce_orig": 1.1820772886276245, | |
| "epoch": 0.016822429906542057, | |
| "kl_loss": 2644.28173828125, | |
| "loss_ib": 132.24072265625, | |
| "step": 117 | |
| }, | |
| { | |
| "ce_ib": 54.39961242675781, | |
| "ce_orig": 1.2899582386016846, | |
| "epoch": 0.016966211358734724, | |
| "kl_loss": 2602.2080078125, | |
| "loss_ib": 130.13760375976562, | |
| "step": 118 | |
| }, | |
| { | |
| "ce_ib": 52.184234619140625, | |
| "ce_orig": 0.8353484869003296, | |
| "epoch": 0.01710999281092739, | |
| "kl_loss": 2481.124267578125, | |
| "loss_ib": 124.0822982788086, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.017253774263120056, | |
| "grad_norm": 2074.370361328125, | |
| "learning_rate": 8.78594249201278e-06, | |
| "loss": 132.3383, | |
| "step": 120 | |
| }, | |
| { | |
| "ce_ib": 53.626792907714844, | |
| "ce_orig": 0.689016580581665, | |
| "epoch": 0.017253774263120056, | |
| "kl_loss": 2638.05810546875, | |
| "loss_ib": 131.9297332763672, | |
| "step": 120 | |
| }, | |
| { | |
| "ce_ib": 52.17591857910156, | |
| "ce_orig": 0.7896418571472168, | |
| "epoch": 0.017397555715312724, | |
| "kl_loss": 1895.728515625, | |
| "loss_ib": 94.81251525878906, | |
| "step": 121 | |
| }, | |
| { | |
| "ce_ib": 52.534664154052734, | |
| "ce_orig": 0.7579004764556885, | |
| "epoch": 0.017541337167505392, | |
| "kl_loss": 2552.994873046875, | |
| "loss_ib": 127.67601776123047, | |
| "step": 122 | |
| }, | |
| { | |
| "ce_ib": 52.31755065917969, | |
| "ce_orig": 0.9803644418716431, | |
| "epoch": 0.01768511861969806, | |
| "kl_loss": 2607.78662109375, | |
| "loss_ib": 130.41549682617188, | |
| "step": 123 | |
| }, | |
| { | |
| "ce_ib": 53.674556732177734, | |
| "ce_orig": 0.8895677328109741, | |
| "epoch": 0.017828900071890728, | |
| "kl_loss": 2425.487548828125, | |
| "loss_ib": 121.30122375488281, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.017972681524083392, | |
| "grad_norm": 1952.8394775390625, | |
| "learning_rate": 9.185303514376996e-06, | |
| "loss": 122.1088, | |
| "step": 125 | |
| }, | |
| { | |
| "ce_ib": 53.564842224121094, | |
| "ce_orig": 1.256234049797058, | |
| "epoch": 0.017972681524083392, | |
| "kl_loss": 2441.9111328125, | |
| "loss_ib": 122.1223373413086, | |
| "step": 125 | |
| }, | |
| { | |
| "ce_ib": 52.68410110473633, | |
| "ce_orig": 1.2045953273773193, | |
| "epoch": 0.01811646297627606, | |
| "kl_loss": 2293.639404296875, | |
| "loss_ib": 114.70831298828125, | |
| "step": 126 | |
| }, | |
| { | |
| "ce_ib": 54.133914947509766, | |
| "ce_orig": 1.514359712600708, | |
| "epoch": 0.018260244428468728, | |
| "kl_loss": 2311.011962890625, | |
| "loss_ib": 115.5776596069336, | |
| "step": 127 | |
| }, | |
| { | |
| "ce_ib": 52.44422149658203, | |
| "ce_orig": 0.9751385450363159, | |
| "epoch": 0.018404025880661395, | |
| "kl_loss": 2449.00048828125, | |
| "loss_ib": 122.47624969482422, | |
| "step": 128 | |
| }, | |
| { | |
| "ce_ib": 50.06406021118164, | |
| "ce_orig": 0.7257946729660034, | |
| "epoch": 0.018547807332854063, | |
| "kl_loss": 2270.14794921875, | |
| "loss_ib": 113.53243255615234, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.018691588785046728, | |
| "grad_norm": 1946.055419921875, | |
| "learning_rate": 9.584664536741214e-06, | |
| "loss": 119.3412, | |
| "step": 130 | |
| }, | |
| { | |
| "ce_ib": 53.00954055786133, | |
| "ce_orig": 1.0300298929214478, | |
| "epoch": 0.018691588785046728, | |
| "kl_loss": 2245.056640625, | |
| "loss_ib": 112.27933502197266, | |
| "step": 130 | |
| }, | |
| { | |
| "ce_ib": 50.96159362792969, | |
| "ce_orig": 0.8871611952781677, | |
| "epoch": 0.018835370237239395, | |
| "kl_loss": 2287.427734375, | |
| "loss_ib": 114.39686584472656, | |
| "step": 131 | |
| }, | |
| { | |
| "ce_ib": 52.4853515625, | |
| "ce_orig": 1.039488673210144, | |
| "epoch": 0.018979151689432063, | |
| "kl_loss": 2313.669921875, | |
| "loss_ib": 115.7097396850586, | |
| "step": 132 | |
| }, | |
| { | |
| "ce_ib": 51.21004104614258, | |
| "ce_orig": 0.845194935798645, | |
| "epoch": 0.01912293314162473, | |
| "kl_loss": 2127.791259765625, | |
| "loss_ib": 106.41517639160156, | |
| "step": 133 | |
| }, | |
| { | |
| "ce_ib": 52.13531494140625, | |
| "ce_orig": 1.1372461318969727, | |
| "epoch": 0.0192667145938174, | |
| "kl_loss": 2183.52783203125, | |
| "loss_ib": 109.20245361328125, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.019410496046010063, | |
| "grad_norm": 1868.0174560546875, | |
| "learning_rate": 9.984025559105432e-06, | |
| "loss": 111.8021, | |
| "step": 135 | |
| }, | |
| { | |
| "ce_ib": 50.40250015258789, | |
| "ce_orig": 0.5799722075462341, | |
| "epoch": 0.019410496046010063, | |
| "kl_loss": 2218.98046875, | |
| "loss_ib": 110.9742202758789, | |
| "step": 135 | |
| }, | |
| { | |
| "ce_ib": 49.68068313598633, | |
| "ce_orig": 0.9591949582099915, | |
| "epoch": 0.01955427749820273, | |
| "kl_loss": 2150.03564453125, | |
| "loss_ib": 107.52662658691406, | |
| "step": 136 | |
| }, | |
| { | |
| "ce_ib": 49.44940185546875, | |
| "ce_orig": 0.8390080332756042, | |
| "epoch": 0.0196980589503954, | |
| "kl_loss": 1845.8321533203125, | |
| "loss_ib": 92.31633758544922, | |
| "step": 137 | |
| }, | |
| { | |
| "ce_ib": 52.21055221557617, | |
| "ce_orig": 1.4367856979370117, | |
| "epoch": 0.019841840402588067, | |
| "kl_loss": 2051.351806640625, | |
| "loss_ib": 102.59370422363281, | |
| "step": 138 | |
| }, | |
| { | |
| "ce_ib": 50.15422058105469, | |
| "ce_orig": 1.2788512706756592, | |
| "epoch": 0.019985621854780734, | |
| "kl_loss": 2027.796875, | |
| "loss_ib": 101.41492462158203, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.020129403306973402, | |
| "grad_norm": 1761.1328125, | |
| "learning_rate": 1.038338658146965e-05, | |
| "loss": 102.0922, | |
| "step": 140 | |
| }, | |
| { | |
| "ce_ib": 50.65812301635742, | |
| "ce_orig": 1.2148778438568115, | |
| "epoch": 0.020129403306973402, | |
| "kl_loss": 1903.0653076171875, | |
| "loss_ib": 95.17859649658203, | |
| "step": 140 | |
| }, | |
| { | |
| "ce_ib": 49.68356704711914, | |
| "ce_orig": 1.0471272468566895, | |
| "epoch": 0.020273184759166066, | |
| "kl_loss": 1945.1756591796875, | |
| "loss_ib": 97.28362274169922, | |
| "step": 141 | |
| }, | |
| { | |
| "ce_ib": 47.467594146728516, | |
| "ce_orig": 0.6498449444770813, | |
| "epoch": 0.020416966211358734, | |
| "kl_loss": 1910.508544921875, | |
| "loss_ib": 95.54916381835938, | |
| "step": 142 | |
| }, | |
| { | |
| "ce_ib": 47.57759094238281, | |
| "ce_orig": 0.8552805781364441, | |
| "epoch": 0.020560747663551402, | |
| "kl_loss": 1907.689697265625, | |
| "loss_ib": 95.40827941894531, | |
| "step": 143 | |
| }, | |
| { | |
| "ce_ib": 50.04168701171875, | |
| "ce_orig": 1.1924092769622803, | |
| "epoch": 0.02070452911574407, | |
| "kl_loss": 1690.5194091796875, | |
| "loss_ib": 84.55098724365234, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.020848310567936738, | |
| "grad_norm": 1661.7161865234375, | |
| "learning_rate": 1.0782747603833867e-05, | |
| "loss": 95.0661, | |
| "step": 145 | |
| }, | |
| { | |
| "ce_ib": 48.34769821166992, | |
| "ce_orig": 1.0676274299621582, | |
| "epoch": 0.020848310567936738, | |
| "kl_loss": 1760.777099609375, | |
| "loss_ib": 88.06303405761719, | |
| "step": 145 | |
| }, | |
| { | |
| "ce_ib": 47.0106201171875, | |
| "ce_orig": 1.2353283166885376, | |
| "epoch": 0.020992092020129402, | |
| "kl_loss": 1798.0538330078125, | |
| "loss_ib": 89.92620086669922, | |
| "step": 146 | |
| }, | |
| { | |
| "ce_ib": 48.50465393066406, | |
| "ce_orig": 1.0733487606048584, | |
| "epoch": 0.02113587347232207, | |
| "kl_loss": 1734.2774658203125, | |
| "loss_ib": 86.73812866210938, | |
| "step": 147 | |
| }, | |
| { | |
| "ce_ib": 45.04286193847656, | |
| "ce_orig": 0.8353049159049988, | |
| "epoch": 0.021279654924514738, | |
| "kl_loss": 1773.991943359375, | |
| "loss_ib": 88.72212219238281, | |
| "step": 148 | |
| }, | |
| { | |
| "ce_ib": 47.320526123046875, | |
| "ce_orig": 1.219022512435913, | |
| "epoch": 0.021423436376707405, | |
| "kl_loss": 1750.5101318359375, | |
| "loss_ib": 87.54916381835938, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.021567217828900073, | |
| "grad_norm": 1564.1060791015625, | |
| "learning_rate": 1.1182108626198083e-05, | |
| "loss": 87.3973, | |
| "step": 150 | |
| }, | |
| { | |
| "ce_ib": 47.08059310913086, | |
| "ce_orig": 0.8302248120307922, | |
| "epoch": 0.021567217828900073, | |
| "kl_loss": 1589.67822265625, | |
| "loss_ib": 79.50745391845703, | |
| "step": 150 | |
| }, | |
| { | |
| "ce_ib": 47.74165725708008, | |
| "ce_orig": 0.7171430587768555, | |
| "epoch": 0.021710999281092738, | |
| "kl_loss": 1636.097412109375, | |
| "loss_ib": 81.82874298095703, | |
| "step": 151 | |
| }, | |
| { | |
| "ce_ib": 46.43238830566406, | |
| "ce_orig": 0.7868078947067261, | |
| "epoch": 0.021854780733285405, | |
| "kl_loss": 1594.2950439453125, | |
| "loss_ib": 79.73796844482422, | |
| "step": 152 | |
| }, | |
| { | |
| "ce_ib": 44.30559539794922, | |
| "ce_orig": 1.0339301824569702, | |
| "epoch": 0.021998562185478073, | |
| "kl_loss": 1573.078857421875, | |
| "loss_ib": 78.67610168457031, | |
| "step": 153 | |
| }, | |
| { | |
| "ce_ib": 44.82426834106445, | |
| "ce_orig": 1.0889431238174438, | |
| "epoch": 0.02214234363767074, | |
| "kl_loss": 1503.557861328125, | |
| "loss_ib": 75.20030975341797, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.02228612508986341, | |
| "grad_norm": 1438.786376953125, | |
| "learning_rate": 1.1581469648562302e-05, | |
| "loss": 77.0769, | |
| "step": 155 | |
| }, | |
| { | |
| "ce_ib": 46.833580017089844, | |
| "ce_orig": 1.3186697959899902, | |
| "epoch": 0.02228612508986341, | |
| "kl_loss": 1401.23486328125, | |
| "loss_ib": 70.08516693115234, | |
| "step": 155 | |
| }, | |
| { | |
| "ce_ib": 46.787254333496094, | |
| "ce_orig": 0.9530097246170044, | |
| "epoch": 0.022429906542056073, | |
| "kl_loss": 1483.8673095703125, | |
| "loss_ib": 74.21675872802734, | |
| "step": 156 | |
| }, | |
| { | |
| "ce_ib": 46.81201171875, | |
| "ce_orig": 1.1565759181976318, | |
| "epoch": 0.02257368799424874, | |
| "kl_loss": 1341.5926513671875, | |
| "loss_ib": 67.10304260253906, | |
| "step": 157 | |
| }, | |
| { | |
| "ce_ib": 45.95816421508789, | |
| "ce_orig": 1.2070651054382324, | |
| "epoch": 0.02271746944644141, | |
| "kl_loss": 1238.02880859375, | |
| "loss_ib": 61.92441940307617, | |
| "step": 158 | |
| }, | |
| { | |
| "ce_ib": 43.655330657958984, | |
| "ce_orig": 0.7560437321662903, | |
| "epoch": 0.022861250898634077, | |
| "kl_loss": 1213.171875, | |
| "loss_ib": 60.680419921875, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.023005032350826744, | |
| "grad_norm": 1232.082763671875, | |
| "learning_rate": 1.1980830670926518e-05, | |
| "loss": 66.2022, | |
| "step": 160 | |
| }, | |
| { | |
| "ce_ib": 46.1483268737793, | |
| "ce_orig": 1.2238253355026245, | |
| "epoch": 0.023005032350826744, | |
| "kl_loss": 1228.1053466796875, | |
| "loss_ib": 61.428340911865234, | |
| "step": 160 | |
| }, | |
| { | |
| "ce_ib": 47.45783996582031, | |
| "ce_orig": 1.587011694908142, | |
| "epoch": 0.023148813803019412, | |
| "kl_loss": 1157.474365234375, | |
| "loss_ib": 57.8974494934082, | |
| "step": 161 | |
| }, | |
| { | |
| "ce_ib": 45.567657470703125, | |
| "ce_orig": 1.3450465202331543, | |
| "epoch": 0.023292595255212076, | |
| "kl_loss": 1117.0738525390625, | |
| "loss_ib": 55.8764762878418, | |
| "step": 162 | |
| }, | |
| { | |
| "ce_ib": 44.280250549316406, | |
| "ce_orig": 1.2833943367004395, | |
| "epoch": 0.023436376707404744, | |
| "kl_loss": 1064.94677734375, | |
| "loss_ib": 53.26947784423828, | |
| "step": 163 | |
| }, | |
| { | |
| "ce_ib": 43.552276611328125, | |
| "ce_orig": 0.7282137870788574, | |
| "epoch": 0.023580158159597412, | |
| "kl_loss": 1048.7855224609375, | |
| "loss_ib": 52.461055755615234, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.02372393961179008, | |
| "grad_norm": 1095.414794921875, | |
| "learning_rate": 1.2380191693290735e-05, | |
| "loss": 56.1672, | |
| "step": 165 | |
| }, | |
| { | |
| "ce_ib": 43.77597427368164, | |
| "ce_orig": 1.015390157699585, | |
| "epoch": 0.02372393961179008, | |
| "kl_loss": 996.73828125, | |
| "loss_ib": 49.858802795410156, | |
| "step": 165 | |
| }, | |
| { | |
| "ce_ib": 43.67955017089844, | |
| "ce_orig": 1.0842498540878296, | |
| "epoch": 0.023867721063982748, | |
| "kl_loss": 947.7979736328125, | |
| "loss_ib": 47.411739349365234, | |
| "step": 166 | |
| }, | |
| { | |
| "ce_ib": 42.23929214477539, | |
| "ce_orig": 0.9063572287559509, | |
| "epoch": 0.024011502516175412, | |
| "kl_loss": 897.7328491210938, | |
| "loss_ib": 44.90776062011719, | |
| "step": 167 | |
| }, | |
| { | |
| "ce_ib": 45.15175247192383, | |
| "ce_orig": 1.2839014530181885, | |
| "epoch": 0.02415528396836808, | |
| "kl_loss": 786.1021728515625, | |
| "loss_ib": 39.32768630981445, | |
| "step": 168 | |
| }, | |
| { | |
| "ce_ib": 43.47494125366211, | |
| "ce_orig": 0.9718037247657776, | |
| "epoch": 0.024299065420560748, | |
| "kl_loss": 860.2787475585938, | |
| "loss_ib": 43.035675048828125, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.024442846872753415, | |
| "grad_norm": 1042.965087890625, | |
| "learning_rate": 1.2779552715654951e-05, | |
| "loss": 47.1098, | |
| "step": 170 | |
| }, | |
| { | |
| "ce_ib": 43.44833755493164, | |
| "ce_orig": 1.3617149591445923, | |
| "epoch": 0.024442846872753415, | |
| "kl_loss": 803.04443359375, | |
| "loss_ib": 40.173946380615234, | |
| "step": 170 | |
| }, | |
| { | |
| "ce_ib": 43.578792572021484, | |
| "ce_orig": 0.9456126093864441, | |
| "epoch": 0.024586628324946083, | |
| "kl_loss": 699.483642578125, | |
| "loss_ib": 34.9959716796875, | |
| "step": 171 | |
| }, | |
| { | |
| "ce_ib": 47.23983383178711, | |
| "ce_orig": 1.8550169467926025, | |
| "epoch": 0.024730409777138748, | |
| "kl_loss": 675.0830688476562, | |
| "loss_ib": 33.777774810791016, | |
| "step": 172 | |
| }, | |
| { | |
| "ce_ib": 43.85140609741211, | |
| "ce_orig": 1.138585090637207, | |
| "epoch": 0.024874191229331415, | |
| "kl_loss": 659.050537109375, | |
| "loss_ib": 32.97445297241211, | |
| "step": 173 | |
| }, | |
| { | |
| "ce_ib": 41.722373962402344, | |
| "ce_orig": 1.243048906326294, | |
| "epoch": 0.025017972681524083, | |
| "kl_loss": 590.1033325195312, | |
| "loss_ib": 29.526029586791992, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.02516175413371675, | |
| "grad_norm": 766.4675903320312, | |
| "learning_rate": 1.3178913738019169e-05, | |
| "loss": 35.4625, | |
| "step": 175 | |
| }, | |
| { | |
| "ce_ib": 44.18523406982422, | |
| "ce_orig": 1.255010962486267, | |
| "epoch": 0.02516175413371675, | |
| "kl_loss": 592.2353515625, | |
| "loss_ib": 29.633859634399414, | |
| "step": 175 | |
| }, | |
| { | |
| "ce_ib": 46.14919662475586, | |
| "ce_orig": 0.9867228865623474, | |
| "epoch": 0.02530553558590942, | |
| "kl_loss": 509.3572692871094, | |
| "loss_ib": 25.490938186645508, | |
| "step": 176 | |
| }, | |
| { | |
| "ce_ib": 46.58631134033203, | |
| "ce_orig": 1.1491925716400146, | |
| "epoch": 0.025449317038102087, | |
| "kl_loss": 459.72491455078125, | |
| "loss_ib": 23.009538650512695, | |
| "step": 177 | |
| }, | |
| { | |
| "ce_ib": 40.569705963134766, | |
| "ce_orig": 0.7460018396377563, | |
| "epoch": 0.02559309849029475, | |
| "kl_loss": 352.4381103515625, | |
| "loss_ib": 17.64219093322754, | |
| "step": 178 | |
| }, | |
| { | |
| "ce_ib": 46.841224670410156, | |
| "ce_orig": 1.1470024585723877, | |
| "epoch": 0.02573687994248742, | |
| "kl_loss": 389.03765869140625, | |
| "loss_ib": 19.475303649902344, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.025880661394680086, | |
| "grad_norm": 621.7545776367188, | |
| "learning_rate": 1.3578274760383387e-05, | |
| "loss": 24.304, | |
| "step": 180 | |
| }, | |
| { | |
| "ce_ib": 47.13188552856445, | |
| "ce_orig": 1.330838680267334, | |
| "epoch": 0.025880661394680086, | |
| "kl_loss": 379.19989013671875, | |
| "loss_ib": 18.98356056213379, | |
| "step": 180 | |
| }, | |
| { | |
| "ce_ib": 52.806861877441406, | |
| "ce_orig": 2.114457130432129, | |
| "epoch": 0.026024442846872754, | |
| "kl_loss": 284.24114990234375, | |
| "loss_ib": 14.2384614944458, | |
| "step": 181 | |
| }, | |
| { | |
| "ce_ib": 64.3398208618164, | |
| "ce_orig": 0.9137269854545593, | |
| "epoch": 0.026168224299065422, | |
| "kl_loss": 240.51211547851562, | |
| "loss_ib": 12.05777645111084, | |
| "step": 182 | |
| }, | |
| { | |
| "ce_ib": 62.500736236572266, | |
| "ce_orig": 1.4484410285949707, | |
| "epoch": 0.026312005751258086, | |
| "kl_loss": 223.3756103515625, | |
| "loss_ib": 11.200030326843262, | |
| "step": 183 | |
| }, | |
| { | |
| "ce_ib": 70.71409606933594, | |
| "ce_orig": 1.7095311880111694, | |
| "epoch": 0.026455787203450754, | |
| "kl_loss": 207.35211181640625, | |
| "loss_ib": 10.402962684631348, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.026599568655643422, | |
| "grad_norm": 365.3516845703125, | |
| "learning_rate": 1.3977635782747606e-05, | |
| "loss": 14.3906, | |
| "step": 185 | |
| }, | |
| { | |
| "ce_ib": 63.17552947998047, | |
| "ce_orig": 2.531080484390259, | |
| "epoch": 0.026599568655643422, | |
| "kl_loss": 165.71923828125, | |
| "loss_ib": 8.317549705505371, | |
| "step": 185 | |
| }, | |
| { | |
| "ce_ib": 84.16175842285156, | |
| "ce_orig": 2.0548126697540283, | |
| "epoch": 0.02674335010783609, | |
| "kl_loss": 140.68856811523438, | |
| "loss_ib": 7.07650899887085, | |
| "step": 186 | |
| }, | |
| { | |
| "ce_ib": 77.55998992919922, | |
| "ce_orig": 1.809746503829956, | |
| "epoch": 0.026887131560028758, | |
| "kl_loss": 121.14314270019531, | |
| "loss_ib": 6.095937252044678, | |
| "step": 187 | |
| }, | |
| { | |
| "ce_ib": 77.81259155273438, | |
| "ce_orig": 1.9566165208816528, | |
| "epoch": 0.027030913012221422, | |
| "kl_loss": 101.40348815917969, | |
| "loss_ib": 5.109080791473389, | |
| "step": 188 | |
| }, | |
| { | |
| "ce_ib": 69.3860092163086, | |
| "ce_orig": 1.703246831893921, | |
| "epoch": 0.02717469446441409, | |
| "kl_loss": 93.15800476074219, | |
| "loss_ib": 4.692593574523926, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.027318475916606758, | |
| "grad_norm": 189.81011962890625, | |
| "learning_rate": 1.4376996805111822e-05, | |
| "loss": 8.0904, | |
| "step": 190 | |
| }, | |
| { | |
| "ce_ib": 60.081260681152344, | |
| "ce_orig": 1.8992141485214233, | |
| "epoch": 0.027318475916606758, | |
| "kl_loss": 100.07512664794922, | |
| "loss_ib": 5.033797264099121, | |
| "step": 190 | |
| }, | |
| { | |
| "ce_ib": 58.84217834472656, | |
| "ce_orig": 1.018802285194397, | |
| "epoch": 0.027462257368799425, | |
| "kl_loss": 72.03784942626953, | |
| "loss_ib": 3.6313138008117676, | |
| "step": 191 | |
| }, | |
| { | |
| "ce_ib": 58.045005798339844, | |
| "ce_orig": 1.0986676216125488, | |
| "epoch": 0.027606038820992093, | |
| "kl_loss": 78.45074462890625, | |
| "loss_ib": 3.9515597820281982, | |
| "step": 192 | |
| }, | |
| { | |
| "ce_ib": 53.310672760009766, | |
| "ce_orig": 1.2137010097503662, | |
| "epoch": 0.027749820273184757, | |
| "kl_loss": 56.91468811035156, | |
| "loss_ib": 2.872389793395996, | |
| "step": 193 | |
| }, | |
| { | |
| "ce_ib": 47.84365463256836, | |
| "ce_orig": 1.141069769859314, | |
| "epoch": 0.027893601725377425, | |
| "kl_loss": 49.71630096435547, | |
| "loss_ib": 2.5097367763519287, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.028037383177570093, | |
| "grad_norm": 102.58991241455078, | |
| "learning_rate": 1.477635782747604e-05, | |
| "loss": 4.9077, | |
| "step": 195 | |
| }, | |
| { | |
| "ce_ib": 47.62435531616211, | |
| "ce_orig": 1.2880587577819824, | |
| "epoch": 0.028037383177570093, | |
| "kl_loss": 51.41084289550781, | |
| "loss_ib": 2.5943543910980225, | |
| "step": 195 | |
| }, | |
| { | |
| "ce_ib": 45.36109161376953, | |
| "ce_orig": 0.8635546565055847, | |
| "epoch": 0.02818116462976276, | |
| "kl_loss": 44.00239181518555, | |
| "loss_ib": 2.2228000164031982, | |
| "step": 196 | |
| }, | |
| { | |
| "ce_ib": 41.63899230957031, | |
| "ce_orig": 1.1706253290176392, | |
| "epoch": 0.02832494608195543, | |
| "kl_loss": 40.91154479980469, | |
| "loss_ib": 2.066396713256836, | |
| "step": 197 | |
| }, | |
| { | |
| "ce_ib": 40.076194763183594, | |
| "ce_orig": 0.9194042682647705, | |
| "epoch": 0.028468727534148097, | |
| "kl_loss": 38.050926208496094, | |
| "loss_ib": 1.9225844144821167, | |
| "step": 198 | |
| }, | |
| { | |
| "ce_ib": 35.384429931640625, | |
| "ce_orig": 0.8929119110107422, | |
| "epoch": 0.02861250898634076, | |
| "kl_loss": 40.8455924987793, | |
| "loss_ib": 2.059971809387207, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.02875629043853343, | |
| "grad_norm": 53.01378631591797, | |
| "learning_rate": 1.5175718849840257e-05, | |
| "loss": 3.3711, | |
| "step": 200 | |
| }, | |
| { | |
| "ce_ib": 37.04123306274414, | |
| "ce_orig": 0.9671800136566162, | |
| "epoch": 0.02875629043853343, | |
| "kl_loss": 35.982879638671875, | |
| "loss_ib": 1.8176645040512085, | |
| "step": 200 | |
| }, | |
| { | |
| "ce_ib": 37.61642837524414, | |
| "ce_orig": 1.3750962018966675, | |
| "epoch": 0.028900071890726096, | |
| "kl_loss": 30.0595703125, | |
| "loss_ib": 1.5217866897583008, | |
| "step": 201 | |
| }, | |
| { | |
| "ce_ib": 40.238006591796875, | |
| "ce_orig": 1.7766847610473633, | |
| "epoch": 0.029043853342918764, | |
| "kl_loss": 26.227649688720703, | |
| "loss_ib": 1.3315014839172363, | |
| "step": 202 | |
| }, | |
| { | |
| "ce_ib": 38.055755615234375, | |
| "ce_orig": 1.213000774383545, | |
| "epoch": 0.029187634795111432, | |
| "kl_loss": 29.874595642089844, | |
| "loss_ib": 1.512757658958435, | |
| "step": 203 | |
| }, | |
| { | |
| "ce_ib": 32.915287017822266, | |
| "ce_orig": 0.86496502161026, | |
| "epoch": 0.029331416247304096, | |
| "kl_loss": 30.330623626708984, | |
| "loss_ib": 1.5329889059066772, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.029475197699496764, | |
| "grad_norm": 23.908042907714844, | |
| "learning_rate": 1.5575079872204475e-05, | |
| "loss": 2.7418, | |
| "step": 205 | |
| }, | |
| { | |
| "ce_ib": 35.01918411254883, | |
| "ce_orig": 1.2721844911575317, | |
| "epoch": 0.029475197699496764, | |
| "kl_loss": 26.31290054321289, | |
| "loss_ib": 1.333154559135437, | |
| "step": 205 | |
| }, | |
| { | |
| "ce_ib": 32.87287139892578, | |
| "ce_orig": 1.1400614976882935, | |
| "epoch": 0.029618979151689432, | |
| "kl_loss": 26.177352905273438, | |
| "loss_ib": 1.3253041505813599, | |
| "step": 206 | |
| }, | |
| { | |
| "ce_ib": 35.85771942138672, | |
| "ce_orig": 1.3444130420684814, | |
| "epoch": 0.0297627606038821, | |
| "kl_loss": 34.98625183105469, | |
| "loss_ib": 1.7672414779663086, | |
| "step": 207 | |
| }, | |
| { | |
| "ce_ib": 31.547334671020508, | |
| "ce_orig": 0.9652504920959473, | |
| "epoch": 0.029906542056074768, | |
| "kl_loss": 20.096208572387695, | |
| "loss_ib": 1.0205841064453125, | |
| "step": 208 | |
| }, | |
| { | |
| "ce_ib": 32.527896881103516, | |
| "ce_orig": 1.0664165019989014, | |
| "epoch": 0.030050323508267432, | |
| "kl_loss": 20.749666213989258, | |
| "loss_ib": 1.053747296333313, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.0301941049604601, | |
| "grad_norm": 22.29879379272461, | |
| "learning_rate": 1.597444089456869e-05, | |
| "loss": 2.3639, | |
| "step": 210 | |
| }, | |
| { | |
| "ce_ib": 32.71118927001953, | |
| "ce_orig": 1.2022026777267456, | |
| "epoch": 0.0301941049604601, | |
| "kl_loss": 20.01553726196289, | |
| "loss_ib": 1.0171325206756592, | |
| "step": 210 | |
| }, | |
| { | |
| "ce_ib": 30.730520248413086, | |
| "ce_orig": 0.8476402163505554, | |
| "epoch": 0.030337886412652768, | |
| "kl_loss": 17.856157302856445, | |
| "loss_ib": 0.9081730842590332, | |
| "step": 211 | |
| }, | |
| { | |
| "ce_ib": 29.36618995666504, | |
| "ce_orig": 0.7845667004585266, | |
| "epoch": 0.030481667864845435, | |
| "kl_loss": 18.906429290771484, | |
| "loss_ib": 0.9600045084953308, | |
| "step": 212 | |
| }, | |
| { | |
| "ce_ib": 27.0706844329834, | |
| "ce_orig": 0.3370642066001892, | |
| "epoch": 0.030625449317038103, | |
| "kl_loss": 18.621246337890625, | |
| "loss_ib": 0.9445976614952087, | |
| "step": 213 | |
| }, | |
| { | |
| "ce_ib": 34.2156867980957, | |
| "ce_orig": 0.6062073111534119, | |
| "epoch": 0.03076923076923077, | |
| "kl_loss": 19.93109893798828, | |
| "loss_ib": 1.0136628150939941, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.030913012221423435, | |
| "grad_norm": 16.176197052001953, | |
| "learning_rate": 1.6373801916932906e-05, | |
| "loss": 1.8299, | |
| "step": 215 | |
| }, | |
| { | |
| "ce_ib": 28.275493621826172, | |
| "ce_orig": 1.0786405801773071, | |
| "epoch": 0.030913012221423435, | |
| "kl_loss": 18.11065673828125, | |
| "loss_ib": 0.919670581817627, | |
| "step": 215 | |
| }, | |
| { | |
| "ce_ib": 32.22303009033203, | |
| "ce_orig": 0.8485822081565857, | |
| "epoch": 0.031056793673616103, | |
| "kl_loss": 19.91845703125, | |
| "loss_ib": 1.012034296989441, | |
| "step": 216 | |
| }, | |
| { | |
| "ce_ib": 27.742530822753906, | |
| "ce_orig": 0.8794713020324707, | |
| "epoch": 0.03120057512580877, | |
| "kl_loss": 14.383018493652344, | |
| "loss_ib": 0.7330222129821777, | |
| "step": 217 | |
| }, | |
| { | |
| "ce_ib": 31.794010162353516, | |
| "ce_orig": 0.6954683661460876, | |
| "epoch": 0.031344356578001435, | |
| "kl_loss": 16.9901180267334, | |
| "loss_ib": 0.86540287733078, | |
| "step": 218 | |
| }, | |
| { | |
| "ce_ib": 31.032941818237305, | |
| "ce_orig": 1.1841130256652832, | |
| "epoch": 0.0314881380301941, | |
| "kl_loss": 13.406427383422852, | |
| "loss_ib": 0.6858378648757935, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.03163191948238677, | |
| "grad_norm": 14.094661712646484, | |
| "learning_rate": 1.6773162939297126e-05, | |
| "loss": 1.8049, | |
| "step": 220 | |
| }, | |
| { | |
| "ce_ib": 32.347557067871094, | |
| "ce_orig": 0.7493203282356262, | |
| "epoch": 0.03163191948238677, | |
| "kl_loss": 18.79709815979004, | |
| "loss_ib": 0.9560286402702332, | |
| "step": 220 | |
| }, | |
| { | |
| "ce_ib": 31.1737060546875, | |
| "ce_orig": 1.0758991241455078, | |
| "epoch": 0.03177570093457944, | |
| "kl_loss": 20.572391510009766, | |
| "loss_ib": 1.0442065000534058, | |
| "step": 221 | |
| }, | |
| { | |
| "ce_ib": 30.046796798706055, | |
| "ce_orig": 0.6758080124855042, | |
| "epoch": 0.031919482386772106, | |
| "kl_loss": 13.740577697753906, | |
| "loss_ib": 0.7020522952079773, | |
| "step": 222 | |
| }, | |
| { | |
| "ce_ib": 30.532987594604492, | |
| "ce_orig": 1.0658819675445557, | |
| "epoch": 0.032063263838964774, | |
| "kl_loss": 11.958425521850586, | |
| "loss_ib": 0.6131877899169922, | |
| "step": 223 | |
| }, | |
| { | |
| "ce_ib": 31.667829513549805, | |
| "ce_orig": 1.1956491470336914, | |
| "epoch": 0.03220704529115744, | |
| "kl_loss": 12.607803344726562, | |
| "loss_ib": 0.6462240815162659, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.03235082674335011, | |
| "grad_norm": 5.280292987823486, | |
| "learning_rate": 1.7172523961661345e-05, | |
| "loss": 1.6789, | |
| "step": 225 | |
| }, | |
| { | |
| "ce_ib": 31.769432067871094, | |
| "ce_orig": 1.1676932573318481, | |
| "epoch": 0.03235082674335011, | |
| "kl_loss": 10.932788848876953, | |
| "loss_ib": 0.5625241994857788, | |
| "step": 225 | |
| }, | |
| { | |
| "ce_ib": 30.539987564086914, | |
| "ce_orig": 1.3033503293991089, | |
| "epoch": 0.03249460819554278, | |
| "kl_loss": 12.105158805847168, | |
| "loss_ib": 0.6205279231071472, | |
| "step": 226 | |
| }, | |
| { | |
| "ce_ib": 28.660764694213867, | |
| "ce_orig": 0.6191550493240356, | |
| "epoch": 0.032638389647735445, | |
| "kl_loss": 9.103292465209961, | |
| "loss_ib": 0.46949502825737, | |
| "step": 227 | |
| }, | |
| { | |
| "ce_ib": 29.167789459228516, | |
| "ce_orig": 0.7975085973739624, | |
| "epoch": 0.032782171099928106, | |
| "kl_loss": 10.556224822998047, | |
| "loss_ib": 0.5423951148986816, | |
| "step": 228 | |
| }, | |
| { | |
| "ce_ib": 29.07682991027832, | |
| "ce_orig": 1.0861161947250366, | |
| "epoch": 0.032925952552120774, | |
| "kl_loss": 9.905879974365234, | |
| "loss_ib": 0.5098324418067932, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.03306973400431344, | |
| "grad_norm": 5.249032974243164, | |
| "learning_rate": 1.757188498402556e-05, | |
| "loss": 1.5289, | |
| "step": 230 | |
| }, | |
| { | |
| "ce_ib": 28.152671813964844, | |
| "ce_orig": 0.8374654054641724, | |
| "epoch": 0.03306973400431344, | |
| "kl_loss": 10.608142852783203, | |
| "loss_ib": 0.544483482837677, | |
| "step": 230 | |
| }, | |
| { | |
| "ce_ib": 26.917634963989258, | |
| "ce_orig": 1.0145775079727173, | |
| "epoch": 0.03321351545650611, | |
| "kl_loss": 12.240139961242676, | |
| "loss_ib": 0.6254658102989197, | |
| "step": 231 | |
| }, | |
| { | |
| "ce_ib": 29.13026237487793, | |
| "ce_orig": 0.8860113024711609, | |
| "epoch": 0.03335729690869878, | |
| "kl_loss": 10.390130043029785, | |
| "loss_ib": 0.5340716242790222, | |
| "step": 232 | |
| }, | |
| { | |
| "ce_ib": 27.842693328857422, | |
| "ce_orig": 1.1651474237442017, | |
| "epoch": 0.033501078360891445, | |
| "kl_loss": 9.602649688720703, | |
| "loss_ib": 0.49405384063720703, | |
| "step": 233 | |
| }, | |
| { | |
| "ce_ib": 24.930410385131836, | |
| "ce_orig": 0.6166008710861206, | |
| "epoch": 0.03364485981308411, | |
| "kl_loss": 10.271495819091797, | |
| "loss_ib": 0.5260400176048279, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.03378864126527678, | |
| "grad_norm": 5.078007698059082, | |
| "learning_rate": 1.7971246006389777e-05, | |
| "loss": 1.4736, | |
| "step": 235 | |
| }, | |
| { | |
| "ce_ib": 27.262588500976562, | |
| "ce_orig": 1.0158146619796753, | |
| "epoch": 0.03378864126527678, | |
| "kl_loss": 10.76059627532959, | |
| "loss_ib": 0.5516611337661743, | |
| "step": 235 | |
| }, | |
| { | |
| "ce_ib": 28.12372398376465, | |
| "ce_orig": 0.9383306503295898, | |
| "epoch": 0.03393242271746945, | |
| "kl_loss": 9.931174278259277, | |
| "loss_ib": 0.5106205940246582, | |
| "step": 236 | |
| }, | |
| { | |
| "ce_ib": 27.61408233642578, | |
| "ce_orig": 1.3611609935760498, | |
| "epoch": 0.034076204169662117, | |
| "kl_loss": 11.292994499206543, | |
| "loss_ib": 0.5784568190574646, | |
| "step": 237 | |
| }, | |
| { | |
| "ce_ib": 25.42107582092285, | |
| "ce_orig": 0.8392754197120667, | |
| "epoch": 0.03421998562185478, | |
| "kl_loss": 10.153192520141602, | |
| "loss_ib": 0.5203701853752136, | |
| "step": 238 | |
| }, | |
| { | |
| "ce_ib": 26.299903869628906, | |
| "ce_orig": 0.8138580918312073, | |
| "epoch": 0.034363767074047445, | |
| "kl_loss": 10.076581001281738, | |
| "loss_ib": 0.5169789791107178, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.03450754852624011, | |
| "grad_norm": 1.6015193462371826, | |
| "learning_rate": 1.8370607028753993e-05, | |
| "loss": 1.371, | |
| "step": 240 | |
| }, | |
| { | |
| "ce_ib": 25.303178787231445, | |
| "ce_orig": 0.9828527569770813, | |
| "epoch": 0.03450754852624011, | |
| "kl_loss": 9.301372528076172, | |
| "loss_ib": 0.4777202308177948, | |
| "step": 240 | |
| }, | |
| { | |
| "ce_ib": 17.445037841796875, | |
| "ce_orig": 0.447143018245697, | |
| "epoch": 0.03465132997843278, | |
| "kl_loss": 7.682253837585449, | |
| "loss_ib": 0.39283522963523865, | |
| "step": 241 | |
| }, | |
| { | |
| "ce_ib": 25.544435501098633, | |
| "ce_orig": 0.531434953212738, | |
| "epoch": 0.03479511143062545, | |
| "kl_loss": 9.930435180664062, | |
| "loss_ib": 0.5092939734458923, | |
| "step": 242 | |
| }, | |
| { | |
| "ce_ib": 26.459491729736328, | |
| "ce_orig": 1.5432560443878174, | |
| "epoch": 0.034938892882818116, | |
| "kl_loss": 10.38063907623291, | |
| "loss_ib": 0.5322617292404175, | |
| "step": 243 | |
| }, | |
| { | |
| "ce_ib": 22.71752166748047, | |
| "ce_orig": 0.8145064115524292, | |
| "epoch": 0.035082674335010784, | |
| "kl_loss": 9.922914505004883, | |
| "loss_ib": 0.5075044631958008, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.03522645578720345, | |
| "grad_norm": 3.806140184402466, | |
| "learning_rate": 1.8769968051118212e-05, | |
| "loss": 1.4145, | |
| "step": 245 | |
| }, | |
| { | |
| "ce_ib": 26.927804946899414, | |
| "ce_orig": 0.44644680619239807, | |
| "epoch": 0.03522645578720345, | |
| "kl_loss": 9.268022537231445, | |
| "loss_ib": 0.4768650233745575, | |
| "step": 245 | |
| }, | |
| { | |
| "ce_ib": 25.422399520874023, | |
| "ce_orig": 1.1885180473327637, | |
| "epoch": 0.03537023723939612, | |
| "kl_loss": 8.077836990356445, | |
| "loss_ib": 0.41660305857658386, | |
| "step": 246 | |
| }, | |
| { | |
| "ce_ib": 25.16580581665039, | |
| "ce_orig": 0.6401370763778687, | |
| "epoch": 0.03551401869158879, | |
| "kl_loss": 8.176619529724121, | |
| "loss_ib": 0.4214138984680176, | |
| "step": 247 | |
| }, | |
| { | |
| "ce_ib": 21.501310348510742, | |
| "ce_orig": 0.7412813901901245, | |
| "epoch": 0.035657800143781455, | |
| "kl_loss": 10.056595802307129, | |
| "loss_ib": 0.5135805010795593, | |
| "step": 248 | |
| }, | |
| { | |
| "ce_ib": 24.469106674194336, | |
| "ce_orig": 0.9428795576095581, | |
| "epoch": 0.035801581595974116, | |
| "kl_loss": 8.948450088500977, | |
| "loss_ib": 0.45965704321861267, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.035945363048166784, | |
| "grad_norm": 2.816962957382202, | |
| "learning_rate": 1.9169329073482428e-05, | |
| "loss": 1.397, | |
| "step": 250 | |
| }, | |
| { | |
| "ce_ib": 22.195499420166016, | |
| "ce_orig": 0.7263768911361694, | |
| "epoch": 0.035945363048166784, | |
| "kl_loss": 8.741401672363281, | |
| "loss_ib": 0.4481678605079651, | |
| "step": 250 | |
| }, | |
| { | |
| "ce_ib": 22.70380973815918, | |
| "ce_orig": 0.6979135274887085, | |
| "epoch": 0.03608914450035945, | |
| "kl_loss": 9.159427642822266, | |
| "loss_ib": 0.4693233072757721, | |
| "step": 251 | |
| }, | |
| { | |
| "ce_ib": 24.72095489501953, | |
| "ce_orig": 0.8336602449417114, | |
| "epoch": 0.03623292595255212, | |
| "kl_loss": 8.704825401306152, | |
| "loss_ib": 0.4476017653942108, | |
| "step": 252 | |
| }, | |
| { | |
| "ce_ib": 25.478559494018555, | |
| "ce_orig": 1.2140023708343506, | |
| "epoch": 0.03637670740474479, | |
| "kl_loss": 8.463350296020508, | |
| "loss_ib": 0.4359067976474762, | |
| "step": 253 | |
| }, | |
| { | |
| "ce_ib": 24.30400276184082, | |
| "ce_orig": 1.159781813621521, | |
| "epoch": 0.036520488856937455, | |
| "kl_loss": 9.485654830932617, | |
| "loss_ib": 0.4864347577095032, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.03666427030913012, | |
| "grad_norm": 2.2414402961730957, | |
| "learning_rate": 1.9568690095846644e-05, | |
| "loss": 1.421, | |
| "step": 255 | |
| }, | |
| { | |
| "ce_ib": 22.23937225341797, | |
| "ce_orig": 0.5963650941848755, | |
| "epoch": 0.03666427030913012, | |
| "kl_loss": 8.78721809387207, | |
| "loss_ib": 0.4504806101322174, | |
| "step": 255 | |
| }, | |
| { | |
| "ce_ib": 23.723373413085938, | |
| "ce_orig": 1.1367262601852417, | |
| "epoch": 0.03680805176132279, | |
| "kl_loss": 8.774513244628906, | |
| "loss_ib": 0.45058736205101013, | |
| "step": 256 | |
| }, | |
| { | |
| "ce_ib": 25.06109619140625, | |
| "ce_orig": 1.4180657863616943, | |
| "epoch": 0.03695183321351546, | |
| "kl_loss": 8.770709991455078, | |
| "loss_ib": 0.4510660767555237, | |
| "step": 257 | |
| }, | |
| { | |
| "ce_ib": 23.2429141998291, | |
| "ce_orig": 1.0652941465377808, | |
| "epoch": 0.037095614665708126, | |
| "kl_loss": 8.671271324157715, | |
| "loss_ib": 0.4451850354671478, | |
| "step": 258 | |
| }, | |
| { | |
| "ce_ib": 21.69077491760254, | |
| "ce_orig": 0.3725070655345917, | |
| "epoch": 0.03723939611790079, | |
| "kl_loss": 8.793878555297852, | |
| "loss_ib": 0.45053932070732117, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.037383177570093455, | |
| "grad_norm": 3.798487663269043, | |
| "learning_rate": 1.9968051118210863e-05, | |
| "loss": 1.4654, | |
| "step": 260 | |
| }, | |
| { | |
| "ce_ib": 23.512039184570312, | |
| "ce_orig": 1.0731854438781738, | |
| "epoch": 0.037383177570093455, | |
| "kl_loss": 8.859106063842773, | |
| "loss_ib": 0.45471134781837463, | |
| "step": 260 | |
| }, | |
| { | |
| "ce_ib": 22.52284049987793, | |
| "ce_orig": 1.0415581464767456, | |
| "epoch": 0.03752695902228612, | |
| "kl_loss": 8.765377044677734, | |
| "loss_ib": 0.4495302736759186, | |
| "step": 261 | |
| }, | |
| { | |
| "ce_ib": 24.735340118408203, | |
| "ce_orig": 1.2017862796783447, | |
| "epoch": 0.03767074047447879, | |
| "kl_loss": 8.242500305175781, | |
| "loss_ib": 0.4244926869869232, | |
| "step": 262 | |
| }, | |
| { | |
| "ce_ib": 21.546173095703125, | |
| "ce_orig": 1.224229097366333, | |
| "epoch": 0.03781452192667146, | |
| "kl_loss": 8.425148963928223, | |
| "loss_ib": 0.4320305287837982, | |
| "step": 263 | |
| }, | |
| { | |
| "ce_ib": 21.48952293395996, | |
| "ce_orig": 0.8686205148696899, | |
| "epoch": 0.037958303378864126, | |
| "kl_loss": 8.037482261657715, | |
| "loss_ib": 0.41261887550354004, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.038102084831056794, | |
| "grad_norm": 3.353695869445801, | |
| "learning_rate": 2.0367412140575082e-05, | |
| "loss": 1.396, | |
| "step": 265 | |
| }, | |
| { | |
| "ce_ib": 21.23736000061035, | |
| "ce_orig": 1.0968440771102905, | |
| "epoch": 0.038102084831056794, | |
| "kl_loss": 8.250904083251953, | |
| "loss_ib": 0.42316389083862305, | |
| "step": 265 | |
| }, | |
| { | |
| "ce_ib": 23.215932846069336, | |
| "ce_orig": 1.1477916240692139, | |
| "epoch": 0.03824586628324946, | |
| "kl_loss": 8.169689178466797, | |
| "loss_ib": 0.4200924336910248, | |
| "step": 266 | |
| }, | |
| { | |
| "ce_ib": 23.65303611755371, | |
| "ce_orig": 0.9717550277709961, | |
| "epoch": 0.03838964773544213, | |
| "kl_loss": 7.602072715759277, | |
| "loss_ib": 0.39193016290664673, | |
| "step": 267 | |
| }, | |
| { | |
| "ce_ib": 20.33776092529297, | |
| "ce_orig": 0.7842280864715576, | |
| "epoch": 0.0385334291876348, | |
| "kl_loss": 7.547301769256592, | |
| "loss_ib": 0.3875339925289154, | |
| "step": 268 | |
| }, | |
| { | |
| "ce_ib": 21.66446304321289, | |
| "ce_orig": 0.9328132271766663, | |
| "epoch": 0.038677210639827465, | |
| "kl_loss": 7.6735310554504395, | |
| "loss_ib": 0.39450880885124207, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.038820992092020126, | |
| "grad_norm": 3.106287717819214, | |
| "learning_rate": 2.07667731629393e-05, | |
| "loss": 1.361, | |
| "step": 270 | |
| }, | |
| { | |
| "ce_ib": 14.545405387878418, | |
| "ce_orig": 0.4456147849559784, | |
| "epoch": 0.038820992092020126, | |
| "kl_loss": 6.566807746887207, | |
| "loss_ib": 0.33561310172080994, | |
| "step": 270 | |
| }, | |
| { | |
| "ce_ib": 22.359256744384766, | |
| "ce_orig": 1.0915645360946655, | |
| "epoch": 0.038964773544212794, | |
| "kl_loss": 7.441349029541016, | |
| "loss_ib": 0.38324710726737976, | |
| "step": 271 | |
| }, | |
| { | |
| "ce_ib": 19.833614349365234, | |
| "ce_orig": 0.6568068861961365, | |
| "epoch": 0.03910855499640546, | |
| "kl_loss": 7.507279396057129, | |
| "loss_ib": 0.3852807879447937, | |
| "step": 272 | |
| }, | |
| { | |
| "ce_ib": 18.543352127075195, | |
| "ce_orig": 0.6701676845550537, | |
| "epoch": 0.03925233644859813, | |
| "kl_loss": 7.031156539916992, | |
| "loss_ib": 0.36082950234413147, | |
| "step": 273 | |
| }, | |
| { | |
| "ce_ib": 19.24417495727539, | |
| "ce_orig": 0.9314641952514648, | |
| "epoch": 0.0393961179007908, | |
| "kl_loss": 7.164027214050293, | |
| "loss_ib": 0.36782345175743103, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.039539899352983465, | |
| "grad_norm": 3.4644718170166016, | |
| "learning_rate": 2.1166134185303514e-05, | |
| "loss": 1.2343, | |
| "step": 275 | |
| }, | |
| { | |
| "ce_ib": 20.395038604736328, | |
| "ce_orig": 0.9956320524215698, | |
| "epoch": 0.039539899352983465, | |
| "kl_loss": 7.304188251495361, | |
| "loss_ib": 0.375406950712204, | |
| "step": 275 | |
| }, | |
| { | |
| "ce_ib": 22.580080032348633, | |
| "ce_orig": 1.0877642631530762, | |
| "epoch": 0.03968368080517613, | |
| "kl_loss": 6.740540504455566, | |
| "loss_ib": 0.34831708669662476, | |
| "step": 276 | |
| }, | |
| { | |
| "ce_ib": 17.001928329467773, | |
| "ce_orig": 0.6867518424987793, | |
| "epoch": 0.0398274622573688, | |
| "kl_loss": 6.005027770996094, | |
| "loss_ib": 0.3087523579597473, | |
| "step": 277 | |
| }, | |
| { | |
| "ce_ib": 20.226699829101562, | |
| "ce_orig": 0.5907849669456482, | |
| "epoch": 0.03997124370956147, | |
| "kl_loss": 6.040300369262695, | |
| "loss_ib": 0.3121283948421478, | |
| "step": 278 | |
| }, | |
| { | |
| "ce_ib": 20.84942626953125, | |
| "ce_orig": 0.8668643832206726, | |
| "epoch": 0.040115025161754136, | |
| "kl_loss": 5.51943302154541, | |
| "loss_ib": 0.2863963544368744, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.040258806613946804, | |
| "grad_norm": 2.836003541946411, | |
| "learning_rate": 2.1565495207667734e-05, | |
| "loss": 1.2083, | |
| "step": 280 | |
| }, | |
| { | |
| "ce_ib": 21.198511123657227, | |
| "ce_orig": 1.0262176990509033, | |
| "epoch": 0.040258806613946804, | |
| "kl_loss": 5.828641414642334, | |
| "loss_ib": 0.30203133821487427, | |
| "step": 280 | |
| }, | |
| { | |
| "ce_ib": 20.3875675201416, | |
| "ce_orig": 1.1043504476547241, | |
| "epoch": 0.040402588066139465, | |
| "kl_loss": 5.128015041351318, | |
| "loss_ib": 0.2665945291519165, | |
| "step": 281 | |
| }, | |
| { | |
| "ce_ib": 21.915267944335938, | |
| "ce_orig": 0.9482531547546387, | |
| "epoch": 0.04054636951833213, | |
| "kl_loss": 4.9252400398254395, | |
| "loss_ib": 0.2572196424007416, | |
| "step": 282 | |
| }, | |
| { | |
| "ce_ib": 20.219453811645508, | |
| "ce_orig": 0.9774989485740662, | |
| "epoch": 0.0406901509705248, | |
| "kl_loss": 4.067303657531738, | |
| "loss_ib": 0.21347491443157196, | |
| "step": 283 | |
| }, | |
| { | |
| "ce_ib": 20.512344360351562, | |
| "ce_orig": 0.8953350186347961, | |
| "epoch": 0.04083393242271747, | |
| "kl_loss": 2.980595588684082, | |
| "loss_ib": 0.15928594768047333, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.040977713874910136, | |
| "grad_norm": 3.126970052719116, | |
| "learning_rate": 2.196485623003195e-05, | |
| "loss": 1.0977, | |
| "step": 285 | |
| }, | |
| { | |
| "ce_ib": 22.42814826965332, | |
| "ce_orig": 0.8264601230621338, | |
| "epoch": 0.040977713874910136, | |
| "kl_loss": 2.260897397994995, | |
| "loss_ib": 0.12425895035266876, | |
| "step": 285 | |
| }, | |
| { | |
| "ce_ib": 19.28723907470703, | |
| "ce_orig": 1.0294511318206787, | |
| "epoch": 0.041121495327102804, | |
| "kl_loss": 1.4155142307281494, | |
| "loss_ib": 0.08041933178901672, | |
| "step": 286 | |
| }, | |
| { | |
| "ce_ib": 22.32074546813965, | |
| "ce_orig": 1.2082335948944092, | |
| "epoch": 0.04126527677929547, | |
| "kl_loss": 1.4922515153884888, | |
| "loss_ib": 0.08577295392751694, | |
| "step": 287 | |
| }, | |
| { | |
| "ce_ib": 21.85085105895996, | |
| "ce_orig": 1.1711450815200806, | |
| "epoch": 0.04140905823148814, | |
| "kl_loss": 1.0158560276031494, | |
| "loss_ib": 0.06171822547912598, | |
| "step": 288 | |
| }, | |
| { | |
| "ce_ib": 19.448490142822266, | |
| "ce_orig": 0.8083485960960388, | |
| "epoch": 0.04155283968368081, | |
| "kl_loss": 0.8125163316726685, | |
| "loss_ib": 0.050350066274404526, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.041696621135873475, | |
| "grad_norm": 0.41338738799095154, | |
| "learning_rate": 2.2364217252396165e-05, | |
| "loss": 0.9838, | |
| "step": 290 | |
| }, | |
| { | |
| "ce_ib": 19.226776123046875, | |
| "ce_orig": 1.1908280849456787, | |
| "epoch": 0.041696621135873475, | |
| "kl_loss": 0.7504492998123169, | |
| "loss_ib": 0.0471358560025692, | |
| "step": 290 | |
| }, | |
| { | |
| "ce_ib": 19.87446403503418, | |
| "ce_orig": 1.02711021900177, | |
| "epoch": 0.041840402588066136, | |
| "kl_loss": 0.6533533930778503, | |
| "loss_ib": 0.04260490462183952, | |
| "step": 291 | |
| }, | |
| { | |
| "ce_ib": 17.87228775024414, | |
| "ce_orig": 1.0570042133331299, | |
| "epoch": 0.041984184040258804, | |
| "kl_loss": 0.5579368472099304, | |
| "loss_ib": 0.036832984536886215, | |
| "step": 292 | |
| }, | |
| { | |
| "ce_ib": 16.936511993408203, | |
| "ce_orig": 0.5302789807319641, | |
| "epoch": 0.04212796549245147, | |
| "kl_loss": 0.6725109219551086, | |
| "loss_ib": 0.042093802243471146, | |
| "step": 293 | |
| }, | |
| { | |
| "ce_ib": 18.851577758789062, | |
| "ce_orig": 1.0163416862487793, | |
| "epoch": 0.04227174694464414, | |
| "kl_loss": 0.5098384618759155, | |
| "loss_ib": 0.03491771221160889, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.04241552839683681, | |
| "grad_norm": 0.21297426521778107, | |
| "learning_rate": 2.2763578274760385e-05, | |
| "loss": 0.9126, | |
| "step": 295 | |
| }, | |
| { | |
| "ce_ib": 15.781968116760254, | |
| "ce_orig": 0.5711618661880493, | |
| "epoch": 0.04241552839683681, | |
| "kl_loss": 0.4373496174812317, | |
| "loss_ib": 0.02975846640765667, | |
| "step": 295 | |
| }, | |
| { | |
| "ce_ib": 22.636154174804688, | |
| "ce_orig": 1.496230959892273, | |
| "epoch": 0.042559309849029475, | |
| "kl_loss": 0.4643814265727997, | |
| "loss_ib": 0.03453714773058891, | |
| "step": 296 | |
| }, | |
| { | |
| "ce_ib": 14.683899879455566, | |
| "ce_orig": 0.6033921241760254, | |
| "epoch": 0.04270309130122214, | |
| "kl_loss": 0.39723098278045654, | |
| "loss_ib": 0.027203500270843506, | |
| "step": 297 | |
| }, | |
| { | |
| "ce_ib": 17.57473373413086, | |
| "ce_orig": 0.7110596895217896, | |
| "epoch": 0.04284687275341481, | |
| "kl_loss": 0.4147634506225586, | |
| "loss_ib": 0.02952553890645504, | |
| "step": 298 | |
| }, | |
| { | |
| "ce_ib": 16.251811981201172, | |
| "ce_orig": 0.7895355820655823, | |
| "epoch": 0.04299065420560748, | |
| "kl_loss": 0.4082551896572113, | |
| "loss_ib": 0.028538664802908897, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.043134435657800146, | |
| "grad_norm": 0.1568593829870224, | |
| "learning_rate": 2.3162939297124604e-05, | |
| "loss": 0.9485, | |
| "step": 300 | |
| }, | |
| { | |
| "ce_ib": 18.58441162109375, | |
| "ce_orig": 1.089181900024414, | |
| "epoch": 0.043134435657800146, | |
| "kl_loss": 0.4467368721961975, | |
| "loss_ib": 0.0316290520131588, | |
| "step": 300 | |
| }, | |
| { | |
| "ce_ib": 18.67475700378418, | |
| "ce_orig": 0.566021740436554, | |
| "epoch": 0.043278217109992814, | |
| "kl_loss": 0.3963052034378052, | |
| "loss_ib": 0.029152637347579002, | |
| "step": 301 | |
| }, | |
| { | |
| "ce_ib": 16.824068069458008, | |
| "ce_orig": 0.7251248359680176, | |
| "epoch": 0.043421998562185475, | |
| "kl_loss": 0.34888386726379395, | |
| "loss_ib": 0.025856226682662964, | |
| "step": 302 | |
| }, | |
| { | |
| "ce_ib": 18.8211669921875, | |
| "ce_orig": 0.6579341888427734, | |
| "epoch": 0.04356578001437814, | |
| "kl_loss": 0.3751975893974304, | |
| "loss_ib": 0.02817046456038952, | |
| "step": 303 | |
| }, | |
| { | |
| "ce_ib": 18.44203758239746, | |
| "ce_orig": 0.8343374729156494, | |
| "epoch": 0.04370956146657081, | |
| "kl_loss": 0.3514009118080139, | |
| "loss_ib": 0.02679106593132019, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.04385334291876348, | |
| "grad_norm": 0.15141652524471283, | |
| "learning_rate": 2.356230031948882e-05, | |
| "loss": 0.9599, | |
| "step": 305 | |
| }, | |
| { | |
| "ce_ib": 16.4177188873291, | |
| "ce_orig": 0.6326029896736145, | |
| "epoch": 0.04385334291876348, | |
| "kl_loss": 0.331454873085022, | |
| "loss_ib": 0.02478160336613655, | |
| "step": 305 | |
| }, | |
| { | |
| "ce_ib": 21.196409225463867, | |
| "ce_orig": 1.230805516242981, | |
| "epoch": 0.043997124370956146, | |
| "kl_loss": 0.40000325441360474, | |
| "loss_ib": 0.030598366633057594, | |
| "step": 306 | |
| }, | |
| { | |
| "ce_ib": 17.367687225341797, | |
| "ce_orig": 0.863945484161377, | |
| "epoch": 0.044140905823148814, | |
| "kl_loss": 0.31744185090065, | |
| "loss_ib": 0.024555936455726624, | |
| "step": 307 | |
| }, | |
| { | |
| "ce_ib": 18.942859649658203, | |
| "ce_orig": 1.148938536643982, | |
| "epoch": 0.04428468727534148, | |
| "kl_loss": 0.3378192186355591, | |
| "loss_ib": 0.02636238932609558, | |
| "step": 308 | |
| }, | |
| { | |
| "ce_ib": 19.47002410888672, | |
| "ce_orig": 1.0959943532943726, | |
| "epoch": 0.04442846872753415, | |
| "kl_loss": 0.31226494908332825, | |
| "loss_ib": 0.025348259136080742, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.04457225017972682, | |
| "grad_norm": 0.13533012568950653, | |
| "learning_rate": 2.3961661341853036e-05, | |
| "loss": 1.015, | |
| "step": 310 | |
| }, | |
| { | |
| "ce_ib": 17.681161880493164, | |
| "ce_orig": 0.9902611374855042, | |
| "epoch": 0.04457225017972682, | |
| "kl_loss": 0.26686644554138184, | |
| "loss_ib": 0.02218390442430973, | |
| "step": 310 | |
| }, | |
| { | |
| "ce_ib": 16.987899780273438, | |
| "ce_orig": 0.6236902475357056, | |
| "epoch": 0.044716031631919485, | |
| "kl_loss": 0.33637407422065735, | |
| "loss_ib": 0.025312652811408043, | |
| "step": 311 | |
| }, | |
| { | |
| "ce_ib": 18.65533447265625, | |
| "ce_orig": 0.8152080774307251, | |
| "epoch": 0.044859813084112146, | |
| "kl_loss": 0.35383230447769165, | |
| "loss_ib": 0.027019282802939415, | |
| "step": 312 | |
| }, | |
| { | |
| "ce_ib": 20.103788375854492, | |
| "ce_orig": 0.8615632653236389, | |
| "epoch": 0.045003594536304814, | |
| "kl_loss": 0.2777783274650574, | |
| "loss_ib": 0.023940810933709145, | |
| "step": 313 | |
| }, | |
| { | |
| "ce_ib": 16.64844512939453, | |
| "ce_orig": 0.9147619605064392, | |
| "epoch": 0.04514737598849748, | |
| "kl_loss": 0.27109894156455994, | |
| "loss_ib": 0.0218791700899601, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.04529115744069015, | |
| "grad_norm": 0.11459668725728989, | |
| "learning_rate": 2.4361022364217255e-05, | |
| "loss": 0.8602, | |
| "step": 315 | |
| }, | |
| { | |
| "ce_ib": 11.063411712646484, | |
| "ce_orig": 0.27872321009635925, | |
| "epoch": 0.04529115744069015, | |
| "kl_loss": 0.3863391876220703, | |
| "loss_ib": 0.024848666042089462, | |
| "step": 315 | |
| }, | |
| { | |
| "ce_ib": 17.38253402709961, | |
| "ce_orig": 1.05631422996521, | |
| "epoch": 0.04543493889288282, | |
| "kl_loss": 0.29607954621315, | |
| "loss_ib": 0.023495245724916458, | |
| "step": 316 | |
| }, | |
| { | |
| "ce_ib": 17.782546997070312, | |
| "ce_orig": 0.9817179441452026, | |
| "epoch": 0.045578720345075485, | |
| "kl_loss": 0.28195664286613464, | |
| "loss_ib": 0.02298910729587078, | |
| "step": 317 | |
| }, | |
| { | |
| "ce_ib": 17.297069549560547, | |
| "ce_orig": 0.6919381618499756, | |
| "epoch": 0.04572250179726815, | |
| "kl_loss": 0.29801255464553833, | |
| "loss_ib": 0.023549163714051247, | |
| "step": 318 | |
| }, | |
| { | |
| "ce_ib": 17.82324981689453, | |
| "ce_orig": 1.2199736833572388, | |
| "epoch": 0.04586628324946082, | |
| "kl_loss": 0.2285030335187912, | |
| "loss_ib": 0.020336776971817017, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.04601006470165349, | |
| "grad_norm": 0.10239739716053009, | |
| "learning_rate": 2.476038338658147e-05, | |
| "loss": 0.7986, | |
| "step": 320 | |
| }, | |
| { | |
| "ce_ib": 16.177907943725586, | |
| "ce_orig": 0.7963648438453674, | |
| "epoch": 0.04601006470165349, | |
| "kl_loss": 0.2211613953113556, | |
| "loss_ib": 0.019147023558616638, | |
| "step": 320 | |
| }, | |
| { | |
| "ce_ib": 17.704599380493164, | |
| "ce_orig": 0.9023761749267578, | |
| "epoch": 0.046153846153846156, | |
| "kl_loss": 0.28145700693130493, | |
| "loss_ib": 0.022925151512026787, | |
| "step": 321 | |
| }, | |
| { | |
| "ce_ib": 17.584495544433594, | |
| "ce_orig": 0.8088329434394836, | |
| "epoch": 0.046297627606038824, | |
| "kl_loss": 0.2326435148715973, | |
| "loss_ib": 0.02042442373931408, | |
| "step": 322 | |
| }, | |
| { | |
| "ce_ib": 16.041574478149414, | |
| "ce_orig": 0.7201854586601257, | |
| "epoch": 0.046441409058231485, | |
| "kl_loss": 0.3453258275985718, | |
| "loss_ib": 0.02528708055615425, | |
| "step": 323 | |
| }, | |
| { | |
| "ce_ib": 17.880464553833008, | |
| "ce_orig": 0.917682409286499, | |
| "epoch": 0.04658519051042415, | |
| "kl_loss": 0.2218429148197174, | |
| "loss_ib": 0.020032377913594246, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.04672897196261682, | |
| "grad_norm": 0.12532667815685272, | |
| "learning_rate": 2.515974440894569e-05, | |
| "loss": 0.8682, | |
| "step": 325 | |
| }, | |
| { | |
| "ce_ib": 15.092937469482422, | |
| "ce_orig": 0.7272103428840637, | |
| "epoch": 0.04672897196261682, | |
| "kl_loss": 0.31795835494995117, | |
| "loss_ib": 0.023444388061761856, | |
| "step": 325 | |
| }, | |
| { | |
| "ce_ib": 15.842549324035645, | |
| "ce_orig": 0.9047788977622986, | |
| "epoch": 0.04687275341480949, | |
| "kl_loss": 0.2592480480670929, | |
| "loss_ib": 0.020883677527308464, | |
| "step": 326 | |
| }, | |
| { | |
| "ce_ib": 14.095027923583984, | |
| "ce_orig": 0.7625716328620911, | |
| "epoch": 0.047016534867002156, | |
| "kl_loss": 0.17521969974040985, | |
| "loss_ib": 0.015808498486876488, | |
| "step": 327 | |
| }, | |
| { | |
| "ce_ib": 16.17887306213379, | |
| "ce_orig": 0.7734029293060303, | |
| "epoch": 0.047160316319194824, | |
| "kl_loss": 0.23375201225280762, | |
| "loss_ib": 0.01977703720331192, | |
| "step": 328 | |
| }, | |
| { | |
| "ce_ib": 18.957979202270508, | |
| "ce_orig": 0.8559271097183228, | |
| "epoch": 0.04730409777138749, | |
| "kl_loss": 0.33970198035240173, | |
| "loss_ib": 0.026464087888598442, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.04744787922358016, | |
| "grad_norm": 0.09043259918689728, | |
| "learning_rate": 2.5559105431309903e-05, | |
| "loss": 0.7934, | |
| "step": 330 | |
| }, | |
| { | |
| "ce_ib": 14.730635643005371, | |
| "ce_orig": 0.8875608444213867, | |
| "epoch": 0.04744787922358016, | |
| "kl_loss": 0.2284042239189148, | |
| "loss_ib": 0.01878552883863449, | |
| "step": 330 | |
| }, | |
| { | |
| "ce_ib": 12.517528533935547, | |
| "ce_orig": 0.6206594705581665, | |
| "epoch": 0.04759166067577283, | |
| "kl_loss": 0.21303001046180725, | |
| "loss_ib": 0.016910264268517494, | |
| "step": 331 | |
| }, | |
| { | |
| "ce_ib": 14.753087043762207, | |
| "ce_orig": 0.841992199420929, | |
| "epoch": 0.047735442127965495, | |
| "kl_loss": 0.20966459810733795, | |
| "loss_ib": 0.017859773710370064, | |
| "step": 332 | |
| }, | |
| { | |
| "ce_ib": 14.960477828979492, | |
| "ce_orig": 1.0839353799819946, | |
| "epoch": 0.047879223580158156, | |
| "kl_loss": 0.15192289650440216, | |
| "loss_ib": 0.01507638394832611, | |
| "step": 333 | |
| }, | |
| { | |
| "ce_ib": 15.1857328414917, | |
| "ce_orig": 0.9817880392074585, | |
| "epoch": 0.048023005032350824, | |
| "kl_loss": 0.19470617175102234, | |
| "loss_ib": 0.017328176647424698, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.04816678648454349, | |
| "grad_norm": 0.10444720834493637, | |
| "learning_rate": 2.5958466453674125e-05, | |
| "loss": 0.8294, | |
| "step": 335 | |
| }, | |
| { | |
| "ce_ib": 15.169656753540039, | |
| "ce_orig": 0.8038283586502075, | |
| "epoch": 0.04816678648454349, | |
| "kl_loss": 0.2266630232334137, | |
| "loss_ib": 0.018917979672551155, | |
| "step": 335 | |
| }, | |
| { | |
| "ce_ib": 14.643211364746094, | |
| "ce_orig": 0.7942169904708862, | |
| "epoch": 0.04831056793673616, | |
| "kl_loss": 0.22511643171310425, | |
| "loss_ib": 0.01857742667198181, | |
| "step": 336 | |
| }, | |
| { | |
| "ce_ib": 12.96651554107666, | |
| "ce_orig": 0.8289546370506287, | |
| "epoch": 0.04845434938892883, | |
| "kl_loss": 0.17975889146327972, | |
| "loss_ib": 0.015471202321350574, | |
| "step": 337 | |
| }, | |
| { | |
| "ce_ib": 16.585371017456055, | |
| "ce_orig": 1.165753722190857, | |
| "epoch": 0.048598130841121495, | |
| "kl_loss": 0.190724715590477, | |
| "loss_ib": 0.017828920856118202, | |
| "step": 338 | |
| }, | |
| { | |
| "ce_ib": 14.718335151672363, | |
| "ce_orig": 0.8502370119094849, | |
| "epoch": 0.04874191229331416, | |
| "kl_loss": 0.20640771090984344, | |
| "loss_ib": 0.017679553478956223, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.04888569374550683, | |
| "grad_norm": 0.09888631105422974, | |
| "learning_rate": 2.6357827476038338e-05, | |
| "loss": 0.8718, | |
| "step": 340 | |
| }, | |
| { | |
| "ce_ib": 13.714351654052734, | |
| "ce_orig": 1.0317654609680176, | |
| "epoch": 0.04888569374550683, | |
| "kl_loss": 0.19221842288970947, | |
| "loss_ib": 0.016468096524477005, | |
| "step": 340 | |
| }, | |
| { | |
| "ce_ib": 15.859162330627441, | |
| "ce_orig": 0.6472983956336975, | |
| "epoch": 0.0490294751976995, | |
| "kl_loss": 0.2313256412744522, | |
| "loss_ib": 0.01949586346745491, | |
| "step": 341 | |
| }, | |
| { | |
| "ce_ib": 16.512859344482422, | |
| "ce_orig": 1.1302889585494995, | |
| "epoch": 0.049173256649892166, | |
| "kl_loss": 0.1906917691230774, | |
| "loss_ib": 0.0177910178899765, | |
| "step": 342 | |
| }, | |
| { | |
| "ce_ib": 16.816452026367188, | |
| "ce_orig": 0.8666338920593262, | |
| "epoch": 0.049317038102084834, | |
| "kl_loss": 0.17613860964775085, | |
| "loss_ib": 0.01721515692770481, | |
| "step": 343 | |
| }, | |
| { | |
| "ce_ib": 13.920699119567871, | |
| "ce_orig": 0.719912588596344, | |
| "epoch": 0.049460819554277495, | |
| "kl_loss": 0.18724943697452545, | |
| "loss_ib": 0.016322821378707886, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.04960460100647016, | |
| "grad_norm": 0.10115125775337219, | |
| "learning_rate": 2.6757188498402557e-05, | |
| "loss": 0.843, | |
| "step": 345 | |
| }, | |
| { | |
| "ce_ib": 16.091005325317383, | |
| "ce_orig": 1.0146100521087646, | |
| "epoch": 0.04960460100647016, | |
| "kl_loss": 0.1512732356786728, | |
| "loss_ib": 0.015609164722263813, | |
| "step": 345 | |
| }, | |
| { | |
| "ce_ib": 15.5990571975708, | |
| "ce_orig": 0.7898549437522888, | |
| "epoch": 0.04974838245866283, | |
| "kl_loss": 0.25755369663238525, | |
| "loss_ib": 0.020677214488387108, | |
| "step": 346 | |
| }, | |
| { | |
| "ce_ib": 17.706438064575195, | |
| "ce_orig": 1.2053872346878052, | |
| "epoch": 0.0498921639108555, | |
| "kl_loss": 0.21856242418289185, | |
| "loss_ib": 0.019781339913606644, | |
| "step": 347 | |
| }, | |
| { | |
| "ce_ib": 10.220382690429688, | |
| "ce_orig": 0.5169559121131897, | |
| "epoch": 0.050035945363048166, | |
| "kl_loss": 0.2967785596847534, | |
| "loss_ib": 0.01994911953806877, | |
| "step": 348 | |
| }, | |
| { | |
| "ce_ib": 14.920300483703613, | |
| "ce_orig": 0.7747300267219543, | |
| "epoch": 0.050179726815240834, | |
| "kl_loss": 0.13559795916080475, | |
| "loss_ib": 0.01424004789441824, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.0503235082674335, | |
| "grad_norm": 0.09113696962594986, | |
| "learning_rate": 2.7156549520766773e-05, | |
| "loss": 0.898, | |
| "step": 350 | |
| }, | |
| { | |
| "ce_ib": 16.00478172302246, | |
| "ce_orig": 1.2635383605957031, | |
| "epoch": 0.0503235082674335, | |
| "kl_loss": 0.2005615234375, | |
| "loss_ib": 0.018030468374490738, | |
| "step": 350 | |
| }, | |
| { | |
| "ce_ib": 16.192142486572266, | |
| "ce_orig": 0.9619611501693726, | |
| "epoch": 0.05046728971962617, | |
| "kl_loss": 0.25098395347595215, | |
| "loss_ib": 0.020645270124077797, | |
| "step": 351 | |
| }, | |
| { | |
| "ce_ib": 13.817422866821289, | |
| "ce_orig": 0.6711569428443909, | |
| "epoch": 0.05061107117181884, | |
| "kl_loss": 0.21178191900253296, | |
| "loss_ib": 0.01749780774116516, | |
| "step": 352 | |
| }, | |
| { | |
| "ce_ib": 15.532769203186035, | |
| "ce_orig": 0.5863211750984192, | |
| "epoch": 0.050754852624011505, | |
| "kl_loss": 0.33604636788368225, | |
| "loss_ib": 0.024568704888224602, | |
| "step": 353 | |
| }, | |
| { | |
| "ce_ib": 14.479625701904297, | |
| "ce_orig": 1.0277538299560547, | |
| "epoch": 0.05089863407620417, | |
| "kl_loss": 0.251324862241745, | |
| "loss_ib": 0.01980605535209179, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.051042415528396834, | |
| "grad_norm": 0.10617897659540176, | |
| "learning_rate": 2.7555910543130992e-05, | |
| "loss": 0.8813, | |
| "step": 355 | |
| }, | |
| { | |
| "ce_ib": 16.734331130981445, | |
| "ce_orig": 0.9177318215370178, | |
| "epoch": 0.051042415528396834, | |
| "kl_loss": 0.2959950864315033, | |
| "loss_ib": 0.023166919127106667, | |
| "step": 355 | |
| }, | |
| { | |
| "ce_ib": 14.452986717224121, | |
| "ce_orig": 0.8091175556182861, | |
| "epoch": 0.0511861969805895, | |
| "kl_loss": 0.19851821660995483, | |
| "loss_ib": 0.01715240441262722, | |
| "step": 356 | |
| }, | |
| { | |
| "ce_ib": 15.050978660583496, | |
| "ce_orig": 1.065936803817749, | |
| "epoch": 0.05132997843278217, | |
| "kl_loss": 0.1562933474779129, | |
| "loss_ib": 0.015340156853199005, | |
| "step": 357 | |
| }, | |
| { | |
| "ce_ib": 16.42909812927246, | |
| "ce_orig": 0.6200724840164185, | |
| "epoch": 0.05147375988497484, | |
| "kl_loss": 0.22564369440078735, | |
| "loss_ib": 0.0194967333227396, | |
| "step": 358 | |
| }, | |
| { | |
| "ce_ib": 15.036846160888672, | |
| "ce_orig": 0.8413035273551941, | |
| "epoch": 0.051617541337167505, | |
| "kl_loss": 0.15089106559753418, | |
| "loss_ib": 0.015062975697219372, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.05176132278936017, | |
| "grad_norm": 0.0994093269109726, | |
| "learning_rate": 2.7955271565495212e-05, | |
| "loss": 0.8787, | |
| "step": 360 | |
| }, | |
| { | |
| "ce_ib": 16.430448532104492, | |
| "ce_orig": 1.058258295059204, | |
| "epoch": 0.05176132278936017, | |
| "kl_loss": 0.19877898693084717, | |
| "loss_ib": 0.018154174089431763, | |
| "step": 360 | |
| }, | |
| { | |
| "ce_ib": 16.932239532470703, | |
| "ce_orig": 1.2644236087799072, | |
| "epoch": 0.05190510424155284, | |
| "kl_loss": 0.19584302604198456, | |
| "loss_ib": 0.018258271738886833, | |
| "step": 361 | |
| }, | |
| { | |
| "ce_ib": 14.099466323852539, | |
| "ce_orig": 0.9995023608207703, | |
| "epoch": 0.05204888569374551, | |
| "kl_loss": 0.16139906644821167, | |
| "loss_ib": 0.015119686722755432, | |
| "step": 362 | |
| }, | |
| { | |
| "ce_ib": 15.650708198547363, | |
| "ce_orig": 0.9404743313789368, | |
| "epoch": 0.052192667145938176, | |
| "kl_loss": 0.21341916918754578, | |
| "loss_ib": 0.018496312201023102, | |
| "step": 363 | |
| }, | |
| { | |
| "ce_ib": 16.308624267578125, | |
| "ce_orig": 1.1696351766586304, | |
| "epoch": 0.052336448598130844, | |
| "kl_loss": 0.18094685673713684, | |
| "loss_ib": 0.017201654613018036, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.052480230050323505, | |
| "grad_norm": 0.08824347704648972, | |
| "learning_rate": 2.8354632587859424e-05, | |
| "loss": 0.92, | |
| "step": 365 | |
| }, | |
| { | |
| "ce_ib": 16.583515167236328, | |
| "ce_orig": 1.182395577430725, | |
| "epoch": 0.052480230050323505, | |
| "kl_loss": 0.19043317437171936, | |
| "loss_ib": 0.017813416197896004, | |
| "step": 365 | |
| }, | |
| { | |
| "ce_ib": 13.93136978149414, | |
| "ce_orig": 0.8936623930931091, | |
| "epoch": 0.05262401150251617, | |
| "kl_loss": 0.20374388992786407, | |
| "loss_ib": 0.017152879387140274, | |
| "step": 366 | |
| }, | |
| { | |
| "ce_ib": 13.256582260131836, | |
| "ce_orig": 0.6269909143447876, | |
| "epoch": 0.05276779295470884, | |
| "kl_loss": 0.16010135412216187, | |
| "loss_ib": 0.014633359387516975, | |
| "step": 367 | |
| }, | |
| { | |
| "ce_ib": 12.879414558410645, | |
| "ce_orig": 0.6315767765045166, | |
| "epoch": 0.05291157440690151, | |
| "kl_loss": 0.16557064652442932, | |
| "loss_ib": 0.014718241058290005, | |
| "step": 368 | |
| }, | |
| { | |
| "ce_ib": 15.146659851074219, | |
| "ce_orig": 1.0897746086120605, | |
| "epoch": 0.053055355859094176, | |
| "kl_loss": 0.16495399177074432, | |
| "loss_ib": 0.015821030363440514, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.053199137311286844, | |
| "grad_norm": 0.09635983407497406, | |
| "learning_rate": 2.8753993610223644e-05, | |
| "loss": 0.933, | |
| "step": 370 | |
| }, | |
| { | |
| "ce_ib": 14.545082092285156, | |
| "ce_orig": 0.903797447681427, | |
| "epoch": 0.053199137311286844, | |
| "kl_loss": 0.12444749474525452, | |
| "loss_ib": 0.013494915328919888, | |
| "step": 370 | |
| }, | |
| { | |
| "ce_ib": 12.932695388793945, | |
| "ce_orig": 0.6494324207305908, | |
| "epoch": 0.05334291876347951, | |
| "kl_loss": 0.1315731406211853, | |
| "loss_ib": 0.013045004568994045, | |
| "step": 371 | |
| }, | |
| { | |
| "ce_ib": 15.617569923400879, | |
| "ce_orig": 0.7816907167434692, | |
| "epoch": 0.05348670021567218, | |
| "kl_loss": 0.16312208771705627, | |
| "loss_ib": 0.015964889898896217, | |
| "step": 372 | |
| }, | |
| { | |
| "ce_ib": 15.400293350219727, | |
| "ce_orig": 1.033065915107727, | |
| "epoch": 0.05363048166786485, | |
| "kl_loss": 0.17930516600608826, | |
| "loss_ib": 0.01666540466248989, | |
| "step": 373 | |
| }, | |
| { | |
| "ce_ib": 11.445207595825195, | |
| "ce_orig": 0.5388709902763367, | |
| "epoch": 0.053774263120057515, | |
| "kl_loss": 0.2616915702819824, | |
| "loss_ib": 0.0188071820884943, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.05391804457225018, | |
| "grad_norm": 0.0901573896408081, | |
| "learning_rate": 2.915335463258786e-05, | |
| "loss": 0.926, | |
| "step": 375 | |
| }, | |
| { | |
| "ce_ib": 13.34332275390625, | |
| "ce_orig": 1.1417230367660522, | |
| "epoch": 0.05391804457225018, | |
| "kl_loss": 0.1469050794839859, | |
| "loss_ib": 0.014016914181411266, | |
| "step": 375 | |
| }, | |
| { | |
| "ce_ib": 11.211030006408691, | |
| "ce_orig": 0.6393249034881592, | |
| "epoch": 0.054061826024442844, | |
| "kl_loss": 0.13886746764183044, | |
| "loss_ib": 0.012548888102173805, | |
| "step": 376 | |
| }, | |
| { | |
| "ce_ib": 15.887382507324219, | |
| "ce_orig": 0.9176316261291504, | |
| "epoch": 0.05420560747663551, | |
| "kl_loss": 0.2912940979003906, | |
| "loss_ib": 0.022508395835757256, | |
| "step": 377 | |
| }, | |
| { | |
| "ce_ib": 11.42358112335205, | |
| "ce_orig": 0.8122538924217224, | |
| "epoch": 0.05434938892882818, | |
| "kl_loss": 0.1490350216627121, | |
| "loss_ib": 0.013163541443645954, | |
| "step": 378 | |
| }, | |
| { | |
| "ce_ib": 14.985864639282227, | |
| "ce_orig": 0.9277105927467346, | |
| "epoch": 0.05449317038102085, | |
| "kl_loss": 0.14583294093608856, | |
| "loss_ib": 0.014784579165279865, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.054636951833213515, | |
| "grad_norm": 0.12827961146831512, | |
| "learning_rate": 2.955271565495208e-05, | |
| "loss": 0.9204, | |
| "step": 380 | |
| }, | |
| { | |
| "ce_ib": 13.780610084533691, | |
| "ce_orig": 0.8345714807510376, | |
| "epoch": 0.054636951833213515, | |
| "kl_loss": 0.1690724641084671, | |
| "loss_ib": 0.015343928709626198, | |
| "step": 380 | |
| }, | |
| { | |
| "ce_ib": 11.765593528747559, | |
| "ce_orig": 0.7576747536659241, | |
| "epoch": 0.05478073328540618, | |
| "kl_loss": 0.15963752567768097, | |
| "loss_ib": 0.013864672742784023, | |
| "step": 381 | |
| }, | |
| { | |
| "ce_ib": 12.893147468566895, | |
| "ce_orig": 1.0378029346466064, | |
| "epoch": 0.05492451473759885, | |
| "kl_loss": 0.13528262078762054, | |
| "loss_ib": 0.013210705481469631, | |
| "step": 382 | |
| }, | |
| { | |
| "ce_ib": 15.782855987548828, | |
| "ce_orig": 0.8279376029968262, | |
| "epoch": 0.05506829618979152, | |
| "kl_loss": 0.16392827033996582, | |
| "loss_ib": 0.016087843105196953, | |
| "step": 383 | |
| }, | |
| { | |
| "ce_ib": 12.443214416503906, | |
| "ce_orig": 0.8166038990020752, | |
| "epoch": 0.055212077641984186, | |
| "kl_loss": 0.14554069936275482, | |
| "loss_ib": 0.013498641550540924, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.055355859094176854, | |
| "grad_norm": 0.13148367404937744, | |
| "learning_rate": 2.9952076677316295e-05, | |
| "loss": 0.8973, | |
| "step": 385 | |
| }, | |
| { | |
| "ce_ib": 16.574996948242188, | |
| "ce_orig": 1.3873276710510254, | |
| "epoch": 0.055355859094176854, | |
| "kl_loss": 0.206925630569458, | |
| "loss_ib": 0.018633781000971794, | |
| "step": 385 | |
| }, | |
| { | |
| "ce_ib": 14.954483032226562, | |
| "ce_orig": 1.4074153900146484, | |
| "epoch": 0.055499640546369515, | |
| "kl_loss": 0.1725064069032669, | |
| "loss_ib": 0.016102561727166176, | |
| "step": 386 | |
| }, | |
| { | |
| "ce_ib": 13.222760200500488, | |
| "ce_orig": 0.581721842288971, | |
| "epoch": 0.05564342199856218, | |
| "kl_loss": 0.3106327950954437, | |
| "loss_ib": 0.02214301936328411, | |
| "step": 387 | |
| }, | |
| { | |
| "ce_ib": 12.130496978759766, | |
| "ce_orig": 0.8030052185058594, | |
| "epoch": 0.05578720345075485, | |
| "kl_loss": 0.19192326068878174, | |
| "loss_ib": 0.015661410987377167, | |
| "step": 388 | |
| }, | |
| { | |
| "ce_ib": 12.304028511047363, | |
| "ce_orig": 0.838097095489502, | |
| "epoch": 0.05593098490294752, | |
| "kl_loss": 0.17265933752059937, | |
| "loss_ib": 0.0147849814966321, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.056074766355140186, | |
| "grad_norm": 0.1129549965262413, | |
| "learning_rate": 3.0351437699680514e-05, | |
| "loss": 0.9147, | |
| "step": 390 | |
| }, | |
| { | |
| "ce_ib": 12.584757804870605, | |
| "ce_orig": 0.6829859018325806, | |
| "epoch": 0.056074766355140186, | |
| "kl_loss": 0.15448611974716187, | |
| "loss_ib": 0.014016685076057911, | |
| "step": 390 | |
| }, | |
| { | |
| "ce_ib": 14.888505935668945, | |
| "ce_orig": 0.8695336580276489, | |
| "epoch": 0.056218547807332854, | |
| "kl_loss": 0.18638572096824646, | |
| "loss_ib": 0.016763538122177124, | |
| "step": 391 | |
| }, | |
| { | |
| "ce_ib": 13.158818244934082, | |
| "ce_orig": 0.724577009677887, | |
| "epoch": 0.05636232925952552, | |
| "kl_loss": 0.15236912667751312, | |
| "loss_ib": 0.01419786550104618, | |
| "step": 392 | |
| }, | |
| { | |
| "ce_ib": 14.405329704284668, | |
| "ce_orig": 0.4904825985431671, | |
| "epoch": 0.05650611071171819, | |
| "kl_loss": 0.21818403899669647, | |
| "loss_ib": 0.01811186783015728, | |
| "step": 393 | |
| }, | |
| { | |
| "ce_ib": 10.370551109313965, | |
| "ce_orig": 0.6885640621185303, | |
| "epoch": 0.05664989216391086, | |
| "kl_loss": 0.18041831254959106, | |
| "loss_ib": 0.014206192456185818, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.056793673616103525, | |
| "grad_norm": 0.09922255575656891, | |
| "learning_rate": 3.075079872204473e-05, | |
| "loss": 0.8916, | |
| "step": 395 | |
| }, | |
| { | |
| "ce_ib": 15.597278594970703, | |
| "ce_orig": 1.2381712198257446, | |
| "epoch": 0.056793673616103525, | |
| "kl_loss": 0.1798793077468872, | |
| "loss_ib": 0.016792604699730873, | |
| "step": 395 | |
| }, | |
| { | |
| "ce_ib": 16.811328887939453, | |
| "ce_orig": 1.2628995180130005, | |
| "epoch": 0.05693745506829619, | |
| "kl_loss": 0.171632319688797, | |
| "loss_ib": 0.016987280920147896, | |
| "step": 396 | |
| }, | |
| { | |
| "ce_ib": 14.330126762390137, | |
| "ce_orig": 0.842546284198761, | |
| "epoch": 0.057081236520488854, | |
| "kl_loss": 0.17398859560489655, | |
| "loss_ib": 0.01586449332535267, | |
| "step": 397 | |
| }, | |
| { | |
| "ce_ib": 13.21159553527832, | |
| "ce_orig": 0.8423411250114441, | |
| "epoch": 0.05722501797268152, | |
| "kl_loss": 0.13025188446044922, | |
| "loss_ib": 0.013118392787873745, | |
| "step": 398 | |
| }, | |
| { | |
| "ce_ib": 12.565587043762207, | |
| "ce_orig": 0.7189036011695862, | |
| "epoch": 0.05736879942487419, | |
| "kl_loss": 0.14162641763687134, | |
| "loss_ib": 0.013364115729928017, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.05751258087706686, | |
| "grad_norm": 0.0966155007481575, | |
| "learning_rate": 3.115015974440895e-05, | |
| "loss": 0.9267, | |
| "step": 400 | |
| }, | |
| { | |
| "ce_ib": 11.058226585388184, | |
| "ce_orig": 0.9342263340950012, | |
| "epoch": 0.05751258087706686, | |
| "kl_loss": 0.11544251441955566, | |
| "loss_ib": 0.011301239021122456, | |
| "step": 400 | |
| }, | |
| { | |
| "ce_ib": 14.200401306152344, | |
| "ce_orig": 1.0684270858764648, | |
| "epoch": 0.057656362329259525, | |
| "kl_loss": 0.14682269096374512, | |
| "loss_ib": 0.014441335573792458, | |
| "step": 401 | |
| }, | |
| { | |
| "ce_ib": 14.785656929016113, | |
| "ce_orig": 1.1560802459716797, | |
| "epoch": 0.05780014378145219, | |
| "kl_loss": 0.1572328507900238, | |
| "loss_ib": 0.015254470519721508, | |
| "step": 402 | |
| }, | |
| { | |
| "ce_ib": 16.529001235961914, | |
| "ce_orig": 1.4409286975860596, | |
| "epoch": 0.05794392523364486, | |
| "kl_loss": 0.1712377667427063, | |
| "loss_ib": 0.016826389357447624, | |
| "step": 403 | |
| }, | |
| { | |
| "ce_ib": 8.062843322753906, | |
| "ce_orig": 0.4845752716064453, | |
| "epoch": 0.05808770668583753, | |
| "kl_loss": 0.114130899310112, | |
| "loss_ib": 0.009737967513501644, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.058231488138030196, | |
| "grad_norm": 0.10344849526882172, | |
| "learning_rate": 3.154952076677317e-05, | |
| "loss": 0.9143, | |
| "step": 405 | |
| }, | |
| { | |
| "ce_ib": 14.247758865356445, | |
| "ce_orig": 0.8494449853897095, | |
| "epoch": 0.058231488138030196, | |
| "kl_loss": 0.14982560276985168, | |
| "loss_ib": 0.014615160413086414, | |
| "step": 405 | |
| }, | |
| { | |
| "ce_ib": 15.090539932250977, | |
| "ce_orig": 1.255419135093689, | |
| "epoch": 0.058375269590222864, | |
| "kl_loss": 0.2056526243686676, | |
| "loss_ib": 0.017827901989221573, | |
| "step": 406 | |
| }, | |
| { | |
| "ce_ib": 13.584562301635742, | |
| "ce_orig": 1.0737160444259644, | |
| "epoch": 0.058519051042415525, | |
| "kl_loss": 0.16253307461738586, | |
| "loss_ib": 0.014918935485184193, | |
| "step": 407 | |
| }, | |
| { | |
| "ce_ib": 10.844743728637695, | |
| "ce_orig": 0.7258655428886414, | |
| "epoch": 0.05866283249460819, | |
| "kl_loss": 0.2175343632698059, | |
| "loss_ib": 0.016299089416861534, | |
| "step": 408 | |
| }, | |
| { | |
| "ce_ib": 9.838624000549316, | |
| "ce_orig": 0.5341205596923828, | |
| "epoch": 0.05880661394680086, | |
| "kl_loss": 0.24159343540668488, | |
| "loss_ib": 0.016998983919620514, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.05895039539899353, | |
| "grad_norm": 0.0828595831990242, | |
| "learning_rate": 3.194888178913738e-05, | |
| "loss": 0.8595, | |
| "step": 410 | |
| }, | |
| { | |
| "ce_ib": 11.394186019897461, | |
| "ce_orig": 0.6941292881965637, | |
| "epoch": 0.05895039539899353, | |
| "kl_loss": 0.13403424620628357, | |
| "loss_ib": 0.012398804537951946, | |
| "step": 410 | |
| }, | |
| { | |
| "ce_ib": 13.786474227905273, | |
| "ce_orig": 1.0304478406906128, | |
| "epoch": 0.059094176851186196, | |
| "kl_loss": 0.24797815084457397, | |
| "loss_ib": 0.01929214410483837, | |
| "step": 411 | |
| }, | |
| { | |
| "ce_ib": 12.280767440795898, | |
| "ce_orig": 0.9082537889480591, | |
| "epoch": 0.059237958303378864, | |
| "kl_loss": 0.09719130396842957, | |
| "loss_ib": 0.010999949648976326, | |
| "step": 412 | |
| }, | |
| { | |
| "ce_ib": 11.551681518554688, | |
| "ce_orig": 0.9754782915115356, | |
| "epoch": 0.05938173975557153, | |
| "kl_loss": 0.1405172348022461, | |
| "loss_ib": 0.0128017021343112, | |
| "step": 413 | |
| }, | |
| { | |
| "ce_ib": 13.329681396484375, | |
| "ce_orig": 0.9015910625457764, | |
| "epoch": 0.0595255212077642, | |
| "kl_loss": 0.15253770351409912, | |
| "loss_ib": 0.014291726052761078, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.05966930265995687, | |
| "grad_norm": 0.09882552921772003, | |
| "learning_rate": 3.23482428115016e-05, | |
| "loss": 0.8577, | |
| "step": 415 | |
| }, | |
| { | |
| "ce_ib": 12.369913101196289, | |
| "ce_orig": 0.8101427555084229, | |
| "epoch": 0.05966930265995687, | |
| "kl_loss": 0.2113226056098938, | |
| "loss_ib": 0.016751086339354515, | |
| "step": 415 | |
| }, | |
| { | |
| "ce_ib": 14.39426040649414, | |
| "ce_orig": 1.3613587617874146, | |
| "epoch": 0.059813084112149535, | |
| "kl_loss": 0.1314837634563446, | |
| "loss_ib": 0.013771317899227142, | |
| "step": 416 | |
| }, | |
| { | |
| "ce_ib": 12.081097602844238, | |
| "ce_orig": 0.4347302317619324, | |
| "epoch": 0.0599568655643422, | |
| "kl_loss": 0.2995225489139557, | |
| "loss_ib": 0.021016675978899002, | |
| "step": 417 | |
| }, | |
| { | |
| "ce_ib": 15.218514442443848, | |
| "ce_orig": 1.2289142608642578, | |
| "epoch": 0.060100647016534864, | |
| "kl_loss": 0.26874852180480957, | |
| "loss_ib": 0.021046683192253113, | |
| "step": 418 | |
| }, | |
| { | |
| "ce_ib": 13.258194923400879, | |
| "ce_orig": 1.0039843320846558, | |
| "epoch": 0.06024442846872753, | |
| "kl_loss": 0.12897028028964996, | |
| "loss_ib": 0.013077611103653908, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.0603882099209202, | |
| "grad_norm": 0.1349947154521942, | |
| "learning_rate": 3.274760383386581e-05, | |
| "loss": 0.899, | |
| "step": 420 | |
| }, | |
| { | |
| "ce_ib": 12.171891212463379, | |
| "ce_orig": 0.9144300222396851, | |
| "epoch": 0.0603882099209202, | |
| "kl_loss": 0.21492531895637512, | |
| "loss_ib": 0.01683221198618412, | |
| "step": 420 | |
| }, | |
| { | |
| "ce_ib": 10.995501518249512, | |
| "ce_orig": 0.699188768863678, | |
| "epoch": 0.06053199137311287, | |
| "kl_loss": 0.12099233269691467, | |
| "loss_ib": 0.011547367088496685, | |
| "step": 421 | |
| }, | |
| { | |
| "ce_ib": 12.991347312927246, | |
| "ce_orig": 0.9281318187713623, | |
| "epoch": 0.060675772825305535, | |
| "kl_loss": 0.1598033308982849, | |
| "loss_ib": 0.014485838823020458, | |
| "step": 422 | |
| }, | |
| { | |
| "ce_ib": 10.423280715942383, | |
| "ce_orig": 0.9821050763130188, | |
| "epoch": 0.0608195542774982, | |
| "kl_loss": 0.10947795957326889, | |
| "loss_ib": 0.010685537941753864, | |
| "step": 423 | |
| }, | |
| { | |
| "ce_ib": 11.12364387512207, | |
| "ce_orig": 0.7817228436470032, | |
| "epoch": 0.06096333572969087, | |
| "kl_loss": 0.17594116926193237, | |
| "loss_ib": 0.014358880929648876, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.06110711718188354, | |
| "grad_norm": 0.0929858386516571, | |
| "learning_rate": 3.314696485623003e-05, | |
| "loss": 0.7994, | |
| "step": 425 | |
| }, | |
| { | |
| "ce_ib": 12.660994529724121, | |
| "ce_orig": 0.9210802912712097, | |
| "epoch": 0.06110711718188354, | |
| "kl_loss": 0.15979456901550293, | |
| "loss_ib": 0.014320224523544312, | |
| "step": 425 | |
| }, | |
| { | |
| "ce_ib": 15.475061416625977, | |
| "ce_orig": 1.6302592754364014, | |
| "epoch": 0.061250898634076206, | |
| "kl_loss": 0.1844199150800705, | |
| "loss_ib": 0.01695852540433407, | |
| "step": 426 | |
| }, | |
| { | |
| "ce_ib": 9.331029891967773, | |
| "ce_orig": 0.5564351081848145, | |
| "epoch": 0.061394680086268874, | |
| "kl_loss": 0.1388329267501831, | |
| "loss_ib": 0.011607161723077297, | |
| "step": 427 | |
| }, | |
| { | |
| "ce_ib": 12.874106407165527, | |
| "ce_orig": 0.9861687421798706, | |
| "epoch": 0.06153846153846154, | |
| "kl_loss": 0.12479162216186523, | |
| "loss_ib": 0.012676633894443512, | |
| "step": 428 | |
| }, | |
| { | |
| "ce_ib": 7.756659507751465, | |
| "ce_orig": 0.28384384512901306, | |
| "epoch": 0.0616822429906542, | |
| "kl_loss": 0.2630873918533325, | |
| "loss_ib": 0.017032699659466743, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.06182602444284687, | |
| "grad_norm": 0.09535211324691772, | |
| "learning_rate": 3.354632587859425e-05, | |
| "loss": 0.9043, | |
| "step": 430 | |
| }, | |
| { | |
| "ce_ib": 10.622055053710938, | |
| "ce_orig": 0.6629616022109985, | |
| "epoch": 0.06182602444284687, | |
| "kl_loss": 0.13496457040309906, | |
| "loss_ib": 0.012059256434440613, | |
| "step": 430 | |
| }, | |
| { | |
| "ce_ib": 11.811662673950195, | |
| "ce_orig": 0.7327677011489868, | |
| "epoch": 0.06196980589503954, | |
| "kl_loss": 0.14013449847698212, | |
| "loss_ib": 0.012912556529045105, | |
| "step": 431 | |
| }, | |
| { | |
| "ce_ib": 8.620430946350098, | |
| "ce_orig": 0.7203670144081116, | |
| "epoch": 0.062113587347232206, | |
| "kl_loss": 0.11925005167722702, | |
| "loss_ib": 0.010272718034684658, | |
| "step": 432 | |
| }, | |
| { | |
| "ce_ib": 14.616909980773926, | |
| "ce_orig": 1.4517083168029785, | |
| "epoch": 0.062257368799424874, | |
| "kl_loss": 0.15538114309310913, | |
| "loss_ib": 0.01507751177996397, | |
| "step": 433 | |
| }, | |
| { | |
| "ce_ib": 9.763717651367188, | |
| "ce_orig": 0.6260893940925598, | |
| "epoch": 0.06240115025161754, | |
| "kl_loss": 0.13390487432479858, | |
| "loss_ib": 0.011577102355659008, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.0625449317038102, | |
| "grad_norm": 0.11180251836776733, | |
| "learning_rate": 3.394568690095847e-05, | |
| "loss": 0.9071, | |
| "step": 435 | |
| }, | |
| { | |
| "ce_ib": 12.881009101867676, | |
| "ce_orig": 0.5546009540557861, | |
| "epoch": 0.0625449317038102, | |
| "kl_loss": 0.13927477598190308, | |
| "loss_ib": 0.013404244557023048, | |
| "step": 435 | |
| }, | |
| { | |
| "ce_ib": 12.253645896911621, | |
| "ce_orig": 0.7509746551513672, | |
| "epoch": 0.06268871315600287, | |
| "kl_loss": 0.16948378086090088, | |
| "loss_ib": 0.014601011760532856, | |
| "step": 436 | |
| }, | |
| { | |
| "ce_ib": 13.29328441619873, | |
| "ce_orig": 0.9583929777145386, | |
| "epoch": 0.06283249460819554, | |
| "kl_loss": 0.15764841437339783, | |
| "loss_ib": 0.014529063366353512, | |
| "step": 437 | |
| }, | |
| { | |
| "ce_ib": 12.615095138549805, | |
| "ce_orig": 1.1630975008010864, | |
| "epoch": 0.0629762760603882, | |
| "kl_loss": 0.12097503244876862, | |
| "loss_ib": 0.012356298975646496, | |
| "step": 438 | |
| }, | |
| { | |
| "ce_ib": 11.95744514465332, | |
| "ce_orig": 0.734953761100769, | |
| "epoch": 0.06312005751258087, | |
| "kl_loss": 0.13797758519649506, | |
| "loss_ib": 0.012877601198852062, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.06326383896477354, | |
| "grad_norm": 0.09555409848690033, | |
| "learning_rate": 3.434504792332269e-05, | |
| "loss": 0.8284, | |
| "step": 440 | |
| }, | |
| { | |
| "ce_ib": 17.494842529296875, | |
| "ce_orig": 1.2540117502212524, | |
| "epoch": 0.06326383896477354, | |
| "kl_loss": 0.20184318721294403, | |
| "loss_ib": 0.018839580938220024, | |
| "step": 440 | |
| }, | |
| { | |
| "ce_ib": 13.520644187927246, | |
| "ce_orig": 1.173345923423767, | |
| "epoch": 0.06340762041696621, | |
| "kl_loss": 0.224016010761261, | |
| "loss_ib": 0.01796112395823002, | |
| "step": 441 | |
| }, | |
| { | |
| "ce_ib": 6.444005489349365, | |
| "ce_orig": 0.33801600337028503, | |
| "epoch": 0.06355140186915888, | |
| "kl_loss": 0.24252083897590637, | |
| "loss_ib": 0.015348044224083424, | |
| "step": 442 | |
| }, | |
| { | |
| "ce_ib": 11.194876670837402, | |
| "ce_orig": 0.8596982359886169, | |
| "epoch": 0.06369518332135155, | |
| "kl_loss": 0.1475781947374344, | |
| "loss_ib": 0.012976348400115967, | |
| "step": 443 | |
| }, | |
| { | |
| "ce_ib": 14.788161277770996, | |
| "ce_orig": 0.9179244041442871, | |
| "epoch": 0.06383896477354421, | |
| "kl_loss": 0.23837195336818695, | |
| "loss_ib": 0.019312677904963493, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.06398274622573688, | |
| "grad_norm": 0.10049393773078918, | |
| "learning_rate": 3.47444089456869e-05, | |
| "loss": 0.915, | |
| "step": 445 | |
| }, | |
| { | |
| "ce_ib": 12.728458404541016, | |
| "ce_orig": 0.6808370351791382, | |
| "epoch": 0.06398274622573688, | |
| "kl_loss": 0.16207855939865112, | |
| "loss_ib": 0.014468157663941383, | |
| "step": 445 | |
| }, | |
| { | |
| "ce_ib": 9.595919609069824, | |
| "ce_orig": 0.48967745900154114, | |
| "epoch": 0.06412652767792955, | |
| "kl_loss": 0.12488089501857758, | |
| "loss_ib": 0.011042005382478237, | |
| "step": 446 | |
| }, | |
| { | |
| "ce_ib": 15.164140701293945, | |
| "ce_orig": 0.8277769684791565, | |
| "epoch": 0.06427030913012222, | |
| "kl_loss": 0.1784917414188385, | |
| "loss_ib": 0.016506657004356384, | |
| "step": 447 | |
| }, | |
| { | |
| "ce_ib": 11.631290435791016, | |
| "ce_orig": 0.786353588104248, | |
| "epoch": 0.06441409058231488, | |
| "kl_loss": 0.13490960001945496, | |
| "loss_ib": 0.012561124749481678, | |
| "step": 448 | |
| }, | |
| { | |
| "ce_ib": 11.316841125488281, | |
| "ce_orig": 0.6659090518951416, | |
| "epoch": 0.06455787203450755, | |
| "kl_loss": 0.1083206981420517, | |
| "loss_ib": 0.011074455454945564, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.06470165348670022, | |
| "grad_norm": 0.10170278698205948, | |
| "learning_rate": 3.514376996805112e-05, | |
| "loss": 0.8419, | |
| "step": 450 | |
| }, | |
| { | |
| "ce_ib": 9.49474048614502, | |
| "ce_orig": 0.7802785038948059, | |
| "epoch": 0.06470165348670022, | |
| "kl_loss": 0.1756356954574585, | |
| "loss_ib": 0.013529154472053051, | |
| "step": 450 | |
| }, | |
| { | |
| "ce_ib": 11.215967178344727, | |
| "ce_orig": 0.4214544892311096, | |
| "epoch": 0.06484543493889289, | |
| "kl_loss": 0.12542136013507843, | |
| "loss_ib": 0.01187905203551054, | |
| "step": 451 | |
| }, | |
| { | |
| "ce_ib": 6.312502861022949, | |
| "ce_orig": 0.3134852945804596, | |
| "epoch": 0.06498921639108556, | |
| "kl_loss": 0.2386016845703125, | |
| "loss_ib": 0.015086335130035877, | |
| "step": 452 | |
| }, | |
| { | |
| "ce_ib": 13.910443305969238, | |
| "ce_orig": 0.7964897155761719, | |
| "epoch": 0.06513299784327822, | |
| "kl_loss": 0.19260820746421814, | |
| "loss_ib": 0.016585631296038628, | |
| "step": 453 | |
| }, | |
| { | |
| "ce_ib": 10.858504295349121, | |
| "ce_orig": 0.8178758025169373, | |
| "epoch": 0.06527677929547089, | |
| "kl_loss": 0.1271795630455017, | |
| "loss_ib": 0.011788229458034039, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.06542056074766354, | |
| "grad_norm": 0.1267521232366562, | |
| "learning_rate": 3.5543130990415334e-05, | |
| "loss": 0.8513, | |
| "step": 455 | |
| }, | |
| { | |
| "ce_ib": 11.637347221374512, | |
| "ce_orig": 1.0193455219268799, | |
| "epoch": 0.06542056074766354, | |
| "kl_loss": 0.144621342420578, | |
| "loss_ib": 0.01304974127560854, | |
| "step": 455 | |
| }, | |
| { | |
| "ce_ib": 10.80041217803955, | |
| "ce_orig": 0.6328637599945068, | |
| "epoch": 0.06556434219985621, | |
| "kl_loss": 0.10495860129594803, | |
| "loss_ib": 0.010648136027157307, | |
| "step": 456 | |
| }, | |
| { | |
| "ce_ib": 7.313602447509766, | |
| "ce_orig": 0.42815887928009033, | |
| "epoch": 0.06570812365204888, | |
| "kl_loss": 0.17510683834552765, | |
| "loss_ib": 0.01241214293986559, | |
| "step": 457 | |
| }, | |
| { | |
| "ce_ib": 13.05362606048584, | |
| "ce_orig": 1.0646302700042725, | |
| "epoch": 0.06585190510424155, | |
| "kl_loss": 0.14800792932510376, | |
| "loss_ib": 0.013927209191024303, | |
| "step": 458 | |
| }, | |
| { | |
| "ce_ib": 8.705698013305664, | |
| "ce_orig": 0.5751362442970276, | |
| "epoch": 0.06599568655643422, | |
| "kl_loss": 0.19291532039642334, | |
| "loss_ib": 0.013998615555465221, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.06613946800862688, | |
| "grad_norm": 0.14026452600955963, | |
| "learning_rate": 3.5942492012779554e-05, | |
| "loss": 0.8978, | |
| "step": 460 | |
| }, | |
| { | |
| "ce_ib": 12.629561424255371, | |
| "ce_orig": 1.248939871788025, | |
| "epoch": 0.06613946800862688, | |
| "kl_loss": 0.1431877613067627, | |
| "loss_ib": 0.013474169187247753, | |
| "step": 460 | |
| }, | |
| { | |
| "ce_ib": 13.466840744018555, | |
| "ce_orig": 1.1314830780029297, | |
| "epoch": 0.06628324946081955, | |
| "kl_loss": 0.11893537640571594, | |
| "loss_ib": 0.012680189684033394, | |
| "step": 461 | |
| }, | |
| { | |
| "ce_ib": 12.272945404052734, | |
| "ce_orig": 0.5334405303001404, | |
| "epoch": 0.06642703091301222, | |
| "kl_loss": 0.19608467817306519, | |
| "loss_ib": 0.015940707176923752, | |
| "step": 462 | |
| }, | |
| { | |
| "ce_ib": 11.584327697753906, | |
| "ce_orig": 0.5882666707038879, | |
| "epoch": 0.06657081236520489, | |
| "kl_loss": 0.15428690612316132, | |
| "loss_ib": 0.013506509363651276, | |
| "step": 463 | |
| }, | |
| { | |
| "ce_ib": 10.483445167541504, | |
| "ce_orig": 0.5081559419631958, | |
| "epoch": 0.06671459381739756, | |
| "kl_loss": 0.23190432786941528, | |
| "loss_ib": 0.01683693937957287, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.06685837526959022, | |
| "grad_norm": 0.11186351627111435, | |
| "learning_rate": 3.6341853035143766e-05, | |
| "loss": 0.977, | |
| "step": 465 | |
| }, | |
| { | |
| "ce_ib": 10.818644523620605, | |
| "ce_orig": 0.8423200249671936, | |
| "epoch": 0.06685837526959022, | |
| "kl_loss": 0.12322719395160675, | |
| "loss_ib": 0.011570681817829609, | |
| "step": 465 | |
| }, | |
| { | |
| "ce_ib": 13.477171897888184, | |
| "ce_orig": 1.135223627090454, | |
| "epoch": 0.06700215672178289, | |
| "kl_loss": 0.1358594447374344, | |
| "loss_ib": 0.013531558215618134, | |
| "step": 466 | |
| }, | |
| { | |
| "ce_ib": 12.029156684875488, | |
| "ce_orig": 0.925537645816803, | |
| "epoch": 0.06714593817397556, | |
| "kl_loss": 0.1674036681652069, | |
| "loss_ib": 0.014384761452674866, | |
| "step": 467 | |
| }, | |
| { | |
| "ce_ib": 8.591270446777344, | |
| "ce_orig": 0.6351872086524963, | |
| "epoch": 0.06728971962616823, | |
| "kl_loss": 0.16200634837150574, | |
| "loss_ib": 0.012395952828228474, | |
| "step": 468 | |
| }, | |
| { | |
| "ce_ib": 12.34648609161377, | |
| "ce_orig": 0.8252216577529907, | |
| "epoch": 0.0674335010783609, | |
| "kl_loss": 0.1306806206703186, | |
| "loss_ib": 0.012707273475825787, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.06757728253055356, | |
| "grad_norm": 0.11462409794330597, | |
| "learning_rate": 3.6741214057507985e-05, | |
| "loss": 0.8112, | |
| "step": 470 | |
| }, | |
| { | |
| "ce_ib": 12.145110130310059, | |
| "ce_orig": 0.7569481730461121, | |
| "epoch": 0.06757728253055356, | |
| "kl_loss": 0.1333114206790924, | |
| "loss_ib": 0.012738126330077648, | |
| "step": 470 | |
| }, | |
| { | |
| "ce_ib": 10.791728019714355, | |
| "ce_orig": 0.8886812329292297, | |
| "epoch": 0.06772106398274623, | |
| "kl_loss": 0.14122334122657776, | |
| "loss_ib": 0.012457030825316906, | |
| "step": 471 | |
| }, | |
| { | |
| "ce_ib": 11.75979232788086, | |
| "ce_orig": 0.93720543384552, | |
| "epoch": 0.0678648454349389, | |
| "kl_loss": 0.08433859050273895, | |
| "loss_ib": 0.010096825659275055, | |
| "step": 472 | |
| }, | |
| { | |
| "ce_ib": 7.816238880157471, | |
| "ce_orig": 0.5898436903953552, | |
| "epoch": 0.06800862688713157, | |
| "kl_loss": 0.26155394315719604, | |
| "loss_ib": 0.0169858168810606, | |
| "step": 473 | |
| }, | |
| { | |
| "ce_ib": 12.64213752746582, | |
| "ce_orig": 1.2193433046340942, | |
| "epoch": 0.06815240833932423, | |
| "kl_loss": 0.1382063627243042, | |
| "loss_ib": 0.013231388293206692, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.0682961897915169, | |
| "grad_norm": 0.12322834134101868, | |
| "learning_rate": 3.714057507987221e-05, | |
| "loss": 0.942, | |
| "step": 475 | |
| }, | |
| { | |
| "ce_ib": 12.208565711975098, | |
| "ce_orig": 0.6283319592475891, | |
| "epoch": 0.0682961897915169, | |
| "kl_loss": 0.1437569409608841, | |
| "loss_ib": 0.013292129151523113, | |
| "step": 475 | |
| }, | |
| { | |
| "ce_ib": 10.480301856994629, | |
| "ce_orig": 0.6875295042991638, | |
| "epoch": 0.06843997124370955, | |
| "kl_loss": 0.171269491314888, | |
| "loss_ib": 0.013803625479340553, | |
| "step": 476 | |
| }, | |
| { | |
| "ce_ib": 12.140584945678711, | |
| "ce_orig": 0.686497151851654, | |
| "epoch": 0.06858375269590222, | |
| "kl_loss": 0.14714768528938293, | |
| "loss_ib": 0.013427676633000374, | |
| "step": 477 | |
| }, | |
| { | |
| "ce_ib": 13.12353515625, | |
| "ce_orig": 1.330522060394287, | |
| "epoch": 0.06872753414809489, | |
| "kl_loss": 0.18113256990909576, | |
| "loss_ib": 0.015618395991623402, | |
| "step": 478 | |
| }, | |
| { | |
| "ce_ib": 12.710488319396973, | |
| "ce_orig": 1.4100775718688965, | |
| "epoch": 0.06887131560028756, | |
| "kl_loss": 0.12010614573955536, | |
| "loss_ib": 0.01236055139452219, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.06901509705248023, | |
| "grad_norm": 0.11526408791542053, | |
| "learning_rate": 3.7539936102236424e-05, | |
| "loss": 0.8998, | |
| "step": 480 | |
| }, | |
| { | |
| "ce_ib": 13.879955291748047, | |
| "ce_orig": 1.1341381072998047, | |
| "epoch": 0.06901509705248023, | |
| "kl_loss": 0.1282053291797638, | |
| "loss_ib": 0.013350243680179119, | |
| "step": 480 | |
| }, | |
| { | |
| "ce_ib": 8.764423370361328, | |
| "ce_orig": 0.6200535893440247, | |
| "epoch": 0.0691588785046729, | |
| "kl_loss": 0.1320233792066574, | |
| "loss_ib": 0.010983380489051342, | |
| "step": 481 | |
| }, | |
| { | |
| "ce_ib": 9.943157196044922, | |
| "ce_orig": 0.6673835515975952, | |
| "epoch": 0.06930265995686556, | |
| "kl_loss": 0.10792216658592224, | |
| "loss_ib": 0.010367686860263348, | |
| "step": 482 | |
| }, | |
| { | |
| "ce_ib": 10.92377758026123, | |
| "ce_orig": 0.7028371095657349, | |
| "epoch": 0.06944644140905823, | |
| "kl_loss": 0.15012815594673157, | |
| "loss_ib": 0.012968296185135841, | |
| "step": 483 | |
| }, | |
| { | |
| "ce_ib": 9.512238502502441, | |
| "ce_orig": 0.5816277265548706, | |
| "epoch": 0.0695902228612509, | |
| "kl_loss": 0.13318368792533875, | |
| "loss_ib": 0.011415303684771061, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.06973400431344356, | |
| "grad_norm": 0.12581641972064972, | |
| "learning_rate": 3.793929712460064e-05, | |
| "loss": 0.8585, | |
| "step": 485 | |
| }, | |
| { | |
| "ce_ib": 13.113508224487305, | |
| "ce_orig": 1.0122153759002686, | |
| "epoch": 0.06973400431344356, | |
| "kl_loss": 0.17498700320720673, | |
| "loss_ib": 0.01530610304325819, | |
| "step": 485 | |
| }, | |
| { | |
| "ce_ib": 11.20240592956543, | |
| "ce_orig": 0.8718814253807068, | |
| "epoch": 0.06987778576563623, | |
| "kl_loss": 0.12174628674983978, | |
| "loss_ib": 0.011688517406582832, | |
| "step": 486 | |
| }, | |
| { | |
| "ce_ib": 12.784674644470215, | |
| "ce_orig": 0.8871896266937256, | |
| "epoch": 0.0700215672178289, | |
| "kl_loss": 0.16703587770462036, | |
| "loss_ib": 0.014744131825864315, | |
| "step": 487 | |
| }, | |
| { | |
| "ce_ib": 10.580418586730957, | |
| "ce_orig": 0.8577698469161987, | |
| "epoch": 0.07016534867002157, | |
| "kl_loss": 0.11169049143791199, | |
| "loss_ib": 0.010874733328819275, | |
| "step": 488 | |
| }, | |
| { | |
| "ce_ib": 10.39923095703125, | |
| "ce_orig": 0.6622049808502197, | |
| "epoch": 0.07030913012221424, | |
| "kl_loss": 0.17256517708301544, | |
| "loss_ib": 0.013827874325215816, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.0704529115744069, | |
| "grad_norm": 0.11361895501613617, | |
| "learning_rate": 3.8338658146964856e-05, | |
| "loss": 0.901, | |
| "step": 490 | |
| }, | |
| { | |
| "ce_ib": 13.566216468811035, | |
| "ce_orig": 0.9003996849060059, | |
| "epoch": 0.0704529115744069, | |
| "kl_loss": 0.18744704127311707, | |
| "loss_ib": 0.016155460849404335, | |
| "step": 490 | |
| }, | |
| { | |
| "ce_ib": 11.603694915771484, | |
| "ce_orig": 1.0972646474838257, | |
| "epoch": 0.07059669302659957, | |
| "kl_loss": 0.09514350444078445, | |
| "loss_ib": 0.010559022426605225, | |
| "step": 491 | |
| }, | |
| { | |
| "ce_ib": 12.866926193237305, | |
| "ce_orig": 1.1191866397857666, | |
| "epoch": 0.07074047447879224, | |
| "kl_loss": 0.12916871905326843, | |
| "loss_ib": 0.012891898863017559, | |
| "step": 492 | |
| }, | |
| { | |
| "ce_ib": 11.685700416564941, | |
| "ce_orig": 1.0439685583114624, | |
| "epoch": 0.07088425593098491, | |
| "kl_loss": 0.13916221261024475, | |
| "loss_ib": 0.012800960801541805, | |
| "step": 493 | |
| }, | |
| { | |
| "ce_ib": 8.240974426269531, | |
| "ce_orig": 0.6664552092552185, | |
| "epoch": 0.07102803738317758, | |
| "kl_loss": 0.0913599506020546, | |
| "loss_ib": 0.008688484318554401, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.07117181883537024, | |
| "grad_norm": 0.11460109055042267, | |
| "learning_rate": 3.8738019169329075e-05, | |
| "loss": 0.8749, | |
| "step": 495 | |
| }, | |
| { | |
| "ce_ib": 15.161900520324707, | |
| "ce_orig": 1.5418399572372437, | |
| "epoch": 0.07117181883537024, | |
| "kl_loss": 0.14499804377555847, | |
| "loss_ib": 0.014830851927399635, | |
| "step": 495 | |
| }, | |
| { | |
| "ce_ib": 12.499137878417969, | |
| "ce_orig": 0.7715917229652405, | |
| "epoch": 0.07131560028756291, | |
| "kl_loss": 0.13650982081890106, | |
| "loss_ib": 0.013075060211122036, | |
| "step": 496 | |
| }, | |
| { | |
| "ce_ib": 10.516082763671875, | |
| "ce_orig": 0.810151219367981, | |
| "epoch": 0.07145938173975556, | |
| "kl_loss": 0.1658136546611786, | |
| "loss_ib": 0.013548724353313446, | |
| "step": 497 | |
| }, | |
| { | |
| "ce_ib": 12.26677131652832, | |
| "ce_orig": 0.4871862828731537, | |
| "epoch": 0.07160316319194823, | |
| "kl_loss": 0.15727362036705017, | |
| "loss_ib": 0.013997065834701061, | |
| "step": 498 | |
| }, | |
| { | |
| "ce_ib": 8.879415512084961, | |
| "ce_orig": 0.5100986957550049, | |
| "epoch": 0.0717469446441409, | |
| "kl_loss": 0.10066086053848267, | |
| "loss_ib": 0.009472750127315521, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.07189072609633357, | |
| "grad_norm": 0.1443518102169037, | |
| "learning_rate": 3.913738019169329e-05, | |
| "loss": 0.8769, | |
| "step": 500 | |
| }, | |
| { | |
| "ce_ib": 11.191156387329102, | |
| "ce_orig": 0.7616560459136963, | |
| "epoch": 0.07189072609633357, | |
| "kl_loss": 0.13632240891456604, | |
| "loss_ib": 0.012411698698997498, | |
| "step": 500 | |
| }, | |
| { | |
| "ce_ib": 11.555066108703613, | |
| "ce_orig": 0.8566383123397827, | |
| "epoch": 0.07203450754852624, | |
| "kl_loss": 0.162722647190094, | |
| "loss_ib": 0.013913665898144245, | |
| "step": 501 | |
| }, | |
| { | |
| "ce_ib": 10.90505599975586, | |
| "ce_orig": 0.9519692063331604, | |
| "epoch": 0.0721782890007189, | |
| "kl_loss": 0.12059365957975388, | |
| "loss_ib": 0.011482210829854012, | |
| "step": 502 | |
| }, | |
| { | |
| "ce_ib": 9.185860633850098, | |
| "ce_orig": 0.5769492387771606, | |
| "epoch": 0.07232207045291157, | |
| "kl_loss": 0.14534525573253632, | |
| "loss_ib": 0.011860193684697151, | |
| "step": 503 | |
| }, | |
| { | |
| "ce_ib": 12.747846603393555, | |
| "ce_orig": 0.893709123134613, | |
| "epoch": 0.07246585190510424, | |
| "kl_loss": 0.12308457493782043, | |
| "loss_ib": 0.012528151273727417, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.07260963335729691, | |
| "grad_norm": 0.11177890002727509, | |
| "learning_rate": 3.953674121405751e-05, | |
| "loss": 0.8203, | |
| "step": 505 | |
| }, | |
| { | |
| "ce_ib": 10.646434783935547, | |
| "ce_orig": 1.0113996267318726, | |
| "epoch": 0.07260963335729691, | |
| "kl_loss": 0.12359193712472916, | |
| "loss_ib": 0.01150281447917223, | |
| "step": 505 | |
| }, | |
| { | |
| "ce_ib": 10.180166244506836, | |
| "ce_orig": 1.0264123678207397, | |
| "epoch": 0.07275341480948957, | |
| "kl_loss": 0.1544847935438156, | |
| "loss_ib": 0.012814322486519814, | |
| "step": 506 | |
| }, | |
| { | |
| "ce_ib": 11.716635704040527, | |
| "ce_orig": 0.9286133646965027, | |
| "epoch": 0.07289719626168224, | |
| "kl_loss": 0.1243153065443039, | |
| "loss_ib": 0.012074083089828491, | |
| "step": 507 | |
| }, | |
| { | |
| "ce_ib": 9.650020599365234, | |
| "ce_orig": 0.9953688383102417, | |
| "epoch": 0.07304097771387491, | |
| "kl_loss": 0.12280933558940887, | |
| "loss_ib": 0.010965476743876934, | |
| "step": 508 | |
| }, | |
| { | |
| "ce_ib": 9.697562217712402, | |
| "ce_orig": 0.4963395297527313, | |
| "epoch": 0.07318475916606758, | |
| "kl_loss": 0.11651341617107391, | |
| "loss_ib": 0.010674451477825642, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.07332854061826025, | |
| "grad_norm": 0.10411624610424042, | |
| "learning_rate": 3.9936102236421726e-05, | |
| "loss": 0.8555, | |
| "step": 510 | |
| }, | |
| { | |
| "ce_ib": 11.558809280395508, | |
| "ce_orig": 1.02761971950531, | |
| "epoch": 0.07332854061826025, | |
| "kl_loss": 0.15587545931339264, | |
| "loss_ib": 0.01357317715883255, | |
| "step": 510 | |
| }, | |
| { | |
| "ce_ib": 12.694424629211426, | |
| "ce_orig": 1.1861635446548462, | |
| "epoch": 0.07347232207045291, | |
| "kl_loss": 0.1220870316028595, | |
| "loss_ib": 0.012451563961803913, | |
| "step": 511 | |
| }, | |
| { | |
| "ce_ib": 8.967998504638672, | |
| "ce_orig": 0.8245717883110046, | |
| "epoch": 0.07361610352264558, | |
| "kl_loss": 0.11289598047733307, | |
| "loss_ib": 0.010128798894584179, | |
| "step": 512 | |
| }, | |
| { | |
| "ce_ib": 10.940625190734863, | |
| "ce_orig": 0.742675244808197, | |
| "epoch": 0.07375988497483825, | |
| "kl_loss": 0.11433502286672592, | |
| "loss_ib": 0.011187063530087471, | |
| "step": 513 | |
| }, | |
| { | |
| "ce_ib": 11.216033935546875, | |
| "ce_orig": 0.7051765322685242, | |
| "epoch": 0.07390366642703092, | |
| "kl_loss": 0.17939868569374084, | |
| "loss_ib": 0.014577952213585377, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.07404744787922359, | |
| "grad_norm": 0.12375368177890778, | |
| "learning_rate": 4.0335463258785946e-05, | |
| "loss": 0.8018, | |
| "step": 515 | |
| }, | |
| { | |
| "ce_ib": 12.851394653320312, | |
| "ce_orig": 0.959805965423584, | |
| "epoch": 0.07404744787922359, | |
| "kl_loss": 0.11663807928562164, | |
| "loss_ib": 0.012257601134479046, | |
| "step": 515 | |
| }, | |
| { | |
| "ce_ib": 9.45693302154541, | |
| "ce_orig": 0.6901520490646362, | |
| "epoch": 0.07419122933141625, | |
| "kl_loss": 0.16356007754802704, | |
| "loss_ib": 0.012906471267342567, | |
| "step": 516 | |
| }, | |
| { | |
| "ce_ib": 13.640007972717285, | |
| "ce_orig": 1.0324355363845825, | |
| "epoch": 0.07433501078360892, | |
| "kl_loss": 0.14768928289413452, | |
| "loss_ib": 0.01420446764677763, | |
| "step": 517 | |
| }, | |
| { | |
| "ce_ib": 10.631475448608398, | |
| "ce_orig": 0.7622382044792175, | |
| "epoch": 0.07447879223580157, | |
| "kl_loss": 0.1414967179298401, | |
| "loss_ib": 0.012390573509037495, | |
| "step": 518 | |
| }, | |
| { | |
| "ce_ib": 13.440409660339355, | |
| "ce_orig": 1.4887185096740723, | |
| "epoch": 0.07462257368799424, | |
| "kl_loss": 0.13175088167190552, | |
| "loss_ib": 0.013307749293744564, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.07476635514018691, | |
| "grad_norm": 0.12905798852443695, | |
| "learning_rate": 4.0734824281150165e-05, | |
| "loss": 0.9488, | |
| "step": 520 | |
| }, | |
| { | |
| "ce_ib": 11.399759292602539, | |
| "ce_orig": 0.9179264307022095, | |
| "epoch": 0.07476635514018691, | |
| "kl_loss": 0.1584293693304062, | |
| "loss_ib": 0.013621347956359386, | |
| "step": 520 | |
| }, | |
| { | |
| "ce_ib": 9.124701499938965, | |
| "ce_orig": 0.6920540928840637, | |
| "epoch": 0.07491013659237958, | |
| "kl_loss": 0.1023043617606163, | |
| "loss_ib": 0.009677569381892681, | |
| "step": 521 | |
| }, | |
| { | |
| "ce_ib": 11.09079647064209, | |
| "ce_orig": 1.124171257019043, | |
| "epoch": 0.07505391804457225, | |
| "kl_loss": 0.1274327039718628, | |
| "loss_ib": 0.011917034164071083, | |
| "step": 522 | |
| }, | |
| { | |
| "ce_ib": 11.651871681213379, | |
| "ce_orig": 1.1573556661605835, | |
| "epoch": 0.07519769949676491, | |
| "kl_loss": 0.1257188469171524, | |
| "loss_ib": 0.012111878953874111, | |
| "step": 523 | |
| }, | |
| { | |
| "ce_ib": 10.919167518615723, | |
| "ce_orig": 1.096139907836914, | |
| "epoch": 0.07534148094895758, | |
| "kl_loss": 0.14454081654548645, | |
| "loss_ib": 0.012686625123023987, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.07548526240115025, | |
| "grad_norm": 0.09990247339010239, | |
| "learning_rate": 4.113418530351438e-05, | |
| "loss": 0.8695, | |
| "step": 525 | |
| }, | |
| { | |
| "ce_ib": 13.44006633758545, | |
| "ce_orig": 1.1762773990631104, | |
| "epoch": 0.07548526240115025, | |
| "kl_loss": 0.1347535103559494, | |
| "loss_ib": 0.013457709923386574, | |
| "step": 525 | |
| }, | |
| { | |
| "ce_ib": 10.870895385742188, | |
| "ce_orig": 0.6810830235481262, | |
| "epoch": 0.07562904385334292, | |
| "kl_loss": 0.1084175780415535, | |
| "loss_ib": 0.010856325738132, | |
| "step": 526 | |
| }, | |
| { | |
| "ce_ib": 11.001641273498535, | |
| "ce_orig": 0.5774558782577515, | |
| "epoch": 0.07577282530553558, | |
| "kl_loss": 0.17218388617038727, | |
| "loss_ib": 0.014110015705227852, | |
| "step": 527 | |
| }, | |
| { | |
| "ce_ib": 12.118193626403809, | |
| "ce_orig": 1.1861246824264526, | |
| "epoch": 0.07591660675772825, | |
| "kl_loss": 0.12062694877386093, | |
| "loss_ib": 0.012090444564819336, | |
| "step": 528 | |
| }, | |
| { | |
| "ce_ib": 10.118423461914062, | |
| "ce_orig": 0.9129387736320496, | |
| "epoch": 0.07606038820992092, | |
| "kl_loss": 0.09990894794464111, | |
| "loss_ib": 0.010054659098386765, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.07620416966211359, | |
| "grad_norm": 0.11624792218208313, | |
| "learning_rate": 4.15335463258786e-05, | |
| "loss": 0.8571, | |
| "step": 530 | |
| }, | |
| { | |
| "ce_ib": 10.449731826782227, | |
| "ce_orig": 0.5511190891265869, | |
| "epoch": 0.07620416966211359, | |
| "kl_loss": 0.18196257948875427, | |
| "loss_ib": 0.014322995208203793, | |
| "step": 530 | |
| }, | |
| { | |
| "ce_ib": 10.00999641418457, | |
| "ce_orig": 0.7014174461364746, | |
| "epoch": 0.07634795111430626, | |
| "kl_loss": 0.14452239871025085, | |
| "loss_ib": 0.01223111804574728, | |
| "step": 531 | |
| }, | |
| { | |
| "ce_ib": 12.196045875549316, | |
| "ce_orig": 1.0053819417953491, | |
| "epoch": 0.07649173256649892, | |
| "kl_loss": 0.12312173843383789, | |
| "loss_ib": 0.012254110537469387, | |
| "step": 532 | |
| }, | |
| { | |
| "ce_ib": 11.569887161254883, | |
| "ce_orig": 0.9444661140441895, | |
| "epoch": 0.07663551401869159, | |
| "kl_loss": 0.12740397453308105, | |
| "loss_ib": 0.012155142612755299, | |
| "step": 533 | |
| }, | |
| { | |
| "ce_ib": 9.093774795532227, | |
| "ce_orig": 0.7246710658073425, | |
| "epoch": 0.07677929547088426, | |
| "kl_loss": 0.1298743486404419, | |
| "loss_ib": 0.011040604673326015, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.07692307692307693, | |
| "grad_norm": 0.13377481698989868, | |
| "learning_rate": 4.193290734824281e-05, | |
| "loss": 0.8601, | |
| "step": 535 | |
| }, | |
| { | |
| "ce_ib": 13.371556282043457, | |
| "ce_orig": 1.1632790565490723, | |
| "epoch": 0.07692307692307693, | |
| "kl_loss": 0.13051572442054749, | |
| "loss_ib": 0.013211563229560852, | |
| "step": 535 | |
| }, | |
| { | |
| "ce_ib": 11.797460556030273, | |
| "ce_orig": 0.9421883821487427, | |
| "epoch": 0.0770668583752696, | |
| "kl_loss": 0.13678061962127686, | |
| "loss_ib": 0.012737761251628399, | |
| "step": 536 | |
| }, | |
| { | |
| "ce_ib": 12.24150562286377, | |
| "ce_orig": 0.7441449761390686, | |
| "epoch": 0.07721063982746226, | |
| "kl_loss": 0.11821313202381134, | |
| "loss_ib": 0.012031408958137035, | |
| "step": 537 | |
| }, | |
| { | |
| "ce_ib": 10.22864818572998, | |
| "ce_orig": 0.9380773305892944, | |
| "epoch": 0.07735442127965493, | |
| "kl_loss": 0.10538578778505325, | |
| "loss_ib": 0.010383613407611847, | |
| "step": 538 | |
| }, | |
| { | |
| "ce_ib": 12.875476837158203, | |
| "ce_orig": 0.8242666721343994, | |
| "epoch": 0.0774982027318476, | |
| "kl_loss": 0.15831780433654785, | |
| "loss_ib": 0.014353628270328045, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.07764198418404025, | |
| "grad_norm": 0.10647567361593246, | |
| "learning_rate": 4.233226837060703e-05, | |
| "loss": 0.8364, | |
| "step": 540 | |
| }, | |
| { | |
| "ce_ib": 11.523818016052246, | |
| "ce_orig": 0.8004295229911804, | |
| "epoch": 0.07764198418404025, | |
| "kl_loss": 0.1284905970096588, | |
| "loss_ib": 0.012186438776552677, | |
| "step": 540 | |
| }, | |
| { | |
| "ce_ib": 11.77370548248291, | |
| "ce_orig": 0.6032363176345825, | |
| "epoch": 0.07778576563623292, | |
| "kl_loss": 0.17018523812294006, | |
| "loss_ib": 0.014396115206182003, | |
| "step": 541 | |
| }, | |
| { | |
| "ce_ib": 10.745952606201172, | |
| "ce_orig": 0.6720147132873535, | |
| "epoch": 0.07792954708842559, | |
| "kl_loss": 0.13083013892173767, | |
| "loss_ib": 0.011914483271539211, | |
| "step": 542 | |
| }, | |
| { | |
| "ce_ib": 12.431722640991211, | |
| "ce_orig": 1.328582525253296, | |
| "epoch": 0.07807332854061826, | |
| "kl_loss": 0.11531039327383041, | |
| "loss_ib": 0.011981381103396416, | |
| "step": 543 | |
| }, | |
| { | |
| "ce_ib": 10.487591743469238, | |
| "ce_orig": 0.8851913809776306, | |
| "epoch": 0.07821710999281092, | |
| "kl_loss": 0.1059553325176239, | |
| "loss_ib": 0.010541562922298908, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.07836089144500359, | |
| "grad_norm": 0.12910747528076172, | |
| "learning_rate": 4.273162939297125e-05, | |
| "loss": 0.8664, | |
| "step": 545 | |
| }, | |
| { | |
| "ce_ib": 10.0007905960083, | |
| "ce_orig": 0.9234699010848999, | |
| "epoch": 0.07836089144500359, | |
| "kl_loss": 0.1631077080965042, | |
| "loss_ib": 0.013155780732631683, | |
| "step": 545 | |
| }, | |
| { | |
| "ce_ib": 10.215255737304688, | |
| "ce_orig": 0.8365392088890076, | |
| "epoch": 0.07850467289719626, | |
| "kl_loss": 0.11404451727867126, | |
| "loss_ib": 0.010809853672981262, | |
| "step": 546 | |
| }, | |
| { | |
| "ce_ib": 10.295679092407227, | |
| "ce_orig": 0.6545149683952332, | |
| "epoch": 0.07864845434938893, | |
| "kl_loss": 0.12140201032161713, | |
| "loss_ib": 0.011217939667403698, | |
| "step": 547 | |
| }, | |
| { | |
| "ce_ib": 9.238651275634766, | |
| "ce_orig": 0.7438675761222839, | |
| "epoch": 0.0787922358015816, | |
| "kl_loss": 0.10375800728797913, | |
| "loss_ib": 0.009807226248085499, | |
| "step": 548 | |
| }, | |
| { | |
| "ce_ib": 11.177849769592285, | |
| "ce_orig": 0.9599023461341858, | |
| "epoch": 0.07893601725377426, | |
| "kl_loss": 0.15443462133407593, | |
| "loss_ib": 0.013310655951499939, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.07907979870596693, | |
| "grad_norm": 0.11976125091314316, | |
| "learning_rate": 4.313099041533547e-05, | |
| "loss": 0.9199, | |
| "step": 550 | |
| }, | |
| { | |
| "ce_ib": 8.745708465576172, | |
| "ce_orig": 0.745059072971344, | |
| "epoch": 0.07907979870596693, | |
| "kl_loss": 0.10965421795845032, | |
| "loss_ib": 0.00985556561499834, | |
| "step": 550 | |
| }, | |
| { | |
| "ce_ib": 9.332277297973633, | |
| "ce_orig": 0.7904736995697021, | |
| "epoch": 0.0792235801581596, | |
| "kl_loss": 0.14583170413970947, | |
| "loss_ib": 0.011957723647356033, | |
| "step": 551 | |
| }, | |
| { | |
| "ce_ib": 10.741596221923828, | |
| "ce_orig": 0.9755902290344238, | |
| "epoch": 0.07936736161035227, | |
| "kl_loss": 0.11897563934326172, | |
| "loss_ib": 0.011319580487906933, | |
| "step": 552 | |
| }, | |
| { | |
| "ce_ib": 8.226643562316895, | |
| "ce_orig": 0.6986103057861328, | |
| "epoch": 0.07951114306254493, | |
| "kl_loss": 0.11197762191295624, | |
| "loss_ib": 0.009712203405797482, | |
| "step": 553 | |
| }, | |
| { | |
| "ce_ib": 11.091338157653809, | |
| "ce_orig": 0.7560675740242004, | |
| "epoch": 0.0796549245147376, | |
| "kl_loss": 0.12657985091209412, | |
| "loss_ib": 0.011874661780893803, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.07979870596693027, | |
| "grad_norm": 0.1107710599899292, | |
| "learning_rate": 4.3530351437699686e-05, | |
| "loss": 0.8873, | |
| "step": 555 | |
| }, | |
| { | |
| "ce_ib": 10.592079162597656, | |
| "ce_orig": 1.1071833372116089, | |
| "epoch": 0.07979870596693027, | |
| "kl_loss": 0.13393369317054749, | |
| "loss_ib": 0.011992724612355232, | |
| "step": 555 | |
| }, | |
| { | |
| "ce_ib": 10.618276596069336, | |
| "ce_orig": 0.9089037775993347, | |
| "epoch": 0.07994248741912294, | |
| "kl_loss": 0.16187895834445953, | |
| "loss_ib": 0.013403086923062801, | |
| "step": 556 | |
| }, | |
| { | |
| "ce_ib": 10.179542541503906, | |
| "ce_orig": 0.7911267876625061, | |
| "epoch": 0.0800862688713156, | |
| "kl_loss": 0.19424620270729065, | |
| "loss_ib": 0.01480208057910204, | |
| "step": 557 | |
| }, | |
| { | |
| "ce_ib": 9.025123596191406, | |
| "ce_orig": 0.8696843385696411, | |
| "epoch": 0.08023005032350827, | |
| "kl_loss": 0.10537652671337128, | |
| "loss_ib": 0.009781388565897942, | |
| "step": 558 | |
| }, | |
| { | |
| "ce_ib": 10.719731330871582, | |
| "ce_orig": 1.1363403797149658, | |
| "epoch": 0.08037383177570094, | |
| "kl_loss": 0.12376905977725983, | |
| "loss_ib": 0.011548318900167942, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.08051761322789361, | |
| "grad_norm": 0.11026783287525177, | |
| "learning_rate": 4.39297124600639e-05, | |
| "loss": 0.9008, | |
| "step": 560 | |
| }, | |
| { | |
| "ce_ib": 11.907670021057129, | |
| "ce_orig": 1.1177096366882324, | |
| "epoch": 0.08051761322789361, | |
| "kl_loss": 0.13737264275550842, | |
| "loss_ib": 0.012822466902434826, | |
| "step": 560 | |
| }, | |
| { | |
| "ce_ib": 8.7591552734375, | |
| "ce_orig": 0.5712915658950806, | |
| "epoch": 0.08066139468008626, | |
| "kl_loss": 0.0947001650929451, | |
| "loss_ib": 0.009114585816860199, | |
| "step": 561 | |
| }, | |
| { | |
| "ce_ib": 7.738790988922119, | |
| "ce_orig": 0.688946545124054, | |
| "epoch": 0.08080517613227893, | |
| "kl_loss": 0.10011854022741318, | |
| "loss_ib": 0.00887532252818346, | |
| "step": 562 | |
| }, | |
| { | |
| "ce_ib": 9.232172012329102, | |
| "ce_orig": 0.485678106546402, | |
| "epoch": 0.0809489575844716, | |
| "kl_loss": 0.14091522991657257, | |
| "loss_ib": 0.011661847122013569, | |
| "step": 563 | |
| }, | |
| { | |
| "ce_ib": 11.895172119140625, | |
| "ce_orig": 1.1715614795684814, | |
| "epoch": 0.08109273903666427, | |
| "kl_loss": 0.1400681585073471, | |
| "loss_ib": 0.012950994074344635, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.08123652048885693, | |
| "grad_norm": 0.12719914317131042, | |
| "learning_rate": 4.432907348242812e-05, | |
| "loss": 0.9266, | |
| "step": 565 | |
| }, | |
| { | |
| "ce_ib": 11.971985816955566, | |
| "ce_orig": 1.0788679122924805, | |
| "epoch": 0.08123652048885693, | |
| "kl_loss": 0.1462956666946411, | |
| "loss_ib": 0.013300776481628418, | |
| "step": 565 | |
| }, | |
| { | |
| "ce_ib": 10.301172256469727, | |
| "ce_orig": 1.0739414691925049, | |
| "epoch": 0.0813803019410496, | |
| "kl_loss": 0.13510483503341675, | |
| "loss_ib": 0.01190582849085331, | |
| "step": 566 | |
| }, | |
| { | |
| "ce_ib": 13.213077545166016, | |
| "ce_orig": 1.0148154497146606, | |
| "epoch": 0.08152408339324227, | |
| "kl_loss": 0.14191558957099915, | |
| "loss_ib": 0.01370231807231903, | |
| "step": 567 | |
| }, | |
| { | |
| "ce_ib": 8.01417350769043, | |
| "ce_orig": 0.836399257183075, | |
| "epoch": 0.08166786484543494, | |
| "kl_loss": 0.11862446367740631, | |
| "loss_ib": 0.009938309900462627, | |
| "step": 568 | |
| }, | |
| { | |
| "ce_ib": 9.659282684326172, | |
| "ce_orig": 0.9532420039176941, | |
| "epoch": 0.0818116462976276, | |
| "kl_loss": 0.15856535732746124, | |
| "loss_ib": 0.01275790948420763, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.08195542774982027, | |
| "grad_norm": 0.11036694049835205, | |
| "learning_rate": 4.472843450479233e-05, | |
| "loss": 0.8765, | |
| "step": 570 | |
| }, | |
| { | |
| "ce_ib": 6.825167655944824, | |
| "ce_orig": 0.5682023763656616, | |
| "epoch": 0.08195542774982027, | |
| "kl_loss": 0.17675429582595825, | |
| "loss_ib": 0.012250298634171486, | |
| "step": 570 | |
| }, | |
| { | |
| "ce_ib": 9.991971969604492, | |
| "ce_orig": 0.8481072783470154, | |
| "epoch": 0.08209920920201294, | |
| "kl_loss": 0.17780299484729767, | |
| "loss_ib": 0.013886136002838612, | |
| "step": 571 | |
| }, | |
| { | |
| "ce_ib": 11.411465644836426, | |
| "ce_orig": 1.0281726121902466, | |
| "epoch": 0.08224299065420561, | |
| "kl_loss": 0.11431736499071121, | |
| "loss_ib": 0.01142160128802061, | |
| "step": 572 | |
| }, | |
| { | |
| "ce_ib": 13.019828796386719, | |
| "ce_orig": 1.1337321996688843, | |
| "epoch": 0.08238677210639828, | |
| "kl_loss": 0.2963365316390991, | |
| "loss_ib": 0.021326741203665733, | |
| "step": 573 | |
| }, | |
| { | |
| "ce_ib": 8.582294464111328, | |
| "ce_orig": 0.5892922878265381, | |
| "epoch": 0.08253055355859094, | |
| "kl_loss": 0.10176214575767517, | |
| "loss_ib": 0.009379254654049873, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.08267433501078361, | |
| "grad_norm": 0.1296963393688202, | |
| "learning_rate": 4.512779552715655e-05, | |
| "loss": 0.9216, | |
| "step": 575 | |
| }, | |
| { | |
| "ce_ib": 8.016057014465332, | |
| "ce_orig": 0.8886985778808594, | |
| "epoch": 0.08267433501078361, | |
| "kl_loss": 0.11895924806594849, | |
| "loss_ib": 0.009955990128219128, | |
| "step": 575 | |
| }, | |
| { | |
| "ce_ib": 8.56618595123291, | |
| "ce_orig": 0.9640144109725952, | |
| "epoch": 0.08281811646297628, | |
| "kl_loss": 0.10030095279216766, | |
| "loss_ib": 0.009298141114413738, | |
| "step": 576 | |
| }, | |
| { | |
| "ce_ib": 11.222845077514648, | |
| "ce_orig": 0.7690722346305847, | |
| "epoch": 0.08296189791516895, | |
| "kl_loss": 0.15022683143615723, | |
| "loss_ib": 0.01312276441603899, | |
| "step": 577 | |
| }, | |
| { | |
| "ce_ib": 8.99817943572998, | |
| "ce_orig": 0.8388349413871765, | |
| "epoch": 0.08310567936736162, | |
| "kl_loss": 0.1211586445569992, | |
| "loss_ib": 0.01055702194571495, | |
| "step": 578 | |
| }, | |
| { | |
| "ce_ib": 8.70596694946289, | |
| "ce_orig": 0.8935554623603821, | |
| "epoch": 0.08324946081955428, | |
| "kl_loss": 0.18791162967681885, | |
| "loss_ib": 0.013748565688729286, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.08339324227174695, | |
| "grad_norm": 0.09863687306642532, | |
| "learning_rate": 4.552715654952077e-05, | |
| "loss": 0.8877, | |
| "step": 580 | |
| }, | |
| { | |
| "ce_ib": 9.075626373291016, | |
| "ce_orig": 0.6573423743247986, | |
| "epoch": 0.08339324227174695, | |
| "kl_loss": 0.11877353489398956, | |
| "loss_ib": 0.010476489551365376, | |
| "step": 580 | |
| }, | |
| { | |
| "ce_ib": 9.447694778442383, | |
| "ce_orig": 0.8443095684051514, | |
| "epoch": 0.08353702372393962, | |
| "kl_loss": 0.10504357516765594, | |
| "loss_ib": 0.009976026602089405, | |
| "step": 581 | |
| }, | |
| { | |
| "ce_ib": 10.347243309020996, | |
| "ce_orig": 0.8741527795791626, | |
| "epoch": 0.08368080517613227, | |
| "kl_loss": 0.1390937864780426, | |
| "loss_ib": 0.012128311209380627, | |
| "step": 582 | |
| }, | |
| { | |
| "ce_ib": 10.385614395141602, | |
| "ce_orig": 0.9572657346725464, | |
| "epoch": 0.08382458662832494, | |
| "kl_loss": 0.11069104075431824, | |
| "loss_ib": 0.010727359913289547, | |
| "step": 583 | |
| }, | |
| { | |
| "ce_ib": 9.45213508605957, | |
| "ce_orig": 0.8818128705024719, | |
| "epoch": 0.08396836808051761, | |
| "kl_loss": 0.11259730160236359, | |
| "loss_ib": 0.0103559335693717, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.08411214953271028, | |
| "grad_norm": 0.13552848994731903, | |
| "learning_rate": 4.592651757188499e-05, | |
| "loss": 0.8925, | |
| "step": 585 | |
| }, | |
| { | |
| "ce_ib": 11.096572875976562, | |
| "ce_orig": 1.0407826900482178, | |
| "epoch": 0.08411214953271028, | |
| "kl_loss": 0.12498383969068527, | |
| "loss_ib": 0.01179747935384512, | |
| "step": 585 | |
| }, | |
| { | |
| "ce_ib": 8.041762351989746, | |
| "ce_orig": 0.5626028776168823, | |
| "epoch": 0.08425593098490294, | |
| "kl_loss": 0.10758772492408752, | |
| "loss_ib": 0.009400267153978348, | |
| "step": 586 | |
| }, | |
| { | |
| "ce_ib": 9.080262184143066, | |
| "ce_orig": 0.8330552577972412, | |
| "epoch": 0.08439971243709561, | |
| "kl_loss": 0.11845473945140839, | |
| "loss_ib": 0.01046286802738905, | |
| "step": 587 | |
| }, | |
| { | |
| "ce_ib": 9.385934829711914, | |
| "ce_orig": 0.9699596166610718, | |
| "epoch": 0.08454349388928828, | |
| "kl_loss": 0.11468707025051117, | |
| "loss_ib": 0.010427321307361126, | |
| "step": 588 | |
| }, | |
| { | |
| "ce_ib": 8.70030689239502, | |
| "ce_orig": 0.8794450163841248, | |
| "epoch": 0.08468727534148095, | |
| "kl_loss": 0.09058257937431335, | |
| "loss_ib": 0.008879282511770725, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.08483105679367361, | |
| "grad_norm": 0.12226880341768265, | |
| "learning_rate": 4.632587859424921e-05, | |
| "loss": 0.8537, | |
| "step": 590 | |
| }, | |
| { | |
| "ce_ib": 11.954180717468262, | |
| "ce_orig": 0.6831358075141907, | |
| "epoch": 0.08483105679367361, | |
| "kl_loss": 0.11714005470275879, | |
| "loss_ib": 0.011834092438220978, | |
| "step": 590 | |
| }, | |
| { | |
| "ce_ib": 11.130764961242676, | |
| "ce_orig": 1.1798628568649292, | |
| "epoch": 0.08497483824586628, | |
| "kl_loss": 0.11755703389644623, | |
| "loss_ib": 0.01144323404878378, | |
| "step": 591 | |
| }, | |
| { | |
| "ce_ib": 9.00490665435791, | |
| "ce_orig": 1.0365432500839233, | |
| "epoch": 0.08511861969805895, | |
| "kl_loss": 0.10278277099132538, | |
| "loss_ib": 0.009641592390835285, | |
| "step": 592 | |
| }, | |
| { | |
| "ce_ib": 10.016988754272461, | |
| "ce_orig": 0.7779159545898438, | |
| "epoch": 0.08526240115025162, | |
| "kl_loss": 0.1287814825773239, | |
| "loss_ib": 0.01144756842404604, | |
| "step": 593 | |
| }, | |
| { | |
| "ce_ib": 8.835247039794922, | |
| "ce_orig": 0.7125930190086365, | |
| "epoch": 0.08540618260244429, | |
| "kl_loss": 0.14099836349487305, | |
| "loss_ib": 0.011467541567981243, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.08554996405463695, | |
| "grad_norm": 0.11100345104932785, | |
| "learning_rate": 4.672523961661342e-05, | |
| "loss": 0.8406, | |
| "step": 595 | |
| }, | |
| { | |
| "ce_ib": 11.02431583404541, | |
| "ce_orig": 0.7663914561271667, | |
| "epoch": 0.08554996405463695, | |
| "kl_loss": 0.0822073221206665, | |
| "loss_ib": 0.009622524492442608, | |
| "step": 595 | |
| }, | |
| { | |
| "ce_ib": 10.236846923828125, | |
| "ce_orig": 0.5589779615402222, | |
| "epoch": 0.08569374550682962, | |
| "kl_loss": 0.1600915789604187, | |
| "loss_ib": 0.013123002834618092, | |
| "step": 596 | |
| }, | |
| { | |
| "ce_ib": 8.800088882446289, | |
| "ce_orig": 0.6275121569633484, | |
| "epoch": 0.08583752695902229, | |
| "kl_loss": 0.12892715632915497, | |
| "loss_ib": 0.010846401564776897, | |
| "step": 597 | |
| }, | |
| { | |
| "ce_ib": 8.204318046569824, | |
| "ce_orig": 0.6452017426490784, | |
| "epoch": 0.08598130841121496, | |
| "kl_loss": 0.10103371739387512, | |
| "loss_ib": 0.00915384478867054, | |
| "step": 598 | |
| }, | |
| { | |
| "ce_ib": 8.391890525817871, | |
| "ce_orig": 0.7417363524436951, | |
| "epoch": 0.08612508986340763, | |
| "kl_loss": 0.13140645623207092, | |
| "loss_ib": 0.010766267776489258, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.08626887131560029, | |
| "grad_norm": 0.132738396525383, | |
| "learning_rate": 4.712460063897764e-05, | |
| "loss": 0.8082, | |
| "step": 600 | |
| }, | |
| { | |
| "ce_ib": 10.40978717803955, | |
| "ce_orig": 0.8029245734214783, | |
| "epoch": 0.08626887131560029, | |
| "kl_loss": 0.09170211851596832, | |
| "loss_ib": 0.009789999574422836, | |
| "step": 600 | |
| }, | |
| { | |
| "ce_ib": 8.616018295288086, | |
| "ce_orig": 1.1102370023727417, | |
| "epoch": 0.08641265276779296, | |
| "kl_loss": 0.11172410845756531, | |
| "loss_ib": 0.009894214570522308, | |
| "step": 601 | |
| }, | |
| { | |
| "ce_ib": 10.94422435760498, | |
| "ce_orig": 0.9171683192253113, | |
| "epoch": 0.08655643421998563, | |
| "kl_loss": 0.13117164373397827, | |
| "loss_ib": 0.01203069370239973, | |
| "step": 602 | |
| }, | |
| { | |
| "ce_ib": 11.655625343322754, | |
| "ce_orig": 1.1917483806610107, | |
| "epoch": 0.08670021567217828, | |
| "kl_loss": 0.13371366262435913, | |
| "loss_ib": 0.012513495981693268, | |
| "step": 603 | |
| }, | |
| { | |
| "ce_ib": 7.700047969818115, | |
| "ce_orig": 0.6811072826385498, | |
| "epoch": 0.08684399712437095, | |
| "kl_loss": 0.13317114114761353, | |
| "loss_ib": 0.010508581064641476, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.08698777857656362, | |
| "grad_norm": 0.10468501597642899, | |
| "learning_rate": 4.752396166134185e-05, | |
| "loss": 0.8558, | |
| "step": 605 | |
| }, | |
| { | |
| "ce_ib": 9.883849143981934, | |
| "ce_orig": 0.9263736605644226, | |
| "epoch": 0.08698777857656362, | |
| "kl_loss": 0.21517115831375122, | |
| "loss_ib": 0.015700481832027435, | |
| "step": 605 | |
| }, | |
| { | |
| "ce_ib": 9.480666160583496, | |
| "ce_orig": 1.0162242650985718, | |
| "epoch": 0.08713156002875629, | |
| "kl_loss": 0.11819291114807129, | |
| "loss_ib": 0.010649978183209896, | |
| "step": 606 | |
| }, | |
| { | |
| "ce_ib": 9.99695873260498, | |
| "ce_orig": 0.7871977090835571, | |
| "epoch": 0.08727534148094895, | |
| "kl_loss": 0.21086569130420685, | |
| "loss_ib": 0.015541763976216316, | |
| "step": 607 | |
| }, | |
| { | |
| "ce_ib": 10.787432670593262, | |
| "ce_orig": 1.3771346807479858, | |
| "epoch": 0.08741912293314162, | |
| "kl_loss": 0.11459565162658691, | |
| "loss_ib": 0.01112349983304739, | |
| "step": 608 | |
| }, | |
| { | |
| "ce_ib": 7.926780700683594, | |
| "ce_orig": 0.6084997057914734, | |
| "epoch": 0.08756290438533429, | |
| "kl_loss": 0.11265780031681061, | |
| "loss_ib": 0.009596280753612518, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.08770668583752696, | |
| "grad_norm": 0.1861116737127304, | |
| "learning_rate": 4.792332268370607e-05, | |
| "loss": 0.841, | |
| "step": 610 | |
| }, | |
| { | |
| "ce_ib": 11.350239753723145, | |
| "ce_orig": 1.3010127544403076, | |
| "epoch": 0.08770668583752696, | |
| "kl_loss": 0.1271829605102539, | |
| "loss_ib": 0.012034268118441105, | |
| "step": 610 | |
| }, | |
| { | |
| "ce_ib": 9.161566734313965, | |
| "ce_orig": 1.089638590812683, | |
| "epoch": 0.08785046728971962, | |
| "kl_loss": 0.1308199018239975, | |
| "loss_ib": 0.0111217787489295, | |
| "step": 611 | |
| }, | |
| { | |
| "ce_ib": 9.268600463867188, | |
| "ce_orig": 0.6986385583877563, | |
| "epoch": 0.08799424874191229, | |
| "kl_loss": 0.1047590896487236, | |
| "loss_ib": 0.00987225491553545, | |
| "step": 612 | |
| }, | |
| { | |
| "ce_ib": 9.51627254486084, | |
| "ce_orig": 0.7158625721931458, | |
| "epoch": 0.08813803019410496, | |
| "kl_loss": 0.13369254767894745, | |
| "loss_ib": 0.011442764662206173, | |
| "step": 613 | |
| }, | |
| { | |
| "ce_ib": 8.772662162780762, | |
| "ce_orig": 1.0912283658981323, | |
| "epoch": 0.08828181164629763, | |
| "kl_loss": 0.12375178933143616, | |
| "loss_ib": 0.010573920793831348, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.0884255930984903, | |
| "grad_norm": 0.11201420426368713, | |
| "learning_rate": 4.832268370607029e-05, | |
| "loss": 0.9532, | |
| "step": 615 | |
| }, | |
| { | |
| "ce_ib": 9.231523513793945, | |
| "ce_orig": 0.9239461421966553, | |
| "epoch": 0.0884255930984903, | |
| "kl_loss": 0.11232542991638184, | |
| "loss_ib": 0.010232033208012581, | |
| "step": 615 | |
| }, | |
| { | |
| "ce_ib": 10.984260559082031, | |
| "ce_orig": 1.0705440044403076, | |
| "epoch": 0.08856937455068296, | |
| "kl_loss": 0.14025995135307312, | |
| "loss_ib": 0.012505128048360348, | |
| "step": 616 | |
| }, | |
| { | |
| "ce_ib": 9.729615211486816, | |
| "ce_orig": 0.7350847721099854, | |
| "epoch": 0.08871315600287563, | |
| "kl_loss": 0.1346513330936432, | |
| "loss_ib": 0.011597374454140663, | |
| "step": 617 | |
| }, | |
| { | |
| "ce_ib": 10.69382381439209, | |
| "ce_orig": 0.7845515608787537, | |
| "epoch": 0.0888569374550683, | |
| "kl_loss": 0.16862596571445465, | |
| "loss_ib": 0.013778209686279297, | |
| "step": 618 | |
| }, | |
| { | |
| "ce_ib": 9.848870277404785, | |
| "ce_orig": 0.9884905815124512, | |
| "epoch": 0.08900071890726097, | |
| "kl_loss": 0.13911129534244537, | |
| "loss_ib": 0.011880000121891499, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.08914450035945364, | |
| "grad_norm": 0.12046127766370773, | |
| "learning_rate": 4.872204472843451e-05, | |
| "loss": 0.8906, | |
| "step": 620 | |
| }, | |
| { | |
| "ce_ib": 9.231974601745605, | |
| "ce_orig": 0.6775012612342834, | |
| "epoch": 0.08914450035945364, | |
| "kl_loss": 0.17472121119499207, | |
| "loss_ib": 0.013352048583328724, | |
| "step": 620 | |
| }, | |
| { | |
| "ce_ib": 11.804699897766113, | |
| "ce_orig": 1.529233694076538, | |
| "epoch": 0.0892882818116463, | |
| "kl_loss": 0.12259548157453537, | |
| "loss_ib": 0.01203212421387434, | |
| "step": 621 | |
| }, | |
| { | |
| "ce_ib": 7.200037002563477, | |
| "ce_orig": 0.4418286383152008, | |
| "epoch": 0.08943206326383897, | |
| "kl_loss": 0.1409044861793518, | |
| "loss_ib": 0.010645243339240551, | |
| "step": 622 | |
| }, | |
| { | |
| "ce_ib": 13.076546669006348, | |
| "ce_orig": 1.375049114227295, | |
| "epoch": 0.08957584471603164, | |
| "kl_loss": 0.13703083992004395, | |
| "loss_ib": 0.01338981557637453, | |
| "step": 623 | |
| }, | |
| { | |
| "ce_ib": 7.833815574645996, | |
| "ce_orig": 0.5396220684051514, | |
| "epoch": 0.08971962616822429, | |
| "kl_loss": 0.11749826371669769, | |
| "loss_ib": 0.009791821241378784, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.08986340762041696, | |
| "grad_norm": 0.1378038227558136, | |
| "learning_rate": 4.912140575079873e-05, | |
| "loss": 0.9042, | |
| "step": 625 | |
| }, | |
| { | |
| "ce_ib": 8.829384803771973, | |
| "ce_orig": 0.9549583792686462, | |
| "epoch": 0.08986340762041696, | |
| "kl_loss": 0.08257201313972473, | |
| "loss_ib": 0.008543292991816998, | |
| "step": 625 | |
| }, | |
| { | |
| "ce_ib": 10.215102195739746, | |
| "ce_orig": 0.6707859039306641, | |
| "epoch": 0.09000718907260963, | |
| "kl_loss": 0.1320197582244873, | |
| "loss_ib": 0.011708538979291916, | |
| "step": 626 | |
| }, | |
| { | |
| "ce_ib": 12.020796775817871, | |
| "ce_orig": 1.0823876857757568, | |
| "epoch": 0.0901509705248023, | |
| "kl_loss": 0.12088725715875626, | |
| "loss_ib": 0.012054760940372944, | |
| "step": 627 | |
| }, | |
| { | |
| "ce_ib": 6.340538501739502, | |
| "ce_orig": 0.5447245836257935, | |
| "epoch": 0.09029475197699496, | |
| "kl_loss": 0.08977651596069336, | |
| "loss_ib": 0.0076590958051383495, | |
| "step": 628 | |
| }, | |
| { | |
| "ce_ib": 10.423829078674316, | |
| "ce_orig": 1.2043941020965576, | |
| "epoch": 0.09043853342918763, | |
| "kl_loss": 0.08985006809234619, | |
| "loss_ib": 0.009704417549073696, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.0905823148813803, | |
| "grad_norm": 0.13364273309707642, | |
| "learning_rate": 4.952076677316294e-05, | |
| "loss": 0.9167, | |
| "step": 630 | |
| }, | |
| { | |
| "ce_ib": 10.545193672180176, | |
| "ce_orig": 1.0608717203140259, | |
| "epoch": 0.0905823148813803, | |
| "kl_loss": 0.10969355702400208, | |
| "loss_ib": 0.01075727492570877, | |
| "step": 630 | |
| }, | |
| { | |
| "ce_ib": 6.636918544769287, | |
| "ce_orig": 0.5482816696166992, | |
| "epoch": 0.09072609633357297, | |
| "kl_loss": 0.14597171545028687, | |
| "loss_ib": 0.010617044754326344, | |
| "step": 631 | |
| }, | |
| { | |
| "ce_ib": 7.407113075256348, | |
| "ce_orig": 0.6304860711097717, | |
| "epoch": 0.09086987778576563, | |
| "kl_loss": 0.1305876523256302, | |
| "loss_ib": 0.010232939384877682, | |
| "step": 632 | |
| }, | |
| { | |
| "ce_ib": 9.818191528320312, | |
| "ce_orig": 0.7274070978164673, | |
| "epoch": 0.0910136592379583, | |
| "kl_loss": 0.10508064925670624, | |
| "loss_ib": 0.01016312837600708, | |
| "step": 633 | |
| }, | |
| { | |
| "ce_ib": 7.220940589904785, | |
| "ce_orig": 0.6228238940238953, | |
| "epoch": 0.09115744069015097, | |
| "kl_loss": 0.11822935938835144, | |
| "loss_ib": 0.009521937929093838, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.09130122214234364, | |
| "grad_norm": 0.1587258279323578, | |
| "learning_rate": 4.992012779552716e-05, | |
| "loss": 0.8734, | |
| "step": 635 | |
| }, | |
| { | |
| "ce_ib": 11.307782173156738, | |
| "ce_orig": 0.93541419506073, | |
| "epoch": 0.09130122214234364, | |
| "kl_loss": 0.10773300379514694, | |
| "loss_ib": 0.011040541343390942, | |
| "step": 635 | |
| }, | |
| { | |
| "ce_ib": 12.55269718170166, | |
| "ce_orig": 1.1095880270004272, | |
| "epoch": 0.0914450035945363, | |
| "kl_loss": 0.12884891033172607, | |
| "loss_ib": 0.01271879393607378, | |
| "step": 636 | |
| }, | |
| { | |
| "ce_ib": 10.303481101989746, | |
| "ce_orig": 1.0926016569137573, | |
| "epoch": 0.09158878504672897, | |
| "kl_loss": 0.09748281538486481, | |
| "loss_ib": 0.010025881230831146, | |
| "step": 637 | |
| }, | |
| { | |
| "ce_ib": 10.083890914916992, | |
| "ce_orig": 1.1573371887207031, | |
| "epoch": 0.09173256649892164, | |
| "kl_loss": 0.1186913400888443, | |
| "loss_ib": 0.010976512916386127, | |
| "step": 638 | |
| }, | |
| { | |
| "ce_ib": 8.328096389770508, | |
| "ce_orig": 0.8586428165435791, | |
| "epoch": 0.09187634795111431, | |
| "kl_loss": 0.11891971528530121, | |
| "loss_ib": 0.010110034607350826, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.09202012940330698, | |
| "grad_norm": 0.1216014102101326, | |
| "learning_rate": 4.999999518105881e-05, | |
| "loss": 0.9069, | |
| "step": 640 | |
| }, | |
| { | |
| "ce_ib": 12.23937702178955, | |
| "ce_orig": 1.0276381969451904, | |
| "epoch": 0.09202012940330698, | |
| "kl_loss": 0.1557500809431076, | |
| "loss_ib": 0.013907193206250668, | |
| "step": 640 | |
| }, | |
| { | |
| "ce_ib": 10.701261520385742, | |
| "ce_orig": 1.2324309349060059, | |
| "epoch": 0.09216391085549965, | |
| "kl_loss": 0.12038681656122208, | |
| "loss_ib": 0.011369972489774227, | |
| "step": 641 | |
| }, | |
| { | |
| "ce_ib": 10.260677337646484, | |
| "ce_orig": 0.7305514216423035, | |
| "epoch": 0.09230769230769231, | |
| "kl_loss": 0.1621103733778, | |
| "loss_ib": 0.013235858641564846, | |
| "step": 642 | |
| }, | |
| { | |
| "ce_ib": 8.343496322631836, | |
| "ce_orig": 0.9428196549415588, | |
| "epoch": 0.09245147375988498, | |
| "kl_loss": 0.1055351197719574, | |
| "loss_ib": 0.009448505006730556, | |
| "step": 643 | |
| }, | |
| { | |
| "ce_ib": 11.247652053833008, | |
| "ce_orig": 1.0904582738876343, | |
| "epoch": 0.09259525521207765, | |
| "kl_loss": 0.13140398263931274, | |
| "loss_ib": 0.012194025330245495, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.0927390366642703, | |
| "grad_norm": 0.11910561472177505, | |
| "learning_rate": 4.9999975604113406e-05, | |
| "loss": 0.9693, | |
| "step": 645 | |
| }, | |
| { | |
| "ce_ib": 6.632207870483398, | |
| "ce_orig": 0.6067308783531189, | |
| "epoch": 0.0927390366642703, | |
| "kl_loss": 0.09612976759672165, | |
| "loss_ib": 0.008122592233121395, | |
| "step": 645 | |
| }, | |
| { | |
| "ce_ib": 8.443811416625977, | |
| "ce_orig": 0.5988494157791138, | |
| "epoch": 0.09288281811646297, | |
| "kl_loss": 0.09652799367904663, | |
| "loss_ib": 0.009048305451869965, | |
| "step": 646 | |
| }, | |
| { | |
| "ce_ib": 4.723007678985596, | |
| "ce_orig": 0.2960648238658905, | |
| "epoch": 0.09302659956865564, | |
| "kl_loss": 0.17113137245178223, | |
| "loss_ib": 0.010918072424829006, | |
| "step": 647 | |
| }, | |
| { | |
| "ce_ib": 9.759404182434082, | |
| "ce_orig": 1.0581693649291992, | |
| "epoch": 0.0931703810208483, | |
| "kl_loss": 0.1296691596508026, | |
| "loss_ib": 0.011363159865140915, | |
| "step": 648 | |
| }, | |
| { | |
| "ce_ib": 10.742829322814941, | |
| "ce_orig": 1.3071558475494385, | |
| "epoch": 0.09331416247304097, | |
| "kl_loss": 0.1319178342819214, | |
| "loss_ib": 0.011967306025326252, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.09345794392523364, | |
| "grad_norm": 0.1334671825170517, | |
| "learning_rate": 4.999994096799175e-05, | |
| "loss": 0.8334, | |
| "step": 650 | |
| }, | |
| { | |
| "ce_ib": 8.407774925231934, | |
| "ce_orig": 1.0564472675323486, | |
| "epoch": 0.09345794392523364, | |
| "kl_loss": 0.10574951022863388, | |
| "loss_ib": 0.009491363540291786, | |
| "step": 650 | |
| }, | |
| { | |
| "ce_ib": 8.83208179473877, | |
| "ce_orig": 1.1277897357940674, | |
| "epoch": 0.09360172537742631, | |
| "kl_loss": 0.11387504637241364, | |
| "loss_ib": 0.010109792463481426, | |
| "step": 651 | |
| }, | |
| { | |
| "ce_ib": 9.621241569519043, | |
| "ce_orig": 1.163621187210083, | |
| "epoch": 0.09374550682961898, | |
| "kl_loss": 0.15881387889385223, | |
| "loss_ib": 0.012751313857734203, | |
| "step": 652 | |
| }, | |
| { | |
| "ce_ib": 8.543654441833496, | |
| "ce_orig": 1.220476508140564, | |
| "epoch": 0.09388928828181164, | |
| "kl_loss": 0.13807114958763123, | |
| "loss_ib": 0.011175385676324368, | |
| "step": 653 | |
| }, | |
| { | |
| "ce_ib": 7.747812271118164, | |
| "ce_orig": 0.603781521320343, | |
| "epoch": 0.09403306973400431, | |
| "kl_loss": 0.14266037940979004, | |
| "loss_ib": 0.011006924323737621, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.09417685118619698, | |
| "grad_norm": 0.10387395322322845, | |
| "learning_rate": 4.99998912727147e-05, | |
| "loss": 0.8407, | |
| "step": 655 | |
| }, | |
| { | |
| "ce_ib": 8.64326000213623, | |
| "ce_orig": 0.6705234050750732, | |
| "epoch": 0.09417685118619698, | |
| "kl_loss": 0.08966037631034851, | |
| "loss_ib": 0.008804649114608765, | |
| "step": 655 | |
| }, | |
| { | |
| "ce_ib": 7.918277263641357, | |
| "ce_orig": 0.4241083264350891, | |
| "epoch": 0.09432063263838965, | |
| "kl_loss": 0.10727507621049881, | |
| "loss_ib": 0.00932289194315672, | |
| "step": 656 | |
| }, | |
| { | |
| "ce_ib": 9.530045509338379, | |
| "ce_orig": 1.0509976148605347, | |
| "epoch": 0.09446441409058232, | |
| "kl_loss": 0.11631051450967789, | |
| "loss_ib": 0.010580549016594887, | |
| "step": 657 | |
| }, | |
| { | |
| "ce_ib": 9.818882942199707, | |
| "ce_orig": 1.0639723539352417, | |
| "epoch": 0.09460819554277498, | |
| "kl_loss": 0.09178834408521652, | |
| "loss_ib": 0.009498858824372292, | |
| "step": 658 | |
| }, | |
| { | |
| "ce_ib": 10.010890007019043, | |
| "ce_orig": 0.93562251329422, | |
| "epoch": 0.09475197699496765, | |
| "kl_loss": 0.11310561001300812, | |
| "loss_ib": 0.010660725645720959, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.09489575844716032, | |
| "grad_norm": 0.11378470063209534, | |
| "learning_rate": 4.9999826518312206e-05, | |
| "loss": 1.0042, | |
| "step": 660 | |
| }, | |
| { | |
| "ce_ib": 6.337740898132324, | |
| "ce_orig": 0.6393367052078247, | |
| "epoch": 0.09489575844716032, | |
| "kl_loss": 0.20546941459178925, | |
| "loss_ib": 0.013442340306937695, | |
| "step": 660 | |
| }, | |
| { | |
| "ce_ib": 9.358152389526367, | |
| "ce_orig": 0.6469663977622986, | |
| "epoch": 0.09503953989935299, | |
| "kl_loss": 0.16981008648872375, | |
| "loss_ib": 0.013169581070542336, | |
| "step": 661 | |
| }, | |
| { | |
| "ce_ib": 12.261252403259277, | |
| "ce_orig": 1.3894939422607422, | |
| "epoch": 0.09518332135154565, | |
| "kl_loss": 0.12019184231758118, | |
| "loss_ib": 0.012140218168497086, | |
| "step": 662 | |
| }, | |
| { | |
| "ce_ib": 7.736466407775879, | |
| "ce_orig": 0.7755306363105774, | |
| "epoch": 0.09532710280373832, | |
| "kl_loss": 0.12816421687602997, | |
| "loss_ib": 0.010276444256305695, | |
| "step": 663 | |
| }, | |
| { | |
| "ce_ib": 7.187182426452637, | |
| "ce_orig": 0.5523344874382019, | |
| "epoch": 0.09547088425593099, | |
| "kl_loss": 0.11785402148962021, | |
| "loss_ib": 0.009486292488873005, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.09561466570812366, | |
| "grad_norm": 0.1056251972913742, | |
| "learning_rate": 4.999974670482325e-05, | |
| "loss": 0.9002, | |
| "step": 665 | |
| }, | |
| { | |
| "ce_ib": 8.833344459533691, | |
| "ce_orig": 0.6083950400352478, | |
| "epoch": 0.09561466570812366, | |
| "kl_loss": 0.13750138878822327, | |
| "loss_ib": 0.011291741393506527, | |
| "step": 665 | |
| }, | |
| { | |
| "ce_ib": 6.192565441131592, | |
| "ce_orig": 0.44607073068618774, | |
| "epoch": 0.09575844716031631, | |
| "kl_loss": 0.17101138830184937, | |
| "loss_ib": 0.011646851897239685, | |
| "step": 666 | |
| }, | |
| { | |
| "ce_ib": 8.367379188537598, | |
| "ce_orig": 0.6870203018188477, | |
| "epoch": 0.09590222861250898, | |
| "kl_loss": 0.13014401495456696, | |
| "loss_ib": 0.01069089025259018, | |
| "step": 667 | |
| }, | |
| { | |
| "ce_ib": 9.423290252685547, | |
| "ce_orig": 0.8573846817016602, | |
| "epoch": 0.09604601006470165, | |
| "kl_loss": 0.09922461211681366, | |
| "loss_ib": 0.009672875516116619, | |
| "step": 668 | |
| }, | |
| { | |
| "ce_ib": 7.007211208343506, | |
| "ce_orig": 0.5984476208686829, | |
| "epoch": 0.09618979151689432, | |
| "kl_loss": 0.07789325714111328, | |
| "loss_ib": 0.007398268673568964, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.09633357296908698, | |
| "grad_norm": 0.12372449785470963, | |
| "learning_rate": 4.999965183229593e-05, | |
| "loss": 0.7418, | |
| "step": 670 | |
| }, | |
| { | |
| "ce_ib": 9.530304908752441, | |
| "ce_orig": 1.1927051544189453, | |
| "epoch": 0.09633357296908698, | |
| "kl_loss": 0.1058434247970581, | |
| "loss_ib": 0.01005732361227274, | |
| "step": 670 | |
| }, | |
| { | |
| "ce_ib": 7.50916862487793, | |
| "ce_orig": 0.706110954284668, | |
| "epoch": 0.09647735442127965, | |
| "kl_loss": 0.0839371532201767, | |
| "loss_ib": 0.00795144122093916, | |
| "step": 671 | |
| }, | |
| { | |
| "ce_ib": 9.66496467590332, | |
| "ce_orig": 1.1682391166687012, | |
| "epoch": 0.09662113587347232, | |
| "kl_loss": 0.26062270998954773, | |
| "loss_ib": 0.017863618209958076, | |
| "step": 672 | |
| }, | |
| { | |
| "ce_ib": 13.103985786437988, | |
| "ce_orig": 1.5685844421386719, | |
| "epoch": 0.09676491732566499, | |
| "kl_loss": 0.14245596528053284, | |
| "loss_ib": 0.013674790970981121, | |
| "step": 673 | |
| }, | |
| { | |
| "ce_ib": 8.4186429977417, | |
| "ce_orig": 0.8082234859466553, | |
| "epoch": 0.09690869877785765, | |
| "kl_loss": 0.15234991908073425, | |
| "loss_ib": 0.011826817877590656, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.09705248023005032, | |
| "grad_norm": 0.11889223009347916, | |
| "learning_rate": 4.9999541900787386e-05, | |
| "loss": 0.9837, | |
| "step": 675 | |
| }, | |
| { | |
| "ce_ib": 10.21523666381836, | |
| "ce_orig": 0.7224376797676086, | |
| "epoch": 0.09705248023005032, | |
| "kl_loss": 0.11821460723876953, | |
| "loss_ib": 0.01101834885776043, | |
| "step": 675 | |
| }, | |
| { | |
| "ce_ib": 11.221392631530762, | |
| "ce_orig": 1.364400863647461, | |
| "epoch": 0.09719626168224299, | |
| "kl_loss": 0.12399769574403763, | |
| "loss_ib": 0.01181058119982481, | |
| "step": 676 | |
| }, | |
| { | |
| "ce_ib": 8.069270133972168, | |
| "ce_orig": 0.6916780471801758, | |
| "epoch": 0.09734004313443566, | |
| "kl_loss": 0.09095580875873566, | |
| "loss_ib": 0.00858242530375719, | |
| "step": 677 | |
| }, | |
| { | |
| "ce_ib": 8.115494728088379, | |
| "ce_orig": 0.5971089601516724, | |
| "epoch": 0.09748382458662833, | |
| "kl_loss": 0.16152238845825195, | |
| "loss_ib": 0.012133866548538208, | |
| "step": 678 | |
| }, | |
| { | |
| "ce_ib": 9.717530250549316, | |
| "ce_orig": 0.9812992215156555, | |
| "epoch": 0.097627606038821, | |
| "kl_loss": 0.1159181147813797, | |
| "loss_ib": 0.010654671117663383, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.09777138749101366, | |
| "grad_norm": 0.1076693907380104, | |
| "learning_rate": 4.999941691036383e-05, | |
| "loss": 0.7919, | |
| "step": 680 | |
| }, | |
| { | |
| "ce_ib": 6.14840030670166, | |
| "ce_orig": 0.4094475209712982, | |
| "epoch": 0.09777138749101366, | |
| "kl_loss": 0.21727138757705688, | |
| "loss_ib": 0.013937770389020443, | |
| "step": 680 | |
| }, | |
| { | |
| "ce_ib": 5.300361156463623, | |
| "ce_orig": 0.40322771668434143, | |
| "epoch": 0.09791516894320633, | |
| "kl_loss": 0.10324863344430923, | |
| "loss_ib": 0.007812611758708954, | |
| "step": 681 | |
| }, | |
| { | |
| "ce_ib": 10.288033485412598, | |
| "ce_orig": 0.9181535840034485, | |
| "epoch": 0.098058950395399, | |
| "kl_loss": 0.13683810830116272, | |
| "loss_ib": 0.01198592223227024, | |
| "step": 682 | |
| }, | |
| { | |
| "ce_ib": 6.534595489501953, | |
| "ce_orig": 0.588758647441864, | |
| "epoch": 0.09820273184759166, | |
| "kl_loss": 0.12447066605091095, | |
| "loss_ib": 0.00949083175510168, | |
| "step": 683 | |
| }, | |
| { | |
| "ce_ib": 6.564389705657959, | |
| "ce_orig": 0.6715453863143921, | |
| "epoch": 0.09834651329978433, | |
| "kl_loss": 0.09237797558307648, | |
| "loss_ib": 0.007901093922555447, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.098490294751977, | |
| "grad_norm": 0.13819435238838196, | |
| "learning_rate": 4.999927686110056e-05, | |
| "loss": 0.8023, | |
| "step": 685 | |
| }, | |
| { | |
| "ce_ib": 7.38820219039917, | |
| "ce_orig": 1.00199294090271, | |
| "epoch": 0.098490294751977, | |
| "kl_loss": 0.09955717623233795, | |
| "loss_ib": 0.008671960793435574, | |
| "step": 685 | |
| }, | |
| { | |
| "ce_ib": 8.343016624450684, | |
| "ce_orig": 0.9671911001205444, | |
| "epoch": 0.09863407620416967, | |
| "kl_loss": 0.15917587280273438, | |
| "loss_ib": 0.012130302377045155, | |
| "step": 686 | |
| }, | |
| { | |
| "ce_ib": 7.283801078796387, | |
| "ce_orig": 0.8381296992301941, | |
| "epoch": 0.09877785765636234, | |
| "kl_loss": 0.08582788705825806, | |
| "loss_ib": 0.007933295331895351, | |
| "step": 687 | |
| }, | |
| { | |
| "ce_ib": 7.987790107727051, | |
| "ce_orig": 0.6048274636268616, | |
| "epoch": 0.09892163910855499, | |
| "kl_loss": 0.16408729553222656, | |
| "loss_ib": 0.012198260053992271, | |
| "step": 688 | |
| }, | |
| { | |
| "ce_ib": 13.21379280090332, | |
| "ce_orig": 1.2273222208023071, | |
| "epoch": 0.09906542056074766, | |
| "kl_loss": 0.13596788048744202, | |
| "loss_ib": 0.013405290432274342, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.09920920201294033, | |
| "grad_norm": 0.10889366269111633, | |
| "learning_rate": 4.999912175308195e-05, | |
| "loss": 0.8662, | |
| "step": 690 | |
| }, | |
| { | |
| "ce_ib": 9.05671501159668, | |
| "ce_orig": 1.0359764099121094, | |
| "epoch": 0.09920920201294033, | |
| "kl_loss": 0.09794212877750397, | |
| "loss_ib": 0.00942546408623457, | |
| "step": 690 | |
| }, | |
| { | |
| "ce_ib": 6.449003219604492, | |
| "ce_orig": 0.5917060971260071, | |
| "epoch": 0.099352983465133, | |
| "kl_loss": 0.1164952963590622, | |
| "loss_ib": 0.009049266576766968, | |
| "step": 691 | |
| }, | |
| { | |
| "ce_ib": 8.728141784667969, | |
| "ce_orig": 1.2859077453613281, | |
| "epoch": 0.09949676491732566, | |
| "kl_loss": 0.09028451889753342, | |
| "loss_ib": 0.008878297172486782, | |
| "step": 692 | |
| }, | |
| { | |
| "ce_ib": 11.696812629699707, | |
| "ce_orig": 1.4011276960372925, | |
| "epoch": 0.09964054636951833, | |
| "kl_loss": 0.0865880697965622, | |
| "loss_ib": 0.010177809745073318, | |
| "step": 693 | |
| }, | |
| { | |
| "ce_ib": 9.487892150878906, | |
| "ce_orig": 0.8238059282302856, | |
| "epoch": 0.099784327821711, | |
| "kl_loss": 0.09209860861301422, | |
| "loss_ib": 0.009348876774311066, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.09992810927390366, | |
| "grad_norm": 0.1301174759864807, | |
| "learning_rate": 4.999895158640141e-05, | |
| "loss": 0.9068, | |
| "step": 695 | |
| }, | |
| { | |
| "ce_ib": 6.869251728057861, | |
| "ce_orig": 0.7711926102638245, | |
| "epoch": 0.09992810927390366, | |
| "kl_loss": 0.17371296882629395, | |
| "loss_ib": 0.012120273895561695, | |
| "step": 695 | |
| }, | |
| { | |
| "ce_ib": 9.267475128173828, | |
| "ce_orig": 0.5941876173019409, | |
| "epoch": 0.10007189072609633, | |
| "kl_loss": 0.1114778146147728, | |
| "loss_ib": 0.010207627899944782, | |
| "step": 696 | |
| }, | |
| { | |
| "ce_ib": 7.718648910522461, | |
| "ce_orig": 0.9436888694763184, | |
| "epoch": 0.100215672178289, | |
| "kl_loss": 0.10313811898231506, | |
| "loss_ib": 0.009016230702400208, | |
| "step": 697 | |
| }, | |
| { | |
| "ce_ib": 7.209829807281494, | |
| "ce_orig": 0.6572908759117126, | |
| "epoch": 0.10035945363048167, | |
| "kl_loss": 0.11239567399024963, | |
| "loss_ib": 0.00922469887882471, | |
| "step": 698 | |
| }, | |
| { | |
| "ce_ib": 8.946769714355469, | |
| "ce_orig": 0.8208485841751099, | |
| "epoch": 0.10050323508267434, | |
| "kl_loss": 0.12747088074684143, | |
| "loss_ib": 0.010846929624676704, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.100647016534867, | |
| "grad_norm": 0.11714436113834381, | |
| "learning_rate": 4.999876636116145e-05, | |
| "loss": 0.8387, | |
| "step": 700 | |
| }, | |
| { | |
| "ce_ib": 9.900582313537598, | |
| "ce_orig": 0.894629180431366, | |
| "epoch": 0.100647016534867, | |
| "kl_loss": 0.13778752088546753, | |
| "loss_ib": 0.011839667335152626, | |
| "step": 700 | |
| }, | |
| { | |
| "ce_ib": 8.739860534667969, | |
| "ce_orig": 0.9482055902481079, | |
| "epoch": 0.10079079798705967, | |
| "kl_loss": 0.10394522547721863, | |
| "loss_ib": 0.009567191824316978, | |
| "step": 701 | |
| }, | |
| { | |
| "ce_ib": 7.199070930480957, | |
| "ce_orig": 0.8619567155838013, | |
| "epoch": 0.10093457943925234, | |
| "kl_loss": 0.13609068095684052, | |
| "loss_ib": 0.010404069907963276, | |
| "step": 702 | |
| }, | |
| { | |
| "ce_ib": 7.6442036628723145, | |
| "ce_orig": 0.8066157102584839, | |
| "epoch": 0.10107836089144501, | |
| "kl_loss": 0.09935668110847473, | |
| "loss_ib": 0.00878993608057499, | |
| "step": 703 | |
| }, | |
| { | |
| "ce_ib": 7.810359001159668, | |
| "ce_orig": 0.8702456951141357, | |
| "epoch": 0.10122214234363767, | |
| "kl_loss": 0.0937383696436882, | |
| "loss_ib": 0.008592098020017147, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.10136592379583034, | |
| "grad_norm": 0.11369116604328156, | |
| "learning_rate": 4.9998566077473645e-05, | |
| "loss": 0.84, | |
| "step": 705 | |
| }, | |
| { | |
| "ce_ib": 8.6611909866333, | |
| "ce_orig": 0.8015434145927429, | |
| "epoch": 0.10136592379583034, | |
| "kl_loss": 0.1313619613647461, | |
| "loss_ib": 0.010898693464696407, | |
| "step": 705 | |
| }, | |
| { | |
| "ce_ib": 8.346274375915527, | |
| "ce_orig": 0.8527460694313049, | |
| "epoch": 0.10150970524802301, | |
| "kl_loss": 0.09497249126434326, | |
| "loss_ib": 0.008921761997044086, | |
| "step": 706 | |
| }, | |
| { | |
| "ce_ib": 11.167051315307617, | |
| "ce_orig": 1.280219554901123, | |
| "epoch": 0.10165348670021568, | |
| "kl_loss": 0.13609477877616882, | |
| "loss_ib": 0.012388264760375023, | |
| "step": 707 | |
| }, | |
| { | |
| "ce_ib": 9.243364334106445, | |
| "ce_orig": 1.1910686492919922, | |
| "epoch": 0.10179726815240835, | |
| "kl_loss": 0.10804169625043869, | |
| "loss_ib": 0.010023767128586769, | |
| "step": 708 | |
| }, | |
| { | |
| "ce_ib": 6.902951240539551, | |
| "ce_orig": 0.6421184539794922, | |
| "epoch": 0.101941049604601, | |
| "kl_loss": 0.10724660754203796, | |
| "loss_ib": 0.008813805878162384, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.10208483105679367, | |
| "grad_norm": 0.10316238552331924, | |
| "learning_rate": 4.9998350735458646e-05, | |
| "loss": 0.8424, | |
| "step": 710 | |
| }, | |
| { | |
| "ce_ib": 8.496487617492676, | |
| "ce_orig": 0.8121188879013062, | |
| "epoch": 0.10208483105679367, | |
| "kl_loss": 0.1422605663537979, | |
| "loss_ib": 0.01136127207428217, | |
| "step": 710 | |
| }, | |
| { | |
| "ce_ib": 7.606898784637451, | |
| "ce_orig": 0.4156748056411743, | |
| "epoch": 0.10222861250898634, | |
| "kl_loss": 0.12221111357212067, | |
| "loss_ib": 0.009914005175232887, | |
| "step": 711 | |
| }, | |
| { | |
| "ce_ib": 8.244635581970215, | |
| "ce_orig": 0.9483067393302917, | |
| "epoch": 0.102372393961179, | |
| "kl_loss": 0.1223490834236145, | |
| "loss_ib": 0.010239771567285061, | |
| "step": 712 | |
| }, | |
| { | |
| "ce_ib": 8.075944900512695, | |
| "ce_orig": 0.9652693867683411, | |
| "epoch": 0.10251617541337167, | |
| "kl_loss": 0.11330495774745941, | |
| "loss_ib": 0.009703220799565315, | |
| "step": 713 | |
| }, | |
| { | |
| "ce_ib": 7.7777581214904785, | |
| "ce_orig": 0.6340931057929993, | |
| "epoch": 0.10265995686556434, | |
| "kl_loss": 0.11384841799736023, | |
| "loss_ib": 0.009581300429999828, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.102803738317757, | |
| "grad_norm": 0.11466086655855179, | |
| "learning_rate": 4.999812033524616e-05, | |
| "loss": 0.8816, | |
| "step": 715 | |
| }, | |
| { | |
| "ce_ib": 7.194620609283447, | |
| "ce_orig": 0.5225186944007874, | |
| "epoch": 0.102803738317757, | |
| "kl_loss": 0.16458478569984436, | |
| "loss_ib": 0.011826549656689167, | |
| "step": 715 | |
| }, | |
| { | |
| "ce_ib": 7.576101303100586, | |
| "ce_orig": 0.7184677124023438, | |
| "epoch": 0.10294751976994967, | |
| "kl_loss": 0.17702654004096985, | |
| "loss_ib": 0.012639378197491169, | |
| "step": 716 | |
| }, | |
| { | |
| "ce_ib": 10.157630920410156, | |
| "ce_orig": 0.9195736646652222, | |
| "epoch": 0.10309130122214234, | |
| "kl_loss": 0.11237962543964386, | |
| "loss_ib": 0.010697796940803528, | |
| "step": 717 | |
| }, | |
| { | |
| "ce_ib": 6.4610395431518555, | |
| "ce_orig": 0.2969205975532532, | |
| "epoch": 0.10323508267433501, | |
| "kl_loss": 0.3771587312221527, | |
| "loss_ib": 0.02208845689892769, | |
| "step": 718 | |
| }, | |
| { | |
| "ce_ib": 10.73964786529541, | |
| "ce_orig": 1.206357479095459, | |
| "epoch": 0.10337886412652768, | |
| "kl_loss": 0.11494225263595581, | |
| "loss_ib": 0.011116936802864075, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.10352264557872035, | |
| "grad_norm": 0.13988007605075836, | |
| "learning_rate": 4.9997874876974966e-05, | |
| "loss": 0.9343, | |
| "step": 720 | |
| }, | |
| { | |
| "ce_ib": 8.02586841583252, | |
| "ce_orig": 0.5530202984809875, | |
| "epoch": 0.10352264557872035, | |
| "kl_loss": 0.15824423730373383, | |
| "loss_ib": 0.011925145983695984, | |
| "step": 720 | |
| }, | |
| { | |
| "ce_ib": 7.987276077270508, | |
| "ce_orig": 0.8079979419708252, | |
| "epoch": 0.10366642703091301, | |
| "kl_loss": 0.09849292039871216, | |
| "loss_ib": 0.008918284438550472, | |
| "step": 721 | |
| }, | |
| { | |
| "ce_ib": 7.2635416984558105, | |
| "ce_orig": 1.058225154876709, | |
| "epoch": 0.10381020848310568, | |
| "kl_loss": 0.07580313086509705, | |
| "loss_ib": 0.007421927060931921, | |
| "step": 722 | |
| }, | |
| { | |
| "ce_ib": 11.01719856262207, | |
| "ce_orig": 1.5954816341400146, | |
| "epoch": 0.10395398993529835, | |
| "kl_loss": 0.1211317703127861, | |
| "loss_ib": 0.011565187945961952, | |
| "step": 723 | |
| }, | |
| { | |
| "ce_ib": 7.661947727203369, | |
| "ce_orig": 0.8771336078643799, | |
| "epoch": 0.10409777138749102, | |
| "kl_loss": 0.17039231956005096, | |
| "loss_ib": 0.012350589968264103, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.10424155283968368, | |
| "grad_norm": 0.11809071898460388, | |
| "learning_rate": 4.9997614360792934e-05, | |
| "loss": 0.9575, | |
| "step": 725 | |
| }, | |
| { | |
| "ce_ib": 9.20604133605957, | |
| "ce_orig": 0.8485362529754639, | |
| "epoch": 0.10424155283968368, | |
| "kl_loss": 0.10916964709758759, | |
| "loss_ib": 0.010061502456665039, | |
| "step": 725 | |
| }, | |
| { | |
| "ce_ib": 7.615476608276367, | |
| "ce_orig": 0.9565079808235168, | |
| "epoch": 0.10438533429187635, | |
| "kl_loss": 0.09149608016014099, | |
| "loss_ib": 0.008382542990148067, | |
| "step": 726 | |
| }, | |
| { | |
| "ce_ib": 8.909895896911621, | |
| "ce_orig": 1.0168468952178955, | |
| "epoch": 0.10452911574406902, | |
| "kl_loss": 0.10178525745868683, | |
| "loss_ib": 0.009544211439788342, | |
| "step": 727 | |
| }, | |
| { | |
| "ce_ib": 9.962603569030762, | |
| "ce_orig": 1.013784646987915, | |
| "epoch": 0.10467289719626169, | |
| "kl_loss": 0.0983659103512764, | |
| "loss_ib": 0.009899596683681011, | |
| "step": 728 | |
| }, | |
| { | |
| "ce_ib": 7.675144195556641, | |
| "ce_orig": 0.7497639060020447, | |
| "epoch": 0.10481667864845436, | |
| "kl_loss": 0.1465551257133484, | |
| "loss_ib": 0.011165328323841095, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.10496046010064701, | |
| "grad_norm": 0.10833033919334412, | |
| "learning_rate": 4.999733878685698e-05, | |
| "loss": 0.8967, | |
| "step": 730 | |
| }, | |
| { | |
| "ce_ib": 9.022236824035645, | |
| "ce_orig": 1.01668381690979, | |
| "epoch": 0.10496046010064701, | |
| "kl_loss": 0.12041808664798737, | |
| "loss_ib": 0.010532023385167122, | |
| "step": 730 | |
| }, | |
| { | |
| "ce_ib": 8.386323928833008, | |
| "ce_orig": 0.8513427376747131, | |
| "epoch": 0.10510424155283968, | |
| "kl_loss": 0.12366791814565659, | |
| "loss_ib": 0.01037655770778656, | |
| "step": 731 | |
| }, | |
| { | |
| "ce_ib": 11.588960647583008, | |
| "ce_orig": 1.5426373481750488, | |
| "epoch": 0.10524802300503235, | |
| "kl_loss": 0.13248568773269653, | |
| "loss_ib": 0.012418764643371105, | |
| "step": 732 | |
| }, | |
| { | |
| "ce_ib": 8.538729667663574, | |
| "ce_orig": 0.8282467126846313, | |
| "epoch": 0.10539180445722501, | |
| "kl_loss": 0.121379554271698, | |
| "loss_ib": 0.010338342748582363, | |
| "step": 733 | |
| }, | |
| { | |
| "ce_ib": 8.161299705505371, | |
| "ce_orig": 0.8536421060562134, | |
| "epoch": 0.10553558590941768, | |
| "kl_loss": 0.10607169568538666, | |
| "loss_ib": 0.009384234435856342, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.10567936736161035, | |
| "grad_norm": 0.12110400199890137, | |
| "learning_rate": 4.999704815533312e-05, | |
| "loss": 0.8765, | |
| "step": 735 | |
| }, | |
| { | |
| "ce_ib": 8.390252113342285, | |
| "ce_orig": 1.1886399984359741, | |
| "epoch": 0.10567936736161035, | |
| "kl_loss": 0.12097896635532379, | |
| "loss_ib": 0.010244074277579784, | |
| "step": 735 | |
| }, | |
| { | |
| "ce_ib": 9.484691619873047, | |
| "ce_orig": 1.0858343839645386, | |
| "epoch": 0.10582314881380302, | |
| "kl_loss": 0.11114107072353363, | |
| "loss_ib": 0.010299399495124817, | |
| "step": 736 | |
| }, | |
| { | |
| "ce_ib": 7.962080478668213, | |
| "ce_orig": 0.9100456237792969, | |
| "epoch": 0.10596693026599568, | |
| "kl_loss": 0.10763818770647049, | |
| "loss_ib": 0.00936294998973608, | |
| "step": 737 | |
| }, | |
| { | |
| "ce_ib": 9.301447868347168, | |
| "ce_orig": 1.0797451734542847, | |
| "epoch": 0.10611071171818835, | |
| "kl_loss": 0.08851733803749084, | |
| "loss_ib": 0.009076590649783611, | |
| "step": 738 | |
| }, | |
| { | |
| "ce_ib": 5.509831428527832, | |
| "ce_orig": 0.5435864925384521, | |
| "epoch": 0.10625449317038102, | |
| "kl_loss": 0.11892074346542358, | |
| "loss_ib": 0.008700952865183353, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.10639827462257369, | |
| "grad_norm": 0.12525738775730133, | |
| "learning_rate": 4.9996742466396395e-05, | |
| "loss": 0.9647, | |
| "step": 740 | |
| }, | |
| { | |
| "ce_ib": 7.956106662750244, | |
| "ce_orig": 0.5784890055656433, | |
| "epoch": 0.10639827462257369, | |
| "kl_loss": 0.0811728686094284, | |
| "loss_ib": 0.008036697283387184, | |
| "step": 740 | |
| }, | |
| { | |
| "ce_ib": 4.005490779876709, | |
| "ce_orig": 0.20808197557926178, | |
| "epoch": 0.10654205607476636, | |
| "kl_loss": 0.19454577565193176, | |
| "loss_ib": 0.011730033904314041, | |
| "step": 741 | |
| }, | |
| { | |
| "ce_ib": 5.741650581359863, | |
| "ce_orig": 0.5170265436172485, | |
| "epoch": 0.10668583752695902, | |
| "kl_loss": 0.15967172384262085, | |
| "loss_ib": 0.01085441093891859, | |
| "step": 742 | |
| }, | |
| { | |
| "ce_ib": 4.918848514556885, | |
| "ce_orig": 0.5805583596229553, | |
| "epoch": 0.10682961897915169, | |
| "kl_loss": 0.10687030851840973, | |
| "loss_ib": 0.007802939508110285, | |
| "step": 743 | |
| }, | |
| { | |
| "ce_ib": 10.824296951293945, | |
| "ce_orig": 1.5680608749389648, | |
| "epoch": 0.10697340043134436, | |
| "kl_loss": 0.0964423194527626, | |
| "loss_ib": 0.01023426465690136, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.10711718188353703, | |
| "grad_norm": 0.12692318856716156, | |
| "learning_rate": 4.9996421720230955e-05, | |
| "loss": 0.8577, | |
| "step": 745 | |
| }, | |
| { | |
| "ce_ib": 11.27613639831543, | |
| "ce_orig": 1.3031110763549805, | |
| "epoch": 0.10711718188353703, | |
| "kl_loss": 0.09824083745479584, | |
| "loss_ib": 0.010550110600888729, | |
| "step": 745 | |
| }, | |
| { | |
| "ce_ib": 9.67626953125, | |
| "ce_orig": 0.8898366689682007, | |
| "epoch": 0.1072609633357297, | |
| "kl_loss": 0.07953157275915146, | |
| "loss_ib": 0.008814713917672634, | |
| "step": 746 | |
| }, | |
| { | |
| "ce_ib": 6.659996509552002, | |
| "ce_orig": 0.6965008974075317, | |
| "epoch": 0.10740474478792236, | |
| "kl_loss": 0.15939068794250488, | |
| "loss_ib": 0.01129953283816576, | |
| "step": 747 | |
| }, | |
| { | |
| "ce_ib": 9.864056587219238, | |
| "ce_orig": 0.9512748122215271, | |
| "epoch": 0.10754852624011503, | |
| "kl_loss": 0.13909170031547546, | |
| "loss_ib": 0.011886613443493843, | |
| "step": 748 | |
| }, | |
| { | |
| "ce_ib": 7.443923473358154, | |
| "ce_orig": 0.8524958491325378, | |
| "epoch": 0.1076923076923077, | |
| "kl_loss": 0.11339938640594482, | |
| "loss_ib": 0.009391930885612965, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.10783608914450037, | |
| "grad_norm": 0.12534423172473907, | |
| "learning_rate": 4.999608591703001e-05, | |
| "loss": 0.8728, | |
| "step": 750 | |
| }, | |
| { | |
| "ce_ib": 4.702718257904053, | |
| "ce_orig": 0.37047120928764343, | |
| "epoch": 0.10783608914450037, | |
| "kl_loss": 0.14064157009124756, | |
| "loss_ib": 0.009383438155055046, | |
| "step": 750 | |
| }, | |
| { | |
| "ce_ib": 6.858421802520752, | |
| "ce_orig": 1.1404428482055664, | |
| "epoch": 0.10797987059669302, | |
| "kl_loss": 0.08100490272045135, | |
| "loss_ib": 0.007479456253349781, | |
| "step": 751 | |
| }, | |
| { | |
| "ce_ib": 8.411442756652832, | |
| "ce_orig": 0.8211296200752258, | |
| "epoch": 0.10812365204888569, | |
| "kl_loss": 0.11985696852207184, | |
| "loss_ib": 0.010198569856584072, | |
| "step": 752 | |
| }, | |
| { | |
| "ce_ib": 6.272851943969727, | |
| "ce_orig": 0.7544890642166138, | |
| "epoch": 0.10826743350107836, | |
| "kl_loss": 0.08774818480014801, | |
| "loss_ib": 0.007523834705352783, | |
| "step": 753 | |
| }, | |
| { | |
| "ce_ib": 8.928715705871582, | |
| "ce_orig": 0.9599756002426147, | |
| "epoch": 0.10841121495327102, | |
| "kl_loss": 0.11729443073272705, | |
| "loss_ib": 0.010329079814255238, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.10855499640546369, | |
| "grad_norm": 0.11862904578447342, | |
| "learning_rate": 4.9995735056995826e-05, | |
| "loss": 0.8637, | |
| "step": 755 | |
| }, | |
| { | |
| "ce_ib": 10.275476455688477, | |
| "ce_orig": 1.2619425058364868, | |
| "epoch": 0.10855499640546369, | |
| "kl_loss": 0.13122868537902832, | |
| "loss_ib": 0.011699172668159008, | |
| "step": 755 | |
| }, | |
| { | |
| "ce_ib": 8.515475273132324, | |
| "ce_orig": 0.8744072318077087, | |
| "epoch": 0.10869877785765636, | |
| "kl_loss": 0.14314191043376923, | |
| "loss_ib": 0.01141483336687088, | |
| "step": 756 | |
| }, | |
| { | |
| "ce_ib": 8.099058151245117, | |
| "ce_orig": 1.0449519157409668, | |
| "epoch": 0.10884255930984903, | |
| "kl_loss": 0.08819152414798737, | |
| "loss_ib": 0.008459105156362057, | |
| "step": 757 | |
| }, | |
| { | |
| "ce_ib": 7.1664228439331055, | |
| "ce_orig": 0.856492280960083, | |
| "epoch": 0.1089863407620417, | |
| "kl_loss": 0.07748554646968842, | |
| "loss_ib": 0.0074574886821210384, | |
| "step": 758 | |
| }, | |
| { | |
| "ce_ib": 8.10939884185791, | |
| "ce_orig": 0.7625494003295898, | |
| "epoch": 0.10913012221423436, | |
| "kl_loss": 0.12002036720514297, | |
| "loss_ib": 0.010055718012154102, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.10927390366642703, | |
| "grad_norm": 0.12130390107631683, | |
| "learning_rate": 4.999536914033977e-05, | |
| "loss": 0.864, | |
| "step": 760 | |
| }, | |
| { | |
| "ce_ib": 6.7783203125, | |
| "ce_orig": 0.6925073266029358, | |
| "epoch": 0.10927390366642703, | |
| "kl_loss": 0.14075952768325806, | |
| "loss_ib": 0.010427136905491352, | |
| "step": 760 | |
| }, | |
| { | |
| "ce_ib": 6.035461902618408, | |
| "ce_orig": 0.5443659424781799, | |
| "epoch": 0.1094176851186197, | |
| "kl_loss": 0.11947020888328552, | |
| "loss_ib": 0.008991241455078125, | |
| "step": 761 | |
| }, | |
| { | |
| "ce_ib": 10.941012382507324, | |
| "ce_orig": 1.4812273979187012, | |
| "epoch": 0.10956146657081237, | |
| "kl_loss": 0.09991130232810974, | |
| "loss_ib": 0.010466071777045727, | |
| "step": 762 | |
| }, | |
| { | |
| "ce_ib": 10.077688217163086, | |
| "ce_orig": 1.1687238216400146, | |
| "epoch": 0.10970524802300503, | |
| "kl_loss": 0.10050664842128754, | |
| "loss_ib": 0.01006417628377676, | |
| "step": 763 | |
| }, | |
| { | |
| "ce_ib": 7.7856125831604, | |
| "ce_orig": 0.8300952911376953, | |
| "epoch": 0.1098490294751977, | |
| "kl_loss": 0.11259394139051437, | |
| "loss_ib": 0.009522504173219204, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.10999281092739037, | |
| "grad_norm": 0.09898856282234192, | |
| "learning_rate": 4.999498816728223e-05, | |
| "loss": 0.8381, | |
| "step": 765 | |
| }, | |
| { | |
| "ce_ib": 6.172881126403809, | |
| "ce_orig": 0.695868968963623, | |
| "epoch": 0.10999281092739037, | |
| "kl_loss": 0.10089915245771408, | |
| "loss_ib": 0.008131398819386959, | |
| "step": 765 | |
| }, | |
| { | |
| "ce_ib": 7.305203437805176, | |
| "ce_orig": 0.7463586330413818, | |
| "epoch": 0.11013659237958304, | |
| "kl_loss": 0.09400838613510132, | |
| "loss_ib": 0.008353020995855331, | |
| "step": 766 | |
| }, | |
| { | |
| "ce_ib": 4.747533798217773, | |
| "ce_orig": 0.5122455954551697, | |
| "epoch": 0.1102803738317757, | |
| "kl_loss": 0.074123814702034, | |
| "loss_ib": 0.0060799578204751015, | |
| "step": 767 | |
| }, | |
| { | |
| "ce_ib": 6.877668857574463, | |
| "ce_orig": 0.5530845522880554, | |
| "epoch": 0.11042415528396837, | |
| "kl_loss": 0.11388804018497467, | |
| "loss_ib": 0.009133236482739449, | |
| "step": 768 | |
| }, | |
| { | |
| "ce_ib": 11.079581260681152, | |
| "ce_orig": 1.1505775451660156, | |
| "epoch": 0.11056793673616104, | |
| "kl_loss": 0.2215118706226349, | |
| "loss_ib": 0.016615385189652443, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.11071171818835371, | |
| "grad_norm": 0.13919848203659058, | |
| "learning_rate": 4.999459213805272e-05, | |
| "loss": 0.8708, | |
| "step": 770 | |
| }, | |
| { | |
| "ce_ib": 8.570969581604004, | |
| "ce_orig": 1.2589601278305054, | |
| "epoch": 0.11071171818835371, | |
| "kl_loss": 0.08060797303915024, | |
| "loss_ib": 0.008315883576869965, | |
| "step": 770 | |
| }, | |
| { | |
| "ce_ib": 7.168095588684082, | |
| "ce_orig": 0.8928415775299072, | |
| "epoch": 0.11085549964054638, | |
| "kl_loss": 0.10986876487731934, | |
| "loss_ib": 0.009077486582100391, | |
| "step": 771 | |
| }, | |
| { | |
| "ce_ib": 8.855610847473145, | |
| "ce_orig": 0.733345091342926, | |
| "epoch": 0.11099928109273903, | |
| "kl_loss": 0.12182736396789551, | |
| "loss_ib": 0.010519173927605152, | |
| "step": 772 | |
| }, | |
| { | |
| "ce_ib": 3.5407509803771973, | |
| "ce_orig": 0.3610730469226837, | |
| "epoch": 0.1111430625449317, | |
| "kl_loss": 0.14145609736442566, | |
| "loss_ib": 0.00884318072348833, | |
| "step": 773 | |
| }, | |
| { | |
| "ce_ib": 8.146163940429688, | |
| "ce_orig": 0.6188514828681946, | |
| "epoch": 0.11128684399712437, | |
| "kl_loss": 0.10034702718257904, | |
| "loss_ib": 0.009090433828532696, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.11143062544931703, | |
| "grad_norm": 0.11237223446369171, | |
| "learning_rate": 4.999418105288978e-05, | |
| "loss": 0.8281, | |
| "step": 775 | |
| }, | |
| { | |
| "ce_ib": 6.921514987945557, | |
| "ce_orig": 0.7439333200454712, | |
| "epoch": 0.11143062544931703, | |
| "kl_loss": 0.08917941153049469, | |
| "loss_ib": 0.007919727824628353, | |
| "step": 775 | |
| }, | |
| { | |
| "ce_ib": 6.831997871398926, | |
| "ce_orig": 0.7869361042976379, | |
| "epoch": 0.1115744069015097, | |
| "kl_loss": 0.10595273226499557, | |
| "loss_ib": 0.008713635616004467, | |
| "step": 776 | |
| }, | |
| { | |
| "ce_ib": 7.639923095703125, | |
| "ce_orig": 0.9338688254356384, | |
| "epoch": 0.11171818835370237, | |
| "kl_loss": 0.10657122731208801, | |
| "loss_ib": 0.009148523211479187, | |
| "step": 777 | |
| }, | |
| { | |
| "ce_ib": 6.877979755401611, | |
| "ce_orig": 0.7685055136680603, | |
| "epoch": 0.11186196980589504, | |
| "kl_loss": 0.09290573745965958, | |
| "loss_ib": 0.00808427669107914, | |
| "step": 778 | |
| }, | |
| { | |
| "ce_ib": 10.934708595275879, | |
| "ce_orig": 1.0183820724487305, | |
| "epoch": 0.1120057512580877, | |
| "kl_loss": 0.13018369674682617, | |
| "loss_ib": 0.01197653915733099, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.11214953271028037, | |
| "grad_norm": 0.1425534039735794, | |
| "learning_rate": 4.999375491204102e-05, | |
| "loss": 0.9258, | |
| "step": 780 | |
| }, | |
| { | |
| "ce_ib": 8.499329566955566, | |
| "ce_orig": 0.6566687822341919, | |
| "epoch": 0.11214953271028037, | |
| "kl_loss": 0.09959501028060913, | |
| "loss_ib": 0.009229416027665138, | |
| "step": 780 | |
| }, | |
| { | |
| "ce_ib": 8.128525733947754, | |
| "ce_orig": 0.7684027552604675, | |
| "epoch": 0.11229331416247304, | |
| "kl_loss": 0.0928315594792366, | |
| "loss_ib": 0.00870584137737751, | |
| "step": 781 | |
| }, | |
| { | |
| "ce_ib": 6.759765148162842, | |
| "ce_orig": 0.7490063309669495, | |
| "epoch": 0.11243709561466571, | |
| "kl_loss": 0.05619842931628227, | |
| "loss_ib": 0.006189804058521986, | |
| "step": 782 | |
| }, | |
| { | |
| "ce_ib": 7.713232517242432, | |
| "ce_orig": 0.9557498693466187, | |
| "epoch": 0.11258087706685838, | |
| "kl_loss": 0.10368062555789948, | |
| "loss_ib": 0.009040648117661476, | |
| "step": 783 | |
| }, | |
| { | |
| "ce_ib": 7.60745906829834, | |
| "ce_orig": 0.7716460227966309, | |
| "epoch": 0.11272465851905104, | |
| "kl_loss": 0.08124081790447235, | |
| "loss_ib": 0.00786577071994543, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.11286843997124371, | |
| "grad_norm": 0.13931146264076233, | |
| "learning_rate": 4.9993313715763166e-05, | |
| "loss": 0.8111, | |
| "step": 785 | |
| }, | |
| { | |
| "ce_ib": 9.313846588134766, | |
| "ce_orig": 1.1762357950210571, | |
| "epoch": 0.11286843997124371, | |
| "kl_loss": 0.12043385207653046, | |
| "loss_ib": 0.010678616352379322, | |
| "step": 785 | |
| }, | |
| { | |
| "ce_ib": 9.520804405212402, | |
| "ce_orig": 1.0862674713134766, | |
| "epoch": 0.11301222142343638, | |
| "kl_loss": 0.13194264471530914, | |
| "loss_ib": 0.011357533745467663, | |
| "step": 786 | |
| }, | |
| { | |
| "ce_ib": 5.819194793701172, | |
| "ce_orig": 0.5117652416229248, | |
| "epoch": 0.11315600287562905, | |
| "kl_loss": 0.08215292543172836, | |
| "loss_ib": 0.007017243653535843, | |
| "step": 787 | |
| }, | |
| { | |
| "ce_ib": 8.07127857208252, | |
| "ce_orig": 1.1504753828048706, | |
| "epoch": 0.11329978432782171, | |
| "kl_loss": 0.09762965887784958, | |
| "loss_ib": 0.008917122147977352, | |
| "step": 788 | |
| }, | |
| { | |
| "ce_ib": 7.960126876831055, | |
| "ce_orig": 0.7059429883956909, | |
| "epoch": 0.11344356578001438, | |
| "kl_loss": 0.14231815934181213, | |
| "loss_ib": 0.011095970869064331, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.11358734723220705, | |
| "grad_norm": 0.12224866449832916, | |
| "learning_rate": 4.9992857464321963e-05, | |
| "loss": 0.862, | |
| "step": 790 | |
| }, | |
| { | |
| "ce_ib": 7.631105899810791, | |
| "ce_orig": 0.8110905289649963, | |
| "epoch": 0.11358734723220705, | |
| "kl_loss": 0.08087232708930969, | |
| "loss_ib": 0.007859169505536556, | |
| "step": 790 | |
| }, | |
| { | |
| "ce_ib": 7.410162448883057, | |
| "ce_orig": 0.6299762725830078, | |
| "epoch": 0.11373112868439972, | |
| "kl_loss": 0.12838461995124817, | |
| "loss_ib": 0.010124312713742256, | |
| "step": 791 | |
| }, | |
| { | |
| "ce_ib": 8.221912384033203, | |
| "ce_orig": 0.9979586005210876, | |
| "epoch": 0.11387491013659239, | |
| "kl_loss": 0.1258949488401413, | |
| "loss_ib": 0.010405703447759151, | |
| "step": 792 | |
| }, | |
| { | |
| "ce_ib": 6.832244873046875, | |
| "ce_orig": 0.7152706384658813, | |
| "epoch": 0.11401869158878504, | |
| "kl_loss": 0.08785121142864227, | |
| "loss_ib": 0.0078086829744279385, | |
| "step": 793 | |
| }, | |
| { | |
| "ce_ib": 7.743945598602295, | |
| "ce_orig": 0.9861294627189636, | |
| "epoch": 0.11416247304097771, | |
| "kl_loss": 0.09959419816732407, | |
| "loss_ib": 0.00885168369859457, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.11430625449317038, | |
| "grad_norm": 0.12471532076597214, | |
| "learning_rate": 4.9992386157992246e-05, | |
| "loss": 0.8318, | |
| "step": 795 | |
| }, | |
| { | |
| "ce_ib": 7.021468162536621, | |
| "ce_orig": 0.7713713049888611, | |
| "epoch": 0.11430625449317038, | |
| "kl_loss": 0.07618668675422668, | |
| "loss_ib": 0.007320068776607513, | |
| "step": 795 | |
| }, | |
| { | |
| "ce_ib": 7.368815898895264, | |
| "ce_orig": 1.1165920495986938, | |
| "epoch": 0.11445003594536304, | |
| "kl_loss": 0.07658970355987549, | |
| "loss_ib": 0.007513892836868763, | |
| "step": 796 | |
| }, | |
| { | |
| "ce_ib": 6.703334808349609, | |
| "ce_orig": 0.5481064915657043, | |
| "epoch": 0.11459381739755571, | |
| "kl_loss": 0.09191010147333145, | |
| "loss_ib": 0.007947172038257122, | |
| "step": 797 | |
| }, | |
| { | |
| "ce_ib": 9.726116180419922, | |
| "ce_orig": 1.2227307558059692, | |
| "epoch": 0.11473759884974838, | |
| "kl_loss": 0.09769535809755325, | |
| "loss_ib": 0.009747825562953949, | |
| "step": 798 | |
| }, | |
| { | |
| "ce_ib": 8.778748512268066, | |
| "ce_orig": 0.9550623893737793, | |
| "epoch": 0.11488138030194105, | |
| "kl_loss": 0.08937544375658035, | |
| "loss_ib": 0.008858147077262402, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.11502516175413371, | |
| "grad_norm": 0.12755703926086426, | |
| "learning_rate": 4.9991899797057904e-05, | |
| "loss": 0.9259, | |
| "step": 800 | |
| }, | |
| { | |
| "ce_ib": 10.388070106506348, | |
| "ce_orig": 1.069626808166504, | |
| "epoch": 0.11502516175413371, | |
| "kl_loss": 0.12364666163921356, | |
| "loss_ib": 0.011376367881894112, | |
| "step": 800 | |
| }, | |
| { | |
| "ce_ib": 6.369958400726318, | |
| "ce_orig": 0.8895975947380066, | |
| "epoch": 0.11516894320632638, | |
| "kl_loss": 0.06259946525096893, | |
| "loss_ib": 0.0063149528577923775, | |
| "step": 801 | |
| }, | |
| { | |
| "ce_ib": 9.586531639099121, | |
| "ce_orig": 1.2012348175048828, | |
| "epoch": 0.11531272465851905, | |
| "kl_loss": 0.09361310303211212, | |
| "loss_ib": 0.009473921731114388, | |
| "step": 802 | |
| }, | |
| { | |
| "ce_ib": 7.590814113616943, | |
| "ce_orig": 1.0715209245681763, | |
| "epoch": 0.11545650611071172, | |
| "kl_loss": 0.10730178654193878, | |
| "loss_ib": 0.009160496294498444, | |
| "step": 803 | |
| }, | |
| { | |
| "ce_ib": 8.784406661987305, | |
| "ce_orig": 1.1157152652740479, | |
| "epoch": 0.11560028756290439, | |
| "kl_loss": 0.10003470629453659, | |
| "loss_ib": 0.009393938817083836, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.11574406901509705, | |
| "grad_norm": 0.13564252853393555, | |
| "learning_rate": 4.9991398381811924e-05, | |
| "loss": 0.892, | |
| "step": 805 | |
| }, | |
| { | |
| "ce_ib": 6.886279106140137, | |
| "ce_orig": 0.6827164888381958, | |
| "epoch": 0.11574406901509705, | |
| "kl_loss": 0.08341995626688004, | |
| "loss_ib": 0.007614137139171362, | |
| "step": 805 | |
| }, | |
| { | |
| "ce_ib": 7.802131175994873, | |
| "ce_orig": 1.107555627822876, | |
| "epoch": 0.11588785046728972, | |
| "kl_loss": 0.08331786096096039, | |
| "loss_ib": 0.008066958747804165, | |
| "step": 806 | |
| }, | |
| { | |
| "ce_ib": 7.465454578399658, | |
| "ce_orig": 0.8235023617744446, | |
| "epoch": 0.11603163191948239, | |
| "kl_loss": 0.0998779833316803, | |
| "loss_ib": 0.008726626634597778, | |
| "step": 807 | |
| }, | |
| { | |
| "ce_ib": 5.956389427185059, | |
| "ce_orig": 0.83247971534729, | |
| "epoch": 0.11617541337167506, | |
| "kl_loss": 0.07732568681240082, | |
| "loss_ib": 0.006844479124993086, | |
| "step": 808 | |
| }, | |
| { | |
| "ce_ib": 5.556079864501953, | |
| "ce_orig": 0.6450206637382507, | |
| "epoch": 0.11631919482386772, | |
| "kl_loss": 0.08965234458446503, | |
| "loss_ib": 0.0072606573812663555, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.11646297627606039, | |
| "grad_norm": 0.11268489807844162, | |
| "learning_rate": 4.999088191255632e-05, | |
| "loss": 0.8579, | |
| "step": 810 | |
| }, | |
| { | |
| "ce_ib": 8.282671928405762, | |
| "ce_orig": 0.7875933051109314, | |
| "epoch": 0.11646297627606039, | |
| "kl_loss": 0.11104097217321396, | |
| "loss_ib": 0.009693384170532227, | |
| "step": 810 | |
| }, | |
| { | |
| "ce_ib": 7.059372901916504, | |
| "ce_orig": 0.8186449408531189, | |
| "epoch": 0.11660675772825306, | |
| "kl_loss": 0.1103602796792984, | |
| "loss_ib": 0.00904770102351904, | |
| "step": 811 | |
| }, | |
| { | |
| "ce_ib": 8.647476196289062, | |
| "ce_orig": 1.0445913076400757, | |
| "epoch": 0.11675053918044573, | |
| "kl_loss": 0.10142830014228821, | |
| "loss_ib": 0.009395153261721134, | |
| "step": 812 | |
| }, | |
| { | |
| "ce_ib": 7.812716007232666, | |
| "ce_orig": 0.9563208222389221, | |
| "epoch": 0.1168943206326384, | |
| "kl_loss": 0.07602293789386749, | |
| "loss_ib": 0.0077075050212442875, | |
| "step": 813 | |
| }, | |
| { | |
| "ce_ib": 7.100307941436768, | |
| "ce_orig": 0.6869685649871826, | |
| "epoch": 0.11703810208483105, | |
| "kl_loss": 0.13484741747379303, | |
| "loss_ib": 0.01029252540320158, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.11718188353702372, | |
| "grad_norm": 0.145658478140831, | |
| "learning_rate": 4.9990350389602214e-05, | |
| "loss": 0.8295, | |
| "step": 815 | |
| }, | |
| { | |
| "ce_ib": 7.6031060218811035, | |
| "ce_orig": 0.7811675667762756, | |
| "epoch": 0.11718188353702372, | |
| "kl_loss": 0.14399005472660065, | |
| "loss_ib": 0.01100105606019497, | |
| "step": 815 | |
| }, | |
| { | |
| "ce_ib": 7.256862640380859, | |
| "ce_orig": 0.7083752751350403, | |
| "epoch": 0.11732566498921639, | |
| "kl_loss": 0.14461134374141693, | |
| "loss_ib": 0.010858998633921146, | |
| "step": 816 | |
| }, | |
| { | |
| "ce_ib": 6.885143280029297, | |
| "ce_orig": 0.8050568699836731, | |
| "epoch": 0.11746944644140905, | |
| "kl_loss": 0.11550889909267426, | |
| "loss_ib": 0.009218016639351845, | |
| "step": 817 | |
| }, | |
| { | |
| "ce_ib": 6.232802391052246, | |
| "ce_orig": 0.5001097321510315, | |
| "epoch": 0.11761322789360172, | |
| "kl_loss": 0.1038711816072464, | |
| "loss_ib": 0.00830996036529541, | |
| "step": 818 | |
| }, | |
| { | |
| "ce_ib": 5.1833367347717285, | |
| "ce_orig": 0.37858033180236816, | |
| "epoch": 0.11775700934579439, | |
| "kl_loss": 0.11628204584121704, | |
| "loss_ib": 0.008405770175158978, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.11790079079798706, | |
| "grad_norm": 0.11669674515724182, | |
| "learning_rate": 4.9989803813269775e-05, | |
| "loss": 0.7666, | |
| "step": 820 | |
| }, | |
| { | |
| "ce_ib": 7.362185478210449, | |
| "ce_orig": 0.9856128692626953, | |
| "epoch": 0.11790079079798706, | |
| "kl_loss": 0.09990614652633667, | |
| "loss_ib": 0.008676400408148766, | |
| "step": 820 | |
| }, | |
| { | |
| "ce_ib": 6.022683620452881, | |
| "ce_orig": 0.6719481348991394, | |
| "epoch": 0.11804457225017972, | |
| "kl_loss": 0.09421147406101227, | |
| "loss_ib": 0.0077219158411026, | |
| "step": 821 | |
| }, | |
| { | |
| "ce_ib": 7.54342794418335, | |
| "ce_orig": 0.9077051877975464, | |
| "epoch": 0.11818835370237239, | |
| "kl_loss": 0.09480626881122589, | |
| "loss_ib": 0.008512027561664581, | |
| "step": 822 | |
| }, | |
| { | |
| "ce_ib": 7.5844597816467285, | |
| "ce_orig": 0.8468344807624817, | |
| "epoch": 0.11833213515456506, | |
| "kl_loss": 0.09350645542144775, | |
| "loss_ib": 0.008467552252113819, | |
| "step": 823 | |
| }, | |
| { | |
| "ce_ib": 9.546996116638184, | |
| "ce_orig": 0.9645229578018188, | |
| "epoch": 0.11847591660675773, | |
| "kl_loss": 0.10462982207536697, | |
| "loss_ib": 0.010004988871514797, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.1186196980589504, | |
| "grad_norm": 0.1411919742822647, | |
| "learning_rate": 4.998924218388824e-05, | |
| "loss": 0.8377, | |
| "step": 825 | |
| }, | |
| { | |
| "ce_ib": 6.855606555938721, | |
| "ce_orig": 0.6925601363182068, | |
| "epoch": 0.1186196980589504, | |
| "kl_loss": 0.11507317423820496, | |
| "loss_ib": 0.009181462228298187, | |
| "step": 825 | |
| }, | |
| { | |
| "ce_ib": 7.184199333190918, | |
| "ce_orig": 0.7112440466880798, | |
| "epoch": 0.11876347951114306, | |
| "kl_loss": 0.11154375225305557, | |
| "loss_ib": 0.009169287048280239, | |
| "step": 826 | |
| }, | |
| { | |
| "ce_ib": 6.221945762634277, | |
| "ce_orig": 0.7550656199455261, | |
| "epoch": 0.11890726096333573, | |
| "kl_loss": 0.08264736086130142, | |
| "loss_ib": 0.00724334130063653, | |
| "step": 827 | |
| }, | |
| { | |
| "ce_ib": 6.1294331550598145, | |
| "ce_orig": 0.8655829429626465, | |
| "epoch": 0.1190510424155284, | |
| "kl_loss": 0.2529202103614807, | |
| "loss_ib": 0.015710726380348206, | |
| "step": 828 | |
| }, | |
| { | |
| "ce_ib": 7.718718528747559, | |
| "ce_orig": 0.9449477195739746, | |
| "epoch": 0.11919482386772107, | |
| "kl_loss": 0.1413031816482544, | |
| "loss_ib": 0.01092451810836792, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.11933860531991373, | |
| "grad_norm": 0.13230349123477936, | |
| "learning_rate": 4.998866550179591e-05, | |
| "loss": 0.8497, | |
| "step": 830 | |
| }, | |
| { | |
| "ce_ib": 8.910257339477539, | |
| "ce_orig": 1.0684385299682617, | |
| "epoch": 0.11933860531991373, | |
| "kl_loss": 0.08060315996408463, | |
| "loss_ib": 0.008485286496579647, | |
| "step": 830 | |
| }, | |
| { | |
| "ce_ib": 6.161970138549805, | |
| "ce_orig": 0.7749412059783936, | |
| "epoch": 0.1194823867721064, | |
| "kl_loss": 0.09046103060245514, | |
| "loss_ib": 0.00760403648018837, | |
| "step": 831 | |
| }, | |
| { | |
| "ce_ib": 8.188762664794922, | |
| "ce_orig": 1.1623848676681519, | |
| "epoch": 0.11962616822429907, | |
| "kl_loss": 0.1053881123661995, | |
| "loss_ib": 0.00936378724873066, | |
| "step": 832 | |
| }, | |
| { | |
| "ce_ib": 8.579145431518555, | |
| "ce_orig": 1.0423914194107056, | |
| "epoch": 0.11976994967649174, | |
| "kl_loss": 0.0925225242972374, | |
| "loss_ib": 0.00891569908708334, | |
| "step": 833 | |
| }, | |
| { | |
| "ce_ib": 6.8061676025390625, | |
| "ce_orig": 0.7699450850486755, | |
| "epoch": 0.1199137311286844, | |
| "kl_loss": 0.1753084808588028, | |
| "loss_ib": 0.01216850709170103, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.12005751258087707, | |
| "grad_norm": 0.17299415171146393, | |
| "learning_rate": 4.9988073767340174e-05, | |
| "loss": 0.8972, | |
| "step": 835 | |
| }, | |
| { | |
| "ce_ib": 11.318516731262207, | |
| "ce_orig": 1.7190030813217163, | |
| "epoch": 0.12005751258087707, | |
| "kl_loss": 0.0838870257139206, | |
| "loss_ib": 0.009853609837591648, | |
| "step": 835 | |
| }, | |
| { | |
| "ce_ib": 6.024441242218018, | |
| "ce_orig": 0.5168812274932861, | |
| "epoch": 0.12020129403306973, | |
| "kl_loss": 0.08710253238677979, | |
| "loss_ib": 0.007367347367107868, | |
| "step": 836 | |
| }, | |
| { | |
| "ce_ib": 9.387094497680664, | |
| "ce_orig": 0.5673519968986511, | |
| "epoch": 0.1203450754852624, | |
| "kl_loss": 0.08531000465154648, | |
| "loss_ib": 0.008959047496318817, | |
| "step": 837 | |
| }, | |
| { | |
| "ce_ib": 3.3088152408599854, | |
| "ce_orig": 0.40009650588035583, | |
| "epoch": 0.12048885693745506, | |
| "kl_loss": 0.14413371682167053, | |
| "loss_ib": 0.008861093781888485, | |
| "step": 838 | |
| }, | |
| { | |
| "ce_ib": 5.766994953155518, | |
| "ce_orig": 0.5750284790992737, | |
| "epoch": 0.12063263838964773, | |
| "kl_loss": 0.0825929120182991, | |
| "loss_ib": 0.007013143040239811, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.1207764198418404, | |
| "grad_norm": 0.15587636828422546, | |
| "learning_rate": 4.998746698087745e-05, | |
| "loss": 0.9364, | |
| "step": 840 | |
| }, | |
| { | |
| "ce_ib": 8.854097366333008, | |
| "ce_orig": 1.37924063205719, | |
| "epoch": 0.1207764198418404, | |
| "kl_loss": 0.12135547399520874, | |
| "loss_ib": 0.010494822636246681, | |
| "step": 840 | |
| }, | |
| { | |
| "ce_ib": 5.431858062744141, | |
| "ce_orig": 0.7637268304824829, | |
| "epoch": 0.12092020129403307, | |
| "kl_loss": 0.07992113381624222, | |
| "loss_ib": 0.006711985915899277, | |
| "step": 841 | |
| }, | |
| { | |
| "ce_ib": 8.615471839904785, | |
| "ce_orig": 1.1368523836135864, | |
| "epoch": 0.12106398274622573, | |
| "kl_loss": 0.10841356217861176, | |
| "loss_ib": 0.009728414006531239, | |
| "step": 842 | |
| }, | |
| { | |
| "ce_ib": 6.431497573852539, | |
| "ce_orig": 0.6517418622970581, | |
| "epoch": 0.1212077641984184, | |
| "kl_loss": 0.12224148958921432, | |
| "loss_ib": 0.009327823296189308, | |
| "step": 843 | |
| }, | |
| { | |
| "ce_ib": 6.216892719268799, | |
| "ce_orig": 0.9238556623458862, | |
| "epoch": 0.12135154565061107, | |
| "kl_loss": 0.07222751528024673, | |
| "loss_ib": 0.006719821598380804, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.12149532710280374, | |
| "grad_norm": 0.11728943139314651, | |
| "learning_rate": 4.9986845142773275e-05, | |
| "loss": 0.97, | |
| "step": 845 | |
| }, | |
| { | |
| "ce_ib": 7.411965847015381, | |
| "ce_orig": 0.9339240193367004, | |
| "epoch": 0.12149532710280374, | |
| "kl_loss": 0.1781131625175476, | |
| "loss_ib": 0.01261164154857397, | |
| "step": 845 | |
| }, | |
| { | |
| "ce_ib": 6.76970100402832, | |
| "ce_orig": 0.8812845945358276, | |
| "epoch": 0.1216391085549964, | |
| "kl_loss": 0.1000911146402359, | |
| "loss_ib": 0.008389405906200409, | |
| "step": 846 | |
| }, | |
| { | |
| "ce_ib": 4.417732238769531, | |
| "ce_orig": 0.5549830198287964, | |
| "epoch": 0.12178289000718907, | |
| "kl_loss": 0.06278645992279053, | |
| "loss_ib": 0.005348189268261194, | |
| "step": 847 | |
| }, | |
| { | |
| "ce_ib": 7.800521373748779, | |
| "ce_orig": 0.6102645993232727, | |
| "epoch": 0.12192667145938174, | |
| "kl_loss": 0.12678933143615723, | |
| "loss_ib": 0.01023972686380148, | |
| "step": 848 | |
| }, | |
| { | |
| "ce_ib": 9.005143165588379, | |
| "ce_orig": 0.9449028968811035, | |
| "epoch": 0.12207045291157441, | |
| "kl_loss": 0.10566647350788116, | |
| "loss_ib": 0.009785895235836506, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.12221423436376708, | |
| "grad_norm": 0.12012098729610443, | |
| "learning_rate": 4.998620825340221e-05, | |
| "loss": 0.7973, | |
| "step": 850 | |
| }, | |
| { | |
| "ce_ib": 5.912694454193115, | |
| "ce_orig": 0.6100848317146301, | |
| "epoch": 0.12221423436376708, | |
| "kl_loss": 0.12130458652973175, | |
| "loss_ib": 0.0090215764939785, | |
| "step": 850 | |
| }, | |
| { | |
| "ce_ib": 6.602824687957764, | |
| "ce_orig": 0.6862348318099976, | |
| "epoch": 0.12235801581595974, | |
| "kl_loss": 0.10095177590847015, | |
| "loss_ib": 0.008349001407623291, | |
| "step": 851 | |
| }, | |
| { | |
| "ce_ib": 8.233159065246582, | |
| "ce_orig": 0.880978524684906, | |
| "epoch": 0.12250179726815241, | |
| "kl_loss": 0.074901282787323, | |
| "loss_ib": 0.00786164402961731, | |
| "step": 852 | |
| }, | |
| { | |
| "ce_ib": 7.939795017242432, | |
| "ce_orig": 0.9793850779533386, | |
| "epoch": 0.12264557872034508, | |
| "kl_loss": 0.10613537579774857, | |
| "loss_ib": 0.00927666574716568, | |
| "step": 853 | |
| }, | |
| { | |
| "ce_ib": 6.5190935134887695, | |
| "ce_orig": 0.9610397815704346, | |
| "epoch": 0.12278936017253775, | |
| "kl_loss": 0.16304026544094086, | |
| "loss_ib": 0.011411559768021107, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.12293314162473042, | |
| "grad_norm": 0.11487976461648941, | |
| "learning_rate": 4.9985556313147895e-05, | |
| "loss": 0.9548, | |
| "step": 855 | |
| }, | |
| { | |
| "ce_ib": 7.369608402252197, | |
| "ce_orig": 0.6912807822227478, | |
| "epoch": 0.12293314162473042, | |
| "kl_loss": 0.12601375579833984, | |
| "loss_ib": 0.009985491633415222, | |
| "step": 855 | |
| }, | |
| { | |
| "ce_ib": 8.943307876586914, | |
| "ce_orig": 1.071799397468567, | |
| "epoch": 0.12307692307692308, | |
| "kl_loss": 0.1637842357158661, | |
| "loss_ib": 0.012660865671932697, | |
| "step": 856 | |
| }, | |
| { | |
| "ce_ib": 7.475729465484619, | |
| "ce_orig": 0.9372177124023438, | |
| "epoch": 0.12322070452911574, | |
| "kl_loss": 0.08353350311517715, | |
| "loss_ib": 0.007914540357887745, | |
| "step": 857 | |
| }, | |
| { | |
| "ce_ib": 4.192291736602783, | |
| "ce_orig": 0.4509224593639374, | |
| "epoch": 0.1233644859813084, | |
| "kl_loss": 0.08783835917711258, | |
| "loss_ib": 0.0064880638383328915, | |
| "step": 858 | |
| }, | |
| { | |
| "ce_ib": 6.574756145477295, | |
| "ce_orig": 0.7229125499725342, | |
| "epoch": 0.12350826743350107, | |
| "kl_loss": 0.12410986423492432, | |
| "loss_ib": 0.009492871351540089, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.12365204888569374, | |
| "grad_norm": 0.10187579691410065, | |
| "learning_rate": 4.998488932240305e-05, | |
| "loss": 0.896, | |
| "step": 860 | |
| }, | |
| { | |
| "ce_ib": 6.708831310272217, | |
| "ce_orig": 0.624555766582489, | |
| "epoch": 0.12365204888569374, | |
| "kl_loss": 0.10983145236968994, | |
| "loss_ib": 0.008845987729728222, | |
| "step": 860 | |
| }, | |
| { | |
| "ce_ib": 11.562445640563965, | |
| "ce_orig": 1.5727458000183105, | |
| "epoch": 0.12379583033788641, | |
| "kl_loss": 0.11283396929502487, | |
| "loss_ib": 0.011422920972108841, | |
| "step": 861 | |
| }, | |
| { | |
| "ce_ib": 6.905985355377197, | |
| "ce_orig": 0.5196636319160461, | |
| "epoch": 0.12393961179007908, | |
| "kl_loss": 0.08311201632022858, | |
| "loss_ib": 0.007608593441545963, | |
| "step": 862 | |
| }, | |
| { | |
| "ce_ib": 6.673101425170898, | |
| "ce_orig": 0.7954714298248291, | |
| "epoch": 0.12408339324227174, | |
| "kl_loss": 0.12156697362661362, | |
| "loss_ib": 0.009414899162948132, | |
| "step": 863 | |
| }, | |
| { | |
| "ce_ib": 9.510821342468262, | |
| "ce_orig": 1.0977836847305298, | |
| "epoch": 0.12422717469446441, | |
| "kl_loss": 0.11292783170938492, | |
| "loss_ib": 0.010401802137494087, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.12437095614665708, | |
| "grad_norm": 0.10984697192907333, | |
| "learning_rate": 4.9984207281569426e-05, | |
| "loss": 0.8947, | |
| "step": 865 | |
| }, | |
| { | |
| "ce_ib": 6.371394634246826, | |
| "ce_orig": 0.8775607347488403, | |
| "epoch": 0.12437095614665708, | |
| "kl_loss": 0.10108557343482971, | |
| "loss_ib": 0.00823997613042593, | |
| "step": 865 | |
| }, | |
| { | |
| "ce_ib": 8.300228118896484, | |
| "ce_orig": 0.8505659103393555, | |
| "epoch": 0.12451473759884975, | |
| "kl_loss": 0.13992467522621155, | |
| "loss_ib": 0.011146347038447857, | |
| "step": 866 | |
| }, | |
| { | |
| "ce_ib": 7.113746166229248, | |
| "ce_orig": 0.6279032826423645, | |
| "epoch": 0.12465851905104242, | |
| "kl_loss": 0.07766470313072205, | |
| "loss_ib": 0.007440108340233564, | |
| "step": 867 | |
| }, | |
| { | |
| "ce_ib": 9.46139907836914, | |
| "ce_orig": 0.982869029045105, | |
| "epoch": 0.12480230050323508, | |
| "kl_loss": 0.13695910573005676, | |
| "loss_ib": 0.011578655801713467, | |
| "step": 868 | |
| }, | |
| { | |
| "ce_ib": 6.4365973472595215, | |
| "ce_orig": 0.7115389108657837, | |
| "epoch": 0.12494608195542775, | |
| "kl_loss": 0.08151569217443466, | |
| "loss_ib": 0.00729408347979188, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.1250898634076204, | |
| "grad_norm": 0.14856663346290588, | |
| "learning_rate": 4.998351019105789e-05, | |
| "loss": 0.8044, | |
| "step": 870 | |
| }, | |
| { | |
| "ce_ib": 7.635079383850098, | |
| "ce_orig": 0.9380092620849609, | |
| "epoch": 0.1250898634076204, | |
| "kl_loss": 0.08349855244159698, | |
| "loss_ib": 0.00799246784299612, | |
| "step": 870 | |
| }, | |
| { | |
| "ce_ib": 6.534551620483398, | |
| "ce_orig": 0.5008100867271423, | |
| "epoch": 0.1252336448598131, | |
| "kl_loss": 0.14466875791549683, | |
| "loss_ib": 0.010500714182853699, | |
| "step": 871 | |
| }, | |
| { | |
| "ce_ib": 7.542534828186035, | |
| "ce_orig": 1.1307094097137451, | |
| "epoch": 0.12537742631200574, | |
| "kl_loss": 0.10606865584850311, | |
| "loss_ib": 0.009074700064957142, | |
| "step": 872 | |
| }, | |
| { | |
| "ce_ib": 5.353913307189941, | |
| "ce_orig": 0.5838197469711304, | |
| "epoch": 0.12552120776419842, | |
| "kl_loss": 0.21842791140079498, | |
| "loss_ib": 0.013598352670669556, | |
| "step": 873 | |
| }, | |
| { | |
| "ce_ib": 6.852460861206055, | |
| "ce_orig": 0.898078978061676, | |
| "epoch": 0.12566498921639108, | |
| "kl_loss": 0.19777730107307434, | |
| "loss_ib": 0.013315095566213131, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.12580877066858376, | |
| "grad_norm": 0.16305744647979736, | |
| "learning_rate": 4.9982798051288326e-05, | |
| "loss": 0.8503, | |
| "step": 875 | |
| }, | |
| { | |
| "ce_ib": 5.873923301696777, | |
| "ce_orig": 0.4752177894115448, | |
| "epoch": 0.12580877066858376, | |
| "kl_loss": 0.06173687055706978, | |
| "loss_ib": 0.006023805122822523, | |
| "step": 875 | |
| }, | |
| { | |
| "ce_ib": 6.247898101806641, | |
| "ce_orig": 0.8174537420272827, | |
| "epoch": 0.1259525521207764, | |
| "kl_loss": 0.0691542774438858, | |
| "loss_ib": 0.006581662688404322, | |
| "step": 876 | |
| }, | |
| { | |
| "ce_ib": 3.595930814743042, | |
| "ce_orig": 0.4554266929626465, | |
| "epoch": 0.1260963335729691, | |
| "kl_loss": 0.1204795241355896, | |
| "loss_ib": 0.007821941748261452, | |
| "step": 877 | |
| }, | |
| { | |
| "ce_ib": 5.832032203674316, | |
| "ce_orig": 0.7023007273674011, | |
| "epoch": 0.12624011502516175, | |
| "kl_loss": 0.12059217691421509, | |
| "loss_ib": 0.008945624344050884, | |
| "step": 878 | |
| }, | |
| { | |
| "ce_ib": 5.493231773376465, | |
| "ce_orig": 0.74711012840271, | |
| "epoch": 0.12638389647735443, | |
| "kl_loss": 0.13393926620483398, | |
| "loss_ib": 0.009443579241633415, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.12652767792954708, | |
| "grad_norm": 0.11151118576526642, | |
| "learning_rate": 4.998207086268971e-05, | |
| "loss": 0.7946, | |
| "step": 880 | |
| }, | |
| { | |
| "ce_ib": 7.743311882019043, | |
| "ce_orig": 1.0406618118286133, | |
| "epoch": 0.12652767792954708, | |
| "kl_loss": 0.07510361075401306, | |
| "loss_ib": 0.007626836188137531, | |
| "step": 880 | |
| }, | |
| { | |
| "ce_ib": 6.917181015014648, | |
| "ce_orig": 0.9735674262046814, | |
| "epoch": 0.12667145938173976, | |
| "kl_loss": 0.0983152836561203, | |
| "loss_ib": 0.008374354802072048, | |
| "step": 881 | |
| }, | |
| { | |
| "ce_ib": 8.613320350646973, | |
| "ce_orig": 1.0136125087738037, | |
| "epoch": 0.12681524083393242, | |
| "kl_loss": 0.12226949632167816, | |
| "loss_ib": 0.010420135222375393, | |
| "step": 882 | |
| }, | |
| { | |
| "ce_ib": 10.181398391723633, | |
| "ce_orig": 1.487206220626831, | |
| "epoch": 0.1269590222861251, | |
| "kl_loss": 0.08668573200702667, | |
| "loss_ib": 0.009424986317753792, | |
| "step": 883 | |
| }, | |
| { | |
| "ce_ib": 6.982693672180176, | |
| "ce_orig": 0.819129228591919, | |
| "epoch": 0.12710280373831775, | |
| "kl_loss": 0.11231732368469238, | |
| "loss_ib": 0.009107212536036968, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.12724658519051044, | |
| "grad_norm": 0.12548822164535522, | |
| "learning_rate": 4.998132862570007e-05, | |
| "loss": 0.9055, | |
| "step": 885 | |
| }, | |
| { | |
| "ce_ib": 10.360311508178711, | |
| "ce_orig": 1.447431206703186, | |
| "epoch": 0.12724658519051044, | |
| "kl_loss": 0.10747027397155762, | |
| "loss_ib": 0.010553669184446335, | |
| "step": 885 | |
| }, | |
| { | |
| "ce_ib": 7.586730003356934, | |
| "ce_orig": 0.9365792870521545, | |
| "epoch": 0.1273903666427031, | |
| "kl_loss": 0.10849650204181671, | |
| "loss_ib": 0.009218189865350723, | |
| "step": 886 | |
| }, | |
| { | |
| "ce_ib": 8.302633285522461, | |
| "ce_orig": 0.9514833688735962, | |
| "epoch": 0.12753414809489577, | |
| "kl_loss": 0.07761082053184509, | |
| "loss_ib": 0.008031858131289482, | |
| "step": 887 | |
| }, | |
| { | |
| "ce_ib": 6.314914703369141, | |
| "ce_orig": 0.6305254101753235, | |
| "epoch": 0.12767792954708843, | |
| "kl_loss": 0.11676283180713654, | |
| "loss_ib": 0.00899559911340475, | |
| "step": 888 | |
| }, | |
| { | |
| "ce_ib": 8.043998718261719, | |
| "ce_orig": 0.9012079238891602, | |
| "epoch": 0.12782171099928108, | |
| "kl_loss": 0.07340681552886963, | |
| "loss_ib": 0.007692340295761824, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.12796549245147376, | |
| "grad_norm": 0.10787968337535858, | |
| "learning_rate": 4.9980571340766526e-05, | |
| "loss": 0.9241, | |
| "step": 890 | |
| }, | |
| { | |
| "ce_ib": 7.90130090713501, | |
| "ce_orig": 1.0600311756134033, | |
| "epoch": 0.12796549245147376, | |
| "kl_loss": 0.12333562225103378, | |
| "loss_ib": 0.010117431171238422, | |
| "step": 890 | |
| }, | |
| { | |
| "ce_ib": 6.691804885864258, | |
| "ce_orig": 0.9607767462730408, | |
| "epoch": 0.12810927390366642, | |
| "kl_loss": 0.07735402882099152, | |
| "loss_ib": 0.00721360370516777, | |
| "step": 891 | |
| }, | |
| { | |
| "ce_ib": 7.60068416595459, | |
| "ce_orig": 0.7663901448249817, | |
| "epoch": 0.1282530553558591, | |
| "kl_loss": 0.08263403922319412, | |
| "loss_ib": 0.007932043634355068, | |
| "step": 892 | |
| }, | |
| { | |
| "ce_ib": 7.247678756713867, | |
| "ce_orig": 1.0724464654922485, | |
| "epoch": 0.12839683680805175, | |
| "kl_loss": 0.11332201212644577, | |
| "loss_ib": 0.009289939887821674, | |
| "step": 893 | |
| }, | |
| { | |
| "ce_ib": 5.542264938354492, | |
| "ce_orig": 0.664827823638916, | |
| "epoch": 0.12854061826024443, | |
| "kl_loss": 0.08512848615646362, | |
| "loss_ib": 0.007027556654065847, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.1286843997124371, | |
| "grad_norm": 0.12153299897909164, | |
| "learning_rate": 4.9979799008345215e-05, | |
| "loss": 0.9543, | |
| "step": 895 | |
| }, | |
| { | |
| "ce_ib": 5.504077911376953, | |
| "ce_orig": 0.6800283193588257, | |
| "epoch": 0.1286843997124371, | |
| "kl_loss": 0.07652309536933899, | |
| "loss_ib": 0.006578193511813879, | |
| "step": 895 | |
| }, | |
| { | |
| "ce_ib": 4.984541416168213, | |
| "ce_orig": 0.6408447623252869, | |
| "epoch": 0.12882818116462977, | |
| "kl_loss": 0.0679374560713768, | |
| "loss_ib": 0.005889143329113722, | |
| "step": 896 | |
| }, | |
| { | |
| "ce_ib": 7.2878217697143555, | |
| "ce_orig": 0.8095806837081909, | |
| "epoch": 0.12897196261682242, | |
| "kl_loss": 0.08664879202842712, | |
| "loss_ib": 0.007976350374519825, | |
| "step": 897 | |
| }, | |
| { | |
| "ce_ib": 4.948668003082275, | |
| "ce_orig": 0.4147416055202484, | |
| "epoch": 0.1291157440690151, | |
| "kl_loss": 0.17131486535072327, | |
| "loss_ib": 0.011040077544748783, | |
| "step": 898 | |
| }, | |
| { | |
| "ce_ib": 6.665126323699951, | |
| "ce_orig": 0.6478716731071472, | |
| "epoch": 0.12925952552120776, | |
| "kl_loss": 0.07731156051158905, | |
| "loss_ib": 0.007198141422122717, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.12940330697340044, | |
| "grad_norm": 0.12399930506944656, | |
| "learning_rate": 4.997901162890139e-05, | |
| "loss": 0.762, | |
| "step": 900 | |
| }, | |
| { | |
| "ce_ib": 7.272383689880371, | |
| "ce_orig": 0.6885168552398682, | |
| "epoch": 0.12940330697340044, | |
| "kl_loss": 0.08325660228729248, | |
| "loss_ib": 0.007799021899700165, | |
| "step": 900 | |
| }, | |
| { | |
| "ce_ib": 5.927865028381348, | |
| "ce_orig": 0.7656009793281555, | |
| "epoch": 0.1295470884255931, | |
| "kl_loss": 0.07506805658340454, | |
| "loss_ib": 0.006717335432767868, | |
| "step": 901 | |
| }, | |
| { | |
| "ce_ib": 5.351132392883301, | |
| "ce_orig": 0.6215174794197083, | |
| "epoch": 0.12969086987778577, | |
| "kl_loss": 0.08371435105800629, | |
| "loss_ib": 0.0068612839095294476, | |
| "step": 902 | |
| }, | |
| { | |
| "ce_ib": 7.883289337158203, | |
| "ce_orig": 0.797773540019989, | |
| "epoch": 0.12983465132997843, | |
| "kl_loss": 0.10347917675971985, | |
| "loss_ib": 0.009115603752434254, | |
| "step": 903 | |
| }, | |
| { | |
| "ce_ib": 5.427731513977051, | |
| "ce_orig": 0.5187583565711975, | |
| "epoch": 0.1299784327821711, | |
| "kl_loss": 0.09261107444763184, | |
| "loss_ib": 0.007344419602304697, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.13012221423436376, | |
| "grad_norm": 0.1253208965063095, | |
| "learning_rate": 4.997820920290933e-05, | |
| "loss": 0.8364, | |
| "step": 905 | |
| }, | |
| { | |
| "ce_ib": 4.651042938232422, | |
| "ce_orig": 0.5111949443817139, | |
| "epoch": 0.13012221423436376, | |
| "kl_loss": 0.06503793597221375, | |
| "loss_ib": 0.005577418487519026, | |
| "step": 905 | |
| }, | |
| { | |
| "ce_ib": 6.314610481262207, | |
| "ce_orig": 0.7297623753547668, | |
| "epoch": 0.13026599568655645, | |
| "kl_loss": 0.058927081525325775, | |
| "loss_ib": 0.006103659514337778, | |
| "step": 906 | |
| }, | |
| { | |
| "ce_ib": 6.989099502563477, | |
| "ce_orig": 0.9666232466697693, | |
| "epoch": 0.1304097771387491, | |
| "kl_loss": 0.05747806280851364, | |
| "loss_ib": 0.006368452217429876, | |
| "step": 907 | |
| }, | |
| { | |
| "ce_ib": 6.891256332397461, | |
| "ce_orig": 0.6751134395599365, | |
| "epoch": 0.13055355859094178, | |
| "kl_loss": 0.09852255135774612, | |
| "loss_ib": 0.008371755480766296, | |
| "step": 908 | |
| }, | |
| { | |
| "ce_ib": 6.981194496154785, | |
| "ce_orig": 0.9139746427536011, | |
| "epoch": 0.13069734004313444, | |
| "kl_loss": 0.08173255622386932, | |
| "loss_ib": 0.007577225100249052, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.1308411214953271, | |
| "grad_norm": 0.12957040965557098, | |
| "learning_rate": 4.9977391730852386e-05, | |
| "loss": 0.8999, | |
| "step": 910 | |
| }, | |
| { | |
| "ce_ib": 5.080508232116699, | |
| "ce_orig": 0.7197664380073547, | |
| "epoch": 0.1308411214953271, | |
| "kl_loss": 0.06574570387601852, | |
| "loss_ib": 0.005827539600431919, | |
| "step": 910 | |
| }, | |
| { | |
| "ce_ib": 5.3453497886657715, | |
| "ce_orig": 0.5241774320602417, | |
| "epoch": 0.13098490294751977, | |
| "kl_loss": 0.08800401538610458, | |
| "loss_ib": 0.007072875741869211, | |
| "step": 911 | |
| }, | |
| { | |
| "ce_ib": 6.245478630065918, | |
| "ce_orig": 0.6996411085128784, | |
| "epoch": 0.13112868439971243, | |
| "kl_loss": 0.09634403884410858, | |
| "loss_ib": 0.007939941249787807, | |
| "step": 912 | |
| }, | |
| { | |
| "ce_ib": 7.945882797241211, | |
| "ce_orig": 1.143088698387146, | |
| "epoch": 0.1312724658519051, | |
| "kl_loss": 0.08361925929784775, | |
| "loss_ib": 0.008153904229402542, | |
| "step": 913 | |
| }, | |
| { | |
| "ce_ib": 8.524740219116211, | |
| "ce_orig": 1.103076696395874, | |
| "epoch": 0.13141624730409776, | |
| "kl_loss": 0.07524412125349045, | |
| "loss_ib": 0.008024576120078564, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.13156002875629044, | |
| "grad_norm": 0.11736583709716797, | |
| "learning_rate": 4.997655921322299e-05, | |
| "loss": 0.7881, | |
| "step": 915 | |
| }, | |
| { | |
| "ce_ib": 7.337964057922363, | |
| "ce_orig": 0.7371410131454468, | |
| "epoch": 0.13156002875629044, | |
| "kl_loss": 0.08646431565284729, | |
| "loss_ib": 0.007992197759449482, | |
| "step": 915 | |
| }, | |
| { | |
| "ce_ib": 8.713349342346191, | |
| "ce_orig": 0.9880151152610779, | |
| "epoch": 0.1317038102084831, | |
| "kl_loss": 0.07716777920722961, | |
| "loss_ib": 0.00821506418287754, | |
| "step": 916 | |
| }, | |
| { | |
| "ce_ib": 6.380884647369385, | |
| "ce_orig": 0.6128709316253662, | |
| "epoch": 0.13184759166067578, | |
| "kl_loss": 0.09613852202892303, | |
| "loss_ib": 0.007997368462383747, | |
| "step": 917 | |
| }, | |
| { | |
| "ce_ib": 5.234755992889404, | |
| "ce_orig": 0.6296612024307251, | |
| "epoch": 0.13199137311286843, | |
| "kl_loss": 0.10441483557224274, | |
| "loss_ib": 0.007838119752705097, | |
| "step": 918 | |
| }, | |
| { | |
| "ce_ib": 5.749408721923828, | |
| "ce_orig": 0.6389755606651306, | |
| "epoch": 0.1321351545650611, | |
| "kl_loss": 0.09272737056016922, | |
| "loss_ib": 0.007511072792112827, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.13227893601725377, | |
| "grad_norm": 0.10284168273210526, | |
| "learning_rate": 4.997571165052262e-05, | |
| "loss": 0.8021, | |
| "step": 920 | |
| }, | |
| { | |
| "ce_ib": 7.687596797943115, | |
| "ce_orig": 0.9395468235015869, | |
| "epoch": 0.13227893601725377, | |
| "kl_loss": 0.10860046744346619, | |
| "loss_ib": 0.009273822419345379, | |
| "step": 920 | |
| }, | |
| { | |
| "ce_ib": 5.143918991088867, | |
| "ce_orig": 0.6956912279129028, | |
| "epoch": 0.13242271746944645, | |
| "kl_loss": 0.05788666009902954, | |
| "loss_ib": 0.00546629261225462, | |
| "step": 921 | |
| }, | |
| { | |
| "ce_ib": 4.9963812828063965, | |
| "ce_orig": 0.641473114490509, | |
| "epoch": 0.1325664989216391, | |
| "kl_loss": 0.076176717877388, | |
| "loss_ib": 0.0063070268370211124, | |
| "step": 922 | |
| }, | |
| { | |
| "ce_ib": 5.956387996673584, | |
| "ce_orig": 0.6581974625587463, | |
| "epoch": 0.13271028037383178, | |
| "kl_loss": 0.08382155001163483, | |
| "loss_ib": 0.007169271353632212, | |
| "step": 923 | |
| }, | |
| { | |
| "ce_ib": 6.518821716308594, | |
| "ce_orig": 0.8877462148666382, | |
| "epoch": 0.13285406182602444, | |
| "kl_loss": 0.07050419598817825, | |
| "loss_ib": 0.0067846211604774, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.13299784327821712, | |
| "grad_norm": 0.1254836916923523, | |
| "learning_rate": 4.99748490432618e-05, | |
| "loss": 0.7649, | |
| "step": 925 | |
| }, | |
| { | |
| "ce_ib": 9.03060531616211, | |
| "ce_orig": 1.0601074695587158, | |
| "epoch": 0.13299784327821712, | |
| "kl_loss": 0.1262633204460144, | |
| "loss_ib": 0.010828468017280102, | |
| "step": 925 | |
| }, | |
| { | |
| "ce_ib": 5.390571594238281, | |
| "ce_orig": 0.6605896353721619, | |
| "epoch": 0.13314162473040977, | |
| "kl_loss": 0.08747322857379913, | |
| "loss_ib": 0.007068946957588196, | |
| "step": 926 | |
| }, | |
| { | |
| "ce_ib": 9.145002365112305, | |
| "ce_orig": 0.7847701907157898, | |
| "epoch": 0.13328540618260246, | |
| "kl_loss": 0.08090417087078094, | |
| "loss_ib": 0.008617709390819073, | |
| "step": 927 | |
| }, | |
| { | |
| "ce_ib": 5.643058776855469, | |
| "ce_orig": 0.7314335107803345, | |
| "epoch": 0.1334291876347951, | |
| "kl_loss": 0.05843006446957588, | |
| "loss_ib": 0.0057430327869951725, | |
| "step": 928 | |
| }, | |
| { | |
| "ce_ib": 6.753859996795654, | |
| "ce_orig": 1.0451164245605469, | |
| "epoch": 0.1335729690869878, | |
| "kl_loss": 0.07054602354764938, | |
| "loss_ib": 0.006904230918735266, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.13371675053918045, | |
| "grad_norm": 0.1414111852645874, | |
| "learning_rate": 4.9973971391960167e-05, | |
| "loss": 0.8961, | |
| "step": 930 | |
| }, | |
| { | |
| "ce_ib": 8.576930046081543, | |
| "ce_orig": 1.1311094760894775, | |
| "epoch": 0.13371675053918045, | |
| "kl_loss": 0.09142087399959564, | |
| "loss_ib": 0.00885950867086649, | |
| "step": 930 | |
| }, | |
| { | |
| "ce_ib": 7.471253871917725, | |
| "ce_orig": 1.0194929838180542, | |
| "epoch": 0.1338605319913731, | |
| "kl_loss": 0.11889418959617615, | |
| "loss_ib": 0.009680337272584438, | |
| "step": 931 | |
| }, | |
| { | |
| "ce_ib": 6.724924564361572, | |
| "ce_orig": 0.864520788192749, | |
| "epoch": 0.13400431344356578, | |
| "kl_loss": 0.09439219534397125, | |
| "loss_ib": 0.008082072250545025, | |
| "step": 932 | |
| }, | |
| { | |
| "ce_ib": 7.768470287322998, | |
| "ce_orig": 1.0322949886322021, | |
| "epoch": 0.13414809489575844, | |
| "kl_loss": 0.16739840805530548, | |
| "loss_ib": 0.012254155240952969, | |
| "step": 933 | |
| }, | |
| { | |
| "ce_ib": 8.237820625305176, | |
| "ce_orig": 1.0064351558685303, | |
| "epoch": 0.13429187634795112, | |
| "kl_loss": 0.10145040601491928, | |
| "loss_ib": 0.009191430173814297, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.13443565780014377, | |
| "grad_norm": 0.1539893001317978, | |
| "learning_rate": 4.997307869714637e-05, | |
| "loss": 0.8736, | |
| "step": 935 | |
| }, | |
| { | |
| "ce_ib": 6.874648571014404, | |
| "ce_orig": 1.1211917400360107, | |
| "epoch": 0.13443565780014377, | |
| "kl_loss": 0.10167817026376724, | |
| "loss_ib": 0.00852123275399208, | |
| "step": 935 | |
| }, | |
| { | |
| "ce_ib": 5.751309871673584, | |
| "ce_orig": 0.8845812082290649, | |
| "epoch": 0.13457943925233645, | |
| "kl_loss": 0.09366403520107269, | |
| "loss_ib": 0.007558857090771198, | |
| "step": 936 | |
| }, | |
| { | |
| "ce_ib": 8.573022842407227, | |
| "ce_orig": 1.0504612922668457, | |
| "epoch": 0.1347232207045291, | |
| "kl_loss": 0.07060796767473221, | |
| "loss_ib": 0.007816909812390804, | |
| "step": 937 | |
| }, | |
| { | |
| "ce_ib": 8.377857208251953, | |
| "ce_orig": 0.9093595147132874, | |
| "epoch": 0.1348670021567218, | |
| "kl_loss": 0.11797395348548889, | |
| "loss_ib": 0.010087626054883003, | |
| "step": 938 | |
| }, | |
| { | |
| "ce_ib": 7.620189189910889, | |
| "ce_orig": 0.9195590019226074, | |
| "epoch": 0.13501078360891444, | |
| "kl_loss": 0.11536361277103424, | |
| "loss_ib": 0.009578275494277477, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.13515456506110712, | |
| "grad_norm": 0.12349986284971237, | |
| "learning_rate": 4.9972170959358156e-05, | |
| "loss": 0.8263, | |
| "step": 940 | |
| }, | |
| { | |
| "ce_ib": 4.433289527893066, | |
| "ce_orig": 0.5269767642021179, | |
| "epoch": 0.13515456506110712, | |
| "kl_loss": 0.06309656798839569, | |
| "loss_ib": 0.005371473263949156, | |
| "step": 940 | |
| }, | |
| { | |
| "ce_ib": 6.865793228149414, | |
| "ce_orig": 0.8600917458534241, | |
| "epoch": 0.13529834651329978, | |
| "kl_loss": 0.268341064453125, | |
| "loss_ib": 0.016849949955940247, | |
| "step": 941 | |
| }, | |
| { | |
| "ce_ib": 5.769837856292725, | |
| "ce_orig": 0.9609159827232361, | |
| "epoch": 0.13544212796549246, | |
| "kl_loss": 0.073099285364151, | |
| "loss_ib": 0.006539882626384497, | |
| "step": 942 | |
| }, | |
| { | |
| "ce_ib": 6.068971157073975, | |
| "ce_orig": 0.8975405097007751, | |
| "epoch": 0.1355859094176851, | |
| "kl_loss": 0.07797665894031525, | |
| "loss_ib": 0.006933317985385656, | |
| "step": 943 | |
| }, | |
| { | |
| "ce_ib": 4.143486976623535, | |
| "ce_orig": 0.5329647660255432, | |
| "epoch": 0.1357296908698778, | |
| "kl_loss": 0.1322351098060608, | |
| "loss_ib": 0.008683498948812485, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.13587347232207045, | |
| "grad_norm": 0.12345600128173828, | |
| "learning_rate": 4.9971248179142296e-05, | |
| "loss": 0.8698, | |
| "step": 945 | |
| }, | |
| { | |
| "ce_ib": 8.5775146484375, | |
| "ce_orig": 1.4092092514038086, | |
| "epoch": 0.13587347232207045, | |
| "kl_loss": 0.07750563323497772, | |
| "loss_ib": 0.008164039812982082, | |
| "step": 945 | |
| }, | |
| { | |
| "ce_ib": 6.79196310043335, | |
| "ce_orig": 1.1046801805496216, | |
| "epoch": 0.13601725377426313, | |
| "kl_loss": 0.08599655330181122, | |
| "loss_ib": 0.007695809006690979, | |
| "step": 946 | |
| }, | |
| { | |
| "ce_ib": 6.365146636962891, | |
| "ce_orig": 0.7971789836883545, | |
| "epoch": 0.13616103522645578, | |
| "kl_loss": 0.08945336192846298, | |
| "loss_ib": 0.007655241526663303, | |
| "step": 947 | |
| }, | |
| { | |
| "ce_ib": 6.125009059906006, | |
| "ce_orig": 0.9030478000640869, | |
| "epoch": 0.13630481667864847, | |
| "kl_loss": 0.08069920539855957, | |
| "loss_ib": 0.007097464986145496, | |
| "step": 948 | |
| }, | |
| { | |
| "ce_ib": 7.449437618255615, | |
| "ce_orig": 0.9372292757034302, | |
| "epoch": 0.13644859813084112, | |
| "kl_loss": 0.07520076632499695, | |
| "loss_ib": 0.007484757341444492, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.1365923795830338, | |
| "grad_norm": 0.1214538961648941, | |
| "learning_rate": 4.997031035705466e-05, | |
| "loss": 0.8814, | |
| "step": 950 | |
| }, | |
| { | |
| "ce_ib": 6.2350687980651855, | |
| "ce_orig": 0.8483865857124329, | |
| "epoch": 0.1365923795830338, | |
| "kl_loss": 0.08866537362337112, | |
| "loss_ib": 0.007550803013145924, | |
| "step": 950 | |
| }, | |
| { | |
| "ce_ib": 6.066655158996582, | |
| "ce_orig": 0.6078613996505737, | |
| "epoch": 0.13673616103522646, | |
| "kl_loss": 0.06268885731697083, | |
| "loss_ib": 0.006167770363390446, | |
| "step": 951 | |
| }, | |
| { | |
| "ce_ib": 7.969608783721924, | |
| "ce_orig": 1.1507118940353394, | |
| "epoch": 0.1368799424874191, | |
| "kl_loss": 0.07303150743246078, | |
| "loss_ib": 0.007636380381882191, | |
| "step": 952 | |
| }, | |
| { | |
| "ce_ib": 6.006031036376953, | |
| "ce_orig": 0.7712607383728027, | |
| "epoch": 0.1370237239396118, | |
| "kl_loss": 0.1005963534116745, | |
| "loss_ib": 0.008032833226025105, | |
| "step": 953 | |
| }, | |
| { | |
| "ce_ib": 6.047878265380859, | |
| "ce_orig": 0.8621774315834045, | |
| "epoch": 0.13716750539180445, | |
| "kl_loss": 0.06825101375579834, | |
| "loss_ib": 0.006436489522457123, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.13731128684399713, | |
| "grad_norm": 0.1393401324748993, | |
| "learning_rate": 4.996935749366015e-05, | |
| "loss": 0.9195, | |
| "step": 955 | |
| }, | |
| { | |
| "ce_ib": 5.52618408203125, | |
| "ce_orig": 0.7416598200798035, | |
| "epoch": 0.13731128684399713, | |
| "kl_loss": 0.07697509229183197, | |
| "loss_ib": 0.006611846387386322, | |
| "step": 955 | |
| }, | |
| { | |
| "ce_ib": 5.450565338134766, | |
| "ce_orig": 0.7644197940826416, | |
| "epoch": 0.13745506829618978, | |
| "kl_loss": 0.06023329496383667, | |
| "loss_ib": 0.005736947525292635, | |
| "step": 956 | |
| }, | |
| { | |
| "ce_ib": 7.305458068847656, | |
| "ce_orig": 1.0978727340698242, | |
| "epoch": 0.13759884974838246, | |
| "kl_loss": 0.11561448872089386, | |
| "loss_ib": 0.009433453902602196, | |
| "step": 957 | |
| }, | |
| { | |
| "ce_ib": 6.976890563964844, | |
| "ce_orig": 0.7256874442100525, | |
| "epoch": 0.13774263120057512, | |
| "kl_loss": 0.09455625712871552, | |
| "loss_ib": 0.008216258138418198, | |
| "step": 958 | |
| }, | |
| { | |
| "ce_ib": 6.9282097816467285, | |
| "ce_orig": 0.7956810593605042, | |
| "epoch": 0.1378864126527678, | |
| "kl_loss": 0.087988942861557, | |
| "loss_ib": 0.007863552309572697, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.13803019410496045, | |
| "grad_norm": 0.12086982280015945, | |
| "learning_rate": 4.996838958953275e-05, | |
| "loss": 0.8213, | |
| "step": 960 | |
| }, | |
| { | |
| "ce_ib": 6.342545032501221, | |
| "ce_orig": 0.7129842638969421, | |
| "epoch": 0.13803019410496045, | |
| "kl_loss": 0.13139081001281738, | |
| "loss_ib": 0.009740813635289669, | |
| "step": 960 | |
| }, | |
| { | |
| "ce_ib": 6.692712306976318, | |
| "ce_orig": 0.8974847197532654, | |
| "epoch": 0.13817397555715313, | |
| "kl_loss": 0.08609960973262787, | |
| "loss_ib": 0.007651336491107941, | |
| "step": 961 | |
| }, | |
| { | |
| "ce_ib": 8.433094024658203, | |
| "ce_orig": 1.1379026174545288, | |
| "epoch": 0.1383177570093458, | |
| "kl_loss": 0.10287706553936005, | |
| "loss_ib": 0.009360400028526783, | |
| "step": 962 | |
| }, | |
| { | |
| "ce_ib": 8.236790657043457, | |
| "ce_orig": 1.2443400621414185, | |
| "epoch": 0.13846153846153847, | |
| "kl_loss": 0.08817592263221741, | |
| "loss_ib": 0.00852719135582447, | |
| "step": 963 | |
| }, | |
| { | |
| "ce_ib": 5.165767669677734, | |
| "ce_orig": 0.5761904716491699, | |
| "epoch": 0.13860531991373112, | |
| "kl_loss": 0.0794210359454155, | |
| "loss_ib": 0.006553936284035444, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.1387491013659238, | |
| "grad_norm": 0.1046949103474617, | |
| "learning_rate": 4.996740664525549e-05, | |
| "loss": 0.9412, | |
| "step": 965 | |
| }, | |
| { | |
| "ce_ib": 7.795332908630371, | |
| "ce_orig": 1.0906161069869995, | |
| "epoch": 0.1387491013659238, | |
| "kl_loss": 0.07911582291126251, | |
| "loss_ib": 0.00785345770418644, | |
| "step": 965 | |
| }, | |
| { | |
| "ce_ib": 5.823874473571777, | |
| "ce_orig": 0.6993345022201538, | |
| "epoch": 0.13889288281811646, | |
| "kl_loss": 0.08959372341632843, | |
| "loss_ib": 0.0073916236869990826, | |
| "step": 966 | |
| }, | |
| { | |
| "ce_ib": 7.680074214935303, | |
| "ce_orig": 1.1049128770828247, | |
| "epoch": 0.13903666427030914, | |
| "kl_loss": 0.08348916471004486, | |
| "loss_ib": 0.008014495484530926, | |
| "step": 967 | |
| }, | |
| { | |
| "ce_ib": 6.938556671142578, | |
| "ce_orig": 0.6378891468048096, | |
| "epoch": 0.1391804457225018, | |
| "kl_loss": 0.11031496524810791, | |
| "loss_ib": 0.008985026739537716, | |
| "step": 968 | |
| }, | |
| { | |
| "ce_ib": 8.571457862854004, | |
| "ce_orig": 1.1795148849487305, | |
| "epoch": 0.13932422717469448, | |
| "kl_loss": 0.08533471077680588, | |
| "loss_ib": 0.00855246465653181, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.13946800862688713, | |
| "grad_norm": 0.1300869584083557, | |
| "learning_rate": 4.996640866142046e-05, | |
| "loss": 0.9504, | |
| "step": 970 | |
| }, | |
| { | |
| "ce_ib": 8.15340805053711, | |
| "ce_orig": 1.0846816301345825, | |
| "epoch": 0.13946800862688713, | |
| "kl_loss": 0.08402827382087708, | |
| "loss_ib": 0.008278118446469307, | |
| "step": 970 | |
| }, | |
| { | |
| "ce_ib": 4.409031391143799, | |
| "ce_orig": 0.46880093216896057, | |
| "epoch": 0.1396117900790798, | |
| "kl_loss": 0.11544293165206909, | |
| "loss_ib": 0.007976662367582321, | |
| "step": 971 | |
| }, | |
| { | |
| "ce_ib": 8.286463737487793, | |
| "ce_orig": 0.7137627601623535, | |
| "epoch": 0.13975557153127247, | |
| "kl_loss": 0.07826922088861465, | |
| "loss_ib": 0.008056692779064178, | |
| "step": 972 | |
| }, | |
| { | |
| "ce_ib": 7.249392986297607, | |
| "ce_orig": 1.0557063817977905, | |
| "epoch": 0.13989935298346512, | |
| "kl_loss": 0.08211711049079895, | |
| "loss_ib": 0.007730551995337009, | |
| "step": 973 | |
| }, | |
| { | |
| "ce_ib": 6.922652721405029, | |
| "ce_orig": 0.7434417009353638, | |
| "epoch": 0.1400431344356578, | |
| "kl_loss": 0.09765265882015228, | |
| "loss_ib": 0.008343959227204323, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.14018691588785046, | |
| "grad_norm": 0.10048684477806091, | |
| "learning_rate": 4.996539563862881e-05, | |
| "loss": 0.9914, | |
| "step": 975 | |
| }, | |
| { | |
| "ce_ib": 5.8394904136657715, | |
| "ce_orig": 0.9871096611022949, | |
| "epoch": 0.14018691588785046, | |
| "kl_loss": 0.07624734193086624, | |
| "loss_ib": 0.0067321122623980045, | |
| "step": 975 | |
| }, | |
| { | |
| "ce_ib": 4.4480671882629395, | |
| "ce_orig": 0.5778042078018188, | |
| "epoch": 0.14033069734004314, | |
| "kl_loss": 0.12898705899715424, | |
| "loss_ib": 0.00867338664829731, | |
| "step": 976 | |
| }, | |
| { | |
| "ce_ib": 4.4938836097717285, | |
| "ce_orig": 0.5107190012931824, | |
| "epoch": 0.1404744787922358, | |
| "kl_loss": 0.060333944857120514, | |
| "loss_ib": 0.005263639148324728, | |
| "step": 977 | |
| }, | |
| { | |
| "ce_ib": 5.551029682159424, | |
| "ce_orig": 0.7901322841644287, | |
| "epoch": 0.14061826024442847, | |
| "kl_loss": 0.1715565174818039, | |
| "loss_ib": 0.011353340931236744, | |
| "step": 978 | |
| }, | |
| { | |
| "ce_ib": 6.682663917541504, | |
| "ce_orig": 0.8593956828117371, | |
| "epoch": 0.14076204169662113, | |
| "kl_loss": 0.08176585286855698, | |
| "loss_ib": 0.007429624442011118, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.1409058231488138, | |
| "grad_norm": 0.1225329264998436, | |
| "learning_rate": 4.996436757749077e-05, | |
| "loss": 0.8339, | |
| "step": 980 | |
| }, | |
| { | |
| "ce_ib": 5.105162620544434, | |
| "ce_orig": 0.6780040860176086, | |
| "epoch": 0.1409058231488138, | |
| "kl_loss": 0.06629600375890732, | |
| "loss_ib": 0.005867381580173969, | |
| "step": 980 | |
| }, | |
| { | |
| "ce_ib": 6.971573352813721, | |
| "ce_orig": 1.1731876134872437, | |
| "epoch": 0.14104960460100646, | |
| "kl_loss": 0.09537965059280396, | |
| "loss_ib": 0.008254769258201122, | |
| "step": 981 | |
| }, | |
| { | |
| "ce_ib": 7.419947624206543, | |
| "ce_orig": 1.0379914045333862, | |
| "epoch": 0.14119338605319914, | |
| "kl_loss": 0.09539618343114853, | |
| "loss_ib": 0.008479783311486244, | |
| "step": 982 | |
| }, | |
| { | |
| "ce_ib": 8.027047157287598, | |
| "ce_orig": 1.0013635158538818, | |
| "epoch": 0.1413371675053918, | |
| "kl_loss": 0.11693254113197327, | |
| "loss_ib": 0.009860150516033173, | |
| "step": 983 | |
| }, | |
| { | |
| "ce_ib": 5.866478443145752, | |
| "ce_orig": 0.8373990058898926, | |
| "epoch": 0.14148094895758448, | |
| "kl_loss": 0.056306250393390656, | |
| "loss_ib": 0.005748551804572344, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.14162473040977713, | |
| "grad_norm": 0.12583786249160767, | |
| "learning_rate": 4.99633244786256e-05, | |
| "loss": 0.9591, | |
| "step": 985 | |
| }, | |
| { | |
| "ce_ib": 9.714642524719238, | |
| "ce_orig": 1.50913405418396, | |
| "epoch": 0.14162473040977713, | |
| "kl_loss": 0.11189399659633636, | |
| "loss_ib": 0.010452020913362503, | |
| "step": 985 | |
| }, | |
| { | |
| "ce_ib": 5.668364524841309, | |
| "ce_orig": 0.510465681552887, | |
| "epoch": 0.14176851186196981, | |
| "kl_loss": 0.10138311982154846, | |
| "loss_ib": 0.00790333840996027, | |
| "step": 986 | |
| }, | |
| { | |
| "ce_ib": 6.4766764640808105, | |
| "ce_orig": 0.9878252744674683, | |
| "epoch": 0.14191229331416247, | |
| "kl_loss": 0.05886990204453468, | |
| "loss_ib": 0.0061818333342671394, | |
| "step": 987 | |
| }, | |
| { | |
| "ce_ib": 4.52413272857666, | |
| "ce_orig": 0.6275699734687805, | |
| "epoch": 0.14205607476635515, | |
| "kl_loss": 0.07127900421619415, | |
| "loss_ib": 0.0058260164223611355, | |
| "step": 988 | |
| }, | |
| { | |
| "ce_ib": 7.104588031768799, | |
| "ce_orig": 1.0290782451629639, | |
| "epoch": 0.1421998562185478, | |
| "kl_loss": 0.09330768138170242, | |
| "loss_ib": 0.008217677474021912, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.14234363767074049, | |
| "grad_norm": 0.15013280510902405, | |
| "learning_rate": 4.9962266342661624e-05, | |
| "loss": 0.8154, | |
| "step": 990 | |
| }, | |
| { | |
| "ce_ib": 6.073655128479004, | |
| "ce_orig": 0.7287778258323669, | |
| "epoch": 0.14234363767074049, | |
| "kl_loss": 0.08072468638420105, | |
| "loss_ib": 0.007073062006384134, | |
| "step": 990 | |
| }, | |
| { | |
| "ce_ib": 7.8407883644104, | |
| "ce_orig": 0.9141483902931213, | |
| "epoch": 0.14248741912293314, | |
| "kl_loss": 0.07278452068567276, | |
| "loss_ib": 0.007559619843959808, | |
| "step": 991 | |
| }, | |
| { | |
| "ce_ib": 7.545557975769043, | |
| "ce_orig": 1.0704678297042847, | |
| "epoch": 0.14263120057512582, | |
| "kl_loss": 0.10130893439054489, | |
| "loss_ib": 0.008838226087391376, | |
| "step": 992 | |
| }, | |
| { | |
| "ce_ib": 9.583335876464844, | |
| "ce_orig": 1.1344443559646606, | |
| "epoch": 0.14277498202731848, | |
| "kl_loss": 0.0773579329252243, | |
| "loss_ib": 0.008659563958644867, | |
| "step": 993 | |
| }, | |
| { | |
| "ce_ib": 6.852370738983154, | |
| "ce_orig": 1.0198527574539185, | |
| "epoch": 0.14291876347951113, | |
| "kl_loss": 0.09604521840810776, | |
| "loss_ib": 0.00822844635695219, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.1430625449317038, | |
| "grad_norm": 0.140967458486557, | |
| "learning_rate": 4.9961193170236234e-05, | |
| "loss": 0.9539, | |
| "step": 995 | |
| }, | |
| { | |
| "ce_ib": 7.311800956726074, | |
| "ce_orig": 0.8755852580070496, | |
| "epoch": 0.1430625449317038, | |
| "kl_loss": 0.1115192174911499, | |
| "loss_ib": 0.009231860749423504, | |
| "step": 995 | |
| }, | |
| { | |
| "ce_ib": 7.3969621658325195, | |
| "ce_orig": 0.7122746706008911, | |
| "epoch": 0.14320632638389647, | |
| "kl_loss": 0.16122053563594818, | |
| "loss_ib": 0.011759507469832897, | |
| "step": 996 | |
| }, | |
| { | |
| "ce_ib": 6.429070949554443, | |
| "ce_orig": 0.7772039175033569, | |
| "epoch": 0.14335010783608915, | |
| "kl_loss": 0.06662163138389587, | |
| "loss_ib": 0.006545616779476404, | |
| "step": 997 | |
| }, | |
| { | |
| "ce_ib": 6.746508598327637, | |
| "ce_orig": 1.0281816720962524, | |
| "epoch": 0.1434938892882818, | |
| "kl_loss": 0.11056394129991531, | |
| "loss_ib": 0.008901451714336872, | |
| "step": 998 | |
| }, | |
| { | |
| "ce_ib": 5.576045989990234, | |
| "ce_orig": 0.8467837572097778, | |
| "epoch": 0.14363767074047448, | |
| "kl_loss": 0.08954350650310516, | |
| "loss_ib": 0.007265198510140181, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.14378145219266714, | |
| "grad_norm": 0.1209883913397789, | |
| "learning_rate": 4.996010496199587e-05, | |
| "loss": 0.8696, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 20865, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |