diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -20421,3 +20421,1009 @@ Use FP16 precision: False 02/25/2022 03:28:09 - INFO - codeparrot_training - Step 19998: {'lr': 0.0003457010911585404, 'samples': 10239488, 'steps': 19998, 'loss/train': 1.497208595275879} 02/25/2022 03:28:15 - INFO - codeparrot_training - Step 19999: {'lr': 0.00034568597482985067, 'samples': 10240000, 'steps': 19999, 'loss/train': 1.5917458534240723} 02/25/2022 03:28:15 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/25/2022 03:28:32 - WARNING - huggingface_hub.repository - Several commits (20) will be pushed upstream. +02/25/2022 03:28:32 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/25/2022 03:29:06 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + e9773d2..ad14ca4 floral-grass-11 -> floral-grass-11 + +02/25/2022 03:29:10 - INFO - codeparrot_training - Step 20000: {'lr': 0.0003456708580912725, 'samples': 10240512, 'steps': 20000, 'loss/train': 2.0182840824127197} +02/25/2022 03:29:16 - INFO - codeparrot_training - Step 20001: {'lr': 0.00034565574094287075, 'samples': 10241024, 'steps': 20001, 'loss/train': 2.2768688201904297} +02/25/2022 03:29:19 - INFO - codeparrot_training - Step 20002: {'lr': 0.0003456406233847102, 'samples': 10241536, 'steps': 20002, 'loss/train': 2.4322569370269775} +02/25/2022 03:29:26 - INFO - codeparrot_training - Step 20003: {'lr': 0.00034562550541685557, 'samples': 10242048, 'steps': 20003, 'loss/train': 1.8150594234466553} +02/25/2022 03:29:29 - INFO - codeparrot_training - Step 20004: {'lr': 0.0003456103870393717, 'samples': 10242560, 'steps': 20004, 'loss/train': 2.9891395568847656} +02/25/2022 03:29:35 - INFO - codeparrot_training - Step 20005: {'lr': 0.0003455952682523232, 'samples': 10243072, 'steps': 20005, 'loss/train': 1.8301373720169067} +02/25/2022 03:29:38 - INFO - codeparrot_training - Step 20006: {'lr': 0.00034558014905577506, 'samples': 10243584, 'steps': 20006, 'loss/train': 2.319890022277832} +02/25/2022 03:29:44 - INFO - codeparrot_training - Step 20007: {'lr': 0.00034556502944979177, 'samples': 10244096, 'steps': 20007, 'loss/train': 1.6531423330307007} +02/25/2022 03:29:47 - INFO - codeparrot_training - Step 20008: {'lr': 0.0003455499094344383, 'samples': 10244608, 'steps': 20008, 'loss/train': 1.5679006576538086} +02/25/2022 03:29:53 - INFO - codeparrot_training - Step 20009: {'lr': 0.00034553478900977943, 'samples': 10245120, 'steps': 20009, 'loss/train': 2.504625082015991} +02/25/2022 03:29:56 - INFO - codeparrot_training - Step 20010: {'lr': 0.0003455196681758798, 'samples': 10245632, 'steps': 20010, 'loss/train': 1.9351191520690918} +02/25/2022 03:30:02 - INFO - codeparrot_training - Step 20011: {'lr': 0.00034550454693280417, 'samples': 10246144, 'steps': 20011, 'loss/train': 2.481072425842285} +02/25/2022 03:30:05 - INFO - codeparrot_training - Step 20012: {'lr': 0.0003454894252806175, 'samples': 10246656, 'steps': 20012, 'loss/train': 1.2526456117630005} +02/25/2022 03:30:11 - INFO - codeparrot_training - Step 20013: {'lr': 0.0003454743032193844, 'samples': 10247168, 'steps': 20013, 'loss/train': 1.3796939849853516} +02/25/2022 03:30:15 - INFO - codeparrot_training - Step 20014: {'lr': 0.00034545918074916965, 'samples': 10247680, 'steps': 20014, 'loss/train': 3.00093674659729} +02/25/2022 03:30:20 - INFO - codeparrot_training - Step 20015: {'lr': 0.00034544405787003817, 'samples': 10248192, 'steps': 20015, 'loss/train': 1.7029036283493042} +02/25/2022 03:30:24 - INFO - codeparrot_training - Step 20016: {'lr': 0.0003454289345820546, 'samples': 10248704, 'steps': 20016, 'loss/train': 1.8101145029067993} +02/25/2022 03:30:29 - INFO - codeparrot_training - Step 20017: {'lr': 0.00034541381088528376, 'samples': 10249216, 'steps': 20017, 'loss/train': 1.9798136949539185} +02/25/2022 03:30:33 - INFO - codeparrot_training - Step 20018: {'lr': 0.00034539868677979055, 'samples': 10249728, 'steps': 20018, 'loss/train': 1.6781688928604126} +02/25/2022 03:30:39 - INFO - codeparrot_training - Step 20019: {'lr': 0.0003453835622656396, 'samples': 10250240, 'steps': 20019, 'loss/train': 1.1882104873657227} +02/25/2022 03:30:42 - INFO - codeparrot_training - Step 20020: {'lr': 0.0003453684373428957, 'samples': 10250752, 'steps': 20020, 'loss/train': 1.8127082586288452} +02/25/2022 03:30:48 - INFO - codeparrot_training - Step 20021: {'lr': 0.0003453533120116238, 'samples': 10251264, 'steps': 20021, 'loss/train': 1.358256459236145} +02/25/2022 03:30:51 - INFO - codeparrot_training - Step 20022: {'lr': 0.0003453381862718886, 'samples': 10251776, 'steps': 20022, 'loss/train': 1.9331316947937012} +02/25/2022 03:30:57 - INFO - codeparrot_training - Step 20023: {'lr': 0.00034532306012375474, 'samples': 10252288, 'steps': 20023, 'loss/train': 1.6333743333816528} +02/25/2022 03:31:00 - INFO - codeparrot_training - Step 20024: {'lr': 0.00034530793356728727, 'samples': 10252800, 'steps': 20024, 'loss/train': 2.4872119426727295} +02/25/2022 03:31:06 - INFO - codeparrot_training - Step 20025: {'lr': 0.00034529280660255084, 'samples': 10253312, 'steps': 20025, 'loss/train': 2.5141241550445557} +02/25/2022 03:31:09 - INFO - codeparrot_training - Step 20026: {'lr': 0.00034527767922961034, 'samples': 10253824, 'steps': 20026, 'loss/train': 1.9492285251617432} +02/25/2022 03:31:15 - INFO - codeparrot_training - Step 20027: {'lr': 0.0003452625514485305, 'samples': 10254336, 'steps': 20027, 'loss/train': 1.420654535293579} +02/25/2022 03:31:18 - INFO - codeparrot_training - Step 20028: {'lr': 0.0003452474232593761, 'samples': 10254848, 'steps': 20028, 'loss/train': 2.066033363342285} +02/25/2022 03:31:24 - INFO - codeparrot_training - Step 20029: {'lr': 0.00034523229466221195, 'samples': 10255360, 'steps': 20029, 'loss/train': 1.3243342638015747} +02/25/2022 03:31:28 - INFO - codeparrot_training - Step 20030: {'lr': 0.00034521716565710293, 'samples': 10255872, 'steps': 20030, 'loss/train': 0.8642081022262573} +02/25/2022 03:31:34 - INFO - codeparrot_training - Step 20031: {'lr': 0.00034520203624411385, 'samples': 10256384, 'steps': 20031, 'loss/train': 1.956555962562561} +02/25/2022 03:31:37 - INFO - codeparrot_training - Step 20032: {'lr': 0.0003451869064233094, 'samples': 10256896, 'steps': 20032, 'loss/train': 2.77828049659729} +02/25/2022 03:31:43 - INFO - codeparrot_training - Step 20033: {'lr': 0.0003451717761947545, 'samples': 10257408, 'steps': 20033, 'loss/train': 2.0623152256011963} +02/25/2022 03:31:46 - INFO - codeparrot_training - Step 20034: {'lr': 0.0003451566455585139, 'samples': 10257920, 'steps': 20034, 'loss/train': 1.875267505645752} +02/25/2022 03:31:52 - INFO - codeparrot_training - Step 20035: {'lr': 0.00034514151451465254, 'samples': 10258432, 'steps': 20035, 'loss/train': 2.443436622619629} +02/25/2022 03:31:55 - INFO - codeparrot_training - Step 20036: {'lr': 0.00034512638306323506, 'samples': 10258944, 'steps': 20036, 'loss/train': 1.7062523365020752} +02/25/2022 03:32:01 - INFO - codeparrot_training - Step 20037: {'lr': 0.0003451112512043264, 'samples': 10259456, 'steps': 20037, 'loss/train': 1.2771670818328857} +02/25/2022 03:32:04 - INFO - codeparrot_training - Step 20038: {'lr': 0.0003450961189379913, 'samples': 10259968, 'steps': 20038, 'loss/train': 1.9417378902435303} +02/25/2022 03:32:11 - INFO - codeparrot_training - Step 20039: {'lr': 0.0003450809862642947, 'samples': 10260480, 'steps': 20039, 'loss/train': 2.025756359100342} +02/25/2022 03:32:14 - INFO - codeparrot_training - Step 20040: {'lr': 0.0003450658531833013, 'samples': 10260992, 'steps': 20040, 'loss/train': 1.3042536973953247} +02/25/2022 03:32:20 - INFO - codeparrot_training - Step 20041: {'lr': 0.00034505071969507595, 'samples': 10261504, 'steps': 20041, 'loss/train': 2.375941514968872} +02/25/2022 03:32:25 - INFO - codeparrot_training - Step 20042: {'lr': 0.0003450355857996835, 'samples': 10262016, 'steps': 20042, 'loss/train': 2.3680078983306885} +02/25/2022 03:32:29 - INFO - codeparrot_training - Step 20043: {'lr': 0.0003450204514971888, 'samples': 10262528, 'steps': 20043, 'loss/train': 0.9760146737098694} +02/25/2022 03:32:34 - INFO - codeparrot_training - Step 20044: {'lr': 0.0003450053167876566, 'samples': 10263040, 'steps': 20044, 'loss/train': 1.9572149515151978} +02/25/2022 03:32:38 - INFO - codeparrot_training - Step 20045: {'lr': 0.0003449901816711519, 'samples': 10263552, 'steps': 20045, 'loss/train': 0.6744064092636108} +02/25/2022 03:32:43 - INFO - codeparrot_training - Step 20046: {'lr': 0.00034497504614773935, 'samples': 10264064, 'steps': 20046, 'loss/train': 2.525388240814209} +02/25/2022 03:32:47 - INFO - codeparrot_training - Step 20047: {'lr': 0.0003449599102174839, 'samples': 10264576, 'steps': 20047, 'loss/train': 2.0306506156921387} +02/25/2022 03:32:53 - INFO - codeparrot_training - Step 20048: {'lr': 0.0003449447738804503, 'samples': 10265088, 'steps': 20048, 'loss/train': 1.8904715776443481} +02/25/2022 03:32:57 - INFO - codeparrot_training - Step 20049: {'lr': 0.0003449296371367034, 'samples': 10265600, 'steps': 20049, 'loss/train': 1.5907604694366455} +02/25/2022 03:33:02 - INFO - codeparrot_training - Step 20050: {'lr': 0.0003449144999863082, 'samples': 10266112, 'steps': 20050, 'loss/train': 2.379424571990967} +02/25/2022 03:33:06 - INFO - codeparrot_training - Step 20051: {'lr': 0.00034489936242932935, 'samples': 10266624, 'steps': 20051, 'loss/train': 0.9318121075630188} +02/25/2022 03:33:11 - INFO - codeparrot_training - Step 20052: {'lr': 0.00034488422446583177, 'samples': 10267136, 'steps': 20052, 'loss/train': 2.723862886428833} +02/25/2022 03:33:15 - INFO - codeparrot_training - Step 20053: {'lr': 0.0003448690860958803, 'samples': 10267648, 'steps': 20053, 'loss/train': 1.6078450679779053} +02/25/2022 03:33:20 - INFO - codeparrot_training - Step 20054: {'lr': 0.00034485394731953976, 'samples': 10268160, 'steps': 20054, 'loss/train': 2.144737720489502} +02/25/2022 03:33:24 - INFO - codeparrot_training - Step 20055: {'lr': 0.00034483880813687505, 'samples': 10268672, 'steps': 20055, 'loss/train': 1.9698745012283325} +02/25/2022 03:33:29 - INFO - codeparrot_training - Step 20056: {'lr': 0.0003448236685479511, 'samples': 10269184, 'steps': 20056, 'loss/train': 1.7840856313705444} +02/25/2022 03:33:33 - INFO - codeparrot_training - Step 20057: {'lr': 0.0003448085285528326, 'samples': 10269696, 'steps': 20057, 'loss/train': 1.2451926469802856} +02/25/2022 03:33:38 - INFO - codeparrot_training - Step 20058: {'lr': 0.00034479338815158447, 'samples': 10270208, 'steps': 20058, 'loss/train': 1.1563801765441895} +02/25/2022 03:33:42 - INFO - codeparrot_training - Step 20059: {'lr': 0.0003447782473442715, 'samples': 10270720, 'steps': 20059, 'loss/train': 1.4472498893737793} +02/25/2022 03:33:47 - INFO - codeparrot_training - Step 20060: {'lr': 0.00034476310613095867, 'samples': 10271232, 'steps': 20060, 'loss/train': 1.7619688510894775} +02/25/2022 03:33:51 - INFO - codeparrot_training - Step 20061: {'lr': 0.00034474796451171075, 'samples': 10271744, 'steps': 20061, 'loss/train': 2.4976987838745117} +02/25/2022 03:33:57 - INFO - codeparrot_training - Step 20062: {'lr': 0.00034473282248659266, 'samples': 10272256, 'steps': 20062, 'loss/train': 1.5934425592422485} +02/25/2022 03:34:00 - INFO - codeparrot_training - Step 20063: {'lr': 0.00034471768005566925, 'samples': 10272768, 'steps': 20063, 'loss/train': 0.8754019141197205} +02/25/2022 03:34:06 - INFO - codeparrot_training - Step 20064: {'lr': 0.00034470253721900535, 'samples': 10273280, 'steps': 20064, 'loss/train': 2.1164333820343018} +02/25/2022 03:34:10 - INFO - codeparrot_training - Step 20065: {'lr': 0.0003446873939766659, 'samples': 10273792, 'steps': 20065, 'loss/train': 2.306887149810791} +02/25/2022 03:34:15 - INFO - codeparrot_training - Step 20066: {'lr': 0.0003446722503287157, 'samples': 10274304, 'steps': 20066, 'loss/train': 1.4776066541671753} +02/25/2022 03:34:19 - INFO - codeparrot_training - Step 20067: {'lr': 0.0003446571062752196, 'samples': 10274816, 'steps': 20067, 'loss/train': 1.2129466533660889} +02/25/2022 03:34:24 - INFO - codeparrot_training - Step 20068: {'lr': 0.0003446419618162425, 'samples': 10275328, 'steps': 20068, 'loss/train': 3.2345340251922607} +02/25/2022 03:34:28 - INFO - codeparrot_training - Step 20069: {'lr': 0.0003446268169518494, 'samples': 10275840, 'steps': 20069, 'loss/train': 1.928001046180725} +02/25/2022 03:34:33 - INFO - codeparrot_training - Step 20070: {'lr': 0.00034461167168210494, 'samples': 10276352, 'steps': 20070, 'loss/train': 1.7225770950317383} +02/25/2022 03:34:37 - INFO - codeparrot_training - Step 20071: {'lr': 0.00034459652600707423, 'samples': 10276864, 'steps': 20071, 'loss/train': 2.394929885864258} +02/25/2022 03:34:42 - INFO - codeparrot_training - Step 20072: {'lr': 0.000344581379926822, 'samples': 10277376, 'steps': 20072, 'loss/train': 2.9181714057922363} +02/25/2022 03:34:46 - INFO - codeparrot_training - Step 20073: {'lr': 0.0003445662334414131, 'samples': 10277888, 'steps': 20073, 'loss/train': 1.212462306022644} +02/25/2022 03:34:52 - INFO - codeparrot_training - Step 20074: {'lr': 0.00034455108655091256, 'samples': 10278400, 'steps': 20074, 'loss/train': 1.367550253868103} +02/25/2022 03:34:55 - INFO - codeparrot_training - Step 20075: {'lr': 0.00034453593925538515, 'samples': 10278912, 'steps': 20075, 'loss/train': 2.1880338191986084} +02/25/2022 03:35:01 - INFO - codeparrot_training - Step 20076: {'lr': 0.00034452079155489586, 'samples': 10279424, 'steps': 20076, 'loss/train': 1.9069173336029053} +02/25/2022 03:35:04 - INFO - codeparrot_training - Step 20077: {'lr': 0.00034450564344950944, 'samples': 10279936, 'steps': 20077, 'loss/train': 2.4891440868377686} +02/25/2022 03:35:10 - INFO - codeparrot_training - Step 20078: {'lr': 0.00034449049493929086, 'samples': 10280448, 'steps': 20078, 'loss/train': 1.4974418878555298} +02/25/2022 03:35:13 - INFO - codeparrot_training - Step 20079: {'lr': 0.00034447534602430503, 'samples': 10280960, 'steps': 20079, 'loss/train': 1.76010000705719} +02/25/2022 03:35:19 - INFO - codeparrot_training - Step 20080: {'lr': 0.00034446019670461683, 'samples': 10281472, 'steps': 20080, 'loss/train': 1.767886757850647} +02/25/2022 03:35:22 - INFO - codeparrot_training - Step 20081: {'lr': 0.0003444450469802911, 'samples': 10281984, 'steps': 20081, 'loss/train': 2.418989419937134} +02/25/2022 03:35:28 - INFO - codeparrot_training - Step 20082: {'lr': 0.0003444298968513928, 'samples': 10282496, 'steps': 20082, 'loss/train': 1.963512659072876} +02/25/2022 03:35:31 - INFO - codeparrot_training - Step 20083: {'lr': 0.0003444147463179868, 'samples': 10283008, 'steps': 20083, 'loss/train': 2.308830499649048} +02/25/2022 03:35:38 - INFO - codeparrot_training - Step 20084: {'lr': 0.00034439959538013805, 'samples': 10283520, 'steps': 20084, 'loss/train': 1.3150640726089478} +02/25/2022 03:35:41 - INFO - codeparrot_training - Step 20085: {'lr': 0.00034438444403791135, 'samples': 10284032, 'steps': 20085, 'loss/train': 2.6701104640960693} +02/25/2022 03:35:47 - INFO - codeparrot_training - Step 20086: {'lr': 0.00034436929229137163, 'samples': 10284544, 'steps': 20086, 'loss/train': 0.08370508253574371} +02/25/2022 03:35:50 - INFO - codeparrot_training - Step 20087: {'lr': 0.00034435414014058393, 'samples': 10285056, 'steps': 20087, 'loss/train': 2.3114542961120605} +02/25/2022 03:35:56 - INFO - codeparrot_training - Step 20088: {'lr': 0.000344338987585613, 'samples': 10285568, 'steps': 20088, 'loss/train': 2.2398841381073} +02/25/2022 03:35:59 - INFO - codeparrot_training - Step 20089: {'lr': 0.0003443238346265238, 'samples': 10286080, 'steps': 20089, 'loss/train': 1.71213698387146} +02/25/2022 03:36:05 - INFO - codeparrot_training - Step 20090: {'lr': 0.0003443086812633812, 'samples': 10286592, 'steps': 20090, 'loss/train': 2.1920006275177} +02/25/2022 03:36:08 - INFO - codeparrot_training - Step 20091: {'lr': 0.00034429352749625026, 'samples': 10287104, 'steps': 20091, 'loss/train': 2.539146661758423} +02/25/2022 03:36:14 - INFO - codeparrot_training - Step 20092: {'lr': 0.00034427837332519573, 'samples': 10287616, 'steps': 20092, 'loss/train': 2.2923905849456787} +02/25/2022 03:36:17 - INFO - codeparrot_training - Step 20093: {'lr': 0.0003442632187502826, 'samples': 10288128, 'steps': 20093, 'loss/train': 1.10002863407135} +02/25/2022 03:36:23 - INFO - codeparrot_training - Step 20094: {'lr': 0.00034424806377157576, 'samples': 10288640, 'steps': 20094, 'loss/train': 1.6613848209381104} +02/25/2022 03:36:26 - INFO - codeparrot_training - Step 20095: {'lr': 0.0003442329083891402, 'samples': 10289152, 'steps': 20095, 'loss/train': 0.9307950139045715} +02/25/2022 03:36:32 - INFO - codeparrot_training - Step 20096: {'lr': 0.00034421775260304067, 'samples': 10289664, 'steps': 20096, 'loss/train': 1.4342178106307983} +02/25/2022 03:36:36 - INFO - codeparrot_training - Step 20097: {'lr': 0.0003442025964133422, 'samples': 10290176, 'steps': 20097, 'loss/train': 2.288285732269287} +02/25/2022 03:36:41 - INFO - codeparrot_training - Step 20098: {'lr': 0.0003441874398201099, 'samples': 10290688, 'steps': 20098, 'loss/train': 2.137441396713257} +02/25/2022 03:36:45 - INFO - codeparrot_training - Step 20099: {'lr': 0.00034417228282340837, 'samples': 10291200, 'steps': 20099, 'loss/train': 1.7621381282806396} +02/25/2022 03:36:52 - INFO - codeparrot_training - Step 20100: {'lr': 0.0003441571254233027, 'samples': 10291712, 'steps': 20100, 'loss/train': 1.829404592514038} +02/25/2022 03:36:55 - INFO - codeparrot_training - Step 20101: {'lr': 0.00034414196761985784, 'samples': 10292224, 'steps': 20101, 'loss/train': 2.217130422592163} +02/25/2022 03:37:01 - INFO - codeparrot_training - Step 20102: {'lr': 0.00034412680941313866, 'samples': 10292736, 'steps': 20102, 'loss/train': 1.9657161235809326} +02/25/2022 03:37:04 - INFO - codeparrot_training - Step 20103: {'lr': 0.00034411165080321007, 'samples': 10293248, 'steps': 20103, 'loss/train': 2.4528114795684814} +02/25/2022 03:37:10 - INFO - codeparrot_training - Step 20104: {'lr': 0.00034409649179013716, 'samples': 10293760, 'steps': 20104, 'loss/train': 2.3077969551086426} +02/25/2022 03:37:13 - INFO - codeparrot_training - Step 20105: {'lr': 0.00034408133237398466, 'samples': 10294272, 'steps': 20105, 'loss/train': 1.5373939275741577} +02/25/2022 03:37:19 - INFO - codeparrot_training - Step 20106: {'lr': 0.0003440661725548176, 'samples': 10294784, 'steps': 20106, 'loss/train': 0.37341976165771484} +02/25/2022 03:37:22 - INFO - codeparrot_training - Step 20107: {'lr': 0.00034405101233270105, 'samples': 10295296, 'steps': 20107, 'loss/train': 1.6152325868606567} +02/25/2022 03:37:28 - INFO - codeparrot_training - Step 20108: {'lr': 0.0003440358517076997, 'samples': 10295808, 'steps': 20108, 'loss/train': 2.2198026180267334} +02/25/2022 03:37:31 - INFO - codeparrot_training - Step 20109: {'lr': 0.00034402069067987874, 'samples': 10296320, 'steps': 20109, 'loss/train': 2.169184684753418} +02/25/2022 03:37:37 - INFO - codeparrot_training - Step 20110: {'lr': 0.0003440055292493029, 'samples': 10296832, 'steps': 20110, 'loss/train': 1.4872303009033203} +02/25/2022 03:37:40 - INFO - codeparrot_training - Step 20111: {'lr': 0.0003439903674160373, 'samples': 10297344, 'steps': 20111, 'loss/train': 2.643341064453125} +02/25/2022 03:37:47 - INFO - codeparrot_training - Step 20112: {'lr': 0.0003439752051801467, 'samples': 10297856, 'steps': 20112, 'loss/train': 2.191432237625122} +02/25/2022 03:37:50 - INFO - codeparrot_training - Step 20113: {'lr': 0.0003439600425416963, 'samples': 10298368, 'steps': 20113, 'loss/train': 3.3072595596313477} +02/25/2022 03:37:56 - INFO - codeparrot_training - Step 20114: {'lr': 0.00034394487950075076, 'samples': 10298880, 'steps': 20114, 'loss/train': 2.21701717376709} +02/25/2022 03:37:59 - INFO - codeparrot_training - Step 20115: {'lr': 0.0003439297160573753, 'samples': 10299392, 'steps': 20115, 'loss/train': 2.4579732418060303} +02/25/2022 03:38:05 - INFO - codeparrot_training - Step 20116: {'lr': 0.0003439145522116347, 'samples': 10299904, 'steps': 20116, 'loss/train': 1.6692771911621094} +02/25/2022 03:38:08 - INFO - codeparrot_training - Step 20117: {'lr': 0.000343899387963594, 'samples': 10300416, 'steps': 20117, 'loss/train': 2.643324613571167} +02/25/2022 03:38:14 - INFO - codeparrot_training - Step 20118: {'lr': 0.00034388422331331817, 'samples': 10300928, 'steps': 20118, 'loss/train': 3.2394938468933105} +02/25/2022 03:38:18 - INFO - codeparrot_training - Step 20119: {'lr': 0.0003438690582608721, 'samples': 10301440, 'steps': 20119, 'loss/train': 1.951217770576477} +02/25/2022 03:38:23 - INFO - codeparrot_training - Step 20120: {'lr': 0.00034385389280632077, 'samples': 10301952, 'steps': 20120, 'loss/train': 2.131113052368164} +02/25/2022 03:38:27 - INFO - codeparrot_training - Step 20121: {'lr': 0.00034383872694972916, 'samples': 10302464, 'steps': 20121, 'loss/train': 1.0880255699157715} +02/25/2022 03:38:33 - INFO - codeparrot_training - Step 20122: {'lr': 0.0003438235606911623, 'samples': 10302976, 'steps': 20122, 'loss/train': 0.5588898658752441} +02/25/2022 03:38:37 - INFO - codeparrot_training - Step 20123: {'lr': 0.0003438083940306851, 'samples': 10303488, 'steps': 20123, 'loss/train': 1.6169017553329468} +02/25/2022 03:38:42 - INFO - codeparrot_training - Step 20124: {'lr': 0.0003437932269683625, 'samples': 10304000, 'steps': 20124, 'loss/train': 0.9220380783081055} +02/25/2022 03:38:46 - INFO - codeparrot_training - Step 20125: {'lr': 0.0003437780595042595, 'samples': 10304512, 'steps': 20125, 'loss/train': 3.7603652477264404} +02/25/2022 03:38:52 - INFO - codeparrot_training - Step 20126: {'lr': 0.0003437628916384411, 'samples': 10305024, 'steps': 20126, 'loss/train': 2.475961685180664} +02/25/2022 03:38:55 - INFO - codeparrot_training - Step 20127: {'lr': 0.0003437477233709722, 'samples': 10305536, 'steps': 20127, 'loss/train': 2.18354868888855} +02/25/2022 03:39:01 - INFO - codeparrot_training - Step 20128: {'lr': 0.0003437325547019179, 'samples': 10306048, 'steps': 20128, 'loss/train': 1.8523273468017578} +02/25/2022 03:39:04 - INFO - codeparrot_training - Step 20129: {'lr': 0.000343717385631343, 'samples': 10306560, 'steps': 20129, 'loss/train': 0.9113268256187439} +02/25/2022 03:39:10 - INFO - codeparrot_training - Step 20130: {'lr': 0.00034370221615931265, 'samples': 10307072, 'steps': 20130, 'loss/train': 2.594956874847412} +02/25/2022 03:39:13 - INFO - codeparrot_training - Step 20131: {'lr': 0.0003436870462858917, 'samples': 10307584, 'steps': 20131, 'loss/train': 2.4714298248291016} +02/25/2022 03:39:19 - INFO - codeparrot_training - Step 20132: {'lr': 0.0003436718760111452, 'samples': 10308096, 'steps': 20132, 'loss/train': 2.5058271884918213} +02/25/2022 03:39:23 - INFO - codeparrot_training - Step 20133: {'lr': 0.00034365670533513813, 'samples': 10308608, 'steps': 20133, 'loss/train': 1.8360416889190674} +02/25/2022 03:39:28 - INFO - codeparrot_training - Step 20134: {'lr': 0.00034364153425793547, 'samples': 10309120, 'steps': 20134, 'loss/train': 1.4394134283065796} +02/25/2022 03:39:32 - INFO - codeparrot_training - Step 20135: {'lr': 0.0003436263627796023, 'samples': 10309632, 'steps': 20135, 'loss/train': 2.3129072189331055} +02/25/2022 03:39:38 - INFO - codeparrot_training - Step 20136: {'lr': 0.00034361119090020343, 'samples': 10310144, 'steps': 20136, 'loss/train': 1.7954285144805908} +02/25/2022 03:39:41 - INFO - codeparrot_training - Step 20137: {'lr': 0.000343596018619804, 'samples': 10310656, 'steps': 20137, 'loss/train': 2.490025281906128} +02/25/2022 03:39:47 - INFO - codeparrot_training - Step 20138: {'lr': 0.00034358084593846886, 'samples': 10311168, 'steps': 20138, 'loss/train': 2.0469114780426025} +02/25/2022 03:39:50 - INFO - codeparrot_training - Step 20139: {'lr': 0.00034356567285626316, 'samples': 10311680, 'steps': 20139, 'loss/train': 2.0434672832489014} +02/25/2022 03:39:56 - INFO - codeparrot_training - Step 20140: {'lr': 0.0003435504993732518, 'samples': 10312192, 'steps': 20140, 'loss/train': 1.5098412036895752} +02/25/2022 03:39:59 - INFO - codeparrot_training - Step 20141: {'lr': 0.00034353532548949984, 'samples': 10312704, 'steps': 20141, 'loss/train': 2.270737648010254} +02/25/2022 03:40:05 - INFO - codeparrot_training - Step 20142: {'lr': 0.0003435201512050722, 'samples': 10313216, 'steps': 20142, 'loss/train': 1.4375869035720825} +02/25/2022 03:40:08 - INFO - codeparrot_training - Step 20143: {'lr': 0.000343504976520034, 'samples': 10313728, 'steps': 20143, 'loss/train': 1.4480029344558716} +02/25/2022 03:40:14 - INFO - codeparrot_training - Step 20144: {'lr': 0.0003434898014344501, 'samples': 10314240, 'steps': 20144, 'loss/train': 1.4094799757003784} +02/25/2022 03:40:17 - INFO - codeparrot_training - Step 20145: {'lr': 0.00034347462594838565, 'samples': 10314752, 'steps': 20145, 'loss/train': 1.9889744520187378} +02/25/2022 03:40:23 - INFO - codeparrot_training - Step 20146: {'lr': 0.0003434594500619055, 'samples': 10315264, 'steps': 20146, 'loss/train': 0.23489277064800262} +02/25/2022 03:40:26 - INFO - codeparrot_training - Step 20147: {'lr': 0.00034344427377507475, 'samples': 10315776, 'steps': 20147, 'loss/train': 1.9060548543930054} +02/25/2022 03:40:33 - INFO - codeparrot_training - Step 20148: {'lr': 0.00034342909708795846, 'samples': 10316288, 'steps': 20148, 'loss/train': 2.104743003845215} +02/25/2022 03:40:36 - INFO - codeparrot_training - Step 20149: {'lr': 0.0003434139200006216, 'samples': 10316800, 'steps': 20149, 'loss/train': 2.5730905532836914} +02/25/2022 03:40:40 - INFO - codeparrot_training - Step 20150: {'lr': 0.0003433987425131291, 'samples': 10317312, 'steps': 20150, 'loss/train': 0.40831297636032104} +02/25/2022 03:40:45 - INFO - codeparrot_training - Step 20151: {'lr': 0.0003433835646255461, 'samples': 10317824, 'steps': 20151, 'loss/train': 2.559032440185547} +02/25/2022 03:40:49 - INFO - codeparrot_training - Step 20152: {'lr': 0.0003433683863379375, 'samples': 10318336, 'steps': 20152, 'loss/train': 2.9120593070983887} +02/25/2022 03:40:54 - INFO - codeparrot_training - Step 20153: {'lr': 0.00034335320765036843, 'samples': 10318848, 'steps': 20153, 'loss/train': 1.8782655000686646} +02/25/2022 03:40:58 - INFO - codeparrot_training - Step 20154: {'lr': 0.0003433380285629039, 'samples': 10319360, 'steps': 20154, 'loss/train': 2.706453323364258} +02/25/2022 03:41:03 - INFO - codeparrot_training - Step 20155: {'lr': 0.0003433228490756088, 'samples': 10319872, 'steps': 20155, 'loss/train': 1.754799246788025} +02/25/2022 03:41:06 - INFO - codeparrot_training - Step 20156: {'lr': 0.00034330766918854827, 'samples': 10320384, 'steps': 20156, 'loss/train': 3.3620426654815674} +02/25/2022 03:41:14 - INFO - codeparrot_training - Step 20157: {'lr': 0.0003432924889017873, 'samples': 10320896, 'steps': 20157, 'loss/train': 1.7797236442565918} +02/25/2022 03:41:18 - INFO - codeparrot_training - Step 20158: {'lr': 0.000343277308215391, 'samples': 10321408, 'steps': 20158, 'loss/train': 1.5693120956420898} +02/25/2022 03:41:23 - INFO - codeparrot_training - Step 20159: {'lr': 0.0003432621271294243, 'samples': 10321920, 'steps': 20159, 'loss/train': 2.9955568313598633} +02/25/2022 03:41:27 - INFO - codeparrot_training - Step 20160: {'lr': 0.00034324694564395226, 'samples': 10322432, 'steps': 20160, 'loss/train': 2.8591437339782715} +02/25/2022 03:41:32 - INFO - codeparrot_training - Step 20161: {'lr': 0.0003432317637590399, 'samples': 10322944, 'steps': 20161, 'loss/train': 2.9914727210998535} +02/25/2022 03:41:36 - INFO - codeparrot_training - Step 20162: {'lr': 0.0003432165814747523, 'samples': 10323456, 'steps': 20162, 'loss/train': 0.8629205226898193} +02/25/2022 03:41:41 - INFO - codeparrot_training - Step 20163: {'lr': 0.0003432013987911544, 'samples': 10323968, 'steps': 20163, 'loss/train': 2.3496475219726562} +02/25/2022 03:41:45 - INFO - codeparrot_training - Step 20164: {'lr': 0.0003431862157083114, 'samples': 10324480, 'steps': 20164, 'loss/train': 3.192779541015625} +02/25/2022 03:41:50 - INFO - codeparrot_training - Step 20165: {'lr': 0.0003431710322262882, 'samples': 10324992, 'steps': 20165, 'loss/train': 1.9700579643249512} +02/25/2022 03:41:58 - INFO - codeparrot_training - Step 20166: {'lr': 0.0003431558483451498, 'samples': 10325504, 'steps': 20166, 'loss/train': 1.953985571861267} +02/25/2022 03:42:01 - INFO - codeparrot_training - Step 20167: {'lr': 0.00034314066406496146, 'samples': 10326016, 'steps': 20167, 'loss/train': 2.1392829418182373} +02/25/2022 03:42:07 - INFO - codeparrot_training - Step 20168: {'lr': 0.00034312547938578796, 'samples': 10326528, 'steps': 20168, 'loss/train': 1.6107432842254639} +02/25/2022 03:42:10 - INFO - codeparrot_training - Step 20169: {'lr': 0.0003431102943076946, 'samples': 10327040, 'steps': 20169, 'loss/train': 2.2947282791137695} +02/25/2022 03:42:16 - INFO - codeparrot_training - Step 20170: {'lr': 0.00034309510883074625, 'samples': 10327552, 'steps': 20170, 'loss/train': 2.008180856704712} +02/25/2022 03:42:19 - INFO - codeparrot_training - Step 20171: {'lr': 0.0003430799229550079, 'samples': 10328064, 'steps': 20171, 'loss/train': 1.8287910223007202} +02/25/2022 03:42:25 - INFO - codeparrot_training - Step 20172: {'lr': 0.0003430647366805449, 'samples': 10328576, 'steps': 20172, 'loss/train': 0.12379389256238937} +02/25/2022 03:42:28 - INFO - codeparrot_training - Step 20173: {'lr': 0.000343049550007422, 'samples': 10329088, 'steps': 20173, 'loss/train': 2.1541004180908203} +02/25/2022 03:42:34 - INFO - codeparrot_training - Step 20174: {'lr': 0.0003430343629357044, 'samples': 10329600, 'steps': 20174, 'loss/train': 0.17658719420433044} +02/25/2022 03:42:37 - INFO - codeparrot_training - Step 20175: {'lr': 0.0003430191754654572, 'samples': 10330112, 'steps': 20175, 'loss/train': 2.2133290767669678} +02/25/2022 03:42:43 - INFO - codeparrot_training - Step 20176: {'lr': 0.0003430039875967454, 'samples': 10330624, 'steps': 20176, 'loss/train': 2.2580955028533936} +02/25/2022 03:42:46 - INFO - codeparrot_training - Step 20177: {'lr': 0.00034298879932963397, 'samples': 10331136, 'steps': 20177, 'loss/train': 5.419597148895264} +02/25/2022 03:42:53 - INFO - codeparrot_training - Step 20178: {'lr': 0.0003429736106641881, 'samples': 10331648, 'steps': 20178, 'loss/train': 1.6949840784072876} +02/25/2022 03:42:57 - INFO - codeparrot_training - Step 20179: {'lr': 0.0003429584216004728, 'samples': 10332160, 'steps': 20179, 'loss/train': 2.1994357109069824} +02/25/2022 03:43:02 - INFO - codeparrot_training - Step 20180: {'lr': 0.0003429432321385531, 'samples': 10332672, 'steps': 20180, 'loss/train': 1.9700783491134644} +02/25/2022 03:43:06 - INFO - codeparrot_training - Step 20181: {'lr': 0.00034292804227849407, 'samples': 10333184, 'steps': 20181, 'loss/train': 2.1304335594177246} +02/25/2022 03:43:11 - INFO - codeparrot_training - Step 20182: {'lr': 0.0003429128520203608, 'samples': 10333696, 'steps': 20182, 'loss/train': 1.1833218336105347} +02/25/2022 03:43:15 - INFO - codeparrot_training - Step 20183: {'lr': 0.00034289766136421854, 'samples': 10334208, 'steps': 20183, 'loss/train': 2.419492244720459} +02/25/2022 03:43:20 - INFO - codeparrot_training - Step 20184: {'lr': 0.000342882470310132, 'samples': 10334720, 'steps': 20184, 'loss/train': 2.2310307025909424} +02/25/2022 03:43:24 - INFO - codeparrot_training - Step 20185: {'lr': 0.0003428672788581666, 'samples': 10335232, 'steps': 20185, 'loss/train': 2.2726104259490967} +02/25/2022 03:43:29 - INFO - codeparrot_training - Step 20186: {'lr': 0.0003428520870083872, 'samples': 10335744, 'steps': 20186, 'loss/train': 1.7123125791549683} +02/25/2022 03:43:33 - INFO - codeparrot_training - Step 20187: {'lr': 0.000342836894760859, 'samples': 10336256, 'steps': 20187, 'loss/train': 1.6534618139266968} +02/25/2022 03:43:40 - INFO - codeparrot_training - Step 20188: {'lr': 0.00034282170211564697, 'samples': 10336768, 'steps': 20188, 'loss/train': 1.998224139213562} +02/25/2022 03:43:44 - INFO - codeparrot_training - Step 20189: {'lr': 0.0003428065090728163, 'samples': 10337280, 'steps': 20189, 'loss/train': 2.199709415435791} +02/25/2022 03:43:49 - INFO - codeparrot_training - Step 20190: {'lr': 0.0003427913156324319, 'samples': 10337792, 'steps': 20190, 'loss/train': 1.9417345523834229} +02/25/2022 03:43:53 - INFO - codeparrot_training - Step 20191: {'lr': 0.00034277612179455907, 'samples': 10338304, 'steps': 20191, 'loss/train': 1.9399032592773438} +02/25/2022 03:43:58 - INFO - codeparrot_training - Step 20192: {'lr': 0.00034276092755926275, 'samples': 10338816, 'steps': 20192, 'loss/train': 0.8594242930412292} +02/25/2022 03:44:02 - INFO - codeparrot_training - Step 20193: {'lr': 0.0003427457329266081, 'samples': 10339328, 'steps': 20193, 'loss/train': 0.14682406187057495} +02/25/2022 03:44:07 - INFO - codeparrot_training - Step 20194: {'lr': 0.0003427305378966601, 'samples': 10339840, 'steps': 20194, 'loss/train': 1.7434056997299194} +02/25/2022 03:44:11 - INFO - codeparrot_training - Step 20195: {'lr': 0.00034271534246948403, 'samples': 10340352, 'steps': 20195, 'loss/train': 2.130207061767578} +02/25/2022 03:44:16 - INFO - codeparrot_training - Step 20196: {'lr': 0.0003427001466451448, 'samples': 10340864, 'steps': 20196, 'loss/train': 1.4441578388214111} +02/25/2022 03:44:20 - INFO - codeparrot_training - Step 20197: {'lr': 0.00034268495042370767, 'samples': 10341376, 'steps': 20197, 'loss/train': 2.3099284172058105} +02/25/2022 03:44:25 - INFO - codeparrot_training - Step 20198: {'lr': 0.00034266975380523756, 'samples': 10341888, 'steps': 20198, 'loss/train': 2.0511014461517334} +02/25/2022 03:44:29 - INFO - codeparrot_training - Step 20199: {'lr': 0.00034265455678979967, 'samples': 10342400, 'steps': 20199, 'loss/train': 1.2763123512268066} +02/25/2022 03:44:34 - INFO - codeparrot_training - Step 20200: {'lr': 0.0003426393593774591, 'samples': 10342912, 'steps': 20200, 'loss/train': 2.256762742996216} +02/25/2022 03:44:38 - INFO - codeparrot_training - Step 20201: {'lr': 0.0003426241615682809, 'samples': 10343424, 'steps': 20201, 'loss/train': 1.6343567371368408} +02/25/2022 03:44:43 - INFO - codeparrot_training - Step 20202: {'lr': 0.0003426089633623302, 'samples': 10343936, 'steps': 20202, 'loss/train': 1.9044687747955322} +02/25/2022 03:44:47 - INFO - codeparrot_training - Step 20203: {'lr': 0.0003425937647596721, 'samples': 10344448, 'steps': 20203, 'loss/train': 2.21189546585083} +02/25/2022 03:44:54 - INFO - codeparrot_training - Step 20204: {'lr': 0.0003425785657603718, 'samples': 10344960, 'steps': 20204, 'loss/train': 2.301870107650757} +02/25/2022 03:44:57 - INFO - codeparrot_training - Step 20205: {'lr': 0.0003425633663644942, 'samples': 10345472, 'steps': 20205, 'loss/train': 1.391209602355957} +02/25/2022 03:45:03 - INFO - codeparrot_training - Step 20206: {'lr': 0.00034254816657210455, 'samples': 10345984, 'steps': 20206, 'loss/train': 3.39180850982666} +02/25/2022 03:45:06 - INFO - codeparrot_training - Step 20207: {'lr': 0.00034253296638326805, 'samples': 10346496, 'steps': 20207, 'loss/train': 2.0950658321380615} +02/25/2022 03:45:12 - INFO - codeparrot_training - Step 20208: {'lr': 0.0003425177657980496, 'samples': 10347008, 'steps': 20208, 'loss/train': 0.19738437235355377} +02/25/2022 03:45:16 - INFO - codeparrot_training - Step 20209: {'lr': 0.0003425025648165145, 'samples': 10347520, 'steps': 20209, 'loss/train': 2.274714708328247} +02/25/2022 03:45:21 - INFO - codeparrot_training - Step 20210: {'lr': 0.00034248736343872767, 'samples': 10348032, 'steps': 20210, 'loss/train': 1.759999394416809} +02/25/2022 03:45:25 - INFO - codeparrot_training - Step 20211: {'lr': 0.0003424721616647544, 'samples': 10348544, 'steps': 20211, 'loss/train': 2.694755792617798} +02/25/2022 03:45:30 - INFO - codeparrot_training - Step 20212: {'lr': 0.00034245695949465977, 'samples': 10349056, 'steps': 20212, 'loss/train': 2.084501028060913} +02/25/2022 03:45:34 - INFO - codeparrot_training - Step 20213: {'lr': 0.00034244175692850894, 'samples': 10349568, 'steps': 20213, 'loss/train': 1.7472513914108276} +02/25/2022 03:45:41 - INFO - codeparrot_training - Step 20214: {'lr': 0.00034242655396636687, 'samples': 10350080, 'steps': 20214, 'loss/train': 1.5567471981048584} +02/25/2022 03:45:44 - INFO - codeparrot_training - Step 20215: {'lr': 0.0003424113506082989, 'samples': 10350592, 'steps': 20215, 'loss/train': 1.5938544273376465} +02/25/2022 03:45:50 - INFO - codeparrot_training - Step 20216: {'lr': 0.00034239614685436994, 'samples': 10351104, 'steps': 20216, 'loss/train': 1.9916096925735474} +02/25/2022 03:45:53 - INFO - codeparrot_training - Step 20217: {'lr': 0.00034238094270464523, 'samples': 10351616, 'steps': 20217, 'loss/train': 1.6987683773040771} +02/25/2022 03:45:59 - INFO - codeparrot_training - Step 20218: {'lr': 0.00034236573815918993, 'samples': 10352128, 'steps': 20218, 'loss/train': 1.4841560125350952} +02/25/2022 03:46:02 - INFO - codeparrot_training - Step 20219: {'lr': 0.00034235053321806915, 'samples': 10352640, 'steps': 20219, 'loss/train': 1.3522506952285767} +02/25/2022 03:46:08 - INFO - codeparrot_training - Step 20220: {'lr': 0.00034233532788134803, 'samples': 10353152, 'steps': 20220, 'loss/train': 4.422814846038818} +02/25/2022 03:46:11 - INFO - codeparrot_training - Step 20221: {'lr': 0.0003423201221490916, 'samples': 10353664, 'steps': 20221, 'loss/train': 1.8960996866226196} +02/25/2022 03:46:17 - INFO - codeparrot_training - Step 20222: {'lr': 0.00034230491602136513, 'samples': 10354176, 'steps': 20222, 'loss/train': 2.271332263946533} +02/25/2022 03:46:20 - INFO - codeparrot_training - Step 20223: {'lr': 0.0003422897094982337, 'samples': 10354688, 'steps': 20223, 'loss/train': 2.104485511779785} +02/25/2022 03:46:28 - INFO - codeparrot_training - Step 20224: {'lr': 0.0003422745025797626, 'samples': 10355200, 'steps': 20224, 'loss/train': 2.424830436706543} +02/25/2022 03:46:31 - INFO - codeparrot_training - Step 20225: {'lr': 0.00034225929526601664, 'samples': 10355712, 'steps': 20225, 'loss/train': 1.6067159175872803} +02/25/2022 03:46:37 - INFO - codeparrot_training - Step 20226: {'lr': 0.0003422440875570612, 'samples': 10356224, 'steps': 20226, 'loss/train': 1.3527050018310547} +02/25/2022 03:46:40 - INFO - codeparrot_training - Step 20227: {'lr': 0.00034222887945296144, 'samples': 10356736, 'steps': 20227, 'loss/train': 2.335644245147705} +02/25/2022 03:46:45 - INFO - codeparrot_training - Step 20228: {'lr': 0.0003422136709537824, 'samples': 10357248, 'steps': 20228, 'loss/train': 1.4278925657272339} +02/25/2022 03:46:49 - INFO - codeparrot_training - Step 20229: {'lr': 0.00034219846205958926, 'samples': 10357760, 'steps': 20229, 'loss/train': 1.9828650951385498} +02/25/2022 03:46:54 - INFO - codeparrot_training - Step 20230: {'lr': 0.0003421832527704471, 'samples': 10358272, 'steps': 20230, 'loss/train': 0.9953842163085938} +02/25/2022 03:46:58 - INFO - codeparrot_training - Step 20231: {'lr': 0.0003421680430864214, 'samples': 10358784, 'steps': 20231, 'loss/train': 1.9220088720321655} +02/25/2022 03:47:03 - INFO - codeparrot_training - Step 20232: {'lr': 0.0003421528330075769, 'samples': 10359296, 'steps': 20232, 'loss/train': 2.1465003490448} +02/25/2022 03:47:07 - INFO - codeparrot_training - Step 20233: {'lr': 0.00034213762253397896, 'samples': 10359808, 'steps': 20233, 'loss/train': 2.023115873336792} +02/25/2022 03:47:14 - INFO - codeparrot_training - Step 20234: {'lr': 0.0003421224116656927, 'samples': 10360320, 'steps': 20234, 'loss/train': 1.7697272300720215} +02/25/2022 03:47:18 - INFO - codeparrot_training - Step 20235: {'lr': 0.0003421072004027833, 'samples': 10360832, 'steps': 20235, 'loss/train': 0.3598443865776062} +02/25/2022 03:47:23 - INFO - codeparrot_training - Step 20236: {'lr': 0.00034209198874531586, 'samples': 10361344, 'steps': 20236, 'loss/train': 1.957742691040039} +02/25/2022 03:47:27 - INFO - codeparrot_training - Step 20237: {'lr': 0.00034207677669335565, 'samples': 10361856, 'steps': 20237, 'loss/train': 2.067368268966675} +02/25/2022 03:47:32 - INFO - codeparrot_training - Step 20238: {'lr': 0.0003420615642469678, 'samples': 10362368, 'steps': 20238, 'loss/train': 2.1140098571777344} +02/25/2022 03:47:36 - INFO - codeparrot_training - Step 20239: {'lr': 0.00034204635140621726, 'samples': 10362880, 'steps': 20239, 'loss/train': 1.9461902379989624} +02/25/2022 03:47:41 - INFO - codeparrot_training - Step 20240: {'lr': 0.00034203113817116957, 'samples': 10363392, 'steps': 20240, 'loss/train': 1.085320234298706} +02/25/2022 03:47:47 - INFO - codeparrot_training - Step 20241: {'lr': 0.0003420159245418896, 'samples': 10363904, 'steps': 20241, 'loss/train': 0.9491880536079407} +02/25/2022 03:47:50 - INFO - codeparrot_training - Step 20242: {'lr': 0.0003420007105184426, 'samples': 10364416, 'steps': 20242, 'loss/train': 1.3015691041946411} +02/25/2022 03:47:57 - INFO - codeparrot_training - Step 20243: {'lr': 0.0003419854961008938, 'samples': 10364928, 'steps': 20243, 'loss/train': 1.5790596008300781} +02/25/2022 03:48:01 - INFO - codeparrot_training - Step 20244: {'lr': 0.0003419702812893084, 'samples': 10365440, 'steps': 20244, 'loss/train': 1.3433915376663208} +02/25/2022 03:48:04 - INFO - codeparrot_training - Step 20245: {'lr': 0.0003419550660837515, 'samples': 10365952, 'steps': 20245, 'loss/train': 2.1597776412963867} +02/25/2022 03:48:10 - INFO - codeparrot_training - Step 20246: {'lr': 0.0003419398504842883, 'samples': 10366464, 'steps': 20246, 'loss/train': 3.4824254512786865} +02/25/2022 03:48:13 - INFO - codeparrot_training - Step 20247: {'lr': 0.00034192463449098386, 'samples': 10366976, 'steps': 20247, 'loss/train': 3.133544445037842} +02/25/2022 03:48:19 - INFO - codeparrot_training - Step 20248: {'lr': 0.00034190941810390365, 'samples': 10367488, 'steps': 20248, 'loss/train': 1.955594539642334} +02/25/2022 03:48:22 - INFO - codeparrot_training - Step 20249: {'lr': 0.00034189420132311256, 'samples': 10368000, 'steps': 20249, 'loss/train': 2.202923536300659} +02/25/2022 03:48:28 - INFO - codeparrot_training - Step 20250: {'lr': 0.0003418789841486759, 'samples': 10368512, 'steps': 20250, 'loss/train': 2.6444976329803467} +02/25/2022 03:48:31 - INFO - codeparrot_training - Step 20251: {'lr': 0.0003418637665806589, 'samples': 10369024, 'steps': 20251, 'loss/train': 2.010192632675171} +02/25/2022 03:48:38 - INFO - codeparrot_training - Step 20252: {'lr': 0.0003418485486191267, 'samples': 10369536, 'steps': 20252, 'loss/train': 2.4391818046569824} +02/25/2022 03:48:41 - INFO - codeparrot_training - Step 20253: {'lr': 0.0003418333302641444, 'samples': 10370048, 'steps': 20253, 'loss/train': 2.3971216678619385} +02/25/2022 03:48:47 - INFO - codeparrot_training - Step 20254: {'lr': 0.0003418181115157774, 'samples': 10370560, 'steps': 20254, 'loss/train': 0.6539112329483032} +02/25/2022 03:48:50 - INFO - codeparrot_training - Step 20255: {'lr': 0.00034180289237409063, 'samples': 10371072, 'steps': 20255, 'loss/train': 1.6873406171798706} +02/25/2022 03:48:56 - INFO - codeparrot_training - Step 20256: {'lr': 0.00034178767283914944, 'samples': 10371584, 'steps': 20256, 'loss/train': 1.764180064201355} +02/25/2022 03:48:59 - INFO - codeparrot_training - Step 20257: {'lr': 0.000341772452911019, 'samples': 10372096, 'steps': 20257, 'loss/train': 1.1096210479736328} +02/25/2022 03:49:05 - INFO - codeparrot_training - Step 20258: {'lr': 0.0003417572325897646, 'samples': 10372608, 'steps': 20258, 'loss/train': 1.8763506412506104} +02/25/2022 03:49:08 - INFO - codeparrot_training - Step 20259: {'lr': 0.00034174201187545133, 'samples': 10373120, 'steps': 20259, 'loss/train': 1.6311677694320679} +02/25/2022 03:49:16 - INFO - codeparrot_training - Step 20260: {'lr': 0.00034172679076814437, 'samples': 10373632, 'steps': 20260, 'loss/train': 2.2753138542175293} +02/25/2022 03:49:19 - INFO - codeparrot_training - Step 20261: {'lr': 0.00034171156926790904, 'samples': 10374144, 'steps': 20261, 'loss/train': 2.1644623279571533} +02/25/2022 03:49:25 - INFO - codeparrot_training - Step 20262: {'lr': 0.00034169634737481034, 'samples': 10374656, 'steps': 20262, 'loss/train': 2.2250335216522217} +02/25/2022 03:49:28 - INFO - codeparrot_training - Step 20263: {'lr': 0.0003416811250889137, 'samples': 10375168, 'steps': 20263, 'loss/train': 1.5773553848266602} +02/25/2022 03:49:34 - INFO - codeparrot_training - Step 20264: {'lr': 0.00034166590241028425, 'samples': 10375680, 'steps': 20264, 'loss/train': 1.0202758312225342} +02/25/2022 03:49:37 - INFO - codeparrot_training - Step 20265: {'lr': 0.0003416506793389871, 'samples': 10376192, 'steps': 20265, 'loss/train': 2.670820474624634} +02/25/2022 03:49:43 - INFO - codeparrot_training - Step 20266: {'lr': 0.0003416354558750876, 'samples': 10376704, 'steps': 20266, 'loss/train': 2.014662981033325} +02/25/2022 03:49:46 - INFO - codeparrot_training - Step 20267: {'lr': 0.0003416202320186508, 'samples': 10377216, 'steps': 20267, 'loss/train': 2.6723196506500244} +02/25/2022 03:49:52 - INFO - codeparrot_training - Step 20268: {'lr': 0.0003416050077697422, 'samples': 10377728, 'steps': 20268, 'loss/train': 1.0188957452774048} +02/25/2022 03:49:56 - INFO - codeparrot_training - Step 20269: {'lr': 0.0003415897831284267, 'samples': 10378240, 'steps': 20269, 'loss/train': 2.786364793777466} +02/25/2022 03:50:03 - INFO - codeparrot_training - Step 20270: {'lr': 0.0003415745580947697, 'samples': 10378752, 'steps': 20270, 'loss/train': 2.278608798980713} +02/25/2022 03:50:06 - INFO - codeparrot_training - Step 20271: {'lr': 0.0003415593326688364, 'samples': 10379264, 'steps': 20271, 'loss/train': 1.9659326076507568} +02/25/2022 03:50:12 - INFO - codeparrot_training - Step 20272: {'lr': 0.0003415441068506919, 'samples': 10379776, 'steps': 20272, 'loss/train': 0.8071721196174622} +02/25/2022 03:50:15 - INFO - codeparrot_training - Step 20273: {'lr': 0.0003415288806404016, 'samples': 10380288, 'steps': 20273, 'loss/train': 4.123531341552734} +02/25/2022 03:50:21 - INFO - codeparrot_training - Step 20274: {'lr': 0.00034151365403803065, 'samples': 10380800, 'steps': 20274, 'loss/train': 1.7842228412628174} +02/25/2022 03:50:24 - INFO - codeparrot_training - Step 20275: {'lr': 0.0003414984270436442, 'samples': 10381312, 'steps': 20275, 'loss/train': 2.567279577255249} +02/25/2022 03:50:30 - INFO - codeparrot_training - Step 20276: {'lr': 0.00034148319965730757, 'samples': 10381824, 'steps': 20276, 'loss/train': 2.0263853073120117} +02/25/2022 03:50:33 - INFO - codeparrot_training - Step 20277: {'lr': 0.000341467971879086, 'samples': 10382336, 'steps': 20277, 'loss/train': 1.390875220298767} +02/25/2022 03:50:39 - INFO - codeparrot_training - Step 20278: {'lr': 0.0003414527437090446, 'samples': 10382848, 'steps': 20278, 'loss/train': 1.2159847021102905} +02/25/2022 03:50:42 - INFO - codeparrot_training - Step 20279: {'lr': 0.00034143751514724874, 'samples': 10383360, 'steps': 20279, 'loss/train': 2.333939552307129} +02/25/2022 03:50:49 - INFO - codeparrot_training - Step 20280: {'lr': 0.0003414222861937636, 'samples': 10383872, 'steps': 20280, 'loss/train': 1.8507686853408813} +02/25/2022 03:50:53 - INFO - codeparrot_training - Step 20281: {'lr': 0.00034140705684865437, 'samples': 10384384, 'steps': 20281, 'loss/train': 1.3636988401412964} +02/25/2022 03:50:58 - INFO - codeparrot_training - Step 20282: {'lr': 0.0003413918271119864, 'samples': 10384896, 'steps': 20282, 'loss/train': 2.3370273113250732} +02/25/2022 03:51:02 - INFO - codeparrot_training - Step 20283: {'lr': 0.00034137659698382485, 'samples': 10385408, 'steps': 20283, 'loss/train': 2.8440072536468506} +02/25/2022 03:51:07 - INFO - codeparrot_training - Step 20284: {'lr': 0.0003413613664642349, 'samples': 10385920, 'steps': 20284, 'loss/train': 1.3650819063186646} +02/25/2022 03:51:11 - INFO - codeparrot_training - Step 20285: {'lr': 0.00034134613555328195, 'samples': 10386432, 'steps': 20285, 'loss/train': 1.5565146207809448} +02/25/2022 03:51:16 - INFO - codeparrot_training - Step 20286: {'lr': 0.00034133090425103114, 'samples': 10386944, 'steps': 20286, 'loss/train': 1.8609282970428467} +02/25/2022 03:51:20 - INFO - codeparrot_training - Step 20287: {'lr': 0.00034131567255754776, 'samples': 10387456, 'steps': 20287, 'loss/train': 1.7913402318954468} +02/25/2022 03:51:25 - INFO - codeparrot_training - Step 20288: {'lr': 0.000341300440472897, 'samples': 10387968, 'steps': 20288, 'loss/train': 2.0067458152770996} +02/25/2022 03:51:29 - INFO - codeparrot_training - Step 20289: {'lr': 0.0003412852079971441, 'samples': 10388480, 'steps': 20289, 'loss/train': 2.3061718940734863} +02/25/2022 03:51:34 - INFO - codeparrot_training - Step 20290: {'lr': 0.0003412699751303544, 'samples': 10388992, 'steps': 20290, 'loss/train': 2.2157087326049805} +02/25/2022 03:51:38 - INFO - codeparrot_training - Step 20291: {'lr': 0.00034125474187259307, 'samples': 10389504, 'steps': 20291, 'loss/train': 2.6220147609710693} +02/25/2022 03:51:44 - INFO - codeparrot_training - Step 20292: {'lr': 0.0003412395082239255, 'samples': 10390016, 'steps': 20292, 'loss/train': 2.666356325149536} +02/25/2022 03:51:47 - INFO - codeparrot_training - Step 20293: {'lr': 0.0003412242741844168, 'samples': 10390528, 'steps': 20293, 'loss/train': 2.0440683364868164} +02/25/2022 03:51:53 - INFO - codeparrot_training - Step 20294: {'lr': 0.0003412090397541323, 'samples': 10391040, 'steps': 20294, 'loss/train': 1.1251710653305054} +02/25/2022 03:51:56 - INFO - codeparrot_training - Step 20295: {'lr': 0.0003411938049331372, 'samples': 10391552, 'steps': 20295, 'loss/train': 1.8887560367584229} +02/25/2022 03:52:04 - INFO - codeparrot_training - Step 20296: {'lr': 0.0003411785697214968, 'samples': 10392064, 'steps': 20296, 'loss/train': 3.546196222305298} +02/25/2022 03:52:07 - INFO - codeparrot_training - Step 20297: {'lr': 0.00034116333411927637, 'samples': 10392576, 'steps': 20297, 'loss/train': 1.8653291463851929} +02/25/2022 03:52:13 - INFO - codeparrot_training - Step 20298: {'lr': 0.0003411480981265411, 'samples': 10393088, 'steps': 20298, 'loss/train': 0.7127207517623901} +02/25/2022 03:52:16 - INFO - codeparrot_training - Step 20299: {'lr': 0.0003411328617433564, 'samples': 10393600, 'steps': 20299, 'loss/train': 2.2414886951446533} +02/25/2022 03:52:22 - INFO - codeparrot_training - Step 20300: {'lr': 0.0003411176249697875, 'samples': 10394112, 'steps': 20300, 'loss/train': 2.163728952407837} +02/25/2022 03:52:25 - INFO - codeparrot_training - Step 20301: {'lr': 0.0003411023878058995, 'samples': 10394624, 'steps': 20301, 'loss/train': 2.2823126316070557} +02/25/2022 03:52:31 - INFO - codeparrot_training - Step 20302: {'lr': 0.0003410871502517579, 'samples': 10395136, 'steps': 20302, 'loss/train': 3.952664613723755} +02/25/2022 03:52:34 - INFO - codeparrot_training - Step 20303: {'lr': 0.00034107191230742776, 'samples': 10395648, 'steps': 20303, 'loss/train': 1.834477186203003} +02/25/2022 03:52:40 - INFO - codeparrot_training - Step 20304: {'lr': 0.00034105667397297455, 'samples': 10396160, 'steps': 20304, 'loss/train': 1.780929684638977} +02/25/2022 03:52:43 - INFO - codeparrot_training - Step 20305: {'lr': 0.0003410414352484635, 'samples': 10396672, 'steps': 20305, 'loss/train': 1.5590553283691406} +02/25/2022 03:52:51 - INFO - codeparrot_training - Step 20306: {'lr': 0.00034102619613395974, 'samples': 10397184, 'steps': 20306, 'loss/train': 2.6603586673736572} +02/25/2022 03:52:54 - INFO - codeparrot_training - Step 20307: {'lr': 0.00034101095662952873, 'samples': 10397696, 'steps': 20307, 'loss/train': 1.601564884185791} +02/25/2022 03:53:00 - INFO - codeparrot_training - Step 20308: {'lr': 0.00034099571673523564, 'samples': 10398208, 'steps': 20308, 'loss/train': 2.099993944168091} +02/25/2022 03:53:03 - INFO - codeparrot_training - Step 20309: {'lr': 0.0003409804764511459, 'samples': 10398720, 'steps': 20309, 'loss/train': 0.9904488325119019} +02/25/2022 03:53:09 - INFO - codeparrot_training - Step 20310: {'lr': 0.00034096523577732457, 'samples': 10399232, 'steps': 20310, 'loss/train': 1.1795721054077148} +02/25/2022 03:53:12 - INFO - codeparrot_training - Step 20311: {'lr': 0.00034094999471383713, 'samples': 10399744, 'steps': 20311, 'loss/train': 2.3907694816589355} +02/25/2022 03:53:18 - INFO - codeparrot_training - Step 20312: {'lr': 0.00034093475326074874, 'samples': 10400256, 'steps': 20312, 'loss/train': 2.467639207839966} +02/25/2022 03:53:21 - INFO - codeparrot_training - Step 20313: {'lr': 0.00034091951141812483, 'samples': 10400768, 'steps': 20313, 'loss/train': 1.0897190570831299} +02/25/2022 03:53:27 - INFO - codeparrot_training - Step 20314: {'lr': 0.00034090426918603045, 'samples': 10401280, 'steps': 20314, 'loss/train': 0.9841932058334351} +02/25/2022 03:53:30 - INFO - codeparrot_training - Step 20315: {'lr': 0.00034088902656453116, 'samples': 10401792, 'steps': 20315, 'loss/train': 2.4677000045776367} +02/25/2022 03:53:37 - INFO - codeparrot_training - Step 20316: {'lr': 0.00034087378355369215, 'samples': 10402304, 'steps': 20316, 'loss/train': 2.2717084884643555} +02/25/2022 03:53:41 - INFO - codeparrot_training - Step 20317: {'lr': 0.00034085854015357864, 'samples': 10402816, 'steps': 20317, 'loss/train': 0.8332167863845825} +02/25/2022 03:53:46 - INFO - codeparrot_training - Step 20318: {'lr': 0.000340843296364256, 'samples': 10403328, 'steps': 20318, 'loss/train': 1.9185445308685303} +02/25/2022 03:53:50 - INFO - codeparrot_training - Step 20319: {'lr': 0.00034082805218578954, 'samples': 10403840, 'steps': 20319, 'loss/train': 2.3251028060913086} +02/25/2022 03:53:55 - INFO - codeparrot_training - Step 20320: {'lr': 0.00034081280761824465, 'samples': 10404352, 'steps': 20320, 'loss/train': 2.1127960681915283} +02/25/2022 03:53:59 - INFO - codeparrot_training - Step 20321: {'lr': 0.0003407975626616864, 'samples': 10404864, 'steps': 20321, 'loss/train': 2.142221689224243} +02/25/2022 03:54:04 - INFO - codeparrot_training - Step 20322: {'lr': 0.00034078231731618025, 'samples': 10405376, 'steps': 20322, 'loss/train': 2.7477948665618896} +02/25/2022 03:54:08 - INFO - codeparrot_training - Step 20323: {'lr': 0.00034076707158179145, 'samples': 10405888, 'steps': 20323, 'loss/train': 1.6565980911254883} +02/25/2022 03:54:13 - INFO - codeparrot_training - Step 20324: {'lr': 0.0003407518254585854, 'samples': 10406400, 'steps': 20324, 'loss/train': 2.412675380706787} +02/25/2022 03:54:17 - INFO - codeparrot_training - Step 20325: {'lr': 0.0003407365789466273, 'samples': 10406912, 'steps': 20325, 'loss/train': 2.3295187950134277} +02/25/2022 03:54:23 - INFO - codeparrot_training - Step 20326: {'lr': 0.00034072133204598255, 'samples': 10407424, 'steps': 20326, 'loss/train': 1.271855115890503} +02/25/2022 03:54:26 - INFO - codeparrot_training - Step 20327: {'lr': 0.00034070608475671643, 'samples': 10407936, 'steps': 20327, 'loss/train': 1.4647499322891235} +02/25/2022 03:54:32 - INFO - codeparrot_training - Step 20328: {'lr': 0.0003406908370788942, 'samples': 10408448, 'steps': 20328, 'loss/train': 0.8187311887741089} +02/25/2022 03:54:35 - INFO - codeparrot_training - Step 20329: {'lr': 0.0003406755890125813, 'samples': 10408960, 'steps': 20329, 'loss/train': 2.774731397628784} +02/25/2022 03:54:41 - INFO - codeparrot_training - Step 20330: {'lr': 0.00034066034055784284, 'samples': 10409472, 'steps': 20330, 'loss/train': 1.4913020133972168} +02/25/2022 03:54:44 - INFO - codeparrot_training - Step 20331: {'lr': 0.00034064509171474435, 'samples': 10409984, 'steps': 20331, 'loss/train': 3.659651756286621} +02/25/2022 03:54:50 - INFO - codeparrot_training - Step 20332: {'lr': 0.00034062984248335105, 'samples': 10410496, 'steps': 20332, 'loss/train': 1.6191192865371704} +02/25/2022 03:54:53 - INFO - codeparrot_training - Step 20333: {'lr': 0.0003406145928637283, 'samples': 10411008, 'steps': 20333, 'loss/train': 1.7046446800231934} +02/25/2022 03:54:59 - INFO - codeparrot_training - Step 20334: {'lr': 0.0003405993428559414, 'samples': 10411520, 'steps': 20334, 'loss/train': 0.5736855268478394} +02/25/2022 03:55:02 - INFO - codeparrot_training - Step 20335: {'lr': 0.0003405840924600557, 'samples': 10412032, 'steps': 20335, 'loss/train': 3.1948325634002686} +02/25/2022 03:55:08 - INFO - codeparrot_training - Step 20336: {'lr': 0.0003405688416761364, 'samples': 10412544, 'steps': 20336, 'loss/train': 1.9165589809417725} +02/25/2022 03:55:11 - INFO - codeparrot_training - Step 20337: {'lr': 0.0003405535905042491, 'samples': 10413056, 'steps': 20337, 'loss/train': 2.527679204940796} +02/25/2022 03:55:17 - INFO - codeparrot_training - Step 20338: {'lr': 0.0003405383389444589, 'samples': 10413568, 'steps': 20338, 'loss/train': 1.2761796712875366} +02/25/2022 03:55:20 - INFO - codeparrot_training - Step 20339: {'lr': 0.00034052308699683114, 'samples': 10414080, 'steps': 20339, 'loss/train': 1.6958613395690918} +02/25/2022 03:55:27 - INFO - codeparrot_training - Step 20340: {'lr': 0.0003405078346614313, 'samples': 10414592, 'steps': 20340, 'loss/train': 2.393470287322998} +02/25/2022 03:55:31 - INFO - codeparrot_training - Step 20341: {'lr': 0.00034049258193832464, 'samples': 10415104, 'steps': 20341, 'loss/train': 2.105696678161621} +02/25/2022 03:55:36 - INFO - codeparrot_training - Step 20342: {'lr': 0.00034047732882757655, 'samples': 10415616, 'steps': 20342, 'loss/train': 2.420139789581299} +02/25/2022 03:55:40 - INFO - codeparrot_training - Step 20343: {'lr': 0.00034046207532925215, 'samples': 10416128, 'steps': 20343, 'loss/train': 1.5979433059692383} +02/25/2022 03:55:45 - INFO - codeparrot_training - Step 20344: {'lr': 0.0003404468214434171, 'samples': 10416640, 'steps': 20344, 'loss/train': 1.823500633239746} +02/25/2022 03:55:49 - INFO - codeparrot_training - Step 20345: {'lr': 0.00034043156717013647, 'samples': 10417152, 'steps': 20345, 'loss/train': 2.3961737155914307} +02/25/2022 03:55:55 - INFO - codeparrot_training - Step 20346: {'lr': 0.0003404163125094758, 'samples': 10417664, 'steps': 20346, 'loss/train': 1.849566102027893} +02/25/2022 03:56:00 - INFO - codeparrot_training - Step 20347: {'lr': 0.0003404010574615003, 'samples': 10418176, 'steps': 20347, 'loss/train': 2.186707019805908} +02/25/2022 03:56:03 - INFO - codeparrot_training - Step 20348: {'lr': 0.00034038580202627543, 'samples': 10418688, 'steps': 20348, 'loss/train': 3.459792137145996} +02/25/2022 03:56:09 - INFO - codeparrot_training - Step 20349: {'lr': 0.0003403705462038665, 'samples': 10419200, 'steps': 20349, 'loss/train': 0.9187582731246948} +02/25/2022 03:56:13 - INFO - codeparrot_training - Step 20350: {'lr': 0.0003403552899943388, 'samples': 10419712, 'steps': 20350, 'loss/train': 2.584151029586792} +02/25/2022 03:56:19 - INFO - codeparrot_training - Step 20351: {'lr': 0.0003403400333977577, 'samples': 10420224, 'steps': 20351, 'loss/train': 1.807735562324524} +02/25/2022 03:56:22 - INFO - codeparrot_training - Step 20352: {'lr': 0.00034032477641418856, 'samples': 10420736, 'steps': 20352, 'loss/train': 1.9062429666519165} +02/25/2022 03:56:28 - INFO - codeparrot_training - Step 20353: {'lr': 0.00034030951904369687, 'samples': 10421248, 'steps': 20353, 'loss/train': 1.157932996749878} +02/25/2022 03:56:31 - INFO - codeparrot_training - Step 20354: {'lr': 0.0003402942612863478, 'samples': 10421760, 'steps': 20354, 'loss/train': 3.330751657485962} +02/25/2022 03:56:37 - INFO - codeparrot_training - Step 20355: {'lr': 0.00034027900314220684, 'samples': 10422272, 'steps': 20355, 'loss/train': 0.6594069004058838} +02/25/2022 03:56:40 - INFO - codeparrot_training - Step 20356: {'lr': 0.00034026374461133927, 'samples': 10422784, 'steps': 20356, 'loss/train': 0.8760718107223511} +02/25/2022 03:56:46 - INFO - codeparrot_training - Step 20357: {'lr': 0.0003402484856938105, 'samples': 10423296, 'steps': 20357, 'loss/train': 2.344481945037842} +02/25/2022 03:56:50 - INFO - codeparrot_training - Step 20358: {'lr': 0.00034023322638968587, 'samples': 10423808, 'steps': 20358, 'loss/train': 1.5700024366378784} +02/25/2022 03:56:55 - INFO - codeparrot_training - Step 20359: {'lr': 0.0003402179666990307, 'samples': 10424320, 'steps': 20359, 'loss/train': 1.1055718660354614} +02/25/2022 03:56:59 - INFO - codeparrot_training - Step 20360: {'lr': 0.00034020270662191046, 'samples': 10424832, 'steps': 20360, 'loss/train': 2.3525965213775635} +02/25/2022 03:57:04 - INFO - codeparrot_training - Step 20361: {'lr': 0.0003401874461583905, 'samples': 10425344, 'steps': 20361, 'loss/train': 1.6585325002670288} +02/25/2022 03:57:08 - INFO - codeparrot_training - Step 20362: {'lr': 0.00034017218530853606, 'samples': 10425856, 'steps': 20362, 'loss/train': 1.6461007595062256} +02/25/2022 03:57:14 - INFO - codeparrot_training - Step 20363: {'lr': 0.0003401569240724126, 'samples': 10426368, 'steps': 20363, 'loss/train': 2.7128148078918457} +02/25/2022 03:57:17 - INFO - codeparrot_training - Step 20364: {'lr': 0.0003401416624500856, 'samples': 10426880, 'steps': 20364, 'loss/train': 1.3656822443008423} +02/25/2022 03:57:23 - INFO - codeparrot_training - Step 20365: {'lr': 0.0003401264004416203, 'samples': 10427392, 'steps': 20365, 'loss/train': 2.0054006576538086} +02/25/2022 03:57:26 - INFO - codeparrot_training - Step 20366: {'lr': 0.00034011113804708216, 'samples': 10427904, 'steps': 20366, 'loss/train': 1.8891280889511108} +02/25/2022 03:57:32 - INFO - codeparrot_training - Step 20367: {'lr': 0.0003400958752665365, 'samples': 10428416, 'steps': 20367, 'loss/train': 1.4771785736083984} +02/25/2022 03:57:36 - INFO - codeparrot_training - Step 20368: {'lr': 0.0003400806121000487, 'samples': 10428928, 'steps': 20368, 'loss/train': 2.1553425788879395} +02/25/2022 03:57:41 - INFO - codeparrot_training - Step 20369: {'lr': 0.00034006534854768417, 'samples': 10429440, 'steps': 20369, 'loss/train': 2.0339150428771973} +02/25/2022 03:57:45 - INFO - codeparrot_training - Step 20370: {'lr': 0.00034005008460950825, 'samples': 10429952, 'steps': 20370, 'loss/train': 1.69804048538208} +02/25/2022 03:57:52 - INFO - codeparrot_training - Step 20371: {'lr': 0.00034003482028558644, 'samples': 10430464, 'steps': 20371, 'loss/train': 1.744829773902893} +02/25/2022 03:57:55 - INFO - codeparrot_training - Step 20372: {'lr': 0.000340019555575984, 'samples': 10430976, 'steps': 20372, 'loss/train': 1.6603659391403198} +02/25/2022 03:58:00 - INFO - codeparrot_training - Step 20373: {'lr': 0.00034000429048076637, 'samples': 10431488, 'steps': 20373, 'loss/train': 1.9485360383987427} +02/25/2022 03:58:04 - INFO - codeparrot_training - Step 20374: {'lr': 0.00033998902499999894, 'samples': 10432000, 'steps': 20374, 'loss/train': 0.7774336934089661} +02/25/2022 03:58:09 - INFO - codeparrot_training - Step 20375: {'lr': 0.00033997375913374705, 'samples': 10432512, 'steps': 20375, 'loss/train': 0.4261077344417572} +02/25/2022 03:58:13 - INFO - codeparrot_training - Step 20376: {'lr': 0.0003399584928820762, 'samples': 10433024, 'steps': 20376, 'loss/train': 2.7587804794311523} +02/25/2022 03:58:18 - INFO - codeparrot_training - Step 20377: {'lr': 0.00033994322624505177, 'samples': 10433536, 'steps': 20377, 'loss/train': 1.6619898080825806} +02/25/2022 03:58:22 - INFO - codeparrot_training - Step 20378: {'lr': 0.000339927959222739, 'samples': 10434048, 'steps': 20378, 'loss/train': 1.8428360223770142} +02/25/2022 03:58:27 - INFO - codeparrot_training - Step 20379: {'lr': 0.0003399126918152036, 'samples': 10434560, 'steps': 20379, 'loss/train': 1.369884967803955} +02/25/2022 03:58:31 - INFO - codeparrot_training - Step 20380: {'lr': 0.00033989742402251055, 'samples': 10435072, 'steps': 20380, 'loss/train': 0.5787926912307739} +02/25/2022 03:58:36 - INFO - codeparrot_training - Step 20381: {'lr': 0.00033988215584472564, 'samples': 10435584, 'steps': 20381, 'loss/train': 1.7602312564849854} +02/25/2022 03:58:40 - INFO - codeparrot_training - Step 20382: {'lr': 0.00033986688728191404, 'samples': 10436096, 'steps': 20382, 'loss/train': 2.696516275405884} +02/25/2022 03:58:45 - INFO - codeparrot_training - Step 20383: {'lr': 0.0003398516183341413, 'samples': 10436608, 'steps': 20383, 'loss/train': 1.0951000452041626} +02/25/2022 03:58:49 - INFO - codeparrot_training - Step 20384: {'lr': 0.0003398363490014727, 'samples': 10437120, 'steps': 20384, 'loss/train': 0.23739130795001984} +02/25/2022 03:58:54 - INFO - codeparrot_training - Step 20385: {'lr': 0.0003398210792839738, 'samples': 10437632, 'steps': 20385, 'loss/train': 2.0905277729034424} +02/25/2022 03:58:58 - INFO - codeparrot_training - Step 20386: {'lr': 0.0003398058091817098, 'samples': 10438144, 'steps': 20386, 'loss/train': 1.4892735481262207} +02/25/2022 03:59:04 - INFO - codeparrot_training - Step 20387: {'lr': 0.0003397905386947462, 'samples': 10438656, 'steps': 20387, 'loss/train': 3.576022148132324} +02/25/2022 03:59:08 - INFO - codeparrot_training - Step 20388: {'lr': 0.00033977526782314854, 'samples': 10439168, 'steps': 20388, 'loss/train': 2.563326120376587} +02/25/2022 03:59:13 - INFO - codeparrot_training - Step 20389: {'lr': 0.00033975999656698206, 'samples': 10439680, 'steps': 20389, 'loss/train': 2.2927839756011963} +02/25/2022 03:59:16 - INFO - codeparrot_training - Step 20390: {'lr': 0.00033974472492631234, 'samples': 10440192, 'steps': 20390, 'loss/train': 2.5966854095458984} +02/25/2022 03:59:22 - INFO - codeparrot_training - Step 20391: {'lr': 0.0003397294529012047, 'samples': 10440704, 'steps': 20391, 'loss/train': 0.21449001133441925} +02/25/2022 03:59:25 - INFO - codeparrot_training - Step 20392: {'lr': 0.0003397141804917246, 'samples': 10441216, 'steps': 20392, 'loss/train': 1.3566102981567383} +02/25/2022 03:59:31 - INFO - codeparrot_training - Step 20393: {'lr': 0.00033969890769793736, 'samples': 10441728, 'steps': 20393, 'loss/train': 1.2585475444793701} +02/25/2022 03:59:34 - INFO - codeparrot_training - Step 20394: {'lr': 0.0003396836345199086, 'samples': 10442240, 'steps': 20394, 'loss/train': 2.7500460147857666} +02/25/2022 03:59:40 - INFO - codeparrot_training - Step 20395: {'lr': 0.0003396683609577035, 'samples': 10442752, 'steps': 20395, 'loss/train': 0.9440882205963135} +02/25/2022 03:59:43 - INFO - codeparrot_training - Step 20396: {'lr': 0.0003396530870113877, 'samples': 10443264, 'steps': 20396, 'loss/train': 3.391152858734131} +02/25/2022 03:59:49 - INFO - codeparrot_training - Step 20397: {'lr': 0.0003396378126810264, 'samples': 10443776, 'steps': 20397, 'loss/train': 1.4591679573059082} +02/25/2022 03:59:53 - INFO - codeparrot_training - Step 20398: {'lr': 0.0003396225379666854, 'samples': 10444288, 'steps': 20398, 'loss/train': 1.8293768167495728} +02/25/2022 03:59:58 - INFO - codeparrot_training - Step 20399: {'lr': 0.00033960726286842973, 'samples': 10444800, 'steps': 20399, 'loss/train': 3.2655420303344727} +02/25/2022 04:00:02 - INFO - codeparrot_training - Step 20400: {'lr': 0.00033959198738632503, 'samples': 10445312, 'steps': 20400, 'loss/train': 1.8054386377334595} +02/25/2022 04:00:07 - INFO - codeparrot_training - Step 20401: {'lr': 0.00033957671152043677, 'samples': 10445824, 'steps': 20401, 'loss/train': 1.1139875650405884} +02/25/2022 04:00:11 - INFO - codeparrot_training - Step 20402: {'lr': 0.0003395614352708303, 'samples': 10446336, 'steps': 20402, 'loss/train': 2.0459933280944824} +02/25/2022 04:00:16 - INFO - codeparrot_training - Step 20403: {'lr': 0.00033954615863757105, 'samples': 10446848, 'steps': 20403, 'loss/train': 0.432099848985672} +02/25/2022 04:00:20 - INFO - codeparrot_training - Step 20404: {'lr': 0.0003395308816207245, 'samples': 10447360, 'steps': 20404, 'loss/train': 1.4184174537658691} +02/25/2022 04:00:25 - INFO - codeparrot_training - Step 20405: {'lr': 0.0003395156042203561, 'samples': 10447872, 'steps': 20405, 'loss/train': 2.241032361984253} +02/25/2022 04:00:29 - INFO - codeparrot_training - Step 20406: {'lr': 0.0003395003264365313, 'samples': 10448384, 'steps': 20406, 'loss/train': 2.641526699066162} +02/25/2022 04:00:34 - INFO - codeparrot_training - Step 20407: {'lr': 0.00033948504826931546, 'samples': 10448896, 'steps': 20407, 'loss/train': 0.43966493010520935} +02/25/2022 04:00:38 - INFO - codeparrot_training - Step 20408: {'lr': 0.0003394697697187741, 'samples': 10449408, 'steps': 20408, 'loss/train': 2.6849939823150635} +02/25/2022 04:00:44 - INFO - codeparrot_training - Step 20409: {'lr': 0.0003394544907849727, 'samples': 10449920, 'steps': 20409, 'loss/train': 1.4712785482406616} +02/25/2022 04:00:48 - INFO - codeparrot_training - Step 20410: {'lr': 0.0003394392114679766, 'samples': 10450432, 'steps': 20410, 'loss/train': 3.2722394466400146} +02/25/2022 04:00:53 - INFO - codeparrot_training - Step 20411: {'lr': 0.00033942393176785134, 'samples': 10450944, 'steps': 20411, 'loss/train': 1.9418766498565674} +02/25/2022 04:00:56 - INFO - codeparrot_training - Step 20412: {'lr': 0.00033940865168466237, 'samples': 10451456, 'steps': 20412, 'loss/train': 1.7702662944793701} +02/25/2022 04:01:02 - INFO - codeparrot_training - Step 20413: {'lr': 0.00033939337121847513, 'samples': 10451968, 'steps': 20413, 'loss/train': 2.305882215499878} +02/25/2022 04:01:05 - INFO - codeparrot_training - Step 20414: {'lr': 0.00033937809036935505, 'samples': 10452480, 'steps': 20414, 'loss/train': 2.1831846237182617} +02/25/2022 04:01:11 - INFO - codeparrot_training - Step 20415: {'lr': 0.0003393628091373677, 'samples': 10452992, 'steps': 20415, 'loss/train': 2.453648567199707} +02/25/2022 04:01:14 - INFO - codeparrot_training - Step 20416: {'lr': 0.00033934752752257834, 'samples': 10453504, 'steps': 20416, 'loss/train': 1.6907323598861694} +02/25/2022 04:01:20 - INFO - codeparrot_training - Step 20417: {'lr': 0.00033933224552505257, 'samples': 10454016, 'steps': 20417, 'loss/train': 2.2379722595214844} +02/25/2022 04:01:23 - INFO - codeparrot_training - Step 20418: {'lr': 0.0003393169631448559, 'samples': 10454528, 'steps': 20418, 'loss/train': 1.8829885721206665} +02/25/2022 04:01:32 - INFO - codeparrot_training - Step 20419: {'lr': 0.0003393016803820536, 'samples': 10455040, 'steps': 20419, 'loss/train': 1.9877028465270996} +02/25/2022 04:01:37 - INFO - codeparrot_training - Step 20420: {'lr': 0.0003392863972367114, 'samples': 10455552, 'steps': 20420, 'loss/train': 1.5622564554214478} +02/25/2022 04:01:41 - INFO - codeparrot_training - Step 20421: {'lr': 0.0003392711137088945, 'samples': 10456064, 'steps': 20421, 'loss/train': 1.0281120538711548} +02/25/2022 04:01:46 - INFO - codeparrot_training - Step 20422: {'lr': 0.00033925582979866853, 'samples': 10456576, 'steps': 20422, 'loss/train': 1.0365203619003296} +02/25/2022 04:01:50 - INFO - codeparrot_training - Step 20423: {'lr': 0.00033924054550609894, 'samples': 10457088, 'steps': 20423, 'loss/train': 1.7628027200698853} +02/25/2022 04:01:55 - INFO - codeparrot_training - Step 20424: {'lr': 0.00033922526083125115, 'samples': 10457600, 'steps': 20424, 'loss/train': 0.5534242391586304} +02/25/2022 04:01:59 - INFO - codeparrot_training - Step 20425: {'lr': 0.00033920997577419076, 'samples': 10458112, 'steps': 20425, 'loss/train': 1.0568207502365112} +02/25/2022 04:02:04 - INFO - codeparrot_training - Step 20426: {'lr': 0.00033919469033498304, 'samples': 10458624, 'steps': 20426, 'loss/train': 2.7125191688537598} +02/25/2022 04:02:08 - INFO - codeparrot_training - Step 20427: {'lr': 0.0003391794045136937, 'samples': 10459136, 'steps': 20427, 'loss/train': 2.322615623474121} +02/25/2022 04:02:13 - INFO - codeparrot_training - Step 20428: {'lr': 0.00033916411831038805, 'samples': 10459648, 'steps': 20428, 'loss/train': 2.0178771018981934} +02/25/2022 04:02:17 - INFO - codeparrot_training - Step 20429: {'lr': 0.0003391488317251317, 'samples': 10460160, 'steps': 20429, 'loss/train': 2.501131772994995} +02/25/2022 04:02:22 - INFO - codeparrot_training - Step 20430: {'lr': 0.00033913354475798995, 'samples': 10460672, 'steps': 20430, 'loss/train': 2.1958131790161133} +02/25/2022 04:02:26 - INFO - codeparrot_training - Step 20431: {'lr': 0.0003391182574090285, 'samples': 10461184, 'steps': 20431, 'loss/train': 2.0659499168395996} +02/25/2022 04:02:31 - INFO - codeparrot_training - Step 20432: {'lr': 0.00033910296967831267, 'samples': 10461696, 'steps': 20432, 'loss/train': 2.09885573387146} +02/25/2022 04:02:35 - INFO - codeparrot_training - Step 20433: {'lr': 0.00033908768156590806, 'samples': 10462208, 'steps': 20433, 'loss/train': 1.8103362321853638} +02/25/2022 04:02:42 - INFO - codeparrot_training - Step 20434: {'lr': 0.0003390723930718801, 'samples': 10462720, 'steps': 20434, 'loss/train': 0.6538064479827881} +02/25/2022 04:02:45 - INFO - codeparrot_training - Step 20435: {'lr': 0.0003390571041962943, 'samples': 10463232, 'steps': 20435, 'loss/train': 1.8231159448623657} +02/25/2022 04:02:51 - INFO - codeparrot_training - Step 20436: {'lr': 0.0003390418149392161, 'samples': 10463744, 'steps': 20436, 'loss/train': 1.4817583560943604} +02/25/2022 04:02:54 - INFO - codeparrot_training - Step 20437: {'lr': 0.000339026525300711, 'samples': 10464256, 'steps': 20437, 'loss/train': 0.34716546535491943} +02/25/2022 04:03:00 - INFO - codeparrot_training - Step 20438: {'lr': 0.0003390112352808447, 'samples': 10464768, 'steps': 20438, 'loss/train': 1.7235552072525024} +02/25/2022 04:03:03 - INFO - codeparrot_training - Step 20439: {'lr': 0.00033899594487968234, 'samples': 10465280, 'steps': 20439, 'loss/train': 1.886596441268921} +02/25/2022 04:03:09 - INFO - codeparrot_training - Step 20440: {'lr': 0.00033898065409728974, 'samples': 10465792, 'steps': 20440, 'loss/train': 2.01859712600708} +02/25/2022 04:03:12 - INFO - codeparrot_training - Step 20441: {'lr': 0.0003389653629337322, 'samples': 10466304, 'steps': 20441, 'loss/train': 1.9983035326004028} +02/25/2022 04:03:20 - INFO - codeparrot_training - Step 20442: {'lr': 0.00033895007138907534, 'samples': 10466816, 'steps': 20442, 'loss/train': 1.7958946228027344} +02/25/2022 04:03:23 - INFO - codeparrot_training - Step 20443: {'lr': 0.00033893477946338456, 'samples': 10467328, 'steps': 20443, 'loss/train': 2.8162474632263184} +02/25/2022 04:03:29 - INFO - codeparrot_training - Step 20444: {'lr': 0.0003389194871567255, 'samples': 10467840, 'steps': 20444, 'loss/train': 0.8238244652748108} +02/25/2022 04:03:32 - INFO - codeparrot_training - Step 20445: {'lr': 0.00033890419446916354, 'samples': 10468352, 'steps': 20445, 'loss/train': 2.4720194339752197} +02/25/2022 04:03:36 - INFO - codeparrot_training - Step 20446: {'lr': 0.00033888890140076433, 'samples': 10468864, 'steps': 20446, 'loss/train': 8.994832992553711} +02/25/2022 04:03:41 - INFO - codeparrot_training - Step 20447: {'lr': 0.00033887360795159315, 'samples': 10469376, 'steps': 20447, 'loss/train': 1.3861021995544434} +02/25/2022 04:03:45 - INFO - codeparrot_training - Step 20448: {'lr': 0.00033885831412171577, 'samples': 10469888, 'steps': 20448, 'loss/train': 2.0955758094787598} +02/25/2022 04:03:50 - INFO - codeparrot_training - Step 20449: {'lr': 0.00033884301991119755, 'samples': 10470400, 'steps': 20449, 'loss/train': 2.7549216747283936} +02/25/2022 04:03:54 - INFO - codeparrot_training - Step 20450: {'lr': 0.00033882772532010404, 'samples': 10470912, 'steps': 20450, 'loss/train': 1.8993308544158936} +02/25/2022 04:03:59 - INFO - codeparrot_training - Step 20451: {'lr': 0.0003388124303485007, 'samples': 10471424, 'steps': 20451, 'loss/train': 2.9791548252105713} +02/25/2022 04:04:03 - INFO - codeparrot_training - Step 20452: {'lr': 0.0003387971349964532, 'samples': 10471936, 'steps': 20452, 'loss/train': 0.8059250116348267} +02/25/2022 04:04:09 - INFO - codeparrot_training - Step 20453: {'lr': 0.00033878183926402695, 'samples': 10472448, 'steps': 20453, 'loss/train': 2.2308528423309326} +02/25/2022 04:04:12 - INFO - codeparrot_training - Step 20454: {'lr': 0.00033876654315128746, 'samples': 10472960, 'steps': 20454, 'loss/train': 2.264249801635742} +02/25/2022 04:04:19 - INFO - codeparrot_training - Step 20455: {'lr': 0.0003387512466583004, 'samples': 10473472, 'steps': 20455, 'loss/train': 1.5208839178085327} +02/25/2022 04:04:23 - INFO - codeparrot_training - Step 20456: {'lr': 0.0003387359497851311, 'samples': 10473984, 'steps': 20456, 'loss/train': 0.6904934048652649} +02/25/2022 04:04:28 - INFO - codeparrot_training - Step 20457: {'lr': 0.0003387206525318451, 'samples': 10474496, 'steps': 20457, 'loss/train': 0.7754421234130859} +02/25/2022 04:04:32 - INFO - codeparrot_training - Step 20458: {'lr': 0.00033870535489850806, 'samples': 10475008, 'steps': 20458, 'loss/train': 2.2456939220428467} +02/25/2022 04:04:37 - INFO - codeparrot_training - Step 20459: {'lr': 0.00033869005688518547, 'samples': 10475520, 'steps': 20459, 'loss/train': 1.1401348114013672} +02/25/2022 04:04:41 - INFO - codeparrot_training - Step 20460: {'lr': 0.0003386747584919428, 'samples': 10476032, 'steps': 20460, 'loss/train': 1.9550672769546509} +02/25/2022 04:04:46 - INFO - codeparrot_training - Step 20461: {'lr': 0.0003386594597188456, 'samples': 10476544, 'steps': 20461, 'loss/train': 2.4731264114379883} +02/25/2022 04:04:50 - INFO - codeparrot_training - Step 20462: {'lr': 0.0003386441605659595, 'samples': 10477056, 'steps': 20462, 'loss/train': 2.4738430976867676} +02/25/2022 04:04:55 - INFO - codeparrot_training - Step 20463: {'lr': 0.00033862886103334987, 'samples': 10477568, 'steps': 20463, 'loss/train': 1.706005334854126} +02/25/2022 04:04:59 - INFO - codeparrot_training - Step 20464: {'lr': 0.0003386135611210824, 'samples': 10478080, 'steps': 20464, 'loss/train': 1.3999069929122925} +02/25/2022 04:05:05 - INFO - codeparrot_training - Step 20465: {'lr': 0.0003385982608292226, 'samples': 10478592, 'steps': 20465, 'loss/train': 1.4723130464553833} +02/25/2022 04:05:09 - INFO - codeparrot_training - Step 20466: {'lr': 0.000338582960157836, 'samples': 10479104, 'steps': 20466, 'loss/train': 2.181034803390503} +02/25/2022 04:05:14 - INFO - codeparrot_training - Step 20467: {'lr': 0.000338567659106988, 'samples': 10479616, 'steps': 20467, 'loss/train': 1.3326058387756348} +02/25/2022 04:05:18 - INFO - codeparrot_training - Step 20468: {'lr': 0.0003385523576767444, 'samples': 10480128, 'steps': 20468, 'loss/train': 1.0458500385284424} +02/25/2022 04:05:23 - INFO - codeparrot_training - Step 20469: {'lr': 0.00033853705586717056, 'samples': 10480640, 'steps': 20469, 'loss/train': 1.040419340133667} +02/25/2022 04:05:27 - INFO - codeparrot_training - Step 20470: {'lr': 0.0003385217536783321, 'samples': 10481152, 'steps': 20470, 'loss/train': 2.120584011077881} +02/25/2022 04:05:32 - INFO - codeparrot_training - Step 20471: {'lr': 0.00033850645111029455, 'samples': 10481664, 'steps': 20471, 'loss/train': 1.3446996212005615} +02/25/2022 04:05:36 - INFO - codeparrot_training - Step 20472: {'lr': 0.00033849114816312343, 'samples': 10482176, 'steps': 20472, 'loss/train': 1.733984351158142} +02/25/2022 04:05:41 - INFO - codeparrot_training - Step 20473: {'lr': 0.0003384758448368844, 'samples': 10482688, 'steps': 20473, 'loss/train': 2.818788528442383} +02/25/2022 04:05:45 - INFO - codeparrot_training - Step 20474: {'lr': 0.00033846054113164287, 'samples': 10483200, 'steps': 20474, 'loss/train': 1.5297060012817383} +02/25/2022 04:05:50 - INFO - codeparrot_training - Step 20475: {'lr': 0.00033844523704746455, 'samples': 10483712, 'steps': 20475, 'loss/train': 2.1493990421295166} +02/25/2022 04:05:54 - INFO - codeparrot_training - Step 20476: {'lr': 0.00033842993258441487, 'samples': 10484224, 'steps': 20476, 'loss/train': 2.7441694736480713} +02/25/2022 04:05:59 - INFO - codeparrot_training - Step 20477: {'lr': 0.0003384146277425595, 'samples': 10484736, 'steps': 20477, 'loss/train': 1.7860933542251587} +02/25/2022 04:06:03 - INFO - codeparrot_training - Step 20478: {'lr': 0.0003383993225219638, 'samples': 10485248, 'steps': 20478, 'loss/train': 1.4116884469985962} +02/25/2022 04:06:08 - INFO - codeparrot_training - Step 20479: {'lr': 0.0003383840169226936, 'samples': 10485760, 'steps': 20479, 'loss/train': 2.0376620292663574} +02/25/2022 04:06:12 - INFO - codeparrot_training - Step 20480: {'lr': 0.00033836871094481433, 'samples': 10486272, 'steps': 20480, 'loss/train': 2.797713279724121} +02/25/2022 04:06:19 - INFO - codeparrot_training - Step 20481: {'lr': 0.00033835340458839155, 'samples': 10486784, 'steps': 20481, 'loss/train': 2.2535343170166016} +02/25/2022 04:06:22 - INFO - codeparrot_training - Step 20482: {'lr': 0.0003383380978534908, 'samples': 10487296, 'steps': 20482, 'loss/train': 2.2196648120880127} +02/25/2022 04:06:28 - INFO - codeparrot_training - Step 20483: {'lr': 0.0003383227907401777, 'samples': 10487808, 'steps': 20483, 'loss/train': 2.666508913040161} +02/25/2022 04:06:31 - INFO - codeparrot_training - Step 20484: {'lr': 0.0003383074832485178, 'samples': 10488320, 'steps': 20484, 'loss/train': 1.3228881359100342} +02/25/2022 04:06:37 - INFO - codeparrot_training - Step 20485: {'lr': 0.0003382921753785767, 'samples': 10488832, 'steps': 20485, 'loss/train': 1.1112384796142578} +02/25/2022 04:06:40 - INFO - codeparrot_training - Step 20486: {'lr': 0.0003382768671304199, 'samples': 10489344, 'steps': 20486, 'loss/train': 2.3861844539642334} +02/25/2022 04:06:46 - INFO - codeparrot_training - Step 20487: {'lr': 0.0003382615585041131, 'samples': 10489856, 'steps': 20487, 'loss/train': 2.106295347213745} +02/25/2022 04:06:50 - INFO - codeparrot_training - Step 20488: {'lr': 0.0003382462494997219, 'samples': 10490368, 'steps': 20488, 'loss/train': 2.3959200382232666} +02/25/2022 04:06:55 - INFO - codeparrot_training - Step 20489: {'lr': 0.00033823094011731165, 'samples': 10490880, 'steps': 20489, 'loss/train': 1.983829379081726} +02/25/2022 04:06:59 - INFO - codeparrot_training - Step 20490: {'lr': 0.0003382156303569481, 'samples': 10491392, 'steps': 20490, 'loss/train': 2.0471601486206055} +02/25/2022 04:07:06 - INFO - codeparrot_training - Step 20491: {'lr': 0.0003382003202186968, 'samples': 10491904, 'steps': 20491, 'loss/train': 1.9037071466445923} +02/25/2022 04:07:09 - INFO - codeparrot_training - Step 20492: {'lr': 0.0003381850097026234, 'samples': 10492416, 'steps': 20492, 'loss/train': 2.2065999507904053} +02/25/2022 04:07:15 - INFO - codeparrot_training - Step 20493: {'lr': 0.0003381696988087933, 'samples': 10492928, 'steps': 20493, 'loss/train': 2.1461875438690186} +02/25/2022 04:07:18 - INFO - codeparrot_training - Step 20494: {'lr': 0.00033815438753727236, 'samples': 10493440, 'steps': 20494, 'loss/train': 1.7670491933822632} +02/25/2022 04:07:24 - INFO - codeparrot_training - Step 20495: {'lr': 0.00033813907588812586, 'samples': 10493952, 'steps': 20495, 'loss/train': 2.1959335803985596} +02/25/2022 04:07:27 - INFO - codeparrot_training - Step 20496: {'lr': 0.0003381237638614196, 'samples': 10494464, 'steps': 20496, 'loss/train': 0.7912722826004028} +02/25/2022 04:07:33 - INFO - codeparrot_training - Step 20497: {'lr': 0.00033810845145721915, 'samples': 10494976, 'steps': 20497, 'loss/train': 2.4614434242248535} +02/25/2022 04:07:36 - INFO - codeparrot_training - Step 20498: {'lr': 0.0003380931386755901, 'samples': 10495488, 'steps': 20498, 'loss/train': 1.9480470418930054} +02/25/2022 04:07:42 - INFO - codeparrot_training - Step 20499: {'lr': 0.00033807782551659795, 'samples': 10496000, 'steps': 20499, 'loss/train': 1.1926898956298828} +02/25/2022 04:07:45 - INFO - codeparrot_training - Step 20500: {'lr': 0.0003380625119803084, 'samples': 10496512, 'steps': 20500, 'loss/train': 2.2023305892944336} +02/25/2022 04:07:51 - INFO - codeparrot_training - Step 20501: {'lr': 0.00033804719806678694, 'samples': 10497024, 'steps': 20501, 'loss/train': 1.3211919069290161} +02/25/2022 04:07:55 - INFO - codeparrot_training - Step 20502: {'lr': 0.00033803188377609937, 'samples': 10497536, 'steps': 20502, 'loss/train': 2.2933125495910645} +02/25/2022 04:08:00 - INFO - codeparrot_training - Step 20503: {'lr': 0.00033801656910831116, 'samples': 10498048, 'steps': 20503, 'loss/train': 0.5051615238189697} +02/25/2022 04:08:04 - INFO - codeparrot_training - Step 20504: {'lr': 0.0003380012540634878, 'samples': 10498560, 'steps': 20504, 'loss/train': 0.7014504671096802} +02/25/2022 04:08:09 - INFO - codeparrot_training - Step 20505: {'lr': 0.0003379859386416951, 'samples': 10499072, 'steps': 20505, 'loss/train': 1.2564024925231934} +02/25/2022 04:08:13 - INFO - codeparrot_training - Step 20506: {'lr': 0.00033797062284299847, 'samples': 10499584, 'steps': 20506, 'loss/train': 1.6168928146362305} +02/25/2022 04:08:18 - INFO - codeparrot_training - Step 20507: {'lr': 0.0003379553066674637, 'samples': 10500096, 'steps': 20507, 'loss/train': 0.8878385424613953} +02/25/2022 04:08:22 - INFO - codeparrot_training - Step 20508: {'lr': 0.0003379399901151563, 'samples': 10500608, 'steps': 20508, 'loss/train': 1.9881776571273804} +02/25/2022 04:08:28 - INFO - codeparrot_training - Step 20509: {'lr': 0.00033792467318614195, 'samples': 10501120, 'steps': 20509, 'loss/train': 2.4221129417419434} +02/25/2022 04:08:31 - INFO - codeparrot_training - Step 20510: {'lr': 0.0003379093558804862, 'samples': 10501632, 'steps': 20510, 'loss/train': 1.0279136896133423} +02/25/2022 04:08:37 - INFO - codeparrot_training - Step 20511: {'lr': 0.00033789403819825466, 'samples': 10502144, 'steps': 20511, 'loss/train': 1.7528560161590576} +02/25/2022 04:08:41 - INFO - codeparrot_training - Step 20512: {'lr': 0.00033787872013951297, 'samples': 10502656, 'steps': 20512, 'loss/train': 1.3501797914505005} +02/25/2022 04:08:46 - INFO - codeparrot_training - Step 20513: {'lr': 0.0003378634017043268, 'samples': 10503168, 'steps': 20513, 'loss/train': 1.4831734895706177} +02/25/2022 04:08:50 - INFO - codeparrot_training - Step 20514: {'lr': 0.0003378480828927617, 'samples': 10503680, 'steps': 20514, 'loss/train': 3.4787306785583496} +02/25/2022 04:08:55 - INFO - codeparrot_training - Step 20515: {'lr': 0.00033783276370488323, 'samples': 10504192, 'steps': 20515, 'loss/train': 1.2933127880096436} +02/25/2022 04:08:59 - INFO - codeparrot_training - Step 20516: {'lr': 0.00033781744414075723, 'samples': 10504704, 'steps': 20516, 'loss/train': 1.977235198020935} +02/25/2022 04:09:04 - INFO - codeparrot_training - Step 20517: {'lr': 0.00033780212420044903, 'samples': 10505216, 'steps': 20517, 'loss/train': 2.3855020999908447} +02/25/2022 04:09:08 - INFO - codeparrot_training - Step 20518: {'lr': 0.0003377868038840245, 'samples': 10505728, 'steps': 20518, 'loss/train': 1.3925323486328125} +02/25/2022 04:09:13 - INFO - codeparrot_training - Step 20519: {'lr': 0.00033777148319154917, 'samples': 10506240, 'steps': 20519, 'loss/train': 1.3762468099594116} +02/25/2022 04:09:17 - INFO - codeparrot_training - Step 20520: {'lr': 0.0003377561621230887, 'samples': 10506752, 'steps': 20520, 'loss/train': 1.8767329454421997} +02/25/2022 04:09:22 - INFO - codeparrot_training - Step 20521: {'lr': 0.0003377408406787086, 'samples': 10507264, 'steps': 20521, 'loss/train': 1.657964825630188} +02/25/2022 04:09:26 - INFO - codeparrot_training - Step 20522: {'lr': 0.00033772551885847467, 'samples': 10507776, 'steps': 20522, 'loss/train': 2.1231024265289307} +02/25/2022 04:09:31 - INFO - codeparrot_training - Step 20523: {'lr': 0.0003377101966624525, 'samples': 10508288, 'steps': 20523, 'loss/train': 1.6197705268859863} +02/25/2022 04:09:35 - INFO - codeparrot_training - Step 20524: {'lr': 0.0003376948740907077, 'samples': 10508800, 'steps': 20524, 'loss/train': 0.15179258584976196} +02/25/2022 04:09:40 - INFO - codeparrot_training - Step 20525: {'lr': 0.0003376795511433058, 'samples': 10509312, 'steps': 20525, 'loss/train': 2.460319995880127} +02/25/2022 04:09:44 - INFO - codeparrot_training - Step 20526: {'lr': 0.00033766422782031263, 'samples': 10509824, 'steps': 20526, 'loss/train': 1.6952224969863892} +02/25/2022 04:09:50 - INFO - codeparrot_training - Step 20527: {'lr': 0.0003376489041217937, 'samples': 10510336, 'steps': 20527, 'loss/train': 1.53373122215271} +02/25/2022 04:09:54 - INFO - codeparrot_training - Step 20528: {'lr': 0.00033763358004781474, 'samples': 10510848, 'steps': 20528, 'loss/train': 2.0647308826446533} +02/25/2022 04:09:59 - INFO - codeparrot_training - Step 20529: {'lr': 0.00033761825559844137, 'samples': 10511360, 'steps': 20529, 'loss/train': 1.2797430753707886} +02/25/2022 04:10:03 - INFO - codeparrot_training - Step 20530: {'lr': 0.00033760293077373917, 'samples': 10511872, 'steps': 20530, 'loss/train': 2.235442638397217} +02/25/2022 04:10:08 - INFO - codeparrot_training - Step 20531: {'lr': 0.0003375876055737739, 'samples': 10512384, 'steps': 20531, 'loss/train': 1.8197238445281982} +02/25/2022 04:10:11 - INFO - codeparrot_training - Step 20532: {'lr': 0.000337572279998611, 'samples': 10512896, 'steps': 20532, 'loss/train': 0.5820606350898743} +02/25/2022 04:10:17 - INFO - codeparrot_training - Step 20533: {'lr': 0.00033755695404831635, 'samples': 10513408, 'steps': 20533, 'loss/train': 2.4444525241851807} +02/25/2022 04:10:21 - INFO - codeparrot_training - Step 20534: {'lr': 0.00033754162772295555, 'samples': 10513920, 'steps': 20534, 'loss/train': 2.326087713241577} +02/25/2022 04:10:26 - INFO - codeparrot_training - Step 20535: {'lr': 0.0003375263010225941, 'samples': 10514432, 'steps': 20535, 'loss/train': 2.1294429302215576} +02/25/2022 04:10:30 - INFO - codeparrot_training - Step 20536: {'lr': 0.00033751097394729793, 'samples': 10514944, 'steps': 20536, 'loss/train': 1.9030009508132935} +02/25/2022 04:10:36 - INFO - codeparrot_training - Step 20537: {'lr': 0.0003374956464971324, 'samples': 10515456, 'steps': 20537, 'loss/train': 2.6954758167266846} +02/25/2022 04:10:39 - INFO - codeparrot_training - Step 20538: {'lr': 0.00033748031867216334, 'samples': 10515968, 'steps': 20538, 'loss/train': 0.29608097672462463} +02/25/2022 04:10:45 - INFO - codeparrot_training - Step 20539: {'lr': 0.0003374649904724564, 'samples': 10516480, 'steps': 20539, 'loss/train': 1.9044148921966553} +02/25/2022 04:10:48 - INFO - codeparrot_training - Step 20540: {'lr': 0.0003374496618980772, 'samples': 10516992, 'steps': 20540, 'loss/train': 2.0163896083831787} +02/25/2022 04:10:54 - INFO - codeparrot_training - Step 20541: {'lr': 0.0003374343329490914, 'samples': 10517504, 'steps': 20541, 'loss/train': 1.5897040367126465} +02/25/2022 04:10:58 - INFO - codeparrot_training - Step 20542: {'lr': 0.00033741900362556474, 'samples': 10518016, 'steps': 20542, 'loss/train': 2.1951777935028076} +02/25/2022 04:11:03 - INFO - codeparrot_training - Step 20543: {'lr': 0.00033740367392756274, 'samples': 10518528, 'steps': 20543, 'loss/train': 2.0441653728485107} +02/25/2022 04:11:07 - INFO - codeparrot_training - Step 20544: {'lr': 0.0003373883438551512, 'samples': 10519040, 'steps': 20544, 'loss/train': 1.952451467514038} +02/25/2022 04:11:12 - INFO - codeparrot_training - Step 20545: {'lr': 0.0003373730134083958, 'samples': 10519552, 'steps': 20545, 'loss/train': 1.6012649536132812} +02/25/2022 04:11:16 - INFO - codeparrot_training - Step 20546: {'lr': 0.00033735768258736206, 'samples': 10520064, 'steps': 20546, 'loss/train': 3.1981472969055176} +02/25/2022 04:11:23 - INFO - codeparrot_training - Step 20547: {'lr': 0.0003373423513921158, 'samples': 10520576, 'steps': 20547, 'loss/train': 2.0102579593658447} +02/25/2022 04:11:26 - INFO - codeparrot_training - Step 20548: {'lr': 0.00033732701982272257, 'samples': 10521088, 'steps': 20548, 'loss/train': 2.3970634937286377} +02/25/2022 04:11:32 - INFO - codeparrot_training - Step 20549: {'lr': 0.0003373116878792482, 'samples': 10521600, 'steps': 20549, 'loss/train': 2.4481210708618164} +02/25/2022 04:11:35 - INFO - codeparrot_training - Step 20550: {'lr': 0.00033729635556175826, 'samples': 10522112, 'steps': 20550, 'loss/train': 1.7564014196395874} +02/25/2022 04:11:41 - INFO - codeparrot_training - Step 20551: {'lr': 0.0003372810228703184, 'samples': 10522624, 'steps': 20551, 'loss/train': 1.8961580991744995} +02/25/2022 04:11:44 - INFO - codeparrot_training - Step 20552: {'lr': 0.0003372656898049944, 'samples': 10523136, 'steps': 20552, 'loss/train': 2.156228542327881} +02/25/2022 04:11:50 - INFO - codeparrot_training - Step 20553: {'lr': 0.00033725035636585196, 'samples': 10523648, 'steps': 20553, 'loss/train': 2.398817777633667} +02/25/2022 04:11:54 - INFO - codeparrot_training - Step 20554: {'lr': 0.0003372350225529566, 'samples': 10524160, 'steps': 20554, 'loss/train': 1.324874997138977} +02/25/2022 04:11:59 - INFO - codeparrot_training - Step 20555: {'lr': 0.00033721968836637414, 'samples': 10524672, 'steps': 20555, 'loss/train': 0.8835898041725159} +02/25/2022 04:12:03 - INFO - codeparrot_training - Step 20556: {'lr': 0.0003372043538061702, 'samples': 10525184, 'steps': 20556, 'loss/train': 2.0057356357574463} +02/25/2022 04:12:08 - INFO - codeparrot_training - Step 20557: {'lr': 0.00033718901887241047, 'samples': 10525696, 'steps': 20557, 'loss/train': 2.072836399078369} +02/25/2022 04:12:12 - INFO - codeparrot_training - Step 20558: {'lr': 0.00033717368356516077, 'samples': 10526208, 'steps': 20558, 'loss/train': 1.0213122367858887} +02/25/2022 04:12:19 - INFO - codeparrot_training - Step 20559: {'lr': 0.0003371583478844866, 'samples': 10526720, 'steps': 20559, 'loss/train': 1.5481805801391602} +02/25/2022 04:12:22 - INFO - codeparrot_training - Step 20560: {'lr': 0.0003371430118304538, 'samples': 10527232, 'steps': 20560, 'loss/train': 2.0219507217407227} +02/25/2022 04:12:28 - INFO - codeparrot_training - Step 20561: {'lr': 0.000337127675403128, 'samples': 10527744, 'steps': 20561, 'loss/train': 2.5098512172698975} +02/25/2022 04:12:31 - INFO - codeparrot_training - Step 20562: {'lr': 0.00033711233860257494, 'samples': 10528256, 'steps': 20562, 'loss/train': 1.6744624376296997} +02/25/2022 04:12:37 - INFO - codeparrot_training - Step 20563: {'lr': 0.00033709700142886025, 'samples': 10528768, 'steps': 20563, 'loss/train': 2.8262696266174316} +02/25/2022 04:12:40 - INFO - codeparrot_training - Step 20564: {'lr': 0.00033708166388204963, 'samples': 10529280, 'steps': 20564, 'loss/train': 2.3715226650238037} +02/25/2022 04:12:46 - INFO - codeparrot_training - Step 20565: {'lr': 0.00033706632596220885, 'samples': 10529792, 'steps': 20565, 'loss/train': 2.365448474884033} +02/25/2022 04:12:49 - INFO - codeparrot_training - Step 20566: {'lr': 0.00033705098766940354, 'samples': 10530304, 'steps': 20566, 'loss/train': 1.470035433769226} +02/25/2022 04:12:55 - INFO - codeparrot_training - Step 20567: {'lr': 0.00033703564900369943, 'samples': 10530816, 'steps': 20567, 'loss/train': 2.040496826171875} +02/25/2022 04:12:58 - INFO - codeparrot_training - Step 20568: {'lr': 0.0003370203099651623, 'samples': 10531328, 'steps': 20568, 'loss/train': 1.470913052558899} +02/25/2022 04:13:04 - INFO - codeparrot_training - Step 20569: {'lr': 0.0003370049705538578, 'samples': 10531840, 'steps': 20569, 'loss/train': 2.430475950241089} +02/25/2022 04:13:07 - INFO - codeparrot_training - Step 20570: {'lr': 0.00033698963076985155, 'samples': 10532352, 'steps': 20570, 'loss/train': 1.7727771997451782} +02/25/2022 04:13:13 - INFO - codeparrot_training - Step 20571: {'lr': 0.00033697429061320936, 'samples': 10532864, 'steps': 20571, 'loss/train': 1.7067110538482666} +02/25/2022 04:13:16 - INFO - codeparrot_training - Step 20572: {'lr': 0.00033695895008399704, 'samples': 10533376, 'steps': 20572, 'loss/train': 2.1659817695617676} +02/25/2022 04:13:23 - INFO - codeparrot_training - Step 20573: {'lr': 0.00033694360918228006, 'samples': 10533888, 'steps': 20573, 'loss/train': 1.9703102111816406} +02/25/2022 04:13:27 - INFO - codeparrot_training - Step 20574: {'lr': 0.0003369282679081243, 'samples': 10534400, 'steps': 20574, 'loss/train': 1.6992661952972412} +02/25/2022 04:13:32 - INFO - codeparrot_training - Step 20575: {'lr': 0.00033691292626159556, 'samples': 10534912, 'steps': 20575, 'loss/train': 1.243703007698059} +02/25/2022 04:13:36 - INFO - codeparrot_training - Step 20576: {'lr': 0.0003368975842427592, 'samples': 10535424, 'steps': 20576, 'loss/train': 1.986406683921814} +02/25/2022 04:13:41 - INFO - codeparrot_training - Step 20577: {'lr': 0.00033688224185168136, 'samples': 10535936, 'steps': 20577, 'loss/train': 1.7609435319900513} +02/25/2022 04:13:45 - INFO - codeparrot_training - Step 20578: {'lr': 0.0003368668990884275, 'samples': 10536448, 'steps': 20578, 'loss/train': 3.246948719024658} +02/25/2022 04:13:50 - INFO - codeparrot_training - Step 20579: {'lr': 0.0003368515559530635, 'samples': 10536960, 'steps': 20579, 'loss/train': 1.6103166341781616} +02/25/2022 04:13:54 - INFO - codeparrot_training - Step 20580: {'lr': 0.00033683621244565506, 'samples': 10537472, 'steps': 20580, 'loss/train': 2.2472264766693115} +02/25/2022 04:13:59 - INFO - codeparrot_training - Step 20581: {'lr': 0.00033682086856626773, 'samples': 10537984, 'steps': 20581, 'loss/train': 1.9969643354415894} +02/25/2022 04:14:03 - INFO - codeparrot_training - Step 20582: {'lr': 0.00033680552431496744, 'samples': 10538496, 'steps': 20582, 'loss/train': 1.7149379253387451} +02/25/2022 04:14:10 - INFO - codeparrot_training - Step 20583: {'lr': 0.0003367901796918198, 'samples': 10539008, 'steps': 20583, 'loss/train': 2.339693784713745} +02/25/2022 04:14:13 - INFO - codeparrot_training - Step 20584: {'lr': 0.0003367748346968907, 'samples': 10539520, 'steps': 20584, 'loss/train': 2.123481512069702} +02/25/2022 04:14:19 - INFO - codeparrot_training - Step 20585: {'lr': 0.0003367594893302457, 'samples': 10540032, 'steps': 20585, 'loss/train': 1.932574987411499} +02/25/2022 04:14:22 - INFO - codeparrot_training - Step 20586: {'lr': 0.00033674414359195055, 'samples': 10540544, 'steps': 20586, 'loss/train': 1.6969020366668701} +02/25/2022 04:14:28 - INFO - codeparrot_training - Step 20587: {'lr': 0.00033672879748207105, 'samples': 10541056, 'steps': 20587, 'loss/train': 3.1708507537841797} +02/25/2022 04:14:31 - INFO - codeparrot_training - Step 20588: {'lr': 0.000336713451000673, 'samples': 10541568, 'steps': 20588, 'loss/train': 1.717882513999939} +02/25/2022 04:14:37 - INFO - codeparrot_training - Step 20589: {'lr': 0.000336698104147822, 'samples': 10542080, 'steps': 20589, 'loss/train': 1.6928091049194336} +02/25/2022 04:14:40 - INFO - codeparrot_training - Step 20590: {'lr': 0.0003366827569235838, 'samples': 10542592, 'steps': 20590, 'loss/train': 2.0823044776916504} +02/25/2022 04:14:46 - INFO - codeparrot_training - Step 20591: {'lr': 0.00033666740932802424, 'samples': 10543104, 'steps': 20591, 'loss/train': 1.821470022201538} +02/25/2022 04:14:49 - INFO - codeparrot_training - Step 20592: {'lr': 0.0003366520613612091, 'samples': 10543616, 'steps': 20592, 'loss/train': 0.9725879430770874} +02/25/2022 04:14:56 - INFO - codeparrot_training - Step 20593: {'lr': 0.00033663671302320387, 'samples': 10544128, 'steps': 20593, 'loss/train': 2.4706289768218994} +02/25/2022 04:14:59 - INFO - codeparrot_training - Step 20594: {'lr': 0.0003366213643140745, 'samples': 10544640, 'steps': 20594, 'loss/train': 0.7303314805030823} +02/25/2022 04:15:05 - INFO - codeparrot_training - Step 20595: {'lr': 0.0003366060152338869, 'samples': 10545152, 'steps': 20595, 'loss/train': 1.5464123487472534} +02/25/2022 04:15:08 - INFO - codeparrot_training - Step 20596: {'lr': 0.0003365906657827065, 'samples': 10545664, 'steps': 20596, 'loss/train': 1.7940410375595093} +02/25/2022 04:15:14 - INFO - codeparrot_training - Step 20597: {'lr': 0.00033657531596059914, 'samples': 10546176, 'steps': 20597, 'loss/train': 0.9959035515785217} +02/25/2022 04:15:17 - INFO - codeparrot_training - Step 20598: {'lr': 0.00033655996576763064, 'samples': 10546688, 'steps': 20598, 'loss/train': 1.9002448320388794} +02/25/2022 04:15:23 - INFO - codeparrot_training - Step 20599: {'lr': 0.00033654461520386683, 'samples': 10547200, 'steps': 20599, 'loss/train': 1.2800064086914062} +02/25/2022 04:15:26 - INFO - codeparrot_training - Step 20600: {'lr': 0.0003365292642693733, 'samples': 10547712, 'steps': 20600, 'loss/train': 1.8706591129302979} +02/25/2022 04:15:32 - INFO - codeparrot_training - Step 20601: {'lr': 0.00033651391296421587, 'samples': 10548224, 'steps': 20601, 'loss/train': 2.04245924949646} +02/25/2022 04:15:35 - INFO - codeparrot_training - Step 20602: {'lr': 0.0003364985612884603, 'samples': 10548736, 'steps': 20602, 'loss/train': 1.962165355682373} +02/25/2022 04:15:41 - INFO - codeparrot_training - Step 20603: {'lr': 0.0003364832092421724, 'samples': 10549248, 'steps': 20603, 'loss/train': 2.599982261657715} +02/25/2022 04:15:44 - INFO - codeparrot_training - Step 20604: {'lr': 0.0003364678568254178, 'samples': 10549760, 'steps': 20604, 'loss/train': 2.2657630443573} +02/25/2022 04:15:50 - INFO - codeparrot_training - Step 20605: {'lr': 0.00033645250403826246, 'samples': 10550272, 'steps': 20605, 'loss/train': 2.3945748805999756} +02/25/2022 04:15:54 - INFO - codeparrot_training - Step 20606: {'lr': 0.00033643715088077205, 'samples': 10550784, 'steps': 20606, 'loss/train': 1.7973207235336304} +02/25/2022 04:15:59 - INFO - codeparrot_training - Step 20607: {'lr': 0.0003364217973530123, 'samples': 10551296, 'steps': 20607, 'loss/train': 1.2333757877349854} +02/25/2022 04:16:03 - INFO - codeparrot_training - Step 20608: {'lr': 0.00033640644345504897, 'samples': 10551808, 'steps': 20608, 'loss/train': 2.023709535598755} +02/25/2022 04:16:08 - INFO - codeparrot_training - Step 20609: {'lr': 0.0003363910891869479, 'samples': 10552320, 'steps': 20609, 'loss/train': 1.9283435344696045} +02/25/2022 04:16:12 - INFO - codeparrot_training - Step 20610: {'lr': 0.00033637573454877486, 'samples': 10552832, 'steps': 20610, 'loss/train': 2.3888638019561768} +02/25/2022 04:16:17 - INFO - codeparrot_training - Step 20611: {'lr': 0.0003363603795405956, 'samples': 10553344, 'steps': 20611, 'loss/train': 1.7015565633773804} +02/25/2022 04:16:24 - INFO - codeparrot_training - Step 20612: {'lr': 0.0003363450241624759, 'samples': 10553856, 'steps': 20612, 'loss/train': 1.7890573740005493} +02/25/2022 04:16:27 - INFO - codeparrot_training - Step 20613: {'lr': 0.00033632966841448144, 'samples': 10554368, 'steps': 20613, 'loss/train': 1.6532474756240845} +02/25/2022 04:16:30 - INFO - codeparrot_training - Step 20614: {'lr': 0.0003363143122966782, 'samples': 10554880, 'steps': 20614, 'loss/train': 0.4228179454803467} +02/25/2022 04:16:38 - INFO - codeparrot_training - Step 20615: {'lr': 0.00033629895580913183, 'samples': 10555392, 'steps': 20615, 'loss/train': 1.9388201236724854} +02/25/2022 04:16:41 - INFO - codeparrot_training - Step 20616: {'lr': 0.00033628359895190814, 'samples': 10555904, 'steps': 20616, 'loss/train': 5.145500183105469} +02/25/2022 04:16:47 - INFO - codeparrot_training - Step 20617: {'lr': 0.0003362682417250729, 'samples': 10556416, 'steps': 20617, 'loss/train': 1.5483806133270264} +02/25/2022 04:16:50 - INFO - codeparrot_training - Step 20618: {'lr': 0.00033625288412869185, 'samples': 10556928, 'steps': 20618, 'loss/train': 1.572220802307129} +02/25/2022 04:16:56 - INFO - codeparrot_training - Step 20619: {'lr': 0.00033623752616283094, 'samples': 10557440, 'steps': 20619, 'loss/train': 3.715501546859741} +02/25/2022 04:16:59 - INFO - codeparrot_training - Step 20620: {'lr': 0.0003362221678275558, 'samples': 10557952, 'steps': 20620, 'loss/train': 2.956888198852539} +02/25/2022 04:17:05 - INFO - codeparrot_training - Step 20621: {'lr': 0.00033620680912293226, 'samples': 10558464, 'steps': 20621, 'loss/train': 1.7934989929199219} +02/25/2022 04:17:08 - INFO - codeparrot_training - Step 20622: {'lr': 0.0003361914500490261, 'samples': 10558976, 'steps': 20622, 'loss/train': 1.4772794246673584} +02/25/2022 04:17:14 - INFO - codeparrot_training - Step 20623: {'lr': 0.0003361760906059032, 'samples': 10559488, 'steps': 20623, 'loss/train': 1.341872215270996} +02/25/2022 04:17:17 - INFO - codeparrot_training - Step 20624: {'lr': 0.00033616073079362923, 'samples': 10560000, 'steps': 20624, 'loss/train': 0.7913236021995544} +02/25/2022 04:17:23 - INFO - codeparrot_training - Step 20625: {'lr': 0.00033614537061227014, 'samples': 10560512, 'steps': 20625, 'loss/train': 1.3911631107330322} +02/25/2022 04:17:26 - INFO - codeparrot_training - Step 20626: {'lr': 0.00033613001006189156, 'samples': 10561024, 'steps': 20626, 'loss/train': 2.240656852722168} +02/25/2022 04:17:32 - INFO - codeparrot_training - Step 20627: {'lr': 0.0003361146491425594, 'samples': 10561536, 'steps': 20627, 'loss/train': 2.7742202281951904} +02/25/2022 04:17:35 - INFO - codeparrot_training - Step 20628: {'lr': 0.0003360992878543394, 'samples': 10562048, 'steps': 20628, 'loss/train': 2.2700459957122803} +02/25/2022 04:17:42 - INFO - codeparrot_training - Step 20629: {'lr': 0.00033608392619729745, 'samples': 10562560, 'steps': 20629, 'loss/train': 2.4273712635040283} +02/25/2022 04:17:46 - INFO - codeparrot_training - Step 20630: {'lr': 0.0003360685641714992, 'samples': 10563072, 'steps': 20630, 'loss/train': 2.5046215057373047} +02/25/2022 04:17:51 - INFO - codeparrot_training - Step 20631: {'lr': 0.0003360532017770106, 'samples': 10563584, 'steps': 20631, 'loss/train': 1.9252820014953613} +02/25/2022 04:17:55 - INFO - codeparrot_training - Step 20632: {'lr': 0.00033603783901389744, 'samples': 10564096, 'steps': 20632, 'loss/train': 2.424588918685913} +02/25/2022 04:18:00 - INFO - codeparrot_training - Step 20633: {'lr': 0.00033602247588222545, 'samples': 10564608, 'steps': 20633, 'loss/train': 1.0199012756347656} +02/25/2022 04:18:04 - INFO - codeparrot_training - Step 20634: {'lr': 0.00033600711238206056, 'samples': 10565120, 'steps': 20634, 'loss/train': 2.2843220233917236} +02/25/2022 04:18:09 - INFO - codeparrot_training - Step 20635: {'lr': 0.00033599174851346844, 'samples': 10565632, 'steps': 20635, 'loss/train': 2.708117723464966} +02/25/2022 04:18:13 - INFO - codeparrot_training - Step 20636: {'lr': 0.000335976384276515, 'samples': 10566144, 'steps': 20636, 'loss/train': 0.8021120429039001} +02/25/2022 04:18:18 - INFO - codeparrot_training - Step 20637: {'lr': 0.000335961019671266, 'samples': 10566656, 'steps': 20637, 'loss/train': 2.1264638900756836} +02/25/2022 04:18:22 - INFO - codeparrot_training - Step 20638: {'lr': 0.00033594565469778734, 'samples': 10567168, 'steps': 20638, 'loss/train': 1.562037706375122} +02/25/2022 04:18:29 - INFO - codeparrot_training - Step 20639: {'lr': 0.00033593028935614475, 'samples': 10567680, 'steps': 20639, 'loss/train': 2.5130465030670166} +02/25/2022 04:18:32 - INFO - codeparrot_training - Step 20640: {'lr': 0.0003359149236464041, 'samples': 10568192, 'steps': 20640, 'loss/train': 2.2507386207580566} +02/25/2022 04:18:38 - INFO - codeparrot_training - Step 20641: {'lr': 0.0003358995575686311, 'samples': 10568704, 'steps': 20641, 'loss/train': 3.4444360733032227} +02/25/2022 04:18:41 - INFO - codeparrot_training - Step 20642: {'lr': 0.00033588419112289176, 'samples': 10569216, 'steps': 20642, 'loss/train': 1.8604896068572998} +02/25/2022 04:18:47 - INFO - codeparrot_training - Step 20643: {'lr': 0.00033586882430925184, 'samples': 10569728, 'steps': 20643, 'loss/train': 1.009259819984436} +02/25/2022 04:18:50 - INFO - codeparrot_training - Step 20644: {'lr': 0.0003358534571277771, 'samples': 10570240, 'steps': 20644, 'loss/train': 1.8278828859329224} +02/25/2022 04:18:56 - INFO - codeparrot_training - Step 20645: {'lr': 0.0003358380895785335, 'samples': 10570752, 'steps': 20645, 'loss/train': 0.7926627397537231} +02/25/2022 04:19:00 - INFO - codeparrot_training - Step 20646: {'lr': 0.00033582272166158666, 'samples': 10571264, 'steps': 20646, 'loss/train': 2.104950428009033} +02/25/2022 04:19:05 - INFO - codeparrot_training - Step 20647: {'lr': 0.00033580735337700266, 'samples': 10571776, 'steps': 20647, 'loss/train': 1.9371141195297241} +02/25/2022 04:19:09 - INFO - codeparrot_training - Step 20648: {'lr': 0.00033579198472484707, 'samples': 10572288, 'steps': 20648, 'loss/train': 2.2995235919952393} +02/25/2022 04:19:14 - INFO - codeparrot_training - Step 20649: {'lr': 0.0003357766157051859, 'samples': 10572800, 'steps': 20649, 'loss/train': 1.6594722270965576} +02/25/2022 04:19:18 - INFO - codeparrot_training - Step 20650: {'lr': 0.000335761246318085, 'samples': 10573312, 'steps': 20650, 'loss/train': 2.056654930114746} +02/25/2022 04:19:24 - INFO - codeparrot_training - Step 20651: {'lr': 0.0003357458765636101, 'samples': 10573824, 'steps': 20651, 'loss/train': 1.4294756650924683} +02/25/2022 04:19:28 - INFO - codeparrot_training - Step 20652: {'lr': 0.00033573050644182713, 'samples': 10574336, 'steps': 20652, 'loss/train': 2.060487985610962} +02/25/2022 04:19:33 - INFO - codeparrot_training - Step 20653: {'lr': 0.00033571513595280185, 'samples': 10574848, 'steps': 20653, 'loss/train': 2.9714717864990234} +02/25/2022 04:19:37 - INFO - codeparrot_training - Step 20654: {'lr': 0.0003356997650966002, 'samples': 10575360, 'steps': 20654, 'loss/train': 2.1979050636291504} +02/25/2022 04:19:40 - INFO - codeparrot_training - Step 20655: {'lr': 0.0003356843938732879, 'samples': 10575872, 'steps': 20655, 'loss/train': 1.7492620944976807} +02/25/2022 04:19:46 - INFO - codeparrot_training - Step 20656: {'lr': 0.0003356690222829309, 'samples': 10576384, 'steps': 20656, 'loss/train': 1.6251932382583618} +02/25/2022 04:19:50 - INFO - codeparrot_training - Step 20657: {'lr': 0.00033565365032559496, 'samples': 10576896, 'steps': 20657, 'loss/train': 1.6758636236190796} +02/25/2022 04:19:55 - INFO - codeparrot_training - Step 20658: {'lr': 0.00033563827800134604, 'samples': 10577408, 'steps': 20658, 'loss/train': 1.3154969215393066} +02/25/2022 04:19:58 - INFO - codeparrot_training - Step 20659: {'lr': 0.00033562290531025, 'samples': 10577920, 'steps': 20659, 'loss/train': 0.9799546003341675} +02/25/2022 04:20:06 - INFO - codeparrot_training - Step 20660: {'lr': 0.0003356075322523725, 'samples': 10578432, 'steps': 20660, 'loss/train': 2.0282552242279053} +02/25/2022 04:20:09 - INFO - codeparrot_training - Step 20661: {'lr': 0.00033559215882777955, 'samples': 10578944, 'steps': 20661, 'loss/train': 8.620622634887695} +02/25/2022 04:20:14 - INFO - codeparrot_training - Step 20662: {'lr': 0.000335576785036537, 'samples': 10579456, 'steps': 20662, 'loss/train': 2.532790184020996} +02/25/2022 04:20:18 - INFO - codeparrot_training - Step 20663: {'lr': 0.0003355614108787106, 'samples': 10579968, 'steps': 20663, 'loss/train': 2.788933753967285} +02/25/2022 04:20:23 - INFO - codeparrot_training - Step 20664: {'lr': 0.00033554603635436645, 'samples': 10580480, 'steps': 20664, 'loss/train': 2.4055988788604736} +02/25/2022 04:20:29 - INFO - codeparrot_training - Step 20665: {'lr': 0.00033553066146357, 'samples': 10580992, 'steps': 20665, 'loss/train': 2.019536256790161} +02/25/2022 04:20:33 - INFO - codeparrot_training - Step 20666: {'lr': 0.0003355152862063875, 'samples': 10581504, 'steps': 20666, 'loss/train': 2.2609477043151855} +02/25/2022 04:20:38 - INFO - codeparrot_training - Step 20667: {'lr': 0.0003354999105828847, 'samples': 10582016, 'steps': 20667, 'loss/train': 2.2286131381988525} +02/25/2022 04:20:42 - INFO - codeparrot_training - Step 20668: {'lr': 0.0003354845345931274, 'samples': 10582528, 'steps': 20668, 'loss/train': 1.742092490196228} +02/25/2022 04:20:49 - INFO - codeparrot_training - Step 20669: {'lr': 0.0003354691582371815, 'samples': 10583040, 'steps': 20669, 'loss/train': 2.0620768070220947} +02/25/2022 04:20:53 - INFO - codeparrot_training - Step 20670: {'lr': 0.00033545378151511287, 'samples': 10583552, 'steps': 20670, 'loss/train': 1.5710407495498657} +02/25/2022 04:20:58 - INFO - codeparrot_training - Step 20671: {'lr': 0.00033543840442698737, 'samples': 10584064, 'steps': 20671, 'loss/train': 1.565824270248413} +02/25/2022 04:21:02 - INFO - codeparrot_training - Step 20672: {'lr': 0.0003354230269728709, 'samples': 10584576, 'steps': 20672, 'loss/train': 1.8649389743804932} +02/25/2022 04:21:07 - INFO - codeparrot_training - Step 20673: {'lr': 0.0003354076491528292, 'samples': 10585088, 'steps': 20673, 'loss/train': 1.282605528831482} +02/25/2022 04:21:11 - INFO - codeparrot_training - Step 20674: {'lr': 0.00033539227096692837, 'samples': 10585600, 'steps': 20674, 'loss/train': 2.198899269104004} +02/25/2022 04:21:16 - INFO - codeparrot_training - Step 20675: {'lr': 0.00033537689241523407, 'samples': 10586112, 'steps': 20675, 'loss/train': 1.7681479454040527} +02/25/2022 04:21:20 - INFO - codeparrot_training - Step 20676: {'lr': 0.00033536151349781236, 'samples': 10586624, 'steps': 20676, 'loss/train': 2.2473134994506836} +02/25/2022 04:21:25 - INFO - codeparrot_training - Step 20677: {'lr': 0.000335346134214729, 'samples': 10587136, 'steps': 20677, 'loss/train': 2.4820892810821533} +02/25/2022 04:21:29 - INFO - codeparrot_training - Step 20678: {'lr': 0.0003353307545660499, 'samples': 10587648, 'steps': 20678, 'loss/train': 1.0811653137207031} +02/25/2022 04:21:36 - INFO - codeparrot_training - Step 20679: {'lr': 0.0003353153745518409, 'samples': 10588160, 'steps': 20679, 'loss/train': 2.558427095413208} +02/25/2022 04:21:40 - INFO - codeparrot_training - Step 20680: {'lr': 0.000335299994172168, 'samples': 10588672, 'steps': 20680, 'loss/train': 1.893054723739624} +02/25/2022 04:21:45 - INFO - codeparrot_training - Step 20681: {'lr': 0.0003352846134270969, 'samples': 10589184, 'steps': 20681, 'loss/train': 2.0280046463012695} +02/25/2022 04:21:49 - INFO - codeparrot_training - Step 20682: {'lr': 0.0003352692323166938, 'samples': 10589696, 'steps': 20682, 'loss/train': 1.0919030904769897} +02/25/2022 04:21:52 - INFO - codeparrot_training - Step 20683: {'lr': 0.0003352538508410242, 'samples': 10590208, 'steps': 20683, 'loss/train': 0.13893944025039673} +02/25/2022 04:21:58 - INFO - codeparrot_training - Step 20684: {'lr': 0.00033523846900015427, 'samples': 10590720, 'steps': 20684, 'loss/train': 1.4504388570785522} +02/25/2022 04:22:03 - INFO - codeparrot_training - Step 20685: {'lr': 0.0003352230867941497, 'samples': 10591232, 'steps': 20685, 'loss/train': 1.847858190536499} +02/25/2022 04:22:07 - INFO - codeparrot_training - Step 20686: {'lr': 0.0003352077042230766, 'samples': 10591744, 'steps': 20686, 'loss/train': 2.4221556186676025} +02/25/2022 04:22:12 - INFO - codeparrot_training - Step 20687: {'lr': 0.0003351923212870007, 'samples': 10592256, 'steps': 20687, 'loss/train': 1.736351728439331} +02/25/2022 04:22:16 - INFO - codeparrot_training - Step 20688: {'lr': 0.000335176937985988, 'samples': 10592768, 'steps': 20688, 'loss/train': 2.496405839920044} +02/25/2022 04:22:21 - INFO - codeparrot_training - Step 20689: {'lr': 0.0003351615543201042, 'samples': 10593280, 'steps': 20689, 'loss/train': 1.488968014717102} +02/25/2022 04:22:25 - INFO - codeparrot_training - Step 20690: {'lr': 0.00033514617028941547, 'samples': 10593792, 'steps': 20690, 'loss/train': 1.6677732467651367} +02/25/2022 04:22:32 - INFO - codeparrot_training - Step 20691: {'lr': 0.0003351307858939875, 'samples': 10594304, 'steps': 20691, 'loss/train': 1.5482953786849976} +02/25/2022 04:22:35 - INFO - codeparrot_training - Step 20692: {'lr': 0.0003351154011338864, 'samples': 10594816, 'steps': 20692, 'loss/train': 1.4778696298599243} +02/25/2022 04:22:41 - INFO - codeparrot_training - Step 20693: {'lr': 0.00033510001600917783, 'samples': 10595328, 'steps': 20693, 'loss/train': 2.131817579269409} +02/25/2022 04:22:44 - INFO - codeparrot_training - Step 20694: {'lr': 0.00033508463051992786, 'samples': 10595840, 'steps': 20694, 'loss/train': 0.7110695242881775} +02/25/2022 04:22:50 - INFO - codeparrot_training - Step 20695: {'lr': 0.00033506924466620235, 'samples': 10596352, 'steps': 20695, 'loss/train': 0.950127363204956} +02/25/2022 04:22:53 - INFO - codeparrot_training - Step 20696: {'lr': 0.0003350538584480672, 'samples': 10596864, 'steps': 20696, 'loss/train': 0.5178962349891663} +02/25/2022 04:22:59 - INFO - codeparrot_training - Step 20697: {'lr': 0.0003350384718655884, 'samples': 10597376, 'steps': 20697, 'loss/train': 2.0694234371185303} +02/25/2022 04:23:03 - INFO - codeparrot_training - Step 20698: {'lr': 0.0003350230849188317, 'samples': 10597888, 'steps': 20698, 'loss/train': 2.4739716053009033} +02/25/2022 04:23:08 - INFO - codeparrot_training - Step 20699: {'lr': 0.00033500769760786314, 'samples': 10598400, 'steps': 20699, 'loss/train': 3.235154867172241} +02/25/2022 04:23:12 - INFO - codeparrot_training - Step 20700: {'lr': 0.0003349923099327485, 'samples': 10598912, 'steps': 20700, 'loss/train': 1.994461178779602} +02/25/2022 04:23:19 - INFO - codeparrot_training - Step 20701: {'lr': 0.000334976921893554, 'samples': 10599424, 'steps': 20701, 'loss/train': 1.927176594734192} +02/25/2022 04:23:22 - INFO - codeparrot_training - Step 20702: {'lr': 0.0003349615334903452, 'samples': 10599936, 'steps': 20702, 'loss/train': 2.4826650619506836} +02/25/2022 04:23:28 - INFO - codeparrot_training - Step 20703: {'lr': 0.00033494614472318816, 'samples': 10600448, 'steps': 20703, 'loss/train': 1.3483468294143677} +02/25/2022 04:23:31 - INFO - codeparrot_training - Step 20704: {'lr': 0.00033493075559214885, 'samples': 10600960, 'steps': 20704, 'loss/train': 1.7731810808181763} +02/25/2022 04:23:37 - INFO - codeparrot_training - Step 20705: {'lr': 0.00033491536609729313, 'samples': 10601472, 'steps': 20705, 'loss/train': 1.9997228384017944} +02/25/2022 04:23:40 - INFO - codeparrot_training - Step 20706: {'lr': 0.000334899976238687, 'samples': 10601984, 'steps': 20706, 'loss/train': 1.9523364305496216} +02/25/2022 04:23:46 - INFO - codeparrot_training - Step 20707: {'lr': 0.00033488458601639624, 'samples': 10602496, 'steps': 20707, 'loss/train': 2.0029444694519043} +02/25/2022 04:23:49 - INFO - codeparrot_training - Step 20708: {'lr': 0.000334869195430487, 'samples': 10603008, 'steps': 20708, 'loss/train': 1.8645951747894287} +02/25/2022 04:23:55 - INFO - codeparrot_training - Step 20709: {'lr': 0.00033485380448102496, 'samples': 10603520, 'steps': 20709, 'loss/train': 2.003495931625366} +02/25/2022 04:23:58 - INFO - codeparrot_training - Step 20710: {'lr': 0.0003348384131680762, 'samples': 10604032, 'steps': 20710, 'loss/train': 1.5402758121490479} +02/25/2022 04:24:04 - INFO - codeparrot_training - Step 20711: {'lr': 0.0003348230214917066, 'samples': 10604544, 'steps': 20711, 'loss/train': 2.003594160079956} +02/25/2022 04:24:07 - INFO - codeparrot_training - Step 20712: {'lr': 0.0003348076294519822, 'samples': 10605056, 'steps': 20712, 'loss/train': 2.126431465148926} +02/25/2022 04:24:13 - INFO - codeparrot_training - Step 20713: {'lr': 0.0003347922370489687, 'samples': 10605568, 'steps': 20713, 'loss/train': 2.2130186557769775} +02/25/2022 04:24:16 - INFO - codeparrot_training - Step 20714: {'lr': 0.00033477684428273233, 'samples': 10606080, 'steps': 20714, 'loss/train': 0.691471517086029} +02/25/2022 04:24:22 - INFO - codeparrot_training - Step 20715: {'lr': 0.0003347614511533388, 'samples': 10606592, 'steps': 20715, 'loss/train': 3.0492777824401855} +02/25/2022 04:24:25 - INFO - codeparrot_training - Step 20716: {'lr': 0.0003347460576608541, 'samples': 10607104, 'steps': 20716, 'loss/train': 2.4379959106445312} +02/25/2022 04:24:32 - INFO - codeparrot_training - Step 20717: {'lr': 0.00033473066380534423, 'samples': 10607616, 'steps': 20717, 'loss/train': 2.1472010612487793} +02/25/2022 04:24:36 - INFO - codeparrot_training - Step 20718: {'lr': 0.00033471526958687514, 'samples': 10608128, 'steps': 20718, 'loss/train': 2.1485254764556885} +02/25/2022 04:24:41 - INFO - codeparrot_training - Step 20719: {'lr': 0.0003346998750055127, 'samples': 10608640, 'steps': 20719, 'loss/train': 0.8854549527168274} +02/25/2022 04:24:45 - INFO - codeparrot_training - Step 20720: {'lr': 0.0003346844800613229, 'samples': 10609152, 'steps': 20720, 'loss/train': 2.275102138519287} +02/25/2022 04:24:50 - INFO - codeparrot_training - Step 20721: {'lr': 0.0003346690847543717, 'samples': 10609664, 'steps': 20721, 'loss/train': 1.079146385192871} +02/25/2022 04:24:54 - INFO - codeparrot_training - Step 20722: {'lr': 0.00033465368908472496, 'samples': 10610176, 'steps': 20722, 'loss/train': 1.4751988649368286} +02/25/2022 04:24:59 - INFO - codeparrot_training - Step 20723: {'lr': 0.00033463829305244874, 'samples': 10610688, 'steps': 20723, 'loss/train': 3.0920238494873047} +02/25/2022 04:25:03 - INFO - codeparrot_training - Step 20724: {'lr': 0.0003346228966576089, 'samples': 10611200, 'steps': 20724, 'loss/train': 0.6148373484611511} +02/25/2022 04:25:08 - INFO - codeparrot_training - Step 20725: {'lr': 0.0003346074999002715, 'samples': 10611712, 'steps': 20725, 'loss/train': 2.074270248413086} +02/25/2022 04:25:12 - INFO - codeparrot_training - Step 20726: {'lr': 0.0003345921027805024, 'samples': 10612224, 'steps': 20726, 'loss/train': 1.8014246225357056} +02/25/2022 04:25:19 - INFO - codeparrot_training - Step 20727: {'lr': 0.00033457670529836756, 'samples': 10612736, 'steps': 20727, 'loss/train': 1.2105252742767334} +02/25/2022 04:25:22 - INFO - codeparrot_training - Step 20728: {'lr': 0.0003345613074539331, 'samples': 10613248, 'steps': 20728, 'loss/train': 2.1932897567749023} +02/25/2022 04:25:28 - INFO - codeparrot_training - Step 20729: {'lr': 0.00033454590924726467, 'samples': 10613760, 'steps': 20729, 'loss/train': 1.5145426988601685} +02/25/2022 04:25:31 - INFO - codeparrot_training - Step 20730: {'lr': 0.0003345305106784286, 'samples': 10614272, 'steps': 20730, 'loss/train': 2.0614726543426514} +02/25/2022 04:25:37 - INFO - codeparrot_training - Step 20731: {'lr': 0.00033451511174749057, 'samples': 10614784, 'steps': 20731, 'loss/train': 1.3937602043151855} +02/25/2022 04:25:40 - INFO - codeparrot_training - Step 20732: {'lr': 0.0003344997124545166, 'samples': 10615296, 'steps': 20732, 'loss/train': 1.9721789360046387} +02/25/2022 04:25:46 - INFO - codeparrot_training - Step 20733: {'lr': 0.0003344843127995728, 'samples': 10615808, 'steps': 20733, 'loss/train': 2.206768035888672} +02/25/2022 04:25:50 - INFO - codeparrot_training - Step 20734: {'lr': 0.00033446891278272493, 'samples': 10616320, 'steps': 20734, 'loss/train': 3.2114272117614746} +02/25/2022 04:25:55 - INFO - codeparrot_training - Step 20735: {'lr': 0.0003344535124040391, 'samples': 10616832, 'steps': 20735, 'loss/train': 1.652105689048767} +02/25/2022 04:25:59 - INFO - codeparrot_training - Step 20736: {'lr': 0.0003344381116635812, 'samples': 10617344, 'steps': 20736, 'loss/train': 2.0425808429718018} +02/25/2022 04:26:06 - INFO - codeparrot_training - Step 20737: {'lr': 0.0003344227105614173, 'samples': 10617856, 'steps': 20737, 'loss/train': 1.4232568740844727} +02/25/2022 04:26:09 - INFO - codeparrot_training - Step 20738: {'lr': 0.0003344073090976132, 'samples': 10618368, 'steps': 20738, 'loss/train': 1.394872784614563} +02/25/2022 04:26:15 - INFO - codeparrot_training - Step 20739: {'lr': 0.00033439190727223517, 'samples': 10618880, 'steps': 20739, 'loss/train': 2.1726560592651367} +02/25/2022 04:26:18 - INFO - codeparrot_training - Step 20740: {'lr': 0.00033437650508534887, 'samples': 10619392, 'steps': 20740, 'loss/train': 1.9124764204025269} +02/25/2022 04:26:25 - INFO - codeparrot_training - Step 20741: {'lr': 0.0003343611025370205, 'samples': 10619904, 'steps': 20741, 'loss/train': 0.954076886177063} +02/25/2022 04:26:28 - INFO - codeparrot_training - Step 20742: {'lr': 0.00033434569962731593, 'samples': 10620416, 'steps': 20742, 'loss/train': 2.2607502937316895} +02/25/2022 04:26:31 - INFO - codeparrot_training - Step 20743: {'lr': 0.0003343302963563012, 'samples': 10620928, 'steps': 20743, 'loss/train': 2.2857413291931152} +02/25/2022 04:26:37 - INFO - codeparrot_training - Step 20744: {'lr': 0.00033431489272404215, 'samples': 10621440, 'steps': 20744, 'loss/train': 1.923913836479187} +02/25/2022 04:26:41 - INFO - codeparrot_training - Step 20745: {'lr': 0.00033429948873060496, 'samples': 10621952, 'steps': 20745, 'loss/train': 1.737326741218567} +02/25/2022 04:26:46 - INFO - codeparrot_training - Step 20746: {'lr': 0.0003342840843760555, 'samples': 10622464, 'steps': 20746, 'loss/train': 2.188119411468506} +02/25/2022 04:26:50 - INFO - codeparrot_training - Step 20747: {'lr': 0.00033426867966045984, 'samples': 10622976, 'steps': 20747, 'loss/train': 1.7467223405838013} +02/25/2022 04:26:57 - INFO - codeparrot_training - Step 20748: {'lr': 0.00033425327458388375, 'samples': 10623488, 'steps': 20748, 'loss/train': 2.5961050987243652} +02/25/2022 04:27:00 - INFO - codeparrot_training - Step 20749: {'lr': 0.0003342378691463936, 'samples': 10624000, 'steps': 20749, 'loss/train': 2.309401512145996} +02/25/2022 04:27:06 - INFO - codeparrot_training - Step 20750: {'lr': 0.00033422246334805503, 'samples': 10624512, 'steps': 20750, 'loss/train': 2.1629879474639893} +02/25/2022 04:27:09 - INFO - codeparrot_training - Step 20751: {'lr': 0.0003342070571889342, 'samples': 10625024, 'steps': 20751, 'loss/train': 1.0394816398620605} +02/25/2022 04:27:15 - INFO - codeparrot_training - Step 20752: {'lr': 0.00033419165066909707, 'samples': 10625536, 'steps': 20752, 'loss/train': 2.7774131298065186} +02/25/2022 04:27:18 - INFO - codeparrot_training - Step 20753: {'lr': 0.0003341762437886097, 'samples': 10626048, 'steps': 20753, 'loss/train': 3.2035017013549805} +02/25/2022 04:27:24 - INFO - codeparrot_training - Step 20754: {'lr': 0.0003341608365475379, 'samples': 10626560, 'steps': 20754, 'loss/train': 0.35955071449279785} +02/25/2022 04:27:27 - INFO - codeparrot_training - Step 20755: {'lr': 0.00033414542894594793, 'samples': 10627072, 'steps': 20755, 'loss/train': 1.6717816591262817} +02/25/2022 04:27:33 - INFO - codeparrot_training - Step 20756: {'lr': 0.00033413002098390567, 'samples': 10627584, 'steps': 20756, 'loss/train': 2.539670467376709} +02/25/2022 04:27:36 - INFO - codeparrot_training - Step 20757: {'lr': 0.00033411461266147705, 'samples': 10628096, 'steps': 20757, 'loss/train': 1.463529348373413} +02/25/2022 04:27:42 - INFO - codeparrot_training - Step 20758: {'lr': 0.00033409920397872814, 'samples': 10628608, 'steps': 20758, 'loss/train': 1.6585992574691772} +02/25/2022 04:27:45 - INFO - codeparrot_training - Step 20759: {'lr': 0.00033408379493572493, 'samples': 10629120, 'steps': 20759, 'loss/train': 2.5601906776428223} +02/25/2022 04:27:51 - INFO - codeparrot_training - Step 20760: {'lr': 0.0003340683855325335, 'samples': 10629632, 'steps': 20760, 'loss/train': 1.451882243156433} +02/25/2022 04:27:54 - INFO - codeparrot_training - Step 20761: {'lr': 0.00033405297576921976, 'samples': 10630144, 'steps': 20761, 'loss/train': 2.0742735862731934} +02/25/2022 04:28:00 - INFO - codeparrot_training - Step 20762: {'lr': 0.00033403756564584974, 'samples': 10630656, 'steps': 20762, 'loss/train': 1.939662218093872} +02/25/2022 04:28:03 - INFO - codeparrot_training - Step 20763: {'lr': 0.0003340221551624896, 'samples': 10631168, 'steps': 20763, 'loss/train': 1.9721509218215942} +02/25/2022 04:28:11 - INFO - codeparrot_training - Step 20764: {'lr': 0.0003340067443192051, 'samples': 10631680, 'steps': 20764, 'loss/train': 1.8509892225265503} +02/25/2022 04:28:14 - INFO - codeparrot_training - Step 20765: {'lr': 0.0003339913331160624, 'samples': 10632192, 'steps': 20765, 'loss/train': 1.9136699438095093} +02/25/2022 04:28:20 - INFO - codeparrot_training - Step 20766: {'lr': 0.0003339759215531275, 'samples': 10632704, 'steps': 20766, 'loss/train': 2.413472890853882} +02/25/2022 04:28:23 - INFO - codeparrot_training - Step 20767: {'lr': 0.0003339605096304664, 'samples': 10633216, 'steps': 20767, 'loss/train': 2.148566246032715} +02/25/2022 04:28:29 - INFO - codeparrot_training - Step 20768: {'lr': 0.00033394509734814516, 'samples': 10633728, 'steps': 20768, 'loss/train': 2.2628605365753174} +02/25/2022 04:28:33 - INFO - codeparrot_training - Step 20769: {'lr': 0.00033392968470622987, 'samples': 10634240, 'steps': 20769, 'loss/train': 1.1457905769348145} +02/25/2022 04:28:38 - INFO - codeparrot_training - Step 20770: {'lr': 0.0003339142717047863, 'samples': 10634752, 'steps': 20770, 'loss/train': 2.2070224285125732} +02/25/2022 04:28:42 - INFO - codeparrot_training - Step 20771: {'lr': 0.0003338988583438808, 'samples': 10635264, 'steps': 20771, 'loss/train': 2.3740336894989014} +02/25/2022 04:28:47 - INFO - codeparrot_training - Step 20772: {'lr': 0.0003338834446235791, 'samples': 10635776, 'steps': 20772, 'loss/train': 1.4844857454299927} +02/25/2022 04:28:51 - INFO - codeparrot_training - Step 20773: {'lr': 0.00033386803054394744, 'samples': 10636288, 'steps': 20773, 'loss/train': 1.525451898574829} +02/25/2022 04:28:58 - INFO - codeparrot_training - Step 20774: {'lr': 0.0003338526161050517, 'samples': 10636800, 'steps': 20774, 'loss/train': 2.371129274368286} +02/25/2022 04:29:01 - INFO - codeparrot_training - Step 20775: {'lr': 0.00033383720130695794, 'samples': 10637312, 'steps': 20775, 'loss/train': 1.7599045038223267} +02/25/2022 04:29:07 - INFO - codeparrot_training - Step 20776: {'lr': 0.0003338217861497324, 'samples': 10637824, 'steps': 20776, 'loss/train': 2.037294387817383} +02/25/2022 04:29:10 - INFO - codeparrot_training - Step 20777: {'lr': 0.0003338063706334408, 'samples': 10638336, 'steps': 20777, 'loss/train': 1.5171146392822266} +02/25/2022 04:29:16 - INFO - codeparrot_training - Step 20778: {'lr': 0.00033379095475814937, 'samples': 10638848, 'steps': 20778, 'loss/train': 2.194605827331543} +02/25/2022 04:29:19 - INFO - codeparrot_training - Step 20779: {'lr': 0.00033377553852392404, 'samples': 10639360, 'steps': 20779, 'loss/train': 1.1043100357055664} +02/25/2022 04:29:25 - INFO - codeparrot_training - Step 20780: {'lr': 0.000333760121930831, 'samples': 10639872, 'steps': 20780, 'loss/train': 2.4773428440093994} +02/25/2022 04:29:28 - INFO - codeparrot_training - Step 20781: {'lr': 0.00033374470497893614, 'samples': 10640384, 'steps': 20781, 'loss/train': 2.133458137512207} +02/25/2022 04:29:34 - INFO - codeparrot_training - Step 20782: {'lr': 0.0003337292876683056, 'samples': 10640896, 'steps': 20782, 'loss/train': 3.3295552730560303} +02/25/2022 04:29:37 - INFO - codeparrot_training - Step 20783: {'lr': 0.0003337138699990053, 'samples': 10641408, 'steps': 20783, 'loss/train': 2.7440850734710693} +02/25/2022 04:29:45 - INFO - codeparrot_training - Step 20784: {'lr': 0.00033369845197110144, 'samples': 10641920, 'steps': 20784, 'loss/train': 1.9222973585128784} +02/25/2022 04:29:48 - INFO - codeparrot_training - Step 20785: {'lr': 0.00033368303358465994, 'samples': 10642432, 'steps': 20785, 'loss/train': 1.8609380722045898} +02/25/2022 04:29:54 - INFO - codeparrot_training - Step 20786: {'lr': 0.00033366761483974693, 'samples': 10642944, 'steps': 20786, 'loss/train': 2.008422613143921} +02/25/2022 04:29:57 - INFO - codeparrot_training - Step 20787: {'lr': 0.0003336521957364284, 'samples': 10643456, 'steps': 20787, 'loss/train': 2.1725199222564697} +02/25/2022 04:30:03 - INFO - codeparrot_training - Step 20788: {'lr': 0.0003336367762747704, 'samples': 10643968, 'steps': 20788, 'loss/train': 1.5638988018035889} +02/25/2022 04:30:06 - INFO - codeparrot_training - Step 20789: {'lr': 0.0003336213564548391, 'samples': 10644480, 'steps': 20789, 'loss/train': 1.1934763193130493} +02/25/2022 04:30:12 - INFO - codeparrot_training - Step 20790: {'lr': 0.0003336059362767004, 'samples': 10644992, 'steps': 20790, 'loss/train': 2.389613389968872} +02/25/2022 04:30:15 - INFO - codeparrot_training - Step 20791: {'lr': 0.0003335905157404204, 'samples': 10645504, 'steps': 20791, 'loss/train': 1.5832748413085938} +02/25/2022 04:30:21 - INFO - codeparrot_training - Step 20792: {'lr': 0.0003335750948460652, 'samples': 10646016, 'steps': 20792, 'loss/train': 2.5373685359954834} +02/25/2022 04:30:28 - INFO - codeparrot_training - Step 20793: {'lr': 0.0003335596735937009, 'samples': 10646528, 'steps': 20793, 'loss/train': 2.4140779972076416} +02/25/2022 04:30:31 - INFO - codeparrot_training - Step 20794: {'lr': 0.0003335442519833933, 'samples': 10647040, 'steps': 20794, 'loss/train': 2.4741437435150146} +02/25/2022 04:30:37 - INFO - codeparrot_training - Step 20795: {'lr': 0.00033352883001520884, 'samples': 10647552, 'steps': 20795, 'loss/train': 1.6465550661087036} +02/25/2022 04:30:40 - INFO - codeparrot_training - Step 20796: {'lr': 0.0003335134076892133, 'samples': 10648064, 'steps': 20796, 'loss/train': 2.7344467639923096} +02/25/2022 04:30:46 - INFO - codeparrot_training - Step 20797: {'lr': 0.0003334979850054729, 'samples': 10648576, 'steps': 20797, 'loss/train': 1.6852879524230957} +02/25/2022 04:30:50 - INFO - codeparrot_training - Step 20798: {'lr': 0.0003334825619640536, 'samples': 10649088, 'steps': 20798, 'loss/train': 1.8291069269180298} +02/25/2022 04:30:53 - INFO - codeparrot_training - Step 20799: {'lr': 0.0003334671385650215, 'samples': 10649600, 'steps': 20799, 'loss/train': 1.322557806968689} +02/25/2022 04:31:00 - INFO - codeparrot_training - Step 20800: {'lr': 0.0003334517148084427, 'samples': 10650112, 'steps': 20800, 'loss/train': 2.409087657928467} +02/25/2022 04:31:04 - INFO - codeparrot_training - Step 20801: {'lr': 0.00033343629069438333, 'samples': 10650624, 'steps': 20801, 'loss/train': 3.682776927947998} +02/25/2022 04:31:09 - INFO - codeparrot_training - Step 20802: {'lr': 0.0003334208662229093, 'samples': 10651136, 'steps': 20802, 'loss/train': 1.8637281656265259} +02/25/2022 04:31:13 - INFO - codeparrot_training - Step 20803: {'lr': 0.0003334054413940868, 'samples': 10651648, 'steps': 20803, 'loss/train': 1.6451548337936401} +02/25/2022 04:31:18 - INFO - codeparrot_training - Step 20804: {'lr': 0.0003333900162079818, 'samples': 10652160, 'steps': 20804, 'loss/train': 2.9036614894866943} +02/25/2022 04:31:22 - INFO - codeparrot_training - Step 20805: {'lr': 0.00033337459066466057, 'samples': 10652672, 'steps': 20805, 'loss/train': 2.6945085525512695} +02/25/2022 04:31:27 - INFO - codeparrot_training - Step 20806: {'lr': 0.000333359164764189, 'samples': 10653184, 'steps': 20806, 'loss/train': 2.3912503719329834} +02/25/2022 04:31:31 - INFO - codeparrot_training - Step 20807: {'lr': 0.00033334373850663323, 'samples': 10653696, 'steps': 20807, 'loss/train': 1.7559146881103516} +02/25/2022 04:31:36 - INFO - codeparrot_training - Step 20808: {'lr': 0.00033332831189205936, 'samples': 10654208, 'steps': 20808, 'loss/train': 2.333390951156616} +02/25/2022 04:31:40 - INFO - codeparrot_training - Step 20809: {'lr': 0.00033331288492053344, 'samples': 10654720, 'steps': 20809, 'loss/train': 1.834153175354004} +02/25/2022 04:31:47 - INFO - codeparrot_training - Step 20810: {'lr': 0.0003332974575921217, 'samples': 10655232, 'steps': 20810, 'loss/train': 1.6094986200332642} +02/25/2022 04:31:50 - INFO - codeparrot_training - Step 20811: {'lr': 0.00033328202990688996, 'samples': 10655744, 'steps': 20811, 'loss/train': 2.034541130065918} +02/25/2022 04:31:56 - INFO - codeparrot_training - Step 20812: {'lr': 0.0003332666018649044, 'samples': 10656256, 'steps': 20812, 'loss/train': 0.3137263357639313} +02/25/2022 04:32:00 - INFO - codeparrot_training - Step 20813: {'lr': 0.00033325117346623135, 'samples': 10656768, 'steps': 20813, 'loss/train': 1.6814106702804565} +02/25/2022 04:32:05 - INFO - codeparrot_training - Step 20814: {'lr': 0.00033323574471093656, 'samples': 10657280, 'steps': 20814, 'loss/train': 2.4355385303497314} +02/25/2022 04:32:09 - INFO - codeparrot_training - Step 20815: {'lr': 0.0003332203155990863, 'samples': 10657792, 'steps': 20815, 'loss/train': 1.4117026329040527} +02/25/2022 04:32:14 - INFO - codeparrot_training - Step 20816: {'lr': 0.00033320488613074666, 'samples': 10658304, 'steps': 20816, 'loss/train': 2.3932127952575684} +02/25/2022 04:32:18 - INFO - codeparrot_training - Step 20817: {'lr': 0.00033318945630598373, 'samples': 10658816, 'steps': 20817, 'loss/train': 1.157488226890564} +02/25/2022 04:32:23 - INFO - codeparrot_training - Step 20818: {'lr': 0.00033317402612486355, 'samples': 10659328, 'steps': 20818, 'loss/train': 2.4078705310821533} +02/25/2022 04:32:27 - INFO - codeparrot_training - Step 20819: {'lr': 0.00033315859558745225, 'samples': 10659840, 'steps': 20819, 'loss/train': 1.5141855478286743} +02/25/2022 04:32:34 - INFO - codeparrot_training - Step 20820: {'lr': 0.0003331431646938159, 'samples': 10660352, 'steps': 20820, 'loss/train': 1.9648964405059814} +02/25/2022 04:32:37 - INFO - codeparrot_training - Step 20821: {'lr': 0.00033312773344402075, 'samples': 10660864, 'steps': 20821, 'loss/train': 2.2263917922973633} +02/25/2022 04:32:43 - INFO - codeparrot_training - Step 20822: {'lr': 0.00033311230183813266, 'samples': 10661376, 'steps': 20822, 'loss/train': 2.011302947998047} +02/25/2022 04:32:46 - INFO - codeparrot_training - Step 20823: {'lr': 0.0003330968698762179, 'samples': 10661888, 'steps': 20823, 'loss/train': 1.5979396104812622} +02/25/2022 04:32:52 - INFO - codeparrot_training - Step 20824: {'lr': 0.0003330814375583426, 'samples': 10662400, 'steps': 20824, 'loss/train': 2.817964792251587} +02/25/2022 04:32:55 - INFO - codeparrot_training - Step 20825: {'lr': 0.00033306600488457264, 'samples': 10662912, 'steps': 20825, 'loss/train': 1.537620186805725} +02/25/2022 04:33:01 - INFO - codeparrot_training - Step 20826: {'lr': 0.00033305057185497444, 'samples': 10663424, 'steps': 20826, 'loss/train': 1.2470121383666992} +02/25/2022 04:33:05 - INFO - codeparrot_training - Step 20827: {'lr': 0.0003330351384696139, 'samples': 10663936, 'steps': 20827, 'loss/train': 2.086422920227051} +02/25/2022 04:33:10 - INFO - codeparrot_training - Step 20828: {'lr': 0.00033301970472855724, 'samples': 10664448, 'steps': 20828, 'loss/train': 1.9239081144332886} +02/25/2022 04:33:13 - INFO - codeparrot_training - Step 20829: {'lr': 0.0003330042706318705, 'samples': 10664960, 'steps': 20829, 'loss/train': 2.0555431842803955} +02/25/2022 04:33:21 - INFO - codeparrot_training - Step 20830: {'lr': 0.00033298883617961984, 'samples': 10665472, 'steps': 20830, 'loss/train': 1.4617338180541992} +02/25/2022 04:33:24 - INFO - codeparrot_training - Step 20831: {'lr': 0.0003329734013718713, 'samples': 10665984, 'steps': 20831, 'loss/train': 0.9457871913909912} +02/25/2022 04:33:30 - INFO - codeparrot_training - Step 20832: {'lr': 0.0003329579662086911, 'samples': 10666496, 'steps': 20832, 'loss/train': 1.6988276243209839} +02/25/2022 04:33:33 - INFO - codeparrot_training - Step 20833: {'lr': 0.00033294253069014534, 'samples': 10667008, 'steps': 20833, 'loss/train': 2.575636625289917} +02/25/2022 04:33:39 - INFO - codeparrot_training - Step 20834: {'lr': 0.0003329270948163001, 'samples': 10667520, 'steps': 20834, 'loss/train': 2.3679113388061523} +02/25/2022 04:33:42 - INFO - codeparrot_training - Step 20835: {'lr': 0.0003329116585872215, 'samples': 10668032, 'steps': 20835, 'loss/train': 0.6654316186904907} +02/25/2022 04:33:48 - INFO - codeparrot_training - Step 20836: {'lr': 0.00033289622200297563, 'samples': 10668544, 'steps': 20836, 'loss/train': 3.990220069885254} +02/25/2022 04:33:51 - INFO - codeparrot_training - Step 20837: {'lr': 0.0003328807850636287, 'samples': 10669056, 'steps': 20837, 'loss/train': 1.8589555025100708} +02/25/2022 04:33:58 - INFO - codeparrot_training - Step 20838: {'lr': 0.0003328653477692469, 'samples': 10669568, 'steps': 20838, 'loss/train': 1.8131022453308105} +02/25/2022 04:34:01 - INFO - codeparrot_training - Step 20839: {'lr': 0.0003328499101198962, 'samples': 10670080, 'steps': 20839, 'loss/train': 2.7114906311035156} +02/25/2022 04:34:05 - INFO - codeparrot_training - Step 20840: {'lr': 0.0003328344721156427, 'samples': 10670592, 'steps': 20840, 'loss/train': 2.376781702041626} +02/25/2022 04:34:10 - INFO - codeparrot_training - Step 20841: {'lr': 0.00033281903375655277, 'samples': 10671104, 'steps': 20841, 'loss/train': 1.2486603260040283} +02/25/2022 04:34:13 - INFO - codeparrot_training - Step 20842: {'lr': 0.0003328035950426923, 'samples': 10671616, 'steps': 20842, 'loss/train': 2.7645821571350098} +02/25/2022 04:34:19 - INFO - codeparrot_training - Step 20843: {'lr': 0.0003327881559741276, 'samples': 10672128, 'steps': 20843, 'loss/train': 1.2805066108703613} +02/25/2022 04:34:23 - INFO - codeparrot_training - Step 20844: {'lr': 0.00033277271655092467, 'samples': 10672640, 'steps': 20844, 'loss/train': 2.83009672164917} +02/25/2022 04:34:28 - INFO - codeparrot_training - Step 20845: {'lr': 0.0003327572767731497, 'samples': 10673152, 'steps': 20845, 'loss/train': 2.5204989910125732} +02/25/2022 04:34:31 - INFO - codeparrot_training - Step 20846: {'lr': 0.0003327418366408689, 'samples': 10673664, 'steps': 20846, 'loss/train': 2.482987642288208} +02/25/2022 04:34:39 - INFO - codeparrot_training - Step 20847: {'lr': 0.0003327263961541483, 'samples': 10674176, 'steps': 20847, 'loss/train': 1.3386704921722412} +02/25/2022 04:34:42 - INFO - codeparrot_training - Step 20848: {'lr': 0.0003327109553130541, 'samples': 10674688, 'steps': 20848, 'loss/train': 2.4186148643493652} +02/25/2022 04:34:48 - INFO - codeparrot_training - Step 20849: {'lr': 0.0003326955141176524, 'samples': 10675200, 'steps': 20849, 'loss/train': 2.323169469833374} +02/25/2022 04:34:51 - INFO - codeparrot_training - Step 20850: {'lr': 0.0003326800725680094, 'samples': 10675712, 'steps': 20850, 'loss/train': 2.3529465198516846} +02/25/2022 04:34:57 - INFO - codeparrot_training - Step 20851: {'lr': 0.0003326646306641912, 'samples': 10676224, 'steps': 20851, 'loss/train': 1.5705105066299438} +02/25/2022 04:35:00 - INFO - codeparrot_training - Step 20852: {'lr': 0.000332649188406264, 'samples': 10676736, 'steps': 20852, 'loss/train': 0.962972104549408} +02/25/2022 04:35:06 - INFO - codeparrot_training - Step 20853: {'lr': 0.0003326337457942939, 'samples': 10677248, 'steps': 20853, 'loss/train': 2.0908255577087402} +02/25/2022 04:35:09 - INFO - codeparrot_training - Step 20854: {'lr': 0.00033261830282834716, 'samples': 10677760, 'steps': 20854, 'loss/train': 1.5958484411239624} +02/25/2022 04:35:15 - INFO - codeparrot_training - Step 20855: {'lr': 0.00033260285950848965, 'samples': 10678272, 'steps': 20855, 'loss/train': 1.704235553741455} +02/25/2022 04:35:18 - INFO - codeparrot_training - Step 20856: {'lr': 0.0003325874158347879, 'samples': 10678784, 'steps': 20856, 'loss/train': 1.8372846841812134} +02/25/2022 04:35:25 - INFO - codeparrot_training - Step 20857: {'lr': 0.0003325719718073078, 'samples': 10679296, 'steps': 20857, 'loss/train': 2.416452169418335} +02/25/2022 04:35:29 - INFO - codeparrot_training - Step 20858: {'lr': 0.00033255652742611566, 'samples': 10679808, 'steps': 20858, 'loss/train': 1.8830294609069824} +02/25/2022 04:35:34 - INFO - codeparrot_training - Step 20859: {'lr': 0.0003325410826912775, 'samples': 10680320, 'steps': 20859, 'loss/train': 3.635756015777588} +02/25/2022 04:35:38 - INFO - codeparrot_training - Step 20860: {'lr': 0.0003325256376028595, 'samples': 10680832, 'steps': 20860, 'loss/train': 1.945672869682312} +02/25/2022 04:35:43 - INFO - codeparrot_training - Step 20861: {'lr': 0.000332510192160928, 'samples': 10681344, 'steps': 20861, 'loss/train': 1.0041775703430176} +02/25/2022 04:35:47 - INFO - codeparrot_training - Step 20862: {'lr': 0.000332494746365549, 'samples': 10681856, 'steps': 20862, 'loss/train': 2.1228537559509277} +02/25/2022 04:35:52 - INFO - codeparrot_training - Step 20863: {'lr': 0.00033247930021678866, 'samples': 10682368, 'steps': 20863, 'loss/train': 2.3177340030670166} +02/25/2022 04:35:56 - INFO - codeparrot_training - Step 20864: {'lr': 0.0003324638537147132, 'samples': 10682880, 'steps': 20864, 'loss/train': 1.7031713724136353} +02/25/2022 04:36:01 - INFO - codeparrot_training - Step 20865: {'lr': 0.00033244840685938884, 'samples': 10683392, 'steps': 20865, 'loss/train': 2.3604841232299805} +02/25/2022 04:36:05 - INFO - codeparrot_training - Step 20866: {'lr': 0.0003324329596508816, 'samples': 10683904, 'steps': 20866, 'loss/train': 1.0252118110656738} +02/25/2022 04:36:12 - INFO - codeparrot_training - Step 20867: {'lr': 0.0003324175120892579, 'samples': 10684416, 'steps': 20867, 'loss/train': 1.966038465499878} +02/25/2022 04:36:15 - INFO - codeparrot_training - Step 20868: {'lr': 0.00033240206417458354, 'samples': 10684928, 'steps': 20868, 'loss/train': 1.7910404205322266} +02/25/2022 04:36:21 - INFO - codeparrot_training - Step 20869: {'lr': 0.00033238661590692496, 'samples': 10685440, 'steps': 20869, 'loss/train': 2.265265703201294} +02/25/2022 04:36:24 - INFO - codeparrot_training - Step 20870: {'lr': 0.00033237116728634833, 'samples': 10685952, 'steps': 20870, 'loss/train': 1.8578381538391113} +02/25/2022 04:36:30 - INFO - codeparrot_training - Step 20871: {'lr': 0.0003323557183129197, 'samples': 10686464, 'steps': 20871, 'loss/train': 2.419618606567383} +02/25/2022 04:36:33 - INFO - codeparrot_training - Step 20872: {'lr': 0.0003323402689867054, 'samples': 10686976, 'steps': 20872, 'loss/train': 1.4632823467254639} +02/25/2022 04:36:39 - INFO - codeparrot_training - Step 20873: {'lr': 0.0003323248193077715, 'samples': 10687488, 'steps': 20873, 'loss/train': 1.7312337160110474} +02/25/2022 04:36:42 - INFO - codeparrot_training - Step 20874: {'lr': 0.0003323093692761842, 'samples': 10688000, 'steps': 20874, 'loss/train': 2.6617040634155273} +02/25/2022 04:36:48 - INFO - codeparrot_training - Step 20875: {'lr': 0.00033229391889200974, 'samples': 10688512, 'steps': 20875, 'loss/train': 2.7804622650146484} +02/25/2022 04:36:51 - INFO - codeparrot_training - Step 20876: {'lr': 0.00033227846815531424, 'samples': 10689024, 'steps': 20876, 'loss/train': 2.038386583328247} +02/25/2022 04:36:57 - INFO - codeparrot_training - Step 20877: {'lr': 0.0003322630170661639, 'samples': 10689536, 'steps': 20877, 'loss/train': 1.1443202495574951} +02/25/2022 04:37:00 - INFO - codeparrot_training - Step 20878: {'lr': 0.0003322475656246249, 'samples': 10690048, 'steps': 20878, 'loss/train': 2.632556438446045} +02/25/2022 04:37:08 - INFO - codeparrot_training - Step 20879: {'lr': 0.0003322321138307635, 'samples': 10690560, 'steps': 20879, 'loss/train': 1.0574843883514404} +02/25/2022 04:37:11 - INFO - codeparrot_training - Step 20880: {'lr': 0.0003322166616846458, 'samples': 10691072, 'steps': 20880, 'loss/train': 1.4141308069229126} +02/25/2022 04:37:17 - INFO - codeparrot_training - Step 20881: {'lr': 0.0003322012091863381, 'samples': 10691584, 'steps': 20881, 'loss/train': 1.7375237941741943} +02/25/2022 04:37:20 - INFO - codeparrot_training - Step 20882: {'lr': 0.0003321857563359064, 'samples': 10692096, 'steps': 20882, 'loss/train': 2.5988423824310303} +02/25/2022 04:37:26 - INFO - codeparrot_training - Step 20883: {'lr': 0.00033217030313341704, 'samples': 10692608, 'steps': 20883, 'loss/train': 2.4692912101745605} +02/25/2022 04:37:29 - INFO - codeparrot_training - Step 20884: {'lr': 0.00033215484957893626, 'samples': 10693120, 'steps': 20884, 'loss/train': 1.7564690113067627} +02/25/2022 04:37:35 - INFO - codeparrot_training - Step 20885: {'lr': 0.0003321393956725302, 'samples': 10693632, 'steps': 20885, 'loss/train': 1.5622729063034058} +02/25/2022 04:37:38 - INFO - codeparrot_training - Step 20886: {'lr': 0.00033212394141426493, 'samples': 10694144, 'steps': 20886, 'loss/train': 1.2269775867462158} +02/25/2022 04:37:44 - INFO - codeparrot_training - Step 20887: {'lr': 0.00033210848680420693, 'samples': 10694656, 'steps': 20887, 'loss/train': 2.2369186878204346} +02/25/2022 04:37:47 - INFO - codeparrot_training - Step 20888: {'lr': 0.00033209303184242214, 'samples': 10695168, 'steps': 20888, 'loss/train': 2.591826915740967} +02/25/2022 04:37:55 - INFO - codeparrot_training - Step 20889: {'lr': 0.0003320775765289769, 'samples': 10695680, 'steps': 20889, 'loss/train': 2.1632304191589355} +02/25/2022 04:37:58 - INFO - codeparrot_training - Step 20890: {'lr': 0.0003320621208639374, 'samples': 10696192, 'steps': 20890, 'loss/train': 0.8858595490455627} +02/25/2022 04:38:04 - INFO - codeparrot_training - Step 20891: {'lr': 0.00033204666484736977, 'samples': 10696704, 'steps': 20891, 'loss/train': 1.6609606742858887} +02/25/2022 04:38:07 - INFO - codeparrot_training - Step 20892: {'lr': 0.0003320312084793404, 'samples': 10697216, 'steps': 20892, 'loss/train': 2.259467840194702} +02/25/2022 04:38:13 - INFO - codeparrot_training - Step 20893: {'lr': 0.0003320157517599153, 'samples': 10697728, 'steps': 20893, 'loss/train': 2.0813517570495605} +02/25/2022 04:38:16 - INFO - codeparrot_training - Step 20894: {'lr': 0.00033200029468916076, 'samples': 10698240, 'steps': 20894, 'loss/train': 1.8910647630691528} +02/25/2022 04:38:22 - INFO - codeparrot_training - Step 20895: {'lr': 0.00033198483726714294, 'samples': 10698752, 'steps': 20895, 'loss/train': 1.3603250980377197} +02/25/2022 04:38:25 - INFO - codeparrot_training - Step 20896: {'lr': 0.00033196937949392824, 'samples': 10699264, 'steps': 20896, 'loss/train': 2.342127561569214} +02/25/2022 04:38:31 - INFO - codeparrot_training - Step 20897: {'lr': 0.00033195392136958264, 'samples': 10699776, 'steps': 20897, 'loss/train': 2.5612542629241943} +02/25/2022 04:38:34 - INFO - codeparrot_training - Step 20898: {'lr': 0.00033193846289417253, 'samples': 10700288, 'steps': 20898, 'loss/train': 1.9002939462661743} +02/25/2022 04:38:40 - INFO - codeparrot_training - Step 20899: {'lr': 0.00033192300406776406, 'samples': 10700800, 'steps': 20899, 'loss/train': 2.211519718170166} +02/25/2022 04:38:43 - INFO - codeparrot_training - Step 20900: {'lr': 0.0003319075448904234, 'samples': 10701312, 'steps': 20900, 'loss/train': 2.207306146621704} +02/25/2022 04:38:49 - INFO - codeparrot_training - Step 20901: {'lr': 0.00033189208536221683, 'samples': 10701824, 'steps': 20901, 'loss/train': 2.002225160598755} +02/25/2022 04:38:52 - INFO - codeparrot_training - Step 20902: {'lr': 0.00033187662548321063, 'samples': 10702336, 'steps': 20902, 'loss/train': 1.6608514785766602} +02/25/2022 04:38:59 - INFO - codeparrot_training - Step 20903: {'lr': 0.00033186116525347093, 'samples': 10702848, 'steps': 20903, 'loss/train': 2.467921495437622} +02/25/2022 04:39:03 - INFO - codeparrot_training - Step 20904: {'lr': 0.00033184570467306403, 'samples': 10703360, 'steps': 20904, 'loss/train': 1.6327680349349976} +02/25/2022 04:39:08 - INFO - codeparrot_training - Step 20905: {'lr': 0.0003318302437420561, 'samples': 10703872, 'steps': 20905, 'loss/train': 1.6022164821624756} +02/25/2022 04:39:12 - INFO - codeparrot_training - Step 20906: {'lr': 0.0003318147824605133, 'samples': 10704384, 'steps': 20906, 'loss/train': 1.7568657398223877} +02/25/2022 04:39:18 - INFO - codeparrot_training - Step 20907: {'lr': 0.0003317993208285021, 'samples': 10704896, 'steps': 20907, 'loss/train': 2.583235502243042} +02/25/2022 04:39:21 - INFO - codeparrot_training - Step 20908: {'lr': 0.0003317838588460884, 'samples': 10705408, 'steps': 20908, 'loss/train': 2.3424243927001953} +02/25/2022 04:39:27 - INFO - codeparrot_training - Step 20909: {'lr': 0.0003317683965133388, 'samples': 10705920, 'steps': 20909, 'loss/train': 2.498840093612671} +02/25/2022 04:39:30 - INFO - codeparrot_training - Step 20910: {'lr': 0.0003317529338303192, 'samples': 10706432, 'steps': 20910, 'loss/train': 2.320816993713379} +02/25/2022 04:39:36 - INFO - codeparrot_training - Step 20911: {'lr': 0.00033173747079709616, 'samples': 10706944, 'steps': 20911, 'loss/train': 2.204019069671631} +02/25/2022 04:39:39 - INFO - codeparrot_training - Step 20912: {'lr': 0.0003317220074137356, 'samples': 10707456, 'steps': 20912, 'loss/train': 2.279435634613037} +02/25/2022 04:39:46 - INFO - codeparrot_training - Step 20913: {'lr': 0.000331706543680304, 'samples': 10707968, 'steps': 20913, 'loss/train': 1.2946723699569702} +02/25/2022 04:39:50 - INFO - codeparrot_training - Step 20914: {'lr': 0.0003316910795968675, 'samples': 10708480, 'steps': 20914, 'loss/train': 1.8910942077636719} +02/25/2022 04:39:55 - INFO - codeparrot_training - Step 20915: {'lr': 0.00033167561516349233, 'samples': 10708992, 'steps': 20915, 'loss/train': 1.9200794696807861} +02/25/2022 04:39:59 - INFO - codeparrot_training - Step 20916: {'lr': 0.0003316601503802448, 'samples': 10709504, 'steps': 20916, 'loss/train': 1.2175666093826294} +02/25/2022 04:40:04 - INFO - codeparrot_training - Step 20917: {'lr': 0.00033164468524719105, 'samples': 10710016, 'steps': 20917, 'loss/train': 2.0007216930389404} +02/25/2022 04:40:08 - INFO - codeparrot_training - Step 20918: {'lr': 0.00033162921976439744, 'samples': 10710528, 'steps': 20918, 'loss/train': 2.225008726119995} +02/25/2022 04:40:13 - INFO - codeparrot_training - Step 20919: {'lr': 0.00033161375393193015, 'samples': 10711040, 'steps': 20919, 'loss/train': 1.6899045705795288} +02/25/2022 04:40:17 - INFO - codeparrot_training - Step 20920: {'lr': 0.00033159828774985547, 'samples': 10711552, 'steps': 20920, 'loss/train': 1.5959523916244507} +02/25/2022 04:40:23 - INFO - codeparrot_training - Step 20921: {'lr': 0.0003315828212182396, 'samples': 10712064, 'steps': 20921, 'loss/train': 1.3249160051345825} +02/25/2022 04:40:26 - INFO - codeparrot_training - Step 20922: {'lr': 0.00033156735433714893, 'samples': 10712576, 'steps': 20922, 'loss/train': 1.5032868385314941} +02/25/2022 04:40:32 - INFO - codeparrot_training - Step 20923: {'lr': 0.00033155188710664945, 'samples': 10713088, 'steps': 20923, 'loss/train': 1.9754023551940918} +02/25/2022 04:40:35 - INFO - codeparrot_training - Step 20924: {'lr': 0.00033153641952680767, 'samples': 10713600, 'steps': 20924, 'loss/train': 1.177696943283081} +02/25/2022 04:40:43 - INFO - codeparrot_training - Step 20925: {'lr': 0.0003315209515976898, 'samples': 10714112, 'steps': 20925, 'loss/train': 2.143282413482666} +02/25/2022 04:40:46 - INFO - codeparrot_training - Step 20926: {'lr': 0.000331505483319362, 'samples': 10714624, 'steps': 20926, 'loss/train': 3.3589823246002197} +02/25/2022 04:40:52 - INFO - codeparrot_training - Step 20927: {'lr': 0.0003314900146918906, 'samples': 10715136, 'steps': 20927, 'loss/train': 2.3602800369262695} +02/25/2022 04:40:55 - INFO - codeparrot_training - Step 20928: {'lr': 0.0003314745457153419, 'samples': 10715648, 'steps': 20928, 'loss/train': 0.9475632309913635} +02/25/2022 04:41:01 - INFO - codeparrot_training - Step 20929: {'lr': 0.00033145907638978207, 'samples': 10716160, 'steps': 20929, 'loss/train': 0.13922227919101715} +02/25/2022 04:41:04 - INFO - codeparrot_training - Step 20930: {'lr': 0.00033144360671527747, 'samples': 10716672, 'steps': 20930, 'loss/train': 2.3799941539764404} +02/25/2022 04:41:10 - INFO - codeparrot_training - Step 20931: {'lr': 0.0003314281366918943, 'samples': 10717184, 'steps': 20931, 'loss/train': 1.9544856548309326} +02/25/2022 04:41:13 - INFO - codeparrot_training - Step 20932: {'lr': 0.0003314126663196988, 'samples': 10717696, 'steps': 20932, 'loss/train': 1.8646478652954102} +02/25/2022 04:41:19 - INFO - codeparrot_training - Step 20933: {'lr': 0.0003313971955987573, 'samples': 10718208, 'steps': 20933, 'loss/train': 3.3239803314208984} +02/25/2022 04:41:22 - INFO - codeparrot_training - Step 20934: {'lr': 0.0003313817245291361, 'samples': 10718720, 'steps': 20934, 'loss/train': 0.28450432419776917} +02/25/2022 04:41:30 - INFO - codeparrot_training - Step 20935: {'lr': 0.0003313662531109014, 'samples': 10719232, 'steps': 20935, 'loss/train': 1.54340660572052} +02/25/2022 04:41:33 - INFO - codeparrot_training - Step 20936: {'lr': 0.00033135078134411956, 'samples': 10719744, 'steps': 20936, 'loss/train': 1.963658332824707} +02/25/2022 04:41:39 - INFO - codeparrot_training - Step 20937: {'lr': 0.0003313353092288568, 'samples': 10720256, 'steps': 20937, 'loss/train': 1.5553035736083984} +02/25/2022 04:41:42 - INFO - codeparrot_training - Step 20938: {'lr': 0.00033131983676517934, 'samples': 10720768, 'steps': 20938, 'loss/train': 1.9589520692825317} +02/25/2022 04:41:48 - INFO - codeparrot_training - Step 20939: {'lr': 0.0003313043639531536, 'samples': 10721280, 'steps': 20939, 'loss/train': 1.6424849033355713} +02/25/2022 04:41:51 - INFO - codeparrot_training - Step 20940: {'lr': 0.00033128889079284574, 'samples': 10721792, 'steps': 20940, 'loss/train': 1.3562875986099243} +02/25/2022 04:41:57 - INFO - codeparrot_training - Step 20941: {'lr': 0.0003312734172843221, 'samples': 10722304, 'steps': 20941, 'loss/train': 0.8118757009506226} +02/25/2022 04:42:00 - INFO - codeparrot_training - Step 20942: {'lr': 0.0003312579434276489, 'samples': 10722816, 'steps': 20942, 'loss/train': 1.8427571058273315} +02/25/2022 04:42:06 - INFO - codeparrot_training - Step 20943: {'lr': 0.0003312424692228925, 'samples': 10723328, 'steps': 20943, 'loss/train': 1.7319566011428833} +02/25/2022 04:42:10 - INFO - codeparrot_training - Step 20944: {'lr': 0.0003312269946701191, 'samples': 10723840, 'steps': 20944, 'loss/train': 2.4748294353485107} +02/25/2022 04:42:17 - INFO - codeparrot_training - Step 20945: {'lr': 0.0003312115197693951, 'samples': 10724352, 'steps': 20945, 'loss/train': 2.1030755043029785} +02/25/2022 04:42:20 - INFO - codeparrot_training - Step 20946: {'lr': 0.00033119604452078676, 'samples': 10724864, 'steps': 20946, 'loss/train': 2.4600119590759277} +02/25/2022 04:42:26 - INFO - codeparrot_training - Step 20947: {'lr': 0.00033118056892436035, 'samples': 10725376, 'steps': 20947, 'loss/train': 1.407612919807434} +02/25/2022 04:42:29 - INFO - codeparrot_training - Step 20948: {'lr': 0.00033116509298018217, 'samples': 10725888, 'steps': 20948, 'loss/train': 2.043806552886963} +02/25/2022 04:42:35 - INFO - codeparrot_training - Step 20949: {'lr': 0.00033114961668831845, 'samples': 10726400, 'steps': 20949, 'loss/train': 0.8096098899841309} +02/25/2022 04:42:38 - INFO - codeparrot_training - Step 20950: {'lr': 0.00033113414004883556, 'samples': 10726912, 'steps': 20950, 'loss/train': 1.237052321434021} +02/25/2022 04:42:44 - INFO - codeparrot_training - Step 20951: {'lr': 0.0003311186630617998, 'samples': 10727424, 'steps': 20951, 'loss/train': 2.1905226707458496} +02/25/2022 04:42:47 - INFO - codeparrot_training - Step 20952: {'lr': 0.00033110318572727743, 'samples': 10727936, 'steps': 20952, 'loss/train': 1.2035577297210693} +02/25/2022 04:42:53 - INFO - codeparrot_training - Step 20953: {'lr': 0.0003310877080453348, 'samples': 10728448, 'steps': 20953, 'loss/train': 2.5778281688690186} +02/25/2022 04:42:56 - INFO - codeparrot_training - Step 20954: {'lr': 0.00033107223001603814, 'samples': 10728960, 'steps': 20954, 'loss/train': 1.4431873559951782} +02/25/2022 04:43:02 - INFO - codeparrot_training - Step 20955: {'lr': 0.00033105675163945373, 'samples': 10729472, 'steps': 20955, 'loss/train': 2.041386127471924} +02/25/2022 04:43:05 - INFO - codeparrot_training - Step 20956: {'lr': 0.000331041272915648, 'samples': 10729984, 'steps': 20956, 'loss/train': 1.9621171951293945} +02/25/2022 04:43:11 - INFO - codeparrot_training - Step 20957: {'lr': 0.00033102579384468723, 'samples': 10730496, 'steps': 20957, 'loss/train': 1.3029671907424927} +02/25/2022 04:43:14 - INFO - codeparrot_training - Step 20958: {'lr': 0.0003310103144266376, 'samples': 10731008, 'steps': 20958, 'loss/train': 1.6323598623275757} +02/25/2022 04:43:20 - INFO - codeparrot_training - Step 20959: {'lr': 0.00033099483466156554, 'samples': 10731520, 'steps': 20959, 'loss/train': 2.477983236312866} +02/25/2022 04:43:23 - INFO - codeparrot_training - Step 20960: {'lr': 0.00033097935454953737, 'samples': 10732032, 'steps': 20960, 'loss/train': 0.12386111170053482} +02/25/2022 04:43:30 - INFO - codeparrot_training - Step 20961: {'lr': 0.00033096387409061937, 'samples': 10732544, 'steps': 20961, 'loss/train': 1.303353190422058} +02/25/2022 04:43:34 - INFO - codeparrot_training - Step 20962: {'lr': 0.00033094839328487777, 'samples': 10733056, 'steps': 20962, 'loss/train': 1.988746166229248} +02/25/2022 04:43:39 - INFO - codeparrot_training - Step 20963: {'lr': 0.000330932912132379, 'samples': 10733568, 'steps': 20963, 'loss/train': 2.444192409515381} +02/25/2022 04:43:43 - INFO - codeparrot_training - Step 20964: {'lr': 0.0003309174306331893, 'samples': 10734080, 'steps': 20964, 'loss/train': 2.0982134342193604} +02/25/2022 04:43:48 - INFO - codeparrot_training - Step 20965: {'lr': 0.00033090194878737504, 'samples': 10734592, 'steps': 20965, 'loss/train': 1.7582303285598755} +02/25/2022 04:43:52 - INFO - codeparrot_training - Step 20966: {'lr': 0.0003308864665950025, 'samples': 10735104, 'steps': 20966, 'loss/train': 1.6458404064178467} +02/25/2022 04:43:58 - INFO - codeparrot_training - Step 20967: {'lr': 0.0003308709840561381, 'samples': 10735616, 'steps': 20967, 'loss/train': 0.9111060500144958} +02/25/2022 04:44:01 - INFO - codeparrot_training - Step 20968: {'lr': 0.00033085550117084795, 'samples': 10736128, 'steps': 20968, 'loss/train': 2.4431984424591064} +02/25/2022 04:44:07 - INFO - codeparrot_training - Step 20969: {'lr': 0.0003308400179391986, 'samples': 10736640, 'steps': 20969, 'loss/train': 1.752737045288086} +02/25/2022 04:44:10 - INFO - codeparrot_training - Step 20970: {'lr': 0.00033082453436125627, 'samples': 10737152, 'steps': 20970, 'loss/train': 3.0392465591430664} +02/25/2022 04:44:17 - INFO - codeparrot_training - Step 20971: {'lr': 0.00033080905043708734, 'samples': 10737664, 'steps': 20971, 'loss/train': 1.7490004301071167} +02/25/2022 04:44:21 - INFO - codeparrot_training - Step 20972: {'lr': 0.000330793566166758, 'samples': 10738176, 'steps': 20972, 'loss/train': 2.33172345161438} +02/25/2022 04:44:26 - INFO - codeparrot_training - Step 20973: {'lr': 0.00033077808155033473, 'samples': 10738688, 'steps': 20973, 'loss/train': 3.273465633392334} +02/25/2022 04:44:30 - INFO - codeparrot_training - Step 20974: {'lr': 0.0003307625965878838, 'samples': 10739200, 'steps': 20974, 'loss/train': 1.5874143838882446} +02/25/2022 04:44:35 - INFO - codeparrot_training - Step 20975: {'lr': 0.00033074711127947153, 'samples': 10739712, 'steps': 20975, 'loss/train': 1.7917135953903198} +02/25/2022 04:44:39 - INFO - codeparrot_training - Step 20976: {'lr': 0.0003307316256251644, 'samples': 10740224, 'steps': 20976, 'loss/train': 2.751756191253662} +02/25/2022 04:44:44 - INFO - codeparrot_training - Step 20977: {'lr': 0.0003307161396250285, 'samples': 10740736, 'steps': 20977, 'loss/train': 1.7633999586105347} +02/25/2022 04:44:48 - INFO - codeparrot_training - Step 20978: {'lr': 0.00033070065327913035, 'samples': 10741248, 'steps': 20978, 'loss/train': 2.336181163787842} +02/25/2022 04:44:53 - INFO - codeparrot_training - Step 20979: {'lr': 0.00033068516658753624, 'samples': 10741760, 'steps': 20979, 'loss/train': 1.8295520544052124} +02/25/2022 04:44:57 - INFO - codeparrot_training - Step 20980: {'lr': 0.00033066967955031236, 'samples': 10742272, 'steps': 20980, 'loss/train': 1.3669748306274414} +02/25/2022 04:45:04 - INFO - codeparrot_training - Step 20981: {'lr': 0.0003306541921675253, 'samples': 10742784, 'steps': 20981, 'loss/train': 2.227687120437622} +02/25/2022 04:45:07 - INFO - codeparrot_training - Step 20982: {'lr': 0.0003306387044392413, 'samples': 10743296, 'steps': 20982, 'loss/train': 1.1107168197631836} +02/25/2022 04:45:13 - INFO - codeparrot_training - Step 20983: {'lr': 0.0003306232163655267, 'samples': 10743808, 'steps': 20983, 'loss/train': 1.6783326864242554} +02/25/2022 04:45:17 - INFO - codeparrot_training - Step 20984: {'lr': 0.00033060772794644776, 'samples': 10744320, 'steps': 20984, 'loss/train': 1.659469485282898} +02/25/2022 04:45:22 - INFO - codeparrot_training - Step 20985: {'lr': 0.000330592239182071, 'samples': 10744832, 'steps': 20985, 'loss/train': 2.3079824447631836} +02/25/2022 04:45:25 - INFO - codeparrot_training - Step 20986: {'lr': 0.0003305767500724626, 'samples': 10745344, 'steps': 20986, 'loss/train': 2.3354249000549316} +02/25/2022 04:45:31 - INFO - codeparrot_training - Step 20987: {'lr': 0.00033056126061768905, 'samples': 10745856, 'steps': 20987, 'loss/train': 1.2813560962677002} +02/25/2022 04:45:35 - INFO - codeparrot_training - Step 20988: {'lr': 0.00033054577081781654, 'samples': 10746368, 'steps': 20988, 'loss/train': 2.155677318572998} +02/25/2022 04:45:40 - INFO - codeparrot_training - Step 20989: {'lr': 0.00033053028067291166, 'samples': 10746880, 'steps': 20989, 'loss/train': 2.8354949951171875} +02/25/2022 04:45:44 - INFO - codeparrot_training - Step 20990: {'lr': 0.00033051479018304054, 'samples': 10747392, 'steps': 20990, 'loss/train': 1.4095090627670288} +02/25/2022 04:45:51 - INFO - codeparrot_training - Step 20991: {'lr': 0.0003304992993482697, 'samples': 10747904, 'steps': 20991, 'loss/train': 1.4379342794418335} +02/25/2022 04:45:54 - INFO - codeparrot_training - Step 20992: {'lr': 0.0003304838081686653, 'samples': 10748416, 'steps': 20992, 'loss/train': 2.53092622756958} +02/25/2022 04:46:00 - INFO - codeparrot_training - Step 20993: {'lr': 0.0003304683166442939, 'samples': 10748928, 'steps': 20993, 'loss/train': 1.5771119594573975} +02/25/2022 04:46:03 - INFO - codeparrot_training - Step 20994: {'lr': 0.0003304528247752218, 'samples': 10749440, 'steps': 20994, 'loss/train': 2.071444511413574} +02/25/2022 04:46:09 - INFO - codeparrot_training - Step 20995: {'lr': 0.0003304373325615153, 'samples': 10749952, 'steps': 20995, 'loss/train': 1.9969342947006226} +02/25/2022 04:46:12 - INFO - codeparrot_training - Step 20996: {'lr': 0.00033042184000324086, 'samples': 10750464, 'steps': 20996, 'loss/train': 2.3822269439697266} +02/25/2022 04:46:18 - INFO - codeparrot_training - Step 20997: {'lr': 0.00033040634710046474, 'samples': 10750976, 'steps': 20997, 'loss/train': 2.328132152557373} +02/25/2022 04:46:23 - INFO - codeparrot_training - Step 20998: {'lr': 0.0003303908538532534, 'samples': 10751488, 'steps': 20998, 'loss/train': 2.365709066390991} +02/25/2022 04:46:27 - INFO - codeparrot_training - Step 20999: {'lr': 0.00033037536026167313, 'samples': 10752000, 'steps': 20999, 'loss/train': 2.366279363632202} +02/25/2022 04:46:27 - INFO - codeparrot_training - Evaluating and saving model checkpoint