Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-86 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-86 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-86") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-86") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-86") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-86 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-86" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-86", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-86
- SGLang
How to use furproxy/9b-86 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-86" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-86", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-86" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-86", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-86 with Docker Model Runner:
docker model run hf.co/furproxy/9b-86
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 1804, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004434589800443459, | |
| "grad_norm": 2.7257320880889893, | |
| "learning_rate": 5.494505494505495e-08, | |
| "loss": 1.8651859760284424, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.008869179600886918, | |
| "grad_norm": 6.67538595199585, | |
| "learning_rate": 1.6483516483516484e-07, | |
| "loss": 2.1317176818847656, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.013303769401330377, | |
| "grad_norm": 6.216647624969482, | |
| "learning_rate": 2.7472527472527475e-07, | |
| "loss": 1.904492735862732, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.017738359201773836, | |
| "grad_norm": 1.1432791948318481, | |
| "learning_rate": 3.846153846153847e-07, | |
| "loss": 1.8241561651229858, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.022172949002217297, | |
| "grad_norm": 2.2455904483795166, | |
| "learning_rate": 4.945054945054946e-07, | |
| "loss": 1.6350065469741821, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.026607538802660754, | |
| "grad_norm": 3.8747806549072266, | |
| "learning_rate": 6.043956043956044e-07, | |
| "loss": 2.1859989166259766, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.031042128603104215, | |
| "grad_norm": 4.237593173980713, | |
| "learning_rate": 7.142857142857143e-07, | |
| "loss": 1.943555235862732, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03547671840354767, | |
| "grad_norm": 6.446155071258545, | |
| "learning_rate": 8.241758241758242e-07, | |
| "loss": 1.4925756454467773, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.03991130820399113, | |
| "grad_norm": 2.6784210205078125, | |
| "learning_rate": 9.340659340659342e-07, | |
| "loss": 1.4368367195129395, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04434589800443459, | |
| "grad_norm": 4.474376201629639, | |
| "learning_rate": 1.0439560439560442e-06, | |
| "loss": 1.5969985723495483, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04878048780487805, | |
| "grad_norm": 5.059698581695557, | |
| "learning_rate": 1.153846153846154e-06, | |
| "loss": 1.462158441543579, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.05321507760532151, | |
| "grad_norm": 1.1916331052780151, | |
| "learning_rate": 1.2637362637362637e-06, | |
| "loss": 1.5942579507827759, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.057649667405764965, | |
| "grad_norm": 1.0105680227279663, | |
| "learning_rate": 1.3736263736263738e-06, | |
| "loss": 1.4908994436264038, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.06208425720620843, | |
| "grad_norm": 1.906994104385376, | |
| "learning_rate": 1.4835164835164837e-06, | |
| "loss": 1.179060935974121, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06651884700665188, | |
| "grad_norm": 1.2037031650543213, | |
| "learning_rate": 1.5934065934065933e-06, | |
| "loss": 1.3003807067871094, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07095343680709534, | |
| "grad_norm": 7.113156318664551, | |
| "learning_rate": 1.7032967032967034e-06, | |
| "loss": 1.2082353830337524, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.07538802660753881, | |
| "grad_norm": 0.885746955871582, | |
| "learning_rate": 1.8131868131868133e-06, | |
| "loss": 1.5782675743103027, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.07982261640798226, | |
| "grad_norm": 3.321558713912964, | |
| "learning_rate": 1.9230769230769234e-06, | |
| "loss": 1.342716932296753, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.08425720620842572, | |
| "grad_norm": 1.9045021533966064, | |
| "learning_rate": 2.032967032967033e-06, | |
| "loss": 1.5886274576187134, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08869179600886919, | |
| "grad_norm": 1.937488079071045, | |
| "learning_rate": 2.1428571428571427e-06, | |
| "loss": 0.8519856929779053, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09312638580931264, | |
| "grad_norm": 1.281927227973938, | |
| "learning_rate": 2.252747252747253e-06, | |
| "loss": 1.4712433815002441, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.0975609756097561, | |
| "grad_norm": 1.415075421333313, | |
| "learning_rate": 2.362637362637363e-06, | |
| "loss": 1.4157054424285889, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.10199556541019955, | |
| "grad_norm": 1.424558162689209, | |
| "learning_rate": 2.472527472527473e-06, | |
| "loss": 1.428381323814392, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.10643015521064302, | |
| "grad_norm": 2.6216702461242676, | |
| "learning_rate": 2.582417582417583e-06, | |
| "loss": 1.6168309450149536, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.11086474501108648, | |
| "grad_norm": 1.449957013130188, | |
| "learning_rate": 2.6923076923076923e-06, | |
| "loss": 1.502392053604126, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11529933481152993, | |
| "grad_norm": 1.0936357975006104, | |
| "learning_rate": 2.8021978021978024e-06, | |
| "loss": 1.4243361949920654, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.1197339246119734, | |
| "grad_norm": 1.5021100044250488, | |
| "learning_rate": 2.9120879120879125e-06, | |
| "loss": 1.4172306060791016, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.12416851441241686, | |
| "grad_norm": 2.1787075996398926, | |
| "learning_rate": 3.021978021978022e-06, | |
| "loss": 1.1664988994598389, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1286031042128603, | |
| "grad_norm": 1.6643332242965698, | |
| "learning_rate": 3.1318681318681323e-06, | |
| "loss": 1.364538550376892, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.13303769401330376, | |
| "grad_norm": 0.6121675968170166, | |
| "learning_rate": 3.2417582417582424e-06, | |
| "loss": 1.415244221687317, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13747228381374724, | |
| "grad_norm": 1.6771880388259888, | |
| "learning_rate": 3.3516483516483516e-06, | |
| "loss": 1.3695659637451172, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1419068736141907, | |
| "grad_norm": 1.3066154718399048, | |
| "learning_rate": 3.4615384615384617e-06, | |
| "loss": 1.435889720916748, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.14634146341463414, | |
| "grad_norm": 1.6970858573913574, | |
| "learning_rate": 3.5714285714285718e-06, | |
| "loss": 1.4904870986938477, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.15077605321507762, | |
| "grad_norm": 1.193587303161621, | |
| "learning_rate": 3.681318681318682e-06, | |
| "loss": 1.3552013635635376, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.15521064301552107, | |
| "grad_norm": 1.7035346031188965, | |
| "learning_rate": 3.7912087912087915e-06, | |
| "loss": 1.3096868991851807, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.15964523281596452, | |
| "grad_norm": 1.0295628309249878, | |
| "learning_rate": 3.901098901098901e-06, | |
| "loss": 1.3637210130691528, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.164079822616408, | |
| "grad_norm": 1.0206137895584106, | |
| "learning_rate": 4.010989010989012e-06, | |
| "loss": 1.2611558437347412, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.16851441241685144, | |
| "grad_norm": 3.5194575786590576, | |
| "learning_rate": 4.120879120879121e-06, | |
| "loss": 1.3402124643325806, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.1729490022172949, | |
| "grad_norm": 1.0586298704147339, | |
| "learning_rate": 4.230769230769231e-06, | |
| "loss": 1.3869333267211914, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.17738359201773837, | |
| "grad_norm": 2.9942257404327393, | |
| "learning_rate": 4.340659340659341e-06, | |
| "loss": 1.3390436172485352, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 1.4550483226776123, | |
| "learning_rate": 4.45054945054945e-06, | |
| "loss": 0.8444686532020569, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.18625277161862527, | |
| "grad_norm": 0.7003264427185059, | |
| "learning_rate": 4.560439560439561e-06, | |
| "loss": 1.0279695987701416, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.19068736141906872, | |
| "grad_norm": 4.848526954650879, | |
| "learning_rate": 4.6703296703296706e-06, | |
| "loss": 0.9698995351791382, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.1951219512195122, | |
| "grad_norm": 1.024747610092163, | |
| "learning_rate": 4.780219780219781e-06, | |
| "loss": 1.2624331712722778, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.19955654101995565, | |
| "grad_norm": 1.7162449359893799, | |
| "learning_rate": 4.890109890109891e-06, | |
| "loss": 1.392942190170288, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2039911308203991, | |
| "grad_norm": 2.0316786766052246, | |
| "learning_rate": 5e-06, | |
| "loss": 1.229879379272461, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.20842572062084258, | |
| "grad_norm": 1.6212053298950195, | |
| "learning_rate": 4.999984864490455e-06, | |
| "loss": 1.2555553913116455, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.21286031042128603, | |
| "grad_norm": 2.102651357650757, | |
| "learning_rate": 4.999939458165447e-06, | |
| "loss": 1.8031316995620728, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.21729490022172948, | |
| "grad_norm": 1.7087279558181763, | |
| "learning_rate": 4.999863781635863e-06, | |
| "loss": 0.950995922088623, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.22172949002217296, | |
| "grad_norm": 1.2007180452346802, | |
| "learning_rate": 4.999757835919841e-06, | |
| "loss": 1.1569544076919556, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2261640798226164, | |
| "grad_norm": 0.7960771322250366, | |
| "learning_rate": 4.9996216224427495e-06, | |
| "loss": 1.313308596611023, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.23059866962305986, | |
| "grad_norm": 1.0540348291397095, | |
| "learning_rate": 4.999455143037178e-06, | |
| "loss": 1.0472400188446045, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.23503325942350334, | |
| "grad_norm": 1.4287160634994507, | |
| "learning_rate": 4.999258399942903e-06, | |
| "loss": 1.2840181589126587, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2394678492239468, | |
| "grad_norm": 1.540831208229065, | |
| "learning_rate": 4.9990313958068645e-06, | |
| "loss": 1.48462975025177, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.24390243902439024, | |
| "grad_norm": 1.6057487726211548, | |
| "learning_rate": 4.998774133683127e-06, | |
| "loss": 1.026124119758606, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.24833702882483372, | |
| "grad_norm": 3.355077028274536, | |
| "learning_rate": 4.9984866170328426e-06, | |
| "loss": 1.2828115224838257, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.25277161862527714, | |
| "grad_norm": 1.1793975830078125, | |
| "learning_rate": 4.998168849724196e-06, | |
| "loss": 0.7794591188430786, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2572062084257206, | |
| "grad_norm": 1.1411471366882324, | |
| "learning_rate": 4.997820836032363e-06, | |
| "loss": 1.3091580867767334, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2616407982261641, | |
| "grad_norm": 0.4769836962223053, | |
| "learning_rate": 4.997442580639443e-06, | |
| "loss": 1.0651829242706299, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.2660753880266075, | |
| "grad_norm": 0.9096765518188477, | |
| "learning_rate": 4.997034088634404e-06, | |
| "loss": 1.2504687309265137, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.270509977827051, | |
| "grad_norm": 1.190130591392517, | |
| "learning_rate": 4.996595365513012e-06, | |
| "loss": 1.1688843965530396, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2749445676274945, | |
| "grad_norm": 2.0130395889282227, | |
| "learning_rate": 4.9961264171777515e-06, | |
| "loss": 1.5667338371276855, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.2793791574279379, | |
| "grad_norm": 0.7699439525604248, | |
| "learning_rate": 4.995627249937755e-06, | |
| "loss": 0.9175050258636475, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.2838137472283814, | |
| "grad_norm": 0.866606593132019, | |
| "learning_rate": 4.995097870508711e-06, | |
| "loss": 1.209236741065979, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.28824833702882485, | |
| "grad_norm": 1.518518328666687, | |
| "learning_rate": 4.994538286012777e-06, | |
| "loss": 0.9163856506347656, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2926829268292683, | |
| "grad_norm": 1.7814369201660156, | |
| "learning_rate": 4.993948503978484e-06, | |
| "loss": 0.974137544631958, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.29711751662971175, | |
| "grad_norm": 3.4997236728668213, | |
| "learning_rate": 4.993328532340633e-06, | |
| "loss": 0.9840149283409119, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.30155210643015523, | |
| "grad_norm": 3.2781076431274414, | |
| "learning_rate": 4.99267837944019e-06, | |
| "loss": 1.110643982887268, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.30598669623059865, | |
| "grad_norm": 10.070029258728027, | |
| "learning_rate": 4.991998054024172e-06, | |
| "loss": 0.7842212319374084, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.31042128603104213, | |
| "grad_norm": 1.092695713043213, | |
| "learning_rate": 4.991287565245534e-06, | |
| "loss": 1.0406193733215332, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3148558758314856, | |
| "grad_norm": 1.3157380819320679, | |
| "learning_rate": 4.990546922663039e-06, | |
| "loss": 0.9569450616836548, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.31929046563192903, | |
| "grad_norm": 2.488039970397949, | |
| "learning_rate": 4.989776136241134e-06, | |
| "loss": 0.9646241068840027, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3237250554323725, | |
| "grad_norm": 1.5037761926651, | |
| "learning_rate": 4.988975216349814e-06, | |
| "loss": 0.9507364630699158, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.328159645232816, | |
| "grad_norm": 2.198122501373291, | |
| "learning_rate": 4.988144173764486e-06, | |
| "loss": 1.401615023612976, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.3325942350332594, | |
| "grad_norm": 7.834370136260986, | |
| "learning_rate": 4.987283019665817e-06, | |
| "loss": 1.1528998613357544, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3370288248337029, | |
| "grad_norm": 3.270291566848755, | |
| "learning_rate": 4.986391765639592e-06, | |
| "loss": 1.060468077659607, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.34146341463414637, | |
| "grad_norm": 1.180214762687683, | |
| "learning_rate": 4.985470423676551e-06, | |
| "loss": 1.2685281038284302, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3458980044345898, | |
| "grad_norm": 2.8346996307373047, | |
| "learning_rate": 4.984519006172232e-06, | |
| "loss": 1.2129993438720703, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.35033259423503327, | |
| "grad_norm": 0.9781622290611267, | |
| "learning_rate": 4.983537525926804e-06, | |
| "loss": 1.2925925254821777, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.35476718403547675, | |
| "grad_norm": 0.549372136592865, | |
| "learning_rate": 4.982525996144891e-06, | |
| "loss": 1.1071832180023193, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.35920177383592017, | |
| "grad_norm": 1.4813437461853027, | |
| "learning_rate": 4.981484430435399e-06, | |
| "loss": 0.8754007816314697, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 1.7693350315093994, | |
| "learning_rate": 4.98041284281133e-06, | |
| "loss": 0.9003241062164307, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.36807095343680707, | |
| "grad_norm": 1.363362193107605, | |
| "learning_rate": 4.979311247689596e-06, | |
| "loss": 1.273066759109497, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.37250554323725055, | |
| "grad_norm": 6.419438362121582, | |
| "learning_rate": 4.978179659890821e-06, | |
| "loss": 1.1863045692443848, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.376940133037694, | |
| "grad_norm": 1.189877986907959, | |
| "learning_rate": 4.977018094639146e-06, | |
| "loss": 1.2364041805267334, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.38137472283813745, | |
| "grad_norm": 3.7446060180664062, | |
| "learning_rate": 4.975826567562023e-06, | |
| "loss": 0.7797529101371765, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3858093126385809, | |
| "grad_norm": 4.418823719024658, | |
| "learning_rate": 4.97460509469e-06, | |
| "loss": 1.6287654638290405, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3902439024390244, | |
| "grad_norm": 1.5152671337127686, | |
| "learning_rate": 4.973353692456513e-06, | |
| "loss": 1.26861572265625, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.3946784922394678, | |
| "grad_norm": 1.269068717956543, | |
| "learning_rate": 4.972072377697661e-06, | |
| "loss": 1.3224495649337769, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.3991130820399113, | |
| "grad_norm": 0.7425451874732971, | |
| "learning_rate": 4.9707611676519775e-06, | |
| "loss": 1.05917227268219, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4035476718403548, | |
| "grad_norm": 1.1265835762023926, | |
| "learning_rate": 4.969420079960203e-06, | |
| "loss": 1.2697656154632568, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.4079822616407982, | |
| "grad_norm": 1.1472365856170654, | |
| "learning_rate": 4.968049132665045e-06, | |
| "loss": 0.884378969669342, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.4124168514412417, | |
| "grad_norm": 1.4855486154556274, | |
| "learning_rate": 4.966648344210936e-06, | |
| "loss": 0.9717956781387329, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.41685144124168516, | |
| "grad_norm": 1.1715413331985474, | |
| "learning_rate": 4.965217733443782e-06, | |
| "loss": 0.9312400817871094, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.4212860310421286, | |
| "grad_norm": 2.2187230587005615, | |
| "learning_rate": 4.963757319610716e-06, | |
| "loss": 0.9861539602279663, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.42572062084257206, | |
| "grad_norm": 2.1262733936309814, | |
| "learning_rate": 4.962267122359835e-06, | |
| "loss": 0.8997060656547546, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.43015521064301554, | |
| "grad_norm": 2.3965535163879395, | |
| "learning_rate": 4.960747161739931e-06, | |
| "loss": 1.2791428565979004, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.43458980044345896, | |
| "grad_norm": 1.8069835901260376, | |
| "learning_rate": 4.9591974582002324e-06, | |
| "loss": 1.549714207649231, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.43902439024390244, | |
| "grad_norm": 3.381026029586792, | |
| "learning_rate": 4.957618032590118e-06, | |
| "loss": 1.2765225172042847, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4434589800443459, | |
| "grad_norm": 0.758112907409668, | |
| "learning_rate": 4.956008906158842e-06, | |
| "loss": 1.1299937963485718, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.44789356984478934, | |
| "grad_norm": 2.647829055786133, | |
| "learning_rate": 4.954370100555249e-06, | |
| "loss": 1.2492018938064575, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.4523281596452328, | |
| "grad_norm": 1.820816159248352, | |
| "learning_rate": 4.952701637827476e-06, | |
| "loss": 1.216017484664917, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.4567627494456763, | |
| "grad_norm": 1.9108037948608398, | |
| "learning_rate": 4.951003540422668e-06, | |
| "loss": 1.0710757970809937, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4611973392461197, | |
| "grad_norm": 4.884266376495361, | |
| "learning_rate": 4.949275831186663e-06, | |
| "loss": 1.044965147972107, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.4656319290465632, | |
| "grad_norm": 1.51518976688385, | |
| "learning_rate": 4.947518533363691e-06, | |
| "loss": 0.645362377166748, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4700665188470067, | |
| "grad_norm": 4.527989387512207, | |
| "learning_rate": 4.945731670596062e-06, | |
| "loss": 0.8511308431625366, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.4745011086474501, | |
| "grad_norm": 0.7997156381607056, | |
| "learning_rate": 4.943915266923845e-06, | |
| "loss": 1.0683618783950806, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.4789356984478936, | |
| "grad_norm": 1.7993850708007812, | |
| "learning_rate": 4.942069346784547e-06, | |
| "loss": 1.08345627784729, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.48337028824833705, | |
| "grad_norm": 0.7484229803085327, | |
| "learning_rate": 4.940193935012785e-06, | |
| "loss": 1.1189590692520142, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 2.9010186195373535, | |
| "learning_rate": 4.938289056839946e-06, | |
| "loss": 1.2146114110946655, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.49223946784922396, | |
| "grad_norm": 1.2498693466186523, | |
| "learning_rate": 4.936354737893854e-06, | |
| "loss": 1.2259554862976074, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.49667405764966743, | |
| "grad_norm": 1.4774630069732666, | |
| "learning_rate": 4.934391004198424e-06, | |
| "loss": 1.2009336948394775, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.5011086474501109, | |
| "grad_norm": 1.1975030899047852, | |
| "learning_rate": 4.932397882173307e-06, | |
| "loss": 1.2059407234191895, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5055432372505543, | |
| "grad_norm": 0.9775891900062561, | |
| "learning_rate": 4.930375398633543e-06, | |
| "loss": 1.2553625106811523, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5099778270509978, | |
| "grad_norm": 1.1985933780670166, | |
| "learning_rate": 4.928323580789192e-06, | |
| "loss": 1.8459392786026, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5144124168514412, | |
| "grad_norm": 2.5975992679595947, | |
| "learning_rate": 4.926242456244973e-06, | |
| "loss": 0.8564022779464722, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.5188470066518847, | |
| "grad_norm": 1.2151294946670532, | |
| "learning_rate": 4.924132052999892e-06, | |
| "loss": 1.2610244750976562, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.5232815964523282, | |
| "grad_norm": 1.6694788932800293, | |
| "learning_rate": 4.921992399446861e-06, | |
| "loss": 0.9529590010643005, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5277161862527716, | |
| "grad_norm": 1.3985271453857422, | |
| "learning_rate": 4.919823524372323e-06, | |
| "loss": 0.9686543941497803, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.532150776053215, | |
| "grad_norm": 0.8372143507003784, | |
| "learning_rate": 4.91762545695586e-06, | |
| "loss": 1.258904218673706, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5365853658536586, | |
| "grad_norm": 1.2533243894577026, | |
| "learning_rate": 4.9153982267698e-06, | |
| "loss": 1.3248710632324219, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.541019955654102, | |
| "grad_norm": 7.963499069213867, | |
| "learning_rate": 4.913141863778822e-06, | |
| "loss": 0.9705762267112732, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 1.3693453073501587, | |
| "learning_rate": 4.910856398339553e-06, | |
| "loss": 1.2580170631408691, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.549889135254989, | |
| "grad_norm": 1.1870553493499756, | |
| "learning_rate": 4.9085418612001545e-06, | |
| "loss": 1.4803242683410645, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5543237250554324, | |
| "grad_norm": 2.50886607170105, | |
| "learning_rate": 4.906198283499916e-06, | |
| "loss": 1.2252423763275146, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5587583148558758, | |
| "grad_norm": 3.0687942504882812, | |
| "learning_rate": 4.903825696768829e-06, | |
| "loss": 0.6616644263267517, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5631929046563193, | |
| "grad_norm": 2.0669586658477783, | |
| "learning_rate": 4.901424132927172e-06, | |
| "loss": 1.474183440208435, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.5676274944567627, | |
| "grad_norm": 4.0111403465271, | |
| "learning_rate": 4.898993624285069e-06, | |
| "loss": 1.316019892692566, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5720620842572062, | |
| "grad_norm": 0.6927316784858704, | |
| "learning_rate": 4.896534203542062e-06, | |
| "loss": 1.2566733360290527, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5764966740576497, | |
| "grad_norm": 1.2319213151931763, | |
| "learning_rate": 4.894045903786675e-06, | |
| "loss": 1.2865486145019531, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5809312638580931, | |
| "grad_norm": 2.781428337097168, | |
| "learning_rate": 4.891528758495961e-06, | |
| "loss": 0.7123095989227295, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5853658536585366, | |
| "grad_norm": 1.0490000247955322, | |
| "learning_rate": 4.888982801535053e-06, | |
| "loss": 1.360097050666809, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5898004434589801, | |
| "grad_norm": 1.8262178897857666, | |
| "learning_rate": 4.886408067156712e-06, | |
| "loss": 1.0533033609390259, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5942350332594235, | |
| "grad_norm": 1.2792811393737793, | |
| "learning_rate": 4.883804590000865e-06, | |
| "loss": 1.550133228302002, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.5986696230598669, | |
| "grad_norm": 2.9849724769592285, | |
| "learning_rate": 4.881172405094138e-06, | |
| "loss": 1.1676595211029053, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.6031042128603105, | |
| "grad_norm": 0.7342039942741394, | |
| "learning_rate": 4.878511547849383e-06, | |
| "loss": 1.2335383892059326, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.6075388026607539, | |
| "grad_norm": 1.2436589002609253, | |
| "learning_rate": 4.875822054065203e-06, | |
| "loss": 1.2187210321426392, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.6119733924611973, | |
| "grad_norm": 1.587964653968811, | |
| "learning_rate": 4.8731039599254754e-06, | |
| "loss": 1.226876974105835, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.6164079822616408, | |
| "grad_norm": 4.160574913024902, | |
| "learning_rate": 4.870357301998856e-06, | |
| "loss": 1.205104112625122, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.6208425720620843, | |
| "grad_norm": 1.1125067472457886, | |
| "learning_rate": 4.867582117238294e-06, | |
| "loss": 1.287103295326233, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6252771618625277, | |
| "grad_norm": 2.7902486324310303, | |
| "learning_rate": 4.864778442980532e-06, | |
| "loss": 0.8597289323806763, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.6297117516629712, | |
| "grad_norm": 1.8220659494400024, | |
| "learning_rate": 4.861946316945605e-06, | |
| "loss": 1.2538930177688599, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6341463414634146, | |
| "grad_norm": 1.2487821578979492, | |
| "learning_rate": 4.859085777236331e-06, | |
| "loss": 1.330883264541626, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6385809312638581, | |
| "grad_norm": 1.1677597761154175, | |
| "learning_rate": 4.8561968623377985e-06, | |
| "loss": 1.252233862876892, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.6430155210643016, | |
| "grad_norm": 1.1160072088241577, | |
| "learning_rate": 4.853279611116852e-06, | |
| "loss": 1.2359025478363037, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.647450110864745, | |
| "grad_norm": 1.4154568910598755, | |
| "learning_rate": 4.850334062821566e-06, | |
| "loss": 1.3491101264953613, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.6518847006651884, | |
| "grad_norm": 1.4015207290649414, | |
| "learning_rate": 4.8473602570807185e-06, | |
| "loss": 0.9202826619148254, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.656319290465632, | |
| "grad_norm": 1.124402403831482, | |
| "learning_rate": 4.844358233903254e-06, | |
| "loss": 0.9892662167549133, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6607538802660754, | |
| "grad_norm": 1.2850724458694458, | |
| "learning_rate": 4.841328033677753e-06, | |
| "loss": 1.2050740718841553, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6651884700665188, | |
| "grad_norm": 1.2479948997497559, | |
| "learning_rate": 4.83826969717188e-06, | |
| "loss": 1.2547260522842407, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6696230598669624, | |
| "grad_norm": 1.376888394355774, | |
| "learning_rate": 4.835183265531843e-06, | |
| "loss": 1.1995564699172974, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6740576496674058, | |
| "grad_norm": 1.6199829578399658, | |
| "learning_rate": 4.832068780281831e-06, | |
| "loss": 1.2650580406188965, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6784922394678492, | |
| "grad_norm": 2.1889870166778564, | |
| "learning_rate": 4.828926283323464e-06, | |
| "loss": 1.2397799491882324, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6829268292682927, | |
| "grad_norm": 1.4469552040100098, | |
| "learning_rate": 4.8257558169352254e-06, | |
| "loss": 0.8677200675010681, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6873614190687362, | |
| "grad_norm": 1.1219711303710938, | |
| "learning_rate": 4.8225574237718906e-06, | |
| "loss": 1.2440086603164673, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6917960088691796, | |
| "grad_norm": 0.9012990593910217, | |
| "learning_rate": 4.819331146863958e-06, | |
| "loss": 1.2062432765960693, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6962305986696231, | |
| "grad_norm": 1.1953213214874268, | |
| "learning_rate": 4.8160770296170685e-06, | |
| "loss": 1.2283505201339722, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.7006651884700665, | |
| "grad_norm": 1.6990729570388794, | |
| "learning_rate": 4.812795115811419e-06, | |
| "loss": 1.3305617570877075, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.70509977827051, | |
| "grad_norm": 0.8295086622238159, | |
| "learning_rate": 4.809485449601177e-06, | |
| "loss": 0.9975929856300354, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.7095343680709535, | |
| "grad_norm": 1.4529755115509033, | |
| "learning_rate": 4.806148075513883e-06, | |
| "loss": 0.9766805768013, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.7139689578713969, | |
| "grad_norm": 8.476192474365234, | |
| "learning_rate": 4.802783038449857e-06, | |
| "loss": 0.9863343834877014, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.7184035476718403, | |
| "grad_norm": 1.1818149089813232, | |
| "learning_rate": 4.799390383681587e-06, | |
| "loss": 1.0997380018234253, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.7228381374722838, | |
| "grad_norm": 2.4012951850891113, | |
| "learning_rate": 4.795970156853124e-06, | |
| "loss": 1.077536702156067, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 0.42787617444992065, | |
| "learning_rate": 4.792522403979471e-06, | |
| "loss": 0.8034789562225342, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.7317073170731707, | |
| "grad_norm": 1.517409324645996, | |
| "learning_rate": 4.789047171445957e-06, | |
| "loss": 0.8247131109237671, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7361419068736141, | |
| "grad_norm": 3.9636194705963135, | |
| "learning_rate": 4.785544506007619e-06, | |
| "loss": 1.3770612478256226, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7405764966740577, | |
| "grad_norm": 3.6474459171295166, | |
| "learning_rate": 4.782014454788566e-06, | |
| "loss": 1.1880348920822144, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.7450110864745011, | |
| "grad_norm": 1.1807231903076172, | |
| "learning_rate": 4.778457065281355e-06, | |
| "loss": 1.2278774976730347, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.7494456762749445, | |
| "grad_norm": 3.716444969177246, | |
| "learning_rate": 4.774872385346345e-06, | |
| "loss": 0.6877051591873169, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.753880266075388, | |
| "grad_norm": 10.607100486755371, | |
| "learning_rate": 4.7712604632110524e-06, | |
| "loss": 0.3212733566761017, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7583148558758315, | |
| "grad_norm": 2.057530164718628, | |
| "learning_rate": 4.767621347469506e-06, | |
| "loss": 0.8571130633354187, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7627494456762749, | |
| "grad_norm": 0.9771236181259155, | |
| "learning_rate": 4.7639550870815895e-06, | |
| "loss": 1.2813206911087036, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.7671840354767184, | |
| "grad_norm": 0.8891447186470032, | |
| "learning_rate": 4.760261731372388e-06, | |
| "loss": 1.2372010946273804, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7716186252771619, | |
| "grad_norm": 2.889232873916626, | |
| "learning_rate": 4.75654133003152e-06, | |
| "loss": 1.0866163969039917, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7760532150776053, | |
| "grad_norm": 1.4878243207931519, | |
| "learning_rate": 4.752793933112469e-06, | |
| "loss": 1.2248578071594238, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7804878048780488, | |
| "grad_norm": 8.06610107421875, | |
| "learning_rate": 4.749019591031914e-06, | |
| "loss": 1.144696831703186, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7849223946784922, | |
| "grad_norm": 1.4932175874710083, | |
| "learning_rate": 4.745218354569045e-06, | |
| "loss": 1.0152348279953003, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7893569844789357, | |
| "grad_norm": 2.44124698638916, | |
| "learning_rate": 4.741390274864885e-06, | |
| "loss": 1.1802353858947754, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7937915742793792, | |
| "grad_norm": 4.262493133544922, | |
| "learning_rate": 4.737535403421601e-06, | |
| "loss": 1.2564811706542969, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.7982261640798226, | |
| "grad_norm": 10.588669776916504, | |
| "learning_rate": 4.733653792101809e-06, | |
| "loss": 1.2239693403244019, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.802660753880266, | |
| "grad_norm": 1.7806600332260132, | |
| "learning_rate": 4.729745493127878e-06, | |
| "loss": 0.5834329724311829, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.8070953436807096, | |
| "grad_norm": 1.3935792446136475, | |
| "learning_rate": 4.725810559081227e-06, | |
| "loss": 1.324857234954834, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.811529933481153, | |
| "grad_norm": 1.9065128564834595, | |
| "learning_rate": 4.7218490429016175e-06, | |
| "loss": 1.2007935047149658, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.8159645232815964, | |
| "grad_norm": 2.5401406288146973, | |
| "learning_rate": 4.717860997886442e-06, | |
| "loss": 0.9981905817985535, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.8203991130820399, | |
| "grad_norm": 3.2703709602355957, | |
| "learning_rate": 4.713846477690005e-06, | |
| "loss": 0.8222334384918213, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8248337028824834, | |
| "grad_norm": 1.1709415912628174, | |
| "learning_rate": 4.709805536322804e-06, | |
| "loss": 1.22462797164917, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.8292682926829268, | |
| "grad_norm": 1.1292668581008911, | |
| "learning_rate": 4.7057382281508e-06, | |
| "loss": 1.2273211479187012, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.8337028824833703, | |
| "grad_norm": 0.9493663311004639, | |
| "learning_rate": 4.701644607894687e-06, | |
| "loss": 1.1893556118011475, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.8381374722838137, | |
| "grad_norm": 1.0127923488616943, | |
| "learning_rate": 4.697524730629159e-06, | |
| "loss": 1.215989589691162, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8425720620842572, | |
| "grad_norm": 8.141073226928711, | |
| "learning_rate": 4.693378651782162e-06, | |
| "loss": 0.7287262082099915, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8470066518847007, | |
| "grad_norm": 1.4302774667739868, | |
| "learning_rate": 4.689206427134155e-06, | |
| "loss": 1.2984516620635986, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.8514412416851441, | |
| "grad_norm": 0.8617311120033264, | |
| "learning_rate": 4.6850081128173595e-06, | |
| "loss": 1.1311615705490112, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8558758314855875, | |
| "grad_norm": 0.833918571472168, | |
| "learning_rate": 4.680783765314994e-06, | |
| "loss": 1.246683955192566, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.8603104212860311, | |
| "grad_norm": 0.5764380693435669, | |
| "learning_rate": 4.6765334414605315e-06, | |
| "loss": 1.1455856561660767, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8647450110864745, | |
| "grad_norm": 2.1335196495056152, | |
| "learning_rate": 4.672257198436918e-06, | |
| "loss": 1.2500605583190918, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8691796008869179, | |
| "grad_norm": 3.1419546604156494, | |
| "learning_rate": 4.667955093775814e-06, | |
| "loss": 0.9110448360443115, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.8736141906873615, | |
| "grad_norm": 0.8658338189125061, | |
| "learning_rate": 4.663627185356818e-06, | |
| "loss": 1.2182717323303223, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8780487804878049, | |
| "grad_norm": 1.184532880783081, | |
| "learning_rate": 4.65927353140668e-06, | |
| "loss": 1.2174220085144043, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.8824833702882483, | |
| "grad_norm": 1.8402904272079468, | |
| "learning_rate": 4.654894190498534e-06, | |
| "loss": 1.210001826286316, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.8869179600886918, | |
| "grad_norm": 1.187842607498169, | |
| "learning_rate": 4.650489221551095e-06, | |
| "loss": 0.45290517807006836, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8913525498891353, | |
| "grad_norm": 0.9917973279953003, | |
| "learning_rate": 4.646058683827874e-06, | |
| "loss": 1.045676350593567, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.8957871396895787, | |
| "grad_norm": 0.36595824360847473, | |
| "learning_rate": 4.641602636936378e-06, | |
| "loss": 0.9281713366508484, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.9002217294900222, | |
| "grad_norm": 0.9975008964538574, | |
| "learning_rate": 4.637121140827311e-06, | |
| "loss": 1.2655534744262695, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.9046563192904656, | |
| "grad_norm": 3.180144786834717, | |
| "learning_rate": 4.632614255793762e-06, | |
| "loss": 1.1325139999389648, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 5.177031517028809, | |
| "learning_rate": 4.6280820424704e-06, | |
| "loss": 1.069734811782837, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.9135254988913526, | |
| "grad_norm": 1.0717765092849731, | |
| "learning_rate": 4.623524561832653e-06, | |
| "loss": 1.2320454120635986, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.917960088691796, | |
| "grad_norm": 1.0891166925430298, | |
| "learning_rate": 4.618941875195893e-06, | |
| "loss": 1.2557101249694824, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.9223946784922394, | |
| "grad_norm": 6.645715713500977, | |
| "learning_rate": 4.614334044214606e-06, | |
| "loss": 0.9693298935890198, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.926829268292683, | |
| "grad_norm": 0.7747200131416321, | |
| "learning_rate": 4.6097011308815645e-06, | |
| "loss": 1.2573974132537842, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.9312638580931264, | |
| "grad_norm": 14.614143371582031, | |
| "learning_rate": 4.605043197526996e-06, | |
| "loss": 0.7741899490356445, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9356984478935698, | |
| "grad_norm": 11.045886993408203, | |
| "learning_rate": 4.600360306817738e-06, | |
| "loss": 1.45621657371521, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.9401330376940134, | |
| "grad_norm": 4.732778549194336, | |
| "learning_rate": 4.595652521756403e-06, | |
| "loss": 1.0499638319015503, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.9445676274944568, | |
| "grad_norm": 0.7621893882751465, | |
| "learning_rate": 4.590919905680524e-06, | |
| "loss": 1.202383041381836, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9490022172949002, | |
| "grad_norm": 0.6728458404541016, | |
| "learning_rate": 4.5861625222617065e-06, | |
| "loss": 1.0747066736221313, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9534368070953437, | |
| "grad_norm": 4.075941562652588, | |
| "learning_rate": 4.58138043550477e-06, | |
| "loss": 0.6646397709846497, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9578713968957872, | |
| "grad_norm": 1.059487223625183, | |
| "learning_rate": 4.576573709746887e-06, | |
| "loss": 1.2203632593154907, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.9623059866962306, | |
| "grad_norm": 2.331674575805664, | |
| "learning_rate": 4.5717424096567205e-06, | |
| "loss": 1.0556832551956177, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.9667405764966741, | |
| "grad_norm": 1.2610398530960083, | |
| "learning_rate": 4.566886600233547e-06, | |
| "loss": 1.2815701961517334, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.9711751662971175, | |
| "grad_norm": 1.1328259706497192, | |
| "learning_rate": 4.56200634680639e-06, | |
| "loss": 1.233439564704895, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.975609756097561, | |
| "grad_norm": 2.2309317588806152, | |
| "learning_rate": 4.557101715033136e-06, | |
| "loss": 0.7945879697799683, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9800443458980045, | |
| "grad_norm": 1.2745883464813232, | |
| "learning_rate": 4.552172770899652e-06, | |
| "loss": 1.0334054231643677, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9844789356984479, | |
| "grad_norm": 0.977016806602478, | |
| "learning_rate": 4.547219580718899e-06, | |
| "loss": 1.2736181020736694, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9889135254988913, | |
| "grad_norm": 2.0068366527557373, | |
| "learning_rate": 4.542242211130039e-06, | |
| "loss": 1.2092833518981934, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.9933481152993349, | |
| "grad_norm": 1.753902792930603, | |
| "learning_rate": 4.537240729097539e-06, | |
| "loss": 1.2434141635894775, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.9977827050997783, | |
| "grad_norm": 1.0139368772506714, | |
| "learning_rate": 4.532215201910269e-06, | |
| "loss": 1.0325958728790283, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.0022172949002217, | |
| "grad_norm": 2.0576000213623047, | |
| "learning_rate": 4.527165697180598e-06, | |
| "loss": 1.0579339265823364, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.0066518847006651, | |
| "grad_norm": 1.966776967048645, | |
| "learning_rate": 4.522092282843481e-06, | |
| "loss": 1.3362075090408325, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.0110864745011086, | |
| "grad_norm": 1.2726391553878784, | |
| "learning_rate": 4.516995027155554e-06, | |
| "loss": 1.2156652212142944, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.0155210643015522, | |
| "grad_norm": 0.7326069474220276, | |
| "learning_rate": 4.511873998694204e-06, | |
| "loss": 0.8079202175140381, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.0199556541019956, | |
| "grad_norm": 0.47465115785598755, | |
| "learning_rate": 4.506729266356651e-06, | |
| "loss": 0.8377529978752136, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.024390243902439, | |
| "grad_norm": 1.9195665121078491, | |
| "learning_rate": 4.5015608993590276e-06, | |
| "loss": 0.5483433604240417, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.0288248337028825, | |
| "grad_norm": 1.7749394178390503, | |
| "learning_rate": 4.4963689672354375e-06, | |
| "loss": 0.9224250316619873, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.033259423503326, | |
| "grad_norm": 1.9494274854660034, | |
| "learning_rate": 4.491153539837026e-06, | |
| "loss": 0.7165282964706421, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.0376940133037693, | |
| "grad_norm": 2.415039300918579, | |
| "learning_rate": 4.4859146873310375e-06, | |
| "loss": 1.0422555208206177, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.042128603104213, | |
| "grad_norm": 1.4911917448043823, | |
| "learning_rate": 4.480652480199873e-06, | |
| "loss": 0.6359959244728088, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.0465631929046564, | |
| "grad_norm": 1.3522758483886719, | |
| "learning_rate": 4.475366989240147e-06, | |
| "loss": 0.9994376301765442, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0509977827050998, | |
| "grad_norm": 1.266831636428833, | |
| "learning_rate": 4.470058285561721e-06, | |
| "loss": 1.0376862287521362, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0554323725055432, | |
| "grad_norm": 1.151628851890564, | |
| "learning_rate": 4.464726440586761e-06, | |
| "loss": 1.076725959777832, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0598669623059866, | |
| "grad_norm": 1.4442075490951538, | |
| "learning_rate": 4.45937152604877e-06, | |
| "loss": 1.1006007194519043, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.06430155210643, | |
| "grad_norm": 1.1244226694107056, | |
| "learning_rate": 4.453993613991622e-06, | |
| "loss": 0.6436704397201538, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0687361419068737, | |
| "grad_norm": 1.8516688346862793, | |
| "learning_rate": 4.4485927767685995e-06, | |
| "loss": 1.153225064277649, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.0731707317073171, | |
| "grad_norm": 0.4945599138736725, | |
| "learning_rate": 4.443169087041409e-06, | |
| "loss": 0.9155857563018799, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.0776053215077606, | |
| "grad_norm": 1.006088137626648, | |
| "learning_rate": 4.4377226177792145e-06, | |
| "loss": 1.0506926774978638, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.082039911308204, | |
| "grad_norm": 1.9565138816833496, | |
| "learning_rate": 4.432253442257649e-06, | |
| "loss": 0.7985799312591553, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.0864745011086474, | |
| "grad_norm": 0.8965359926223755, | |
| "learning_rate": 4.426761634057831e-06, | |
| "loss": 0.9610664248466492, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.0909090909090908, | |
| "grad_norm": 1.760244607925415, | |
| "learning_rate": 4.421247267065375e-06, | |
| "loss": 1.1698113679885864, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.0953436807095343, | |
| "grad_norm": 5.320736885070801, | |
| "learning_rate": 4.415710415469394e-06, | |
| "loss": 1.0748765468597412, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.099778270509978, | |
| "grad_norm": 2.7736940383911133, | |
| "learning_rate": 4.410151153761506e-06, | |
| "loss": 0.8963067531585693, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.1042128603104213, | |
| "grad_norm": 0.9353604912757874, | |
| "learning_rate": 4.404569556734832e-06, | |
| "loss": 1.023295521736145, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.1086474501108647, | |
| "grad_norm": 4.738027572631836, | |
| "learning_rate": 4.398965699482984e-06, | |
| "loss": 0.9625403881072998, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.1130820399113082, | |
| "grad_norm": 0.9695823788642883, | |
| "learning_rate": 4.39333965739906e-06, | |
| "loss": 1.0802518129348755, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.1175166297117516, | |
| "grad_norm": 2.8423125743865967, | |
| "learning_rate": 4.3876915061746275e-06, | |
| "loss": 0.9319751262664795, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.1219512195121952, | |
| "grad_norm": 1.8870809078216553, | |
| "learning_rate": 4.382021321798707e-06, | |
| "loss": 1.0706431865692139, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.1263858093126387, | |
| "grad_norm": 2.326402425765991, | |
| "learning_rate": 4.376329180556745e-06, | |
| "loss": 0.790678858757019, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.130820399113082, | |
| "grad_norm": 1.3830646276474, | |
| "learning_rate": 4.370615159029594e-06, | |
| "loss": 1.1257532835006714, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.1352549889135255, | |
| "grad_norm": 1.6302467584609985, | |
| "learning_rate": 4.36487933409248e-06, | |
| "loss": 0.5582272410392761, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.139689578713969, | |
| "grad_norm": 1.8311710357666016, | |
| "learning_rate": 4.359121782913964e-06, | |
| "loss": 0.541851282119751, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.1441241685144123, | |
| "grad_norm": 1.120679497718811, | |
| "learning_rate": 4.3533425829549085e-06, | |
| "loss": 1.0328795909881592, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.1485587583148558, | |
| "grad_norm": 1.37759530544281, | |
| "learning_rate": 4.347541811967436e-06, | |
| "loss": 1.163663625717163, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.1529933481152994, | |
| "grad_norm": 0.981916606426239, | |
| "learning_rate": 4.341719547993879e-06, | |
| "loss": 1.1469664573669434, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.1574279379157428, | |
| "grad_norm": 1.2698646783828735, | |
| "learning_rate": 4.335875869365732e-06, | |
| "loss": 0.5811082124710083, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.1618625277161863, | |
| "grad_norm": 1.2500545978546143, | |
| "learning_rate": 4.330010854702598e-06, | |
| "loss": 1.0397084951400757, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.1662971175166297, | |
| "grad_norm": 1.7220888137817383, | |
| "learning_rate": 4.3241245829111324e-06, | |
| "loss": 1.1979455947875977, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.170731707317073, | |
| "grad_norm": 1.460283875465393, | |
| "learning_rate": 4.318217133183978e-06, | |
| "loss": 0.6363497376441956, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.1751662971175167, | |
| "grad_norm": 1.075685977935791, | |
| "learning_rate": 4.312288584998697e-06, | |
| "loss": 0.8744889497756958, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.1796008869179602, | |
| "grad_norm": 0.8398450016975403, | |
| "learning_rate": 4.306339018116714e-06, | |
| "loss": 0.96029132604599, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.1840354767184036, | |
| "grad_norm": 1.3492926359176636, | |
| "learning_rate": 4.300368512582227e-06, | |
| "loss": 1.0959749221801758, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.188470066518847, | |
| "grad_norm": 0.3663475215435028, | |
| "learning_rate": 4.294377148721144e-06, | |
| "loss": 0.82485431432724, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.1929046563192904, | |
| "grad_norm": 1.7144395112991333, | |
| "learning_rate": 4.288365007139991e-06, | |
| "loss": 1.1424366235733032, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.1973392461197339, | |
| "grad_norm": 2.5541539192199707, | |
| "learning_rate": 4.2823321687248386e-06, | |
| "loss": 0.6629378795623779, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.2017738359201773, | |
| "grad_norm": 2.608015775680542, | |
| "learning_rate": 4.276278714640203e-06, | |
| "loss": 0.6427868008613586, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.206208425720621, | |
| "grad_norm": 17.38459587097168, | |
| "learning_rate": 4.270204726327963e-06, | |
| "loss": 1.1916182041168213, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.2106430155210643, | |
| "grad_norm": 0.5953897833824158, | |
| "learning_rate": 4.264110285506259e-06, | |
| "loss": 0.8375392556190491, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.2150776053215078, | |
| "grad_norm": 1.1584091186523438, | |
| "learning_rate": 4.257995474168395e-06, | |
| "loss": 1.216051459312439, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.2195121951219512, | |
| "grad_norm": 2.22684645652771, | |
| "learning_rate": 4.251860374581736e-06, | |
| "loss": 0.598793625831604, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.2239467849223946, | |
| "grad_norm": 1.4314756393432617, | |
| "learning_rate": 4.245705069286601e-06, | |
| "loss": 1.076664686203003, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.2283813747228383, | |
| "grad_norm": 1.2437039613723755, | |
| "learning_rate": 4.239529641095149e-06, | |
| "loss": 0.7446794509887695, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.2328159645232817, | |
| "grad_norm": 0.4004473090171814, | |
| "learning_rate": 4.233334173090274e-06, | |
| "loss": 0.7689218521118164, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.237250554323725, | |
| "grad_norm": 3.483046293258667, | |
| "learning_rate": 4.227118748624478e-06, | |
| "loss": 0.6857209205627441, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.2416851441241685, | |
| "grad_norm": 4.495988845825195, | |
| "learning_rate": 4.220883451318753e-06, | |
| "loss": 1.3058849573135376, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.246119733924612, | |
| "grad_norm": 1.3580222129821777, | |
| "learning_rate": 4.2146283650614545e-06, | |
| "loss": 0.9943916201591492, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.2505543237250554, | |
| "grad_norm": 0.6850365400314331, | |
| "learning_rate": 4.208353574007179e-06, | |
| "loss": 0.6252534985542297, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.2549889135254988, | |
| "grad_norm": 1.7630574703216553, | |
| "learning_rate": 4.202059162575622e-06, | |
| "loss": 0.9947891235351562, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.2594235033259422, | |
| "grad_norm": 1.371643304824829, | |
| "learning_rate": 4.195745215450451e-06, | |
| "loss": 0.9397783279418945, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.2638580931263859, | |
| "grad_norm": 4.08069372177124, | |
| "learning_rate": 4.189411817578159e-06, | |
| "loss": 1.136919379234314, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.2682926829268293, | |
| "grad_norm": 0.8603072166442871, | |
| "learning_rate": 4.1830590541669304e-06, | |
| "loss": 0.7842409610748291, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.2727272727272727, | |
| "grad_norm": 2.9320180416107178, | |
| "learning_rate": 4.176687010685484e-06, | |
| "loss": 1.3783859014511108, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.2771618625277161, | |
| "grad_norm": 1.506885290145874, | |
| "learning_rate": 4.170295772861931e-06, | |
| "loss": 1.0433826446533203, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.2815964523281598, | |
| "grad_norm": 1.3161765336990356, | |
| "learning_rate": 4.163885426682619e-06, | |
| "loss": 1.1605374813079834, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.2860310421286032, | |
| "grad_norm": 1.0546302795410156, | |
| "learning_rate": 4.157456058390977e-06, | |
| "loss": 1.121392011642456, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2904656319290466, | |
| "grad_norm": 2.199747323989868, | |
| "learning_rate": 4.151007754486351e-06, | |
| "loss": 1.3379415273666382, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.29490022172949, | |
| "grad_norm": 2.361356258392334, | |
| "learning_rate": 4.144540601722843e-06, | |
| "loss": 0.7737810015678406, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.2993348115299335, | |
| "grad_norm": 1.0490304231643677, | |
| "learning_rate": 4.138054687108143e-06, | |
| "loss": 0.8205963373184204, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.3037694013303769, | |
| "grad_norm": 3.6670305728912354, | |
| "learning_rate": 4.131550097902361e-06, | |
| "loss": 0.47316789627075195, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.3082039911308203, | |
| "grad_norm": 4.267838478088379, | |
| "learning_rate": 4.125026921616852e-06, | |
| "loss": 0.9959248900413513, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.3126385809312637, | |
| "grad_norm": 3.6597208976745605, | |
| "learning_rate": 4.118485246013031e-06, | |
| "loss": 1.1072802543640137, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.3170731707317074, | |
| "grad_norm": 0.8594958782196045, | |
| "learning_rate": 4.111925159101208e-06, | |
| "loss": 1.0438408851623535, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.3215077605321508, | |
| "grad_norm": 1.6975743770599365, | |
| "learning_rate": 4.1053467491393864e-06, | |
| "loss": 1.0767079591751099, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.3259423503325942, | |
| "grad_norm": 2.8765268325805664, | |
| "learning_rate": 4.098750104632091e-06, | |
| "loss": 1.160161018371582, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.3303769401330376, | |
| "grad_norm": 1.8514677286148071, | |
| "learning_rate": 4.092135314329165e-06, | |
| "loss": 0.47440510988235474, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.3348115299334813, | |
| "grad_norm": 2.5123469829559326, | |
| "learning_rate": 4.085502467224583e-06, | |
| "loss": 1.0348572731018066, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.3392461197339247, | |
| "grad_norm": 2.3508543968200684, | |
| "learning_rate": 4.078851652555254e-06, | |
| "loss": 0.8366844654083252, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.3436807095343681, | |
| "grad_norm": 0.6843059659004211, | |
| "learning_rate": 4.072182959799816e-06, | |
| "loss": 0.782626211643219, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.3481152993348116, | |
| "grad_norm": 1.0305436849594116, | |
| "learning_rate": 4.065496478677436e-06, | |
| "loss": 1.113935112953186, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.352549889135255, | |
| "grad_norm": 6.971372127532959, | |
| "learning_rate": 4.058792299146602e-06, | |
| "loss": 1.1273419857025146, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.3569844789356984, | |
| "grad_norm": 9.397238731384277, | |
| "learning_rate": 4.052070511403912e-06, | |
| "loss": 0.7277010679244995, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.3614190687361418, | |
| "grad_norm": 1.1997414827346802, | |
| "learning_rate": 4.045331205882863e-06, | |
| "loss": 1.1359970569610596, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.3658536585365852, | |
| "grad_norm": 1.7059991359710693, | |
| "learning_rate": 4.038574473252629e-06, | |
| "loss": 0.7449517846107483, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.370288248337029, | |
| "grad_norm": 1.1675894260406494, | |
| "learning_rate": 4.031800404416849e-06, | |
| "loss": 1.0706074237823486, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.3747228381374723, | |
| "grad_norm": 1.6913033723831177, | |
| "learning_rate": 4.025009090512394e-06, | |
| "loss": 0.09774535149335861, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.3791574279379157, | |
| "grad_norm": 0.9009557962417603, | |
| "learning_rate": 4.018200622908153e-06, | |
| "loss": 1.0203994512557983, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.3835920177383592, | |
| "grad_norm": 1.3641494512557983, | |
| "learning_rate": 4.011375093203793e-06, | |
| "loss": 0.949788510799408, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.3880266075388026, | |
| "grad_norm": 1.6519279479980469, | |
| "learning_rate": 4.004532593228531e-06, | |
| "loss": 0.9569694995880127, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.3924611973392462, | |
| "grad_norm": 1.1832157373428345, | |
| "learning_rate": 3.997673215039899e-06, | |
| "loss": 1.0896062850952148, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.3968957871396896, | |
| "grad_norm": 1.915313959121704, | |
| "learning_rate": 3.990797050922506e-06, | |
| "loss": 1.084737777709961, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.401330376940133, | |
| "grad_norm": 2.5589466094970703, | |
| "learning_rate": 3.9839041933867954e-06, | |
| "loss": 0.8085731863975525, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.4057649667405765, | |
| "grad_norm": 2.6854422092437744, | |
| "learning_rate": 3.976994735167796e-06, | |
| "loss": 1.0447653532028198, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.41019955654102, | |
| "grad_norm": 1.3558317422866821, | |
| "learning_rate": 3.970068769223884e-06, | |
| "loss": 1.110194206237793, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.4146341463414633, | |
| "grad_norm": 2.323361873626709, | |
| "learning_rate": 3.963126388735525e-06, | |
| "loss": 0.8597381711006165, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.4190687361419068, | |
| "grad_norm": 0.8953458070755005, | |
| "learning_rate": 3.956167687104021e-06, | |
| "loss": 0.7458611130714417, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.4235033259423504, | |
| "grad_norm": 1.6884546279907227, | |
| "learning_rate": 3.9491927579502584e-06, | |
| "loss": 0.8080941438674927, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.4279379157427938, | |
| "grad_norm": 1.2389963865280151, | |
| "learning_rate": 3.9422016951134415e-06, | |
| "loss": 0.6925735473632812, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.4323725055432373, | |
| "grad_norm": 1.243801236152649, | |
| "learning_rate": 3.935194592649836e-06, | |
| "loss": 1.2162237167358398, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.4368070953436807, | |
| "grad_norm": 0.7290199995040894, | |
| "learning_rate": 3.928171544831501e-06, | |
| "loss": 1.1060457229614258, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.441241685144124, | |
| "grad_norm": 0.8302409052848816, | |
| "learning_rate": 3.921132646145019e-06, | |
| "loss": 1.1656242609024048, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.4456762749445677, | |
| "grad_norm": 3.586696147918701, | |
| "learning_rate": 3.914077991290232e-06, | |
| "loss": 0.9107663035392761, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.4501108647450112, | |
| "grad_norm": 2.574885606765747, | |
| "learning_rate": 3.907007675178956e-06, | |
| "loss": 1.0581393241882324, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.4545454545454546, | |
| "grad_norm": 4.263864517211914, | |
| "learning_rate": 3.899921792933713e-06, | |
| "loss": 0.8163521885871887, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.458980044345898, | |
| "grad_norm": 0.9238935708999634, | |
| "learning_rate": 3.892820439886448e-06, | |
| "loss": 0.8494032621383667, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.4634146341463414, | |
| "grad_norm": 1.1233266592025757, | |
| "learning_rate": 3.885703711577249e-06, | |
| "loss": 1.0738056898117065, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.4678492239467849, | |
| "grad_norm": 0.7168223261833191, | |
| "learning_rate": 3.8785717037530555e-06, | |
| "loss": 1.078277587890625, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.4722838137472283, | |
| "grad_norm": 0.8631088137626648, | |
| "learning_rate": 3.871424512366377e-06, | |
| "loss": 0.8074089288711548, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.476718403547672, | |
| "grad_norm": 0.7211161255836487, | |
| "learning_rate": 3.864262233574e-06, | |
| "loss": 0.5039446949958801, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.4811529933481153, | |
| "grad_norm": 2.315062999725342, | |
| "learning_rate": 3.857084963735689e-06, | |
| "loss": 0.6357030868530273, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.4855875831485588, | |
| "grad_norm": 4.405363082885742, | |
| "learning_rate": 3.849892799412902e-06, | |
| "loss": 1.0553703308105469, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.4900221729490022, | |
| "grad_norm": 1.0128716230392456, | |
| "learning_rate": 3.84268583736748e-06, | |
| "loss": 1.1253726482391357, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.4944567627494456, | |
| "grad_norm": 1.074711799621582, | |
| "learning_rate": 3.835464174560349e-06, | |
| "loss": 0.6612215042114258, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.4988913525498893, | |
| "grad_norm": 0.7255991101264954, | |
| "learning_rate": 3.828227908150217e-06, | |
| "loss": 0.753847599029541, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.5033259423503327, | |
| "grad_norm": 0.9374713897705078, | |
| "learning_rate": 3.820977135492266e-06, | |
| "loss": 1.095574140548706, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.507760532150776, | |
| "grad_norm": 0.7554945349693298, | |
| "learning_rate": 3.8137119541368415e-06, | |
| "loss": 0.6039291620254517, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.5121951219512195, | |
| "grad_norm": 1.8996697664260864, | |
| "learning_rate": 3.80643246182814e-06, | |
| "loss": 0.5733712315559387, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.516629711751663, | |
| "grad_norm": 1.0919080972671509, | |
| "learning_rate": 3.7991387565028963e-06, | |
| "loss": 1.0893672704696655, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.5210643015521064, | |
| "grad_norm": 0.780432939529419, | |
| "learning_rate": 3.791830936289062e-06, | |
| "loss": 1.0852909088134766, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.5254988913525498, | |
| "grad_norm": 91.54979705810547, | |
| "learning_rate": 3.784509099504488e-06, | |
| "loss": 0.6314648985862732, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.5299334811529932, | |
| "grad_norm": 1.411130666732788, | |
| "learning_rate": 3.7771733446556025e-06, | |
| "loss": 0.3787440061569214, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.5343680709534369, | |
| "grad_norm": 1.2953332662582397, | |
| "learning_rate": 3.7698237704360826e-06, | |
| "loss": 0.8831952214241028, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.5388026607538803, | |
| "grad_norm": 2.0927493572235107, | |
| "learning_rate": 3.7624604757255297e-06, | |
| "loss": 0.9063498377799988, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.5432372505543237, | |
| "grad_norm": 8.049708366394043, | |
| "learning_rate": 3.7550835595881365e-06, | |
| "loss": 0.6573284268379211, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.5476718403547673, | |
| "grad_norm": 1.286287784576416, | |
| "learning_rate": 3.747693121271355e-06, | |
| "loss": 1.1525388956069946, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.5521064301552108, | |
| "grad_norm": 0.8270326256752014, | |
| "learning_rate": 3.740289260204565e-06, | |
| "loss": 1.0669758319854736, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.5565410199556542, | |
| "grad_norm": 0.873605489730835, | |
| "learning_rate": 3.732872075997729e-06, | |
| "loss": 1.1033045053482056, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.5609756097560976, | |
| "grad_norm": 1.9848657846450806, | |
| "learning_rate": 3.725441668440058e-06, | |
| "loss": 0.8427386283874512, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.565410199556541, | |
| "grad_norm": 2.9728550910949707, | |
| "learning_rate": 3.7179981374986683e-06, | |
| "loss": 0.4659649431705475, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.5698447893569845, | |
| "grad_norm": 0.9899294376373291, | |
| "learning_rate": 3.710541583317233e-06, | |
| "loss": 1.1070295572280884, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.5742793791574279, | |
| "grad_norm": 1.7865556478500366, | |
| "learning_rate": 3.70307210621464e-06, | |
| "loss": 0.8662485480308533, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.5787139689578713, | |
| "grad_norm": 1.5620512962341309, | |
| "learning_rate": 3.695589806683636e-06, | |
| "loss": 0.6354061365127563, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.5831485587583147, | |
| "grad_norm": 1.4959702491760254, | |
| "learning_rate": 3.68809478538948e-06, | |
| "loss": 1.0189735889434814, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.5875831485587582, | |
| "grad_norm": 1.0991613864898682, | |
| "learning_rate": 3.6805871431685875e-06, | |
| "loss": 1.1094708442687988, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.5920177383592018, | |
| "grad_norm": 1.5194610357284546, | |
| "learning_rate": 3.6730669810271707e-06, | |
| "loss": 1.3121333122253418, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.5964523281596452, | |
| "grad_norm": 1.7078560590744019, | |
| "learning_rate": 3.665534400139885e-06, | |
| "loss": 0.5672956109046936, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.6008869179600886, | |
| "grad_norm": 0.32701054215431213, | |
| "learning_rate": 3.6579895018484635e-06, | |
| "loss": 0.5884336233139038, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.6053215077605323, | |
| "grad_norm": 0.7404405474662781, | |
| "learning_rate": 3.650432387660354e-06, | |
| "loss": 1.0861414670944214, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.6097560975609757, | |
| "grad_norm": 1.0352939367294312, | |
| "learning_rate": 3.6428631592473584e-06, | |
| "loss": 1.0698777437210083, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.6141906873614191, | |
| "grad_norm": 1.0069420337677002, | |
| "learning_rate": 3.6352819184442552e-06, | |
| "loss": 0.7489967346191406, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.6186252771618626, | |
| "grad_norm": 1.1831481456756592, | |
| "learning_rate": 3.6276887672474374e-06, | |
| "loss": 1.0391978025436401, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.623059866962306, | |
| "grad_norm": 1.3561768531799316, | |
| "learning_rate": 3.620083807813541e-06, | |
| "loss": 1.090510368347168, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.6274944567627494, | |
| "grad_norm": 0.9160586595535278, | |
| "learning_rate": 3.6124671424580633e-06, | |
| "loss": 1.0251963138580322, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.6319290465631928, | |
| "grad_norm": 0.39837855100631714, | |
| "learning_rate": 3.604838873653991e-06, | |
| "loss": 0.5789291262626648, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.6363636363636362, | |
| "grad_norm": 0.7905840873718262, | |
| "learning_rate": 3.597199104030424e-06, | |
| "loss": 1.1960307359695435, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.6407982261640797, | |
| "grad_norm": 0.7028928995132446, | |
| "learning_rate": 3.589547936371189e-06, | |
| "loss": 1.0560542345046997, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.6452328159645233, | |
| "grad_norm": 3.9433796405792236, | |
| "learning_rate": 3.58188547361346e-06, | |
| "loss": 1.1990491151809692, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.6496674057649667, | |
| "grad_norm": 5.412987232208252, | |
| "learning_rate": 3.574211818846374e-06, | |
| "loss": 0.8513088226318359, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.6541019955654102, | |
| "grad_norm": 3.399298667907715, | |
| "learning_rate": 3.566527075309641e-06, | |
| "loss": 0.9072363972663879, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.6585365853658538, | |
| "grad_norm": 0.7723504304885864, | |
| "learning_rate": 3.558831346392159e-06, | |
| "loss": 0.8129432201385498, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.6629711751662972, | |
| "grad_norm": 1.6611526012420654, | |
| "learning_rate": 3.5511247356306205e-06, | |
| "loss": 1.1549206972122192, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.6674057649667406, | |
| "grad_norm": 0.962104082107544, | |
| "learning_rate": 3.5434073467081183e-06, | |
| "loss": 0.8654111623764038, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.671840354767184, | |
| "grad_norm": 0.9911019206047058, | |
| "learning_rate": 3.5356792834527533e-06, | |
| "loss": 0.5958826541900635, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.6762749445676275, | |
| "grad_norm": 1.1465089321136475, | |
| "learning_rate": 3.527940649836238e-06, | |
| "loss": 0.9339615106582642, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.680709534368071, | |
| "grad_norm": 1.773185133934021, | |
| "learning_rate": 3.520191549972494e-06, | |
| "loss": 1.0644793510437012, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.6851441241685143, | |
| "grad_norm": 10.430180549621582, | |
| "learning_rate": 3.512432088116255e-06, | |
| "loss": 0.9741522669792175, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.6895787139689578, | |
| "grad_norm": 1.8062833547592163, | |
| "learning_rate": 3.5046623686616627e-06, | |
| "loss": 1.0623693466186523, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.6940133037694012, | |
| "grad_norm": 1.815280795097351, | |
| "learning_rate": 3.496882496140861e-06, | |
| "loss": 1.0941108465194702, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.6984478935698448, | |
| "grad_norm": 1.8432520627975464, | |
| "learning_rate": 3.4890925752225935e-06, | |
| "loss": 0.9967592358589172, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.7028824833702882, | |
| "grad_norm": 2.3747756481170654, | |
| "learning_rate": 3.48129271071079e-06, | |
| "loss": 0.8642722964286804, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.7073170731707317, | |
| "grad_norm": 0.8133605718612671, | |
| "learning_rate": 3.4734830075431605e-06, | |
| "loss": 1.0309913158416748, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.7117516629711753, | |
| "grad_norm": 5.219134330749512, | |
| "learning_rate": 3.4656635707897823e-06, | |
| "loss": 1.0626434087753296, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.7161862527716187, | |
| "grad_norm": 1.766679286956787, | |
| "learning_rate": 3.457834505651687e-06, | |
| "loss": 1.0065494775772095, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.7206208425720622, | |
| "grad_norm": 1.107305884361267, | |
| "learning_rate": 3.449995917459442e-06, | |
| "loss": 1.1634471416473389, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.7250554323725056, | |
| "grad_norm": 0.7361903786659241, | |
| "learning_rate": 3.4421479116717394e-06, | |
| "loss": 1.0542564392089844, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.729490022172949, | |
| "grad_norm": 1.2649286985397339, | |
| "learning_rate": 3.4342905938739707e-06, | |
| "loss": 0.71857750415802, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.7339246119733924, | |
| "grad_norm": 0.9642868041992188, | |
| "learning_rate": 3.4264240697768096e-06, | |
| "loss": 1.039180040359497, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.7383592017738358, | |
| "grad_norm": 0.8986912369728088, | |
| "learning_rate": 3.418548445214791e-06, | |
| "loss": 0.7089306712150574, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.7427937915742793, | |
| "grad_norm": 0.8579238057136536, | |
| "learning_rate": 3.410663826144884e-06, | |
| "loss": 0.8006396293640137, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.7472283813747227, | |
| "grad_norm": 1.4266620874404907, | |
| "learning_rate": 3.4027703186450672e-06, | |
| "loss": 0.6931395530700684, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.7516629711751663, | |
| "grad_norm": 4.200963973999023, | |
| "learning_rate": 3.394868028912906e-06, | |
| "loss": 0.6739537119865417, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.7560975609756098, | |
| "grad_norm": 1.6932109594345093, | |
| "learning_rate": 3.386957063264115e-06, | |
| "loss": 0.7071681618690491, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.7605321507760532, | |
| "grad_norm": 1.0618504285812378, | |
| "learning_rate": 3.3790375281311355e-06, | |
| "loss": 0.8816713094711304, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.7649667405764968, | |
| "grad_norm": 6.342792987823486, | |
| "learning_rate": 3.3711095300617015e-06, | |
| "loss": 0.8279204368591309, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.7694013303769403, | |
| "grad_norm": 0.9115824699401855, | |
| "learning_rate": 3.3631731757174048e-06, | |
| "loss": 1.0542290210723877, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.7738359201773837, | |
| "grad_norm": 0.9469823837280273, | |
| "learning_rate": 3.3552285718722593e-06, | |
| "loss": 1.0976489782333374, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.778270509977827, | |
| "grad_norm": 0.9568173289299011, | |
| "learning_rate": 3.3472758254112662e-06, | |
| "loss": 1.0607342720031738, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.7827050997782705, | |
| "grad_norm": 1.9793434143066406, | |
| "learning_rate": 3.3393150433289795e-06, | |
| "loss": 1.1424230337142944, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.787139689578714, | |
| "grad_norm": 1.001698613166809, | |
| "learning_rate": 3.3313463327280576e-06, | |
| "loss": 0.7997353672981262, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.7915742793791574, | |
| "grad_norm": 1.129724383354187, | |
| "learning_rate": 3.3233698008178306e-06, | |
| "loss": 1.1001020669937134, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.7960088691796008, | |
| "grad_norm": 2.6598269939422607, | |
| "learning_rate": 3.3153855549128537e-06, | |
| "loss": 0.60479736328125, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.8004434589800442, | |
| "grad_norm": 3.824202537536621, | |
| "learning_rate": 3.3073937024314647e-06, | |
| "loss": 0.6236993074417114, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.8048780487804879, | |
| "grad_norm": 1.750210165977478, | |
| "learning_rate": 3.2993943508943386e-06, | |
| "loss": 1.2215160131454468, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.8093126385809313, | |
| "grad_norm": 2.1696269512176514, | |
| "learning_rate": 3.291387607923041e-06, | |
| "loss": 0.9674443006515503, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.8137472283813747, | |
| "grad_norm": 0.8863484263420105, | |
| "learning_rate": 3.283373581238582e-06, | |
| "loss": 0.49516186118125916, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 1.207751750946045, | |
| "learning_rate": 3.2753523786599618e-06, | |
| "loss": 1.1064543724060059, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.8226164079822618, | |
| "grad_norm": 1.975991129875183, | |
| "learning_rate": 3.2673241081027263e-06, | |
| "loss": 1.086673617362976, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.8270509977827052, | |
| "grad_norm": 0.8671886920928955, | |
| "learning_rate": 3.259288877577512e-06, | |
| "loss": 1.1535236835479736, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.8314855875831486, | |
| "grad_norm": 1.9907917976379395, | |
| "learning_rate": 3.251246795188592e-06, | |
| "loss": 0.9606926441192627, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.835920177383592, | |
| "grad_norm": 2.376696825027466, | |
| "learning_rate": 3.243197969132425e-06, | |
| "loss": 0.8494828343391418, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.8403547671840355, | |
| "grad_norm": 0.9770940542221069, | |
| "learning_rate": 3.2351425076961957e-06, | |
| "loss": 1.0482726097106934, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.8447893569844789, | |
| "grad_norm": 1.570204496383667, | |
| "learning_rate": 3.22708051925636e-06, | |
| "loss": 1.0887196063995361, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.8492239467849223, | |
| "grad_norm": 0.7769954800605774, | |
| "learning_rate": 3.219012112277189e-06, | |
| "loss": 1.0047810077667236, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.8536585365853657, | |
| "grad_norm": 0.8842262029647827, | |
| "learning_rate": 3.210937395309304e-06, | |
| "loss": 1.151458501815796, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.8580931263858091, | |
| "grad_norm": 0.9991164803504944, | |
| "learning_rate": 3.202856476988222e-06, | |
| "loss": 1.0137677192687988, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.8625277161862528, | |
| "grad_norm": 2.2849833965301514, | |
| "learning_rate": 3.1947694660328914e-06, | |
| "loss": 1.1807574033737183, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.8669623059866962, | |
| "grad_norm": 0.8035255670547485, | |
| "learning_rate": 3.1866764712442273e-06, | |
| "loss": 0.6505411863327026, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.8713968957871396, | |
| "grad_norm": 1.0458487272262573, | |
| "learning_rate": 3.1785776015036533e-06, | |
| "loss": 0.771373987197876, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.8758314855875833, | |
| "grad_norm": 11.2003812789917, | |
| "learning_rate": 3.1704729657716314e-06, | |
| "loss": 0.8910792469978333, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.8802660753880267, | |
| "grad_norm": 1.6752492189407349, | |
| "learning_rate": 3.1623626730861996e-06, | |
| "loss": 1.0148093700408936, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.8847006651884701, | |
| "grad_norm": 0.9156674742698669, | |
| "learning_rate": 3.1542468325615e-06, | |
| "loss": 1.359726071357727, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.8891352549889135, | |
| "grad_norm": 1.454277515411377, | |
| "learning_rate": 3.1461255533863183e-06, | |
| "loss": 0.7917972207069397, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.893569844789357, | |
| "grad_norm": 0.9512672424316406, | |
| "learning_rate": 3.1379989448226077e-06, | |
| "loss": 1.073644757270813, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.8980044345898004, | |
| "grad_norm": 1.3854492902755737, | |
| "learning_rate": 3.1298671162040236e-06, | |
| "loss": 0.9803899526596069, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.9024390243902438, | |
| "grad_norm": 0.645820677280426, | |
| "learning_rate": 3.1217301769344488e-06, | |
| "loss": 0.7556897401809692, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.9068736141906872, | |
| "grad_norm": 4.2880635261535645, | |
| "learning_rate": 3.1135882364865262e-06, | |
| "loss": 0.8003832101821899, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.9113082039911307, | |
| "grad_norm": 1.7977222204208374, | |
| "learning_rate": 3.105441404400183e-06, | |
| "loss": 1.0547810792922974, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.9157427937915743, | |
| "grad_norm": 0.9238870739936829, | |
| "learning_rate": 3.097289790281155e-06, | |
| "loss": 0.8129977583885193, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.9201773835920177, | |
| "grad_norm": 3.8049755096435547, | |
| "learning_rate": 3.089133503799517e-06, | |
| "loss": 0.6351861953735352, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.9246119733924612, | |
| "grad_norm": 0.8284581899642944, | |
| "learning_rate": 3.0809726546882045e-06, | |
| "loss": 1.1457592248916626, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.9290465631929048, | |
| "grad_norm": 1.308618426322937, | |
| "learning_rate": 3.0728073527415376e-06, | |
| "loss": 1.0072696208953857, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.9334811529933482, | |
| "grad_norm": 0.38561350107192993, | |
| "learning_rate": 3.0646377078137424e-06, | |
| "loss": 0.7055673599243164, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.9379157427937916, | |
| "grad_norm": 4.75135612487793, | |
| "learning_rate": 3.056463829817475e-06, | |
| "loss": 0.6644902229309082, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.942350332594235, | |
| "grad_norm": 2.237863540649414, | |
| "learning_rate": 3.048285828722345e-06, | |
| "loss": 0.7358293533325195, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.9467849223946785, | |
| "grad_norm": 3.476473569869995, | |
| "learning_rate": 3.0401038145534297e-06, | |
| "loss": 0.8098105788230896, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.951219512195122, | |
| "grad_norm": 1.5437142848968506, | |
| "learning_rate": 3.031917897389799e-06, | |
| "loss": 0.8312487602233887, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.9556541019955653, | |
| "grad_norm": 3.081554412841797, | |
| "learning_rate": 3.0237281873630335e-06, | |
| "loss": 0.6966821551322937, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.9600886917960088, | |
| "grad_norm": 1.036048173904419, | |
| "learning_rate": 3.0155347946557407e-06, | |
| "loss": 0.8802693486213684, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.9645232815964522, | |
| "grad_norm": 0.683119535446167, | |
| "learning_rate": 3.007337829500075e-06, | |
| "loss": 1.0593414306640625, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.9689578713968958, | |
| "grad_norm": 0.7239016890525818, | |
| "learning_rate": 2.999137402176255e-06, | |
| "loss": 1.0278575420379639, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.9733924611973392, | |
| "grad_norm": 1.6972147226333618, | |
| "learning_rate": 2.9909336230110747e-06, | |
| "loss": 1.0861477851867676, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.9778270509977827, | |
| "grad_norm": 1.00822114944458, | |
| "learning_rate": 2.9827266023764274e-06, | |
| "loss": 1.1251873970031738, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.9822616407982263, | |
| "grad_norm": 0.9262673854827881, | |
| "learning_rate": 2.9745164506878134e-06, | |
| "loss": 1.1135941743850708, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.9866962305986697, | |
| "grad_norm": 1.560065746307373, | |
| "learning_rate": 2.9663032784028596e-06, | |
| "loss": 0.8289718627929688, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.9911308203991132, | |
| "grad_norm": 1.1300979852676392, | |
| "learning_rate": 2.9580871960198297e-06, | |
| "loss": 1.0270378589630127, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.9955654101995566, | |
| "grad_norm": 1.9531309604644775, | |
| "learning_rate": 2.949868314076142e-06, | |
| "loss": 0.792127251625061, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.6045248508453369, | |
| "learning_rate": 2.941646743146875e-06, | |
| "loss": 1.0664583444595337, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 2.0044345898004434, | |
| "grad_norm": 0.8053255081176758, | |
| "learning_rate": 2.9334225938432868e-06, | |
| "loss": 0.8489485383033752, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 2.008869179600887, | |
| "grad_norm": 1.6182339191436768, | |
| "learning_rate": 2.925195976811326e-06, | |
| "loss": 0.9491725564002991, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 2.0133037694013303, | |
| "grad_norm": 0.8146681189537048, | |
| "learning_rate": 2.9169670027301387e-06, | |
| "loss": 1.0454566478729248, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 2.0177383592017737, | |
| "grad_norm": 0.9472193717956543, | |
| "learning_rate": 2.9087357823105843e-06, | |
| "loss": 0.9005047082901001, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.022172949002217, | |
| "grad_norm": 1.0615944862365723, | |
| "learning_rate": 2.9005024262937427e-06, | |
| "loss": 0.6029907464981079, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 2.0266075388026605, | |
| "grad_norm": 1.9702504873275757, | |
| "learning_rate": 2.8922670454494247e-06, | |
| "loss": 0.656877875328064, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.0310421286031044, | |
| "grad_norm": 0.9048966765403748, | |
| "learning_rate": 2.8840297505746843e-06, | |
| "loss": 0.6500725746154785, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.035476718403548, | |
| "grad_norm": 0.9751022458076477, | |
| "learning_rate": 2.8757906524923286e-06, | |
| "loss": 0.8578327298164368, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.0399113082039912, | |
| "grad_norm": 0.8147334456443787, | |
| "learning_rate": 2.867549862049419e-06, | |
| "loss": 0.6973807215690613, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.0443458980044347, | |
| "grad_norm": 0.9924507141113281, | |
| "learning_rate": 2.859307490115791e-06, | |
| "loss": 0.7475817799568176, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.048780487804878, | |
| "grad_norm": 0.5117158889770508, | |
| "learning_rate": 2.8510636475825533e-06, | |
| "loss": 0.24903425574302673, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.0532150776053215, | |
| "grad_norm": 1.5849206447601318, | |
| "learning_rate": 2.8428184453606027e-06, | |
| "loss": 0.849932074546814, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.057649667405765, | |
| "grad_norm": 2.9684829711914062, | |
| "learning_rate": 2.8345719943791266e-06, | |
| "loss": 0.6786062121391296, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.0620842572062084, | |
| "grad_norm": 0.6647122502326965, | |
| "learning_rate": 2.826324405584114e-06, | |
| "loss": 0.6096989512443542, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.066518847006652, | |
| "grad_norm": 3.739225387573242, | |
| "learning_rate": 2.818075789936863e-06, | |
| "loss": 0.7550086379051208, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.070953436807095, | |
| "grad_norm": 1.7136473655700684, | |
| "learning_rate": 2.8098262584124834e-06, | |
| "loss": 0.8016495704650879, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.0753880266075386, | |
| "grad_norm": 0.6798487305641174, | |
| "learning_rate": 2.801575921998411e-06, | |
| "loss": 0.9375801682472229, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.079822616407982, | |
| "grad_norm": 0.6735354661941528, | |
| "learning_rate": 2.7933248916929066e-06, | |
| "loss": 0.6009020805358887, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.084257206208426, | |
| "grad_norm": 0.593377947807312, | |
| "learning_rate": 2.7850732785035705e-06, | |
| "loss": 0.6308031678199768, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.0886917960088693, | |
| "grad_norm": 1.553142786026001, | |
| "learning_rate": 2.7768211934458417e-06, | |
| "loss": 0.8890527486801147, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.0931263858093128, | |
| "grad_norm": 0.8441118001937866, | |
| "learning_rate": 2.768568747541509e-06, | |
| "loss": 0.5294168591499329, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.097560975609756, | |
| "grad_norm": 1.1496201753616333, | |
| "learning_rate": 2.7603160518172152e-06, | |
| "loss": 0.8766027688980103, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.1019955654101996, | |
| "grad_norm": 1.2092889547348022, | |
| "learning_rate": 2.752063217302966e-06, | |
| "loss": 0.9853121042251587, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.106430155210643, | |
| "grad_norm": 2.6365861892700195, | |
| "learning_rate": 2.743810355030631e-06, | |
| "loss": 0.6440744996070862, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.1108647450110865, | |
| "grad_norm": 1.4584583044052124, | |
| "learning_rate": 2.735557576032458e-06, | |
| "loss": 0.7647169828414917, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.11529933481153, | |
| "grad_norm": 3.336189031600952, | |
| "learning_rate": 2.727304991339569e-06, | |
| "loss": 0.8036750555038452, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.1197339246119733, | |
| "grad_norm": 0.6153157353401184, | |
| "learning_rate": 2.7190527119804762e-06, | |
| "loss": 0.8988800644874573, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.1241685144124167, | |
| "grad_norm": 9.374985694885254, | |
| "learning_rate": 2.710800848979582e-06, | |
| "loss": 0.7791066765785217, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.12860310421286, | |
| "grad_norm": 1.6065351963043213, | |
| "learning_rate": 2.702549513355687e-06, | |
| "loss": 0.7849658131599426, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.1330376940133036, | |
| "grad_norm": 4.621856689453125, | |
| "learning_rate": 2.694298816120497e-06, | |
| "loss": 0.5346379280090332, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.1374722838137474, | |
| "grad_norm": 1.9854819774627686, | |
| "learning_rate": 2.6860488682771306e-06, | |
| "loss": 1.0068566799163818, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.141906873614191, | |
| "grad_norm": 0.6731406450271606, | |
| "learning_rate": 2.67779978081862e-06, | |
| "loss": 0.9128319025039673, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.1463414634146343, | |
| "grad_norm": 0.8809360861778259, | |
| "learning_rate": 2.669551664726428e-06, | |
| "loss": 0.6138067245483398, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.1507760532150777, | |
| "grad_norm": 0.7070305943489075, | |
| "learning_rate": 2.6613046309689433e-06, | |
| "loss": 0.7260922193527222, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.155210643015521, | |
| "grad_norm": 0.9328792691230774, | |
| "learning_rate": 2.6530587904999966e-06, | |
| "loss": 0.5952677726745605, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.1596452328159645, | |
| "grad_norm": 1.6889206171035767, | |
| "learning_rate": 2.6448142542573624e-06, | |
| "loss": 0.933496356010437, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.164079822616408, | |
| "grad_norm": 1.7583260536193848, | |
| "learning_rate": 2.6365711331612692e-06, | |
| "loss": 0.906917929649353, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.1685144124168514, | |
| "grad_norm": 2.437592029571533, | |
| "learning_rate": 2.6283295381129066e-06, | |
| "loss": 0.6723505258560181, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.172949002217295, | |
| "grad_norm": 1.2844873666763306, | |
| "learning_rate": 2.620089579992933e-06, | |
| "loss": 0.9519558548927307, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.1773835920177382, | |
| "grad_norm": 0.7902489900588989, | |
| "learning_rate": 2.6118513696599823e-06, | |
| "loss": 0.5118272304534912, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.1818181818181817, | |
| "grad_norm": 1.085485577583313, | |
| "learning_rate": 2.603615017949178e-06, | |
| "loss": 0.31727081537246704, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.186252771618625, | |
| "grad_norm": 2.681396722793579, | |
| "learning_rate": 2.595380635670634e-06, | |
| "loss": 0.6958884596824646, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.1906873614190685, | |
| "grad_norm": 2.31773042678833, | |
| "learning_rate": 2.5871483336079694e-06, | |
| "loss": 0.4852255582809448, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.1951219512195124, | |
| "grad_norm": 0.9662159085273743, | |
| "learning_rate": 2.578918222516818e-06, | |
| "loss": 0.8706328868865967, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.199556541019956, | |
| "grad_norm": 1.4837191104888916, | |
| "learning_rate": 2.5706904131233336e-06, | |
| "loss": 1.0283949375152588, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.203991130820399, | |
| "grad_norm": 0.7699248194694519, | |
| "learning_rate": 2.5624650161227073e-06, | |
| "loss": 0.8138683438301086, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.2084257206208426, | |
| "grad_norm": 2.567706346511841, | |
| "learning_rate": 2.5542421421776696e-06, | |
| "loss": 0.9206845760345459, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.212860310421286, | |
| "grad_norm": 1.1012285947799683, | |
| "learning_rate": 2.5460219019170097e-06, | |
| "loss": 0.8530703783035278, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.2172949002217295, | |
| "grad_norm": 0.972668468952179, | |
| "learning_rate": 2.5378044059340845e-06, | |
| "loss": 0.7263464331626892, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.221729490022173, | |
| "grad_norm": 29.759498596191406, | |
| "learning_rate": 2.5295897647853283e-06, | |
| "loss": 0.42510873079299927, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.2261640798226163, | |
| "grad_norm": 0.734957218170166, | |
| "learning_rate": 2.521378088988767e-06, | |
| "loss": 0.9547646641731262, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.2305986696230597, | |
| "grad_norm": 1.0839177370071411, | |
| "learning_rate": 2.513169489022531e-06, | |
| "loss": 0.8016040921211243, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.235033259423503, | |
| "grad_norm": 0.6618872880935669, | |
| "learning_rate": 2.5049640753233705e-06, | |
| "loss": 0.8940417766571045, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.2394678492239466, | |
| "grad_norm": 1.0671074390411377, | |
| "learning_rate": 2.496761958285167e-06, | |
| "loss": 0.5136449933052063, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.2439024390243905, | |
| "grad_norm": 0.7268696427345276, | |
| "learning_rate": 2.488563248257451e-06, | |
| "loss": 0.8877573609352112, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.248337028824834, | |
| "grad_norm": 4.092066764831543, | |
| "learning_rate": 2.4803680555439136e-06, | |
| "loss": 0.769023060798645, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.2527716186252773, | |
| "grad_norm": 1.659173846244812, | |
| "learning_rate": 2.4721764904009272e-06, | |
| "loss": 0.8959583640098572, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.2572062084257207, | |
| "grad_norm": 2.1315901279449463, | |
| "learning_rate": 2.4639886630360574e-06, | |
| "loss": 0.5399938821792603, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.261640798226164, | |
| "grad_norm": 0.9298582673072815, | |
| "learning_rate": 2.455804683606584e-06, | |
| "loss": 0.9447622895240784, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.2660753880266076, | |
| "grad_norm": 0.33580493927001953, | |
| "learning_rate": 2.4476246622180174e-06, | |
| "loss": 0.4857233166694641, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.270509977827051, | |
| "grad_norm": 1.8351166248321533, | |
| "learning_rate": 2.4394487089226158e-06, | |
| "loss": 1.283814787864685, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.2749445676274944, | |
| "grad_norm": 0.8674602508544922, | |
| "learning_rate": 2.43127693371791e-06, | |
| "loss": 0.6131373643875122, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.279379157427938, | |
| "grad_norm": 1.49549400806427, | |
| "learning_rate": 2.423109446545213e-06, | |
| "loss": 0.9470140337944031, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.2838137472283813, | |
| "grad_norm": 0.5943393707275391, | |
| "learning_rate": 2.4149463572881537e-06, | |
| "loss": 0.9135305881500244, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.2882483370288247, | |
| "grad_norm": 1.793879508972168, | |
| "learning_rate": 2.4067877757711907e-06, | |
| "loss": 0.6614438891410828, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.292682926829268, | |
| "grad_norm": 1.0091503858566284, | |
| "learning_rate": 2.3986338117581357e-06, | |
| "loss": 0.7243059277534485, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.2971175166297115, | |
| "grad_norm": 1.3116031885147095, | |
| "learning_rate": 2.390484574950677e-06, | |
| "loss": 0.9390950798988342, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.3015521064301554, | |
| "grad_norm": 2.461923837661743, | |
| "learning_rate": 2.382340174986906e-06, | |
| "loss": 0.6997823715209961, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.305986696230599, | |
| "grad_norm": 1.6579372882843018, | |
| "learning_rate": 2.374200721439837e-06, | |
| "loss": 0.9242331385612488, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.3104212860310422, | |
| "grad_norm": 1.2097886800765991, | |
| "learning_rate": 2.3660663238159405e-06, | |
| "loss": 1.012819766998291, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.3148558758314857, | |
| "grad_norm": 1.4017579555511475, | |
| "learning_rate": 2.357937091553662e-06, | |
| "loss": 0.6412765383720398, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.319290465631929, | |
| "grad_norm": 1.0496717691421509, | |
| "learning_rate": 2.3498131340219554e-06, | |
| "loss": 0.9358582496643066, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.3237250554323725, | |
| "grad_norm": 0.7849573493003845, | |
| "learning_rate": 2.341694560518809e-06, | |
| "loss": 0.8715764880180359, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.328159645232816, | |
| "grad_norm": 0.8973456621170044, | |
| "learning_rate": 2.333581480269776e-06, | |
| "loss": 0.9279825687408447, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.3325942350332594, | |
| "grad_norm": 0.914661169052124, | |
| "learning_rate": 2.325474002426503e-06, | |
| "loss": 0.8819960951805115, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.337028824833703, | |
| "grad_norm": 0.6689122915267944, | |
| "learning_rate": 2.3173722360652644e-06, | |
| "loss": 0.6645777821540833, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.341463414634146, | |
| "grad_norm": 0.7918397188186646, | |
| "learning_rate": 2.309276290185494e-06, | |
| "loss": 0.8606789708137512, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.3458980044345896, | |
| "grad_norm": 0.44345715641975403, | |
| "learning_rate": 2.3011862737083162e-06, | |
| "loss": 0.5443306565284729, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.3503325942350335, | |
| "grad_norm": 0.6253224015235901, | |
| "learning_rate": 2.2931022954750843e-06, | |
| "loss": 1.009533405303955, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.354767184035477, | |
| "grad_norm": 2.641712188720703, | |
| "learning_rate": 2.285024464245912e-06, | |
| "loss": 0.4540158808231354, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.3592017738359203, | |
| "grad_norm": 0.5843250751495361, | |
| "learning_rate": 2.2769528886982158e-06, | |
| "loss": 0.49174901843070984, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.3636363636363638, | |
| "grad_norm": 3.842958927154541, | |
| "learning_rate": 2.268887677425248e-06, | |
| "loss": 0.7375195026397705, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.368070953436807, | |
| "grad_norm": 1.1907048225402832, | |
| "learning_rate": 2.2608289389346362e-06, | |
| "loss": 1.0150161981582642, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.3725055432372506, | |
| "grad_norm": 0.834156334400177, | |
| "learning_rate": 2.2527767816469263e-06, | |
| "loss": 0.8399034142494202, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.376940133037694, | |
| "grad_norm": 0.8549678325653076, | |
| "learning_rate": 2.244731313894121e-06, | |
| "loss": 0.87836754322052, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.3813747228381374, | |
| "grad_norm": 0.5392049551010132, | |
| "learning_rate": 2.236692643918224e-06, | |
| "loss": 0.040761929005384445, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.385809312638581, | |
| "grad_norm": 2.3668556213378906, | |
| "learning_rate": 2.2286608798697834e-06, | |
| "loss": 0.5826558470726013, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.3902439024390243, | |
| "grad_norm": 0.8297257423400879, | |
| "learning_rate": 2.2206361298064343e-06, | |
| "loss": 0.29340535402297974, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.3946784922394677, | |
| "grad_norm": 11.721139907836914, | |
| "learning_rate": 2.2126185016914515e-06, | |
| "loss": 0.8513185381889343, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.399113082039911, | |
| "grad_norm": 0.919727623462677, | |
| "learning_rate": 2.2046081033922884e-06, | |
| "loss": 0.5718480348587036, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.4035476718403546, | |
| "grad_norm": 2.098891019821167, | |
| "learning_rate": 2.1966050426791325e-06, | |
| "loss": 0.7035835385322571, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.4079822616407984, | |
| "grad_norm": 0.9428548216819763, | |
| "learning_rate": 2.1886094272234508e-06, | |
| "loss": 0.906488299369812, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.412416851441242, | |
| "grad_norm": 0.5806995630264282, | |
| "learning_rate": 2.1806213645965457e-06, | |
| "loss": 0.29125481843948364, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.4168514412416853, | |
| "grad_norm": 0.8353445529937744, | |
| "learning_rate": 2.172640962268104e-06, | |
| "loss": 0.9137746691703796, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.4212860310421287, | |
| "grad_norm": 1.1309226751327515, | |
| "learning_rate": 2.1646683276047525e-06, | |
| "loss": 0.9136351943016052, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.425720620842572, | |
| "grad_norm": 1.4746575355529785, | |
| "learning_rate": 2.156703567868615e-06, | |
| "loss": 0.4814295470714569, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.4301552106430155, | |
| "grad_norm": 4.6678032875061035, | |
| "learning_rate": 2.148746790215866e-06, | |
| "loss": 0.5724620819091797, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.434589800443459, | |
| "grad_norm": 0.5765445828437805, | |
| "learning_rate": 2.140798101695291e-06, | |
| "loss": 0.45596760511398315, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.4390243902439024, | |
| "grad_norm": 3.441075086593628, | |
| "learning_rate": 2.1328576092468476e-06, | |
| "loss": 1.0661569833755493, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.443458980044346, | |
| "grad_norm": 1.3114399909973145, | |
| "learning_rate": 2.124925419700223e-06, | |
| "loss": 0.9525973796844482, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.4478935698447892, | |
| "grad_norm": 1.7259883880615234, | |
| "learning_rate": 2.1170016397734e-06, | |
| "loss": 0.45727112889289856, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.4523281596452327, | |
| "grad_norm": 0.6469590067863464, | |
| "learning_rate": 2.109086376071221e-06, | |
| "loss": 0.940199077129364, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.4567627494456765, | |
| "grad_norm": 3.1953182220458984, | |
| "learning_rate": 2.1011797350839513e-06, | |
| "loss": 0.8561551570892334, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.4611973392461195, | |
| "grad_norm": 1.3489811420440674, | |
| "learning_rate": 2.093281823185848e-06, | |
| "loss": 0.9693219661712646, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.4656319290465634, | |
| "grad_norm": 0.7777767777442932, | |
| "learning_rate": 2.0853927466337315e-06, | |
| "loss": 0.6467586755752563, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.470066518847007, | |
| "grad_norm": 1.0480725765228271, | |
| "learning_rate": 2.077512611565551e-06, | |
| "loss": 0.870927095413208, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.47450110864745, | |
| "grad_norm": 1.7313536405563354, | |
| "learning_rate": 2.0696415239989593e-06, | |
| "loss": 0.325950562953949, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.4789356984478936, | |
| "grad_norm": 0.7683355212211609, | |
| "learning_rate": 2.0617795898298855e-06, | |
| "loss": 0.8618558645248413, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.483370288248337, | |
| "grad_norm": 0.7160274386405945, | |
| "learning_rate": 2.053926914831112e-06, | |
| "loss": 0.8484733700752258, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.4878048780487805, | |
| "grad_norm": 0.9705782532691956, | |
| "learning_rate": 2.04608360465085e-06, | |
| "loss": 0.9692363142967224, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.492239467849224, | |
| "grad_norm": 4.628700256347656, | |
| "learning_rate": 2.038249764811318e-06, | |
| "loss": 0.9982001781463623, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.4966740576496673, | |
| "grad_norm": 0.741965115070343, | |
| "learning_rate": 2.0304255007073227e-06, | |
| "loss": 0.9597415328025818, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.5011086474501107, | |
| "grad_norm": 1.0579674243927002, | |
| "learning_rate": 2.022610917604842e-06, | |
| "loss": 0.6873862147331238, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.505543237250554, | |
| "grad_norm": 3.054508686065674, | |
| "learning_rate": 2.014806120639605e-06, | |
| "loss": 0.6469390392303467, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.5099778270509976, | |
| "grad_norm": 0.8802086114883423, | |
| "learning_rate": 2.007011214815684e-06, | |
| "loss": 0.7718120813369751, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.5144124168514415, | |
| "grad_norm": 0.6990912556648254, | |
| "learning_rate": 1.9992263050040737e-06, | |
| "loss": 0.5093148350715637, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.5188470066518844, | |
| "grad_norm": 1.247750997543335, | |
| "learning_rate": 1.991451495941289e-06, | |
| "loss": 1.002577543258667, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.5232815964523283, | |
| "grad_norm": 0.7929804921150208, | |
| "learning_rate": 1.983686892227948e-06, | |
| "loss": 0.7795249223709106, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.5277161862527717, | |
| "grad_norm": 1.0441555976867676, | |
| "learning_rate": 1.975932598327369e-06, | |
| "loss": 0.9324785470962524, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.532150776053215, | |
| "grad_norm": 1.305307149887085, | |
| "learning_rate": 1.9681887185641646e-06, | |
| "loss": 0.4589383602142334, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.5365853658536586, | |
| "grad_norm": 0.3339233696460724, | |
| "learning_rate": 1.9604553571228395e-06, | |
| "loss": 0.5734773874282837, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.541019955654102, | |
| "grad_norm": 0.8641642332077026, | |
| "learning_rate": 1.9527326180463855e-06, | |
| "loss": 0.8844019770622253, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.5454545454545454, | |
| "grad_norm": 0.8415119647979736, | |
| "learning_rate": 1.9450206052348823e-06, | |
| "loss": 0.8842139840126038, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.549889135254989, | |
| "grad_norm": 0.9703028798103333, | |
| "learning_rate": 1.9373194224441028e-06, | |
| "loss": 0.8934570550918579, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.5543237250554323, | |
| "grad_norm": 1.8766459226608276, | |
| "learning_rate": 1.929629173284114e-06, | |
| "loss": 0.63346928358078, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.5587583148558757, | |
| "grad_norm": 0.8151181936264038, | |
| "learning_rate": 1.9219499612178836e-06, | |
| "loss": 0.30481529235839844, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.5631929046563195, | |
| "grad_norm": 0.7087282538414001, | |
| "learning_rate": 1.9142818895598908e-06, | |
| "loss": 0.5541834831237793, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.5676274944567625, | |
| "grad_norm": 2.2988929748535156, | |
| "learning_rate": 1.9066250614747317e-06, | |
| "loss": 0.6360606551170349, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.5720620842572064, | |
| "grad_norm": 1.320077896118164, | |
| "learning_rate": 1.8989795799757348e-06, | |
| "loss": 0.824614405632019, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.57649667405765, | |
| "grad_norm": 0.7024682760238647, | |
| "learning_rate": 1.8913455479235754e-06, | |
| "loss": 0.8806930184364319, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.5809312638580932, | |
| "grad_norm": 0.8074402809143066, | |
| "learning_rate": 1.8837230680248874e-06, | |
| "loss": 0.8695907592773438, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.5853658536585367, | |
| "grad_norm": 0.8083245158195496, | |
| "learning_rate": 1.8761122428308875e-06, | |
| "loss": 0.6029998660087585, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.58980044345898, | |
| "grad_norm": 0.8537569642066956, | |
| "learning_rate": 1.8685131747359902e-06, | |
| "loss": 0.9235411882400513, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.5942350332594235, | |
| "grad_norm": 1.1320568323135376, | |
| "learning_rate": 1.8609259659764345e-06, | |
| "loss": 0.8941707611083984, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.598669623059867, | |
| "grad_norm": 2.4210267066955566, | |
| "learning_rate": 1.853350718628904e-06, | |
| "loss": 0.3670371174812317, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.6031042128603104, | |
| "grad_norm": 1.3128033876419067, | |
| "learning_rate": 1.845787534609157e-06, | |
| "loss": 0.6117712259292603, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.6075388026607538, | |
| "grad_norm": 0.8735086917877197, | |
| "learning_rate": 1.8382365156706566e-06, | |
| "loss": 0.6652023196220398, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.611973392461197, | |
| "grad_norm": 0.6818370223045349, | |
| "learning_rate": 1.8306977634031976e-06, | |
| "loss": 0.5446036458015442, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.6164079822616406, | |
| "grad_norm": 3.452026844024658, | |
| "learning_rate": 1.8231713792315403e-06, | |
| "loss": 0.7938367128372192, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.6208425720620845, | |
| "grad_norm": 0.9159925580024719, | |
| "learning_rate": 1.8156574644140495e-06, | |
| "loss": 0.8983339667320251, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.6252771618625275, | |
| "grad_norm": 0.8673039674758911, | |
| "learning_rate": 1.8081561200413295e-06, | |
| "loss": 0.8714417815208435, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.6297117516629713, | |
| "grad_norm": 1.4530643224716187, | |
| "learning_rate": 1.800667447034864e-06, | |
| "loss": 0.874497652053833, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.6341463414634148, | |
| "grad_norm": 2.0185797214508057, | |
| "learning_rate": 1.7931915461456573e-06, | |
| "loss": 1.0262384414672852, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.638580931263858, | |
| "grad_norm": 0.7627118229866028, | |
| "learning_rate": 1.7857285179528838e-06, | |
| "loss": 0.5378797054290771, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.6430155210643016, | |
| "grad_norm": 1.8396339416503906, | |
| "learning_rate": 1.7782784628625305e-06, | |
| "loss": 0.7439752221107483, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.647450110864745, | |
| "grad_norm": 1.5848437547683716, | |
| "learning_rate": 1.7708414811060437e-06, | |
| "loss": 0.5809391736984253, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.6518847006651884, | |
| "grad_norm": 0.9342997074127197, | |
| "learning_rate": 1.763417672738989e-06, | |
| "loss": 0.7553034424781799, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.656319290465632, | |
| "grad_norm": 0.5442282557487488, | |
| "learning_rate": 1.7560071376396953e-06, | |
| "loss": 0.25871163606643677, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.6607538802660753, | |
| "grad_norm": 0.8709027767181396, | |
| "learning_rate": 1.7486099755079197e-06, | |
| "loss": 0.9024768471717834, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.6651884700665187, | |
| "grad_norm": 0.8886547088623047, | |
| "learning_rate": 1.7412262858634987e-06, | |
| "loss": 0.9686548113822937, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.6696230598669626, | |
| "grad_norm": 0.6681715250015259, | |
| "learning_rate": 1.7338561680450171e-06, | |
| "loss": 0.46400630474090576, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.6740576496674056, | |
| "grad_norm": 1.2975910902023315, | |
| "learning_rate": 1.7264997212084616e-06, | |
| "loss": 0.7697018384933472, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.6784922394678494, | |
| "grad_norm": 2.711646556854248, | |
| "learning_rate": 1.7191570443258976e-06, | |
| "loss": 1.056341290473938, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.682926829268293, | |
| "grad_norm": 1.6570303440093994, | |
| "learning_rate": 1.711828236184131e-06, | |
| "loss": 0.5748533606529236, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.6873614190687363, | |
| "grad_norm": 1.9034491777420044, | |
| "learning_rate": 1.704513395383378e-06, | |
| "loss": 0.31726494431495667, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.6917960088691797, | |
| "grad_norm": 1.501547932624817, | |
| "learning_rate": 1.6972126203359454e-06, | |
| "loss": 0.6147481203079224, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.696230598669623, | |
| "grad_norm": 3.692929744720459, | |
| "learning_rate": 1.6899260092648995e-06, | |
| "loss": 0.6757416129112244, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.7006651884700665, | |
| "grad_norm": 1.5111517906188965, | |
| "learning_rate": 1.6826536602027471e-06, | |
| "loss": 0.6192297339439392, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.70509977827051, | |
| "grad_norm": 1.2896053791046143, | |
| "learning_rate": 1.6753956709901202e-06, | |
| "loss": 1.0000026226043701, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.7095343680709534, | |
| "grad_norm": 2.9973676204681396, | |
| "learning_rate": 1.6681521392744515e-06, | |
| "loss": 0.7289214730262756, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.713968957871397, | |
| "grad_norm": 2.971303939819336, | |
| "learning_rate": 1.660923162508671e-06, | |
| "loss": 0.8310694694519043, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.7184035476718402, | |
| "grad_norm": 2.4088239669799805, | |
| "learning_rate": 1.6537088379498872e-06, | |
| "loss": 0.47293317317962646, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.7228381374722836, | |
| "grad_norm": 0.6797178387641907, | |
| "learning_rate": 1.6465092626580787e-06, | |
| "loss": 0.983069896697998, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "grad_norm": 1.6718101501464844, | |
| "learning_rate": 1.6393245334947942e-06, | |
| "loss": 0.7170325517654419, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.7317073170731705, | |
| "grad_norm": 0.7277650833129883, | |
| "learning_rate": 1.6321547471218432e-06, | |
| "loss": 0.882874608039856, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.7361419068736144, | |
| "grad_norm": 1.7578508853912354, | |
| "learning_rate": 1.6250000000000007e-06, | |
| "loss": 0.5405250787734985, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.740576496674058, | |
| "grad_norm": 0.9388027191162109, | |
| "learning_rate": 1.6178603883877032e-06, | |
| "loss": 0.8325910568237305, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.745011086474501, | |
| "grad_norm": 0.7891237735748291, | |
| "learning_rate": 1.6107360083397604e-06, | |
| "loss": 0.5409272313117981, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.7494456762749446, | |
| "grad_norm": 0.7106161713600159, | |
| "learning_rate": 1.6036269557060594e-06, | |
| "loss": 0.6126099824905396, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.753880266075388, | |
| "grad_norm": 0.8075646162033081, | |
| "learning_rate": 1.5965333261302735e-06, | |
| "loss": 0.9320923089981079, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.7583148558758315, | |
| "grad_norm": 0.8899918794631958, | |
| "learning_rate": 1.5894552150485801e-06, | |
| "loss": 1.0979969501495361, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.762749445676275, | |
| "grad_norm": 3.5077619552612305, | |
| "learning_rate": 1.5823927176883725e-06, | |
| "loss": 0.4379834234714508, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.7671840354767183, | |
| "grad_norm": 3.83512544631958, | |
| "learning_rate": 1.5753459290669792e-06, | |
| "loss": 0.6199414134025574, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.7716186252771617, | |
| "grad_norm": 3.1204421520233154, | |
| "learning_rate": 1.5683149439903905e-06, | |
| "loss": 0.6263423562049866, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.776053215077605, | |
| "grad_norm": 1.005743145942688, | |
| "learning_rate": 1.5612998570519746e-06, | |
| "loss": 0.9981138706207275, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.7804878048780486, | |
| "grad_norm": 0.676094651222229, | |
| "learning_rate": 1.5543007626312129e-06, | |
| "loss": 1.003664255142212, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.7849223946784925, | |
| "grad_norm": 0.5613422989845276, | |
| "learning_rate": 1.5473177548924267e-06, | |
| "loss": 0.9180096983909607, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.7893569844789354, | |
| "grad_norm": 0.6209838390350342, | |
| "learning_rate": 1.5403509277835077e-06, | |
| "loss": 0.18954633176326752, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.7937915742793793, | |
| "grad_norm": 0.7056342959403992, | |
| "learning_rate": 1.5334003750346608e-06, | |
| "loss": 0.8927637338638306, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.7982261640798227, | |
| "grad_norm": 1.2110857963562012, | |
| "learning_rate": 1.5264661901571349e-06, | |
| "loss": 0.6852344870567322, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.802660753880266, | |
| "grad_norm": 0.8435057997703552, | |
| "learning_rate": 1.5195484664419732e-06, | |
| "loss": 0.5791198015213013, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.8070953436807096, | |
| "grad_norm": 0.7665271162986755, | |
| "learning_rate": 1.5126472969587502e-06, | |
| "loss": 1.0053340196609497, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.811529933481153, | |
| "grad_norm": 1.9054136276245117, | |
| "learning_rate": 1.5057627745543269e-06, | |
| "loss": 0.9256460666656494, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.8159645232815964, | |
| "grad_norm": 0.928837239742279, | |
| "learning_rate": 1.4988949918515947e-06, | |
| "loss": 0.872265100479126, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.82039911308204, | |
| "grad_norm": 1.4256490468978882, | |
| "learning_rate": 1.4920440412482345e-06, | |
| "loss": 0.38903388381004333, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.8248337028824833, | |
| "grad_norm": 1.0866613388061523, | |
| "learning_rate": 1.485210014915473e-06, | |
| "loss": 0.5502209663391113, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.8292682926829267, | |
| "grad_norm": 0.8656668066978455, | |
| "learning_rate": 1.4783930047968388e-06, | |
| "loss": 0.9950301051139832, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.8337028824833705, | |
| "grad_norm": 2.2869789600372314, | |
| "learning_rate": 1.4715931026069273e-06, | |
| "loss": 0.9035691618919373, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.8381374722838135, | |
| "grad_norm": 0.892152726650238, | |
| "learning_rate": 1.4648103998301716e-06, | |
| "loss": 0.5569464564323425, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.8425720620842574, | |
| "grad_norm": 2.0605618953704834, | |
| "learning_rate": 1.4580449877196035e-06, | |
| "loss": 0.6820242404937744, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.847006651884701, | |
| "grad_norm": 0.8823329210281372, | |
| "learning_rate": 1.4512969572956328e-06, | |
| "loss": 0.6583420038223267, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.8514412416851442, | |
| "grad_norm": 0.7509365677833557, | |
| "learning_rate": 1.4445663993448173e-06, | |
| "loss": 0.9154214262962341, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.8558758314855877, | |
| "grad_norm": 2.52254319190979, | |
| "learning_rate": 1.437853404418646e-06, | |
| "loss": 0.31365761160850525, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.860310421286031, | |
| "grad_norm": 0.8587144613265991, | |
| "learning_rate": 1.431158062832318e-06, | |
| "loss": 0.9376072883605957, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.8647450110864745, | |
| "grad_norm": 1.4515576362609863, | |
| "learning_rate": 1.4244804646635266e-06, | |
| "loss": 1.0736974477767944, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.869179600886918, | |
| "grad_norm": 0.9386347532272339, | |
| "learning_rate": 1.4178206997512522e-06, | |
| "loss": 0.8537707924842834, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.8736141906873613, | |
| "grad_norm": 0.8694044947624207, | |
| "learning_rate": 1.4111788576945467e-06, | |
| "loss": 0.9072995781898499, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.8780487804878048, | |
| "grad_norm": 1.4627509117126465, | |
| "learning_rate": 1.4045550278513351e-06, | |
| "loss": 0.5681540966033936, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.882483370288248, | |
| "grad_norm": 1.8899142742156982, | |
| "learning_rate": 1.3979492993372074e-06, | |
| "loss": 0.9094551205635071, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.8869179600886916, | |
| "grad_norm": 2.1445658206939697, | |
| "learning_rate": 1.391361761024222e-06, | |
| "loss": 0.3109537363052368, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.8913525498891355, | |
| "grad_norm": 0.7595453858375549, | |
| "learning_rate": 1.3847925015397146e-06, | |
| "loss": 0.8516042828559875, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.8957871396895785, | |
| "grad_norm": 0.9547167420387268, | |
| "learning_rate": 1.3782416092650957e-06, | |
| "loss": 0.7857693433761597, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.9002217294900223, | |
| "grad_norm": 0.8833897113800049, | |
| "learning_rate": 1.3717091723346699e-06, | |
| "loss": 0.40065449476242065, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.9046563192904657, | |
| "grad_norm": 0.7363049983978271, | |
| "learning_rate": 1.3651952786344485e-06, | |
| "loss": 0.518020749092102, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.909090909090909, | |
| "grad_norm": 0.7660597562789917, | |
| "learning_rate": 1.3587000158009638e-06, | |
| "loss": 0.8922036290168762, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.9135254988913526, | |
| "grad_norm": 0.7471197843551636, | |
| "learning_rate": 1.3522234712200954e-06, | |
| "loss": 0.9531146883964539, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.917960088691796, | |
| "grad_norm": 2.3324310779571533, | |
| "learning_rate": 1.3457657320258878e-06, | |
| "loss": 0.6479524374008179, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.9223946784922394, | |
| "grad_norm": 0.8263819813728333, | |
| "learning_rate": 1.3393268850993852e-06, | |
| "loss": 0.9447596073150635, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.926829268292683, | |
| "grad_norm": 1.0561071634292603, | |
| "learning_rate": 1.332907017067458e-06, | |
| "loss": 0.7807310819625854, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.9312638580931263, | |
| "grad_norm": 0.7978630065917969, | |
| "learning_rate": 1.3265062143016378e-06, | |
| "loss": 0.8912954926490784, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.9356984478935697, | |
| "grad_norm": 1.006141185760498, | |
| "learning_rate": 1.3201245629169574e-06, | |
| "loss": 0.9309762716293335, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.9401330376940136, | |
| "grad_norm": 0.6220606565475464, | |
| "learning_rate": 1.3137621487707902e-06, | |
| "loss": 0.8276110887527466, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.9445676274944566, | |
| "grad_norm": 0.7150996923446655, | |
| "learning_rate": 1.307419057461697e-06, | |
| "loss": 0.9218543171882629, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.9490022172949004, | |
| "grad_norm": 0.7995206713676453, | |
| "learning_rate": 1.3010953743282724e-06, | |
| "loss": 0.8906182050704956, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.953436807095344, | |
| "grad_norm": 1.6357812881469727, | |
| "learning_rate": 1.294791184447996e-06, | |
| "loss": 0.907779335975647, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.9578713968957873, | |
| "grad_norm": 0.705119788646698, | |
| "learning_rate": 1.2885065726360925e-06, | |
| "loss": 0.45520275831222534, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.9623059866962307, | |
| "grad_norm": 0.8450793027877808, | |
| "learning_rate": 1.282241623444386e-06, | |
| "loss": 0.9082697629928589, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.966740576496674, | |
| "grad_norm": 0.715003252029419, | |
| "learning_rate": 1.2759964211601633e-06, | |
| "loss": 0.8805263042449951, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.9711751662971175, | |
| "grad_norm": 1.0061311721801758, | |
| "learning_rate": 1.269771049805042e-06, | |
| "loss": 0.7469978928565979, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.975609756097561, | |
| "grad_norm": 2.486464262008667, | |
| "learning_rate": 1.2635655931338364e-06, | |
| "loss": 0.6190311312675476, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.9800443458980044, | |
| "grad_norm": 0.5506015419960022, | |
| "learning_rate": 1.2573801346334355e-06, | |
| "loss": 0.19588389992713928, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.984478935698448, | |
| "grad_norm": 0.7587252259254456, | |
| "learning_rate": 1.251214757521675e-06, | |
| "loss": 0.6986634731292725, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.988913525498891, | |
| "grad_norm": 0.548460841178894, | |
| "learning_rate": 1.2450695447462214e-06, | |
| "loss": 0.5783390998840332, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.9933481152993346, | |
| "grad_norm": 4.170194149017334, | |
| "learning_rate": 1.2389445789834534e-06, | |
| "loss": 0.6150118112564087, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.9977827050997785, | |
| "grad_norm": 0.9792019128799438, | |
| "learning_rate": 1.2328399426373511e-06, | |
| "loss": 0.6677907109260559, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 3.002217294900222, | |
| "grad_norm": 0.8542647361755371, | |
| "learning_rate": 1.2267557178383886e-06, | |
| "loss": 0.8499741554260254, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 3.0066518847006654, | |
| "grad_norm": 0.7518671154975891, | |
| "learning_rate": 1.220691986442424e-06, | |
| "loss": 0.42442217469215393, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 3.011086474501109, | |
| "grad_norm": 1.2137625217437744, | |
| "learning_rate": 1.2146488300296047e-06, | |
| "loss": 0.6506487727165222, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 3.015521064301552, | |
| "grad_norm": 0.703611433506012, | |
| "learning_rate": 1.2086263299032652e-06, | |
| "loss": 0.8044725656509399, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 3.0199556541019956, | |
| "grad_norm": 0.8881542682647705, | |
| "learning_rate": 1.2026245670888343e-06, | |
| "loss": 0.7103544473648071, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 3.024390243902439, | |
| "grad_norm": 0.7568899393081665, | |
| "learning_rate": 1.196643622332747e-06, | |
| "loss": 0.8767702579498291, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 3.0288248337028825, | |
| "grad_norm": 0.25662195682525635, | |
| "learning_rate": 1.1906835761013547e-06, | |
| "loss": 0.30692797899246216, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 3.033259423503326, | |
| "grad_norm": 0.7841421961784363, | |
| "learning_rate": 1.184744508579846e-06, | |
| "loss": 0.46566513180732727, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 3.0376940133037693, | |
| "grad_norm": 1.7681467533111572, | |
| "learning_rate": 1.178826499671167e-06, | |
| "loss": 0.5476133227348328, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 3.0421286031042127, | |
| "grad_norm": 0.914473295211792, | |
| "learning_rate": 1.172929628994943e-06, | |
| "loss": 0.7700155377388, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 3.046563192904656, | |
| "grad_norm": 1.8283075094223022, | |
| "learning_rate": 1.167053975886413e-06, | |
| "loss": 0.29538294672966003, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 3.0509977827050996, | |
| "grad_norm": 2.4396283626556396, | |
| "learning_rate": 1.1611996193953569e-06, | |
| "loss": 0.4983513057231903, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 3.0554323725055434, | |
| "grad_norm": 1.0929163694381714, | |
| "learning_rate": 1.1553666382850366e-06, | |
| "loss": 0.3278832733631134, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 3.059866962305987, | |
| "grad_norm": 0.1607833206653595, | |
| "learning_rate": 1.1495551110311324e-06, | |
| "loss": 0.319016695022583, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 3.0643015521064303, | |
| "grad_norm": 2.2611818313598633, | |
| "learning_rate": 1.1437651158206904e-06, | |
| "loss": 0.2882533073425293, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 3.0687361419068737, | |
| "grad_norm": 1.358695387840271, | |
| "learning_rate": 1.137996730551069e-06, | |
| "loss": 0.5356056690216064, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 3.073170731707317, | |
| "grad_norm": 0.34475207328796387, | |
| "learning_rate": 1.1322500328288897e-06, | |
| "loss": 0.31114962697029114, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 3.0776053215077606, | |
| "grad_norm": 0.14836685359477997, | |
| "learning_rate": 1.1265250999689966e-06, | |
| "loss": 0.4476943016052246, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 3.082039911308204, | |
| "grad_norm": 0.9479687809944153, | |
| "learning_rate": 1.1208220089934118e-06, | |
| "loss": 0.6670686602592468, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 3.0864745011086474, | |
| "grad_norm": 2.1511411666870117, | |
| "learning_rate": 1.1151408366303024e-06, | |
| "loss": 0.31225770711898804, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 3.090909090909091, | |
| "grad_norm": 3.655069589614868, | |
| "learning_rate": 1.1094816593129475e-06, | |
| "loss": 0.513544499874115, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 3.0953436807095343, | |
| "grad_norm": 0.8022751212120056, | |
| "learning_rate": 1.1038445531787083e-06, | |
| "loss": 0.7358729243278503, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 3.0997782705099777, | |
| "grad_norm": 0.9091879725456238, | |
| "learning_rate": 1.098229594068007e-06, | |
| "loss": 0.739376425743103, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 3.104212860310421, | |
| "grad_norm": 0.7697157859802246, | |
| "learning_rate": 1.0926368575233032e-06, | |
| "loss": 0.562745213508606, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.1086474501108645, | |
| "grad_norm": 0.6193404197692871, | |
| "learning_rate": 1.087066418788078e-06, | |
| "loss": 0.2242104858160019, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 3.1130820399113084, | |
| "grad_norm": 1.587528944015503, | |
| "learning_rate": 1.0815183528058248e-06, | |
| "loss": 0.47392910718917847, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 3.117516629711752, | |
| "grad_norm": 0.8710333108901978, | |
| "learning_rate": 1.0759927342190362e-06, | |
| "loss": 0.6808566451072693, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 3.1219512195121952, | |
| "grad_norm": 0.8722860217094421, | |
| "learning_rate": 1.0704896373682052e-06, | |
| "loss": 0.8002766966819763, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 3.1263858093126387, | |
| "grad_norm": 0.8693475723266602, | |
| "learning_rate": 1.0650091362908189e-06, | |
| "loss": 0.6717104315757751, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 3.130820399113082, | |
| "grad_norm": 0.8810287714004517, | |
| "learning_rate": 1.0595513047203693e-06, | |
| "loss": 0.6879040598869324, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 3.1352549889135255, | |
| "grad_norm": 1.0683605670928955, | |
| "learning_rate": 1.0541162160853538e-06, | |
| "loss": 0.4185694754123688, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 3.139689578713969, | |
| "grad_norm": 0.7451454401016235, | |
| "learning_rate": 1.0487039435082941e-06, | |
| "loss": 0.057561103254556656, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 3.1441241685144123, | |
| "grad_norm": 1.1728911399841309, | |
| "learning_rate": 1.0433145598047495e-06, | |
| "loss": 0.6490657925605774, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 3.1485587583148558, | |
| "grad_norm": 0.90044105052948, | |
| "learning_rate": 1.0379481374823358e-06, | |
| "loss": 0.7569578886032104, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 3.152993348115299, | |
| "grad_norm": 1.4448527097702026, | |
| "learning_rate": 1.032604748739751e-06, | |
| "loss": 0.6902734041213989, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 3.1574279379157426, | |
| "grad_norm": 0.5912182927131653, | |
| "learning_rate": 1.0272844654658069e-06, | |
| "loss": 0.12579640746116638, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 3.1618625277161865, | |
| "grad_norm": 1.5718578100204468, | |
| "learning_rate": 1.0219873592384556e-06, | |
| "loss": 0.6934728026390076, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 3.16629711751663, | |
| "grad_norm": 1.5502285957336426, | |
| "learning_rate": 1.016713501323834e-06, | |
| "loss": 0.7339826822280884, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 3.1707317073170733, | |
| "grad_norm": 3.2946419715881348, | |
| "learning_rate": 1.0114629626752973e-06, | |
| "loss": 0.4983486831188202, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 3.1751662971175167, | |
| "grad_norm": 1.9185384511947632, | |
| "learning_rate": 1.0062358139324715e-06, | |
| "loss": 0.39363744854927063, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 3.17960088691796, | |
| "grad_norm": 0.8971115946769714, | |
| "learning_rate": 1.0010321254202992e-06, | |
| "loss": 0.5175668597221375, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 3.1840354767184036, | |
| "grad_norm": 2.1288814544677734, | |
| "learning_rate": 9.958519671480919e-07, | |
| "loss": 0.17975324392318726, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 3.188470066518847, | |
| "grad_norm": 0.8228841423988342, | |
| "learning_rate": 9.906954088085929e-07, | |
| "loss": 0.42570653557777405, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 3.1929046563192904, | |
| "grad_norm": 2.6906468868255615, | |
| "learning_rate": 9.85562519777035e-07, | |
| "loss": 0.585996150970459, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.197339246119734, | |
| "grad_norm": 1.0107629299163818, | |
| "learning_rate": 9.804533691102112e-07, | |
| "loss": 0.7266984581947327, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 3.2017738359201773, | |
| "grad_norm": 0.8374084830284119, | |
| "learning_rate": 9.75368025545542e-07, | |
| "loss": 0.7630691528320312, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 3.2062084257206207, | |
| "grad_norm": 0.8851264119148254, | |
| "learning_rate": 9.703065575001518e-07, | |
| "loss": 0.6669173240661621, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 3.210643015521064, | |
| "grad_norm": 4.757336139678955, | |
| "learning_rate": 9.65269033069952e-07, | |
| "loss": 0.6446712613105774, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 3.2150776053215075, | |
| "grad_norm": 0.8732156753540039, | |
| "learning_rate": 9.602555200287184e-07, | |
| "loss": 0.8526778817176819, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 3.2195121951219514, | |
| "grad_norm": 1.501761794090271, | |
| "learning_rate": 9.552660858271835e-07, | |
| "loss": 0.4225665330886841, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 3.223946784922395, | |
| "grad_norm": 0.790913999080658, | |
| "learning_rate": 9.503007975921294e-07, | |
| "loss": 0.5034030675888062, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 3.2283813747228383, | |
| "grad_norm": 2.5785648822784424, | |
| "learning_rate": 9.453597221254821e-07, | |
| "loss": 0.7649686336517334, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 3.2328159645232817, | |
| "grad_norm": 5.038004398345947, | |
| "learning_rate": 9.404429259034156e-07, | |
| "loss": 0.45995619893074036, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 3.237250554323725, | |
| "grad_norm": 0.8205391764640808, | |
| "learning_rate": 9.355504750754543e-07, | |
| "loss": 0.8184725046157837, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 3.2416851441241685, | |
| "grad_norm": 0.9232129454612732, | |
| "learning_rate": 9.306824354635866e-07, | |
| "loss": 0.5478826761245728, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 3.246119733924612, | |
| "grad_norm": 2.364551067352295, | |
| "learning_rate": 9.258388725613776e-07, | |
| "loss": 0.69716876745224, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 3.2505543237250554, | |
| "grad_norm": 1.0339198112487793, | |
| "learning_rate": 9.21019851533086e-07, | |
| "loss": 0.8049119710922241, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 3.254988913525499, | |
| "grad_norm": 0.22390811145305634, | |
| "learning_rate": 9.162254372127921e-07, | |
| "loss": 0.12469253689050674, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 3.259423503325942, | |
| "grad_norm": 1.6547586917877197, | |
| "learning_rate": 9.114556941035199e-07, | |
| "loss": 0.8042660355567932, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 3.2638580931263856, | |
| "grad_norm": 1.0059466361999512, | |
| "learning_rate": 9.067106863763752e-07, | |
| "loss": 0.8942850828170776, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 3.2682926829268295, | |
| "grad_norm": 3.251643657684326, | |
| "learning_rate": 9.01990477869677e-07, | |
| "loss": 0.3387027978897095, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 3.2727272727272725, | |
| "grad_norm": 0.6991317272186279, | |
| "learning_rate": 8.972951320881014e-07, | |
| "loss": 0.5067039728164673, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 3.2771618625277164, | |
| "grad_norm": 0.7981709241867065, | |
| "learning_rate": 8.92624712201827e-07, | |
| "loss": 0.4391624331474304, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 3.2815964523281598, | |
| "grad_norm": 0.6799890398979187, | |
| "learning_rate": 8.879792810456861e-07, | |
| "loss": 0.5848779678344727, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 3.286031042128603, | |
| "grad_norm": 0.9311388731002808, | |
| "learning_rate": 8.833589011183147e-07, | |
| "loss": 0.6908966898918152, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 3.2904656319290466, | |
| "grad_norm": 0.7512919306755066, | |
| "learning_rate": 8.78763634581318e-07, | |
| "loss": 0.541054904460907, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 3.29490022172949, | |
| "grad_norm": 0.7206840515136719, | |
| "learning_rate": 8.741935432584292e-07, | |
| "loss": 0.4378011226654053, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 3.2993348115299335, | |
| "grad_norm": 26.240053176879883, | |
| "learning_rate": 8.696486886346805e-07, | |
| "loss": 0.4610789716243744, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 3.303769401330377, | |
| "grad_norm": 2.8554205894470215, | |
| "learning_rate": 8.651291318555745e-07, | |
| "loss": 0.26280251145362854, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 3.3082039911308203, | |
| "grad_norm": 0.8796905875205994, | |
| "learning_rate": 8.606349337262623e-07, | |
| "loss": 0.7376017570495605, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 3.3126385809312637, | |
| "grad_norm": 1.916146993637085, | |
| "learning_rate": 8.561661547107243e-07, | |
| "loss": 0.5012311935424805, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 3.317073170731707, | |
| "grad_norm": 2.0767107009887695, | |
| "learning_rate": 8.517228549309588e-07, | |
| "loss": 0.5897710919380188, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 3.3215077605321506, | |
| "grad_norm": 2.7314648628234863, | |
| "learning_rate": 8.473050941661717e-07, | |
| "loss": 0.7489433288574219, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 3.3259423503325944, | |
| "grad_norm": 0.9693230986595154, | |
| "learning_rate": 8.429129318519711e-07, | |
| "loss": 0.40758612751960754, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.330376940133038, | |
| "grad_norm": 1.1836127042770386, | |
| "learning_rate": 8.38546427079571e-07, | |
| "loss": 0.749980092048645, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 3.3348115299334813, | |
| "grad_norm": 1.2712533473968506, | |
| "learning_rate": 8.342056385949929e-07, | |
| "loss": 0.7348231673240662, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 3.3392461197339247, | |
| "grad_norm": 0.25616952776908875, | |
| "learning_rate": 8.298906247982768e-07, | |
| "loss": 0.32258349657058716, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 3.343680709534368, | |
| "grad_norm": 1.4267375469207764, | |
| "learning_rate": 8.25601443742697e-07, | |
| "loss": 0.4655570089817047, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 3.3481152993348116, | |
| "grad_norm": 1.8292971849441528, | |
| "learning_rate": 8.213381531339776e-07, | |
| "loss": 0.8381154537200928, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 3.352549889135255, | |
| "grad_norm": 0.254131019115448, | |
| "learning_rate": 8.1710081032952e-07, | |
| "loss": 0.40903714299201965, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 3.3569844789356984, | |
| "grad_norm": 1.1773041486740112, | |
| "learning_rate": 8.128894723376285e-07, | |
| "loss": 0.78590327501297, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 3.361419068736142, | |
| "grad_norm": 2.625889539718628, | |
| "learning_rate": 8.087041958167438e-07, | |
| "loss": 0.5213326215744019, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 3.3658536585365852, | |
| "grad_norm": 0.7647179365158081, | |
| "learning_rate": 8.04545037074683e-07, | |
| "loss": 0.5064011812210083, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 3.3702882483370287, | |
| "grad_norm": 0.8953445553779602, | |
| "learning_rate": 8.004120520678768e-07, | |
| "loss": 0.7650377154350281, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 3.374722838137472, | |
| "grad_norm": 0.39387643337249756, | |
| "learning_rate": 7.963052964006243e-07, | |
| "loss": 0.45021387934684753, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 3.3791574279379155, | |
| "grad_norm": 3.076669692993164, | |
| "learning_rate": 7.922248253243367e-07, | |
| "loss": 0.6630456447601318, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 3.3835920177383594, | |
| "grad_norm": 1.8935463428497314, | |
| "learning_rate": 7.881706937368005e-07, | |
| "loss": 0.6750819683074951, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 3.388026607538803, | |
| "grad_norm": 0.7205215692520142, | |
| "learning_rate": 7.84142956181436e-07, | |
| "loss": 0.37881627678871155, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 3.3924611973392462, | |
| "grad_norm": 0.5228025317192078, | |
| "learning_rate": 7.801416668465621e-07, | |
| "loss": 0.3390671908855438, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 3.3968957871396896, | |
| "grad_norm": 2.084331512451172, | |
| "learning_rate": 7.76166879564672e-07, | |
| "loss": 0.2297561913728714, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 3.401330376940133, | |
| "grad_norm": 0.8670969605445862, | |
| "learning_rate": 7.722186478117031e-07, | |
| "loss": 0.6209254860877991, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 3.4057649667405765, | |
| "grad_norm": 0.9295784831047058, | |
| "learning_rate": 7.682970247063212e-07, | |
| "loss": 0.6978744864463806, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 3.41019955654102, | |
| "grad_norm": 1.0750006437301636, | |
| "learning_rate": 7.644020630092066e-07, | |
| "loss": 0.6786344647407532, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 3.4146341463414633, | |
| "grad_norm": 1.2284328937530518, | |
| "learning_rate": 7.605338151223401e-07, | |
| "loss": 0.7001453042030334, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 3.4190687361419068, | |
| "grad_norm": 1.4736789464950562, | |
| "learning_rate": 7.566923330883029e-07, | |
| "loss": 0.5472738742828369, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 3.42350332594235, | |
| "grad_norm": 1.1253695487976074, | |
| "learning_rate": 7.528776685895731e-07, | |
| "loss": 0.6272318363189697, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 3.4279379157427936, | |
| "grad_norm": 3.8921940326690674, | |
| "learning_rate": 7.490898729478312e-07, | |
| "loss": 0.25309649109840393, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 3.4323725055432375, | |
| "grad_norm": 0.1467740684747696, | |
| "learning_rate": 7.45328997123271e-07, | |
| "loss": 0.1823056936264038, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 3.436807095343681, | |
| "grad_norm": 1.1179344654083252, | |
| "learning_rate": 7.415950917139106e-07, | |
| "loss": 0.8248109221458435, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 3.4412416851441243, | |
| "grad_norm": 0.9201335906982422, | |
| "learning_rate": 7.378882069549166e-07, | |
| "loss": 0.6940004229545593, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 3.4456762749445677, | |
| "grad_norm": 0.1357531100511551, | |
| "learning_rate": 7.342083927179235e-07, | |
| "loss": 0.3549342155456543, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 3.450110864745011, | |
| "grad_norm": 1.1405051946640015, | |
| "learning_rate": 7.30555698510366e-07, | |
| "loss": 0.808836817741394, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 3.4545454545454546, | |
| "grad_norm": 2.6760106086730957, | |
| "learning_rate": 7.269301734748107e-07, | |
| "loss": 0.7992438077926636, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 3.458980044345898, | |
| "grad_norm": 0.6596611738204956, | |
| "learning_rate": 7.233318663882968e-07, | |
| "loss": 0.7546770572662354, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 3.4634146341463414, | |
| "grad_norm": 2.3677260875701904, | |
| "learning_rate": 7.197608256616792e-07, | |
| "loss": 0.2441236525774002, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 3.467849223946785, | |
| "grad_norm": 2.9253904819488525, | |
| "learning_rate": 7.162170993389763e-07, | |
| "loss": 0.72310870885849, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 3.4722838137472283, | |
| "grad_norm": 0.8974405527114868, | |
| "learning_rate": 7.127007350967241e-07, | |
| "loss": 0.38693636655807495, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 3.4767184035476717, | |
| "grad_norm": 1.2987812757492065, | |
| "learning_rate": 7.092117802433362e-07, | |
| "loss": 0.6902230381965637, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 3.481152993348115, | |
| "grad_norm": 0.820105254650116, | |
| "learning_rate": 7.057502817184648e-07, | |
| "loss": 0.4683529734611511, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 3.4855875831485585, | |
| "grad_norm": 0.34536460041999817, | |
| "learning_rate": 7.023162860923722e-07, | |
| "loss": 0.4866012930870056, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 3.4900221729490024, | |
| "grad_norm": 0.8184029459953308, | |
| "learning_rate": 6.989098395653005e-07, | |
| "loss": 0.878727912902832, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 3.494456762749446, | |
| "grad_norm": 2.2575831413269043, | |
| "learning_rate": 6.955309879668537e-07, | |
| "loss": 0.4023507833480835, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 3.4988913525498893, | |
| "grad_norm": 0.8604305982589722, | |
| "learning_rate": 6.921797767553794e-07, | |
| "loss": 0.47066283226013184, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 3.5033259423503327, | |
| "grad_norm": 0.9628890752792358, | |
| "learning_rate": 6.88856251017356e-07, | |
| "loss": 0.7100802659988403, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 3.507760532150776, | |
| "grad_norm": 0.574712336063385, | |
| "learning_rate": 6.855604554667897e-07, | |
| "loss": 0.8129547834396362, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 3.5121951219512195, | |
| "grad_norm": 0.8234847784042358, | |
| "learning_rate": 6.822924344446081e-07, | |
| "loss": 0.45433831214904785, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 3.516629711751663, | |
| "grad_norm": 0.8851921558380127, | |
| "learning_rate": 6.790522319180687e-07, | |
| "loss": 0.38872238993644714, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 3.5210643015521064, | |
| "grad_norm": 0.7976731657981873, | |
| "learning_rate": 6.758398914801628e-07, | |
| "loss": 0.7368452548980713, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 3.52549889135255, | |
| "grad_norm": 1.8044689893722534, | |
| "learning_rate": 6.726554563490321e-07, | |
| "loss": 0.48450690507888794, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 3.529933481152993, | |
| "grad_norm": 9.239014625549316, | |
| "learning_rate": 6.694989693673872e-07, | |
| "loss": 0.4795666038990021, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 3.5343680709534366, | |
| "grad_norm": 1.6922059059143066, | |
| "learning_rate": 6.663704730019285e-07, | |
| "loss": 0.8343867063522339, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 3.5388026607538805, | |
| "grad_norm": 3.8835480213165283, | |
| "learning_rate": 6.632700093427774e-07, | |
| "loss": 0.2447872906923294, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 3.5432372505543235, | |
| "grad_norm": 1.5197457075119019, | |
| "learning_rate": 6.601976201029095e-07, | |
| "loss": 0.4911421537399292, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 3.5476718403547673, | |
| "grad_norm": 0.9553492665290833, | |
| "learning_rate": 6.571533466175928e-07, | |
| "loss": 0.5422607064247131, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.5521064301552108, | |
| "grad_norm": 0.8694108724594116, | |
| "learning_rate": 6.541372298438325e-07, | |
| "loss": 0.7197349667549133, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 3.556541019955654, | |
| "grad_norm": 0.8564309477806091, | |
| "learning_rate": 6.511493103598184e-07, | |
| "loss": 0.8826733231544495, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 3.5609756097560976, | |
| "grad_norm": 0.7103037238121033, | |
| "learning_rate": 6.481896283643808e-07, | |
| "loss": 0.7101774215698242, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 3.565410199556541, | |
| "grad_norm": 0.16576789319515228, | |
| "learning_rate": 6.452582236764495e-07, | |
| "loss": 0.0969938412308693, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 3.5698447893569845, | |
| "grad_norm": 3.15248966217041, | |
| "learning_rate": 6.423551357345154e-07, | |
| "loss": 0.5980420112609863, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 3.574279379157428, | |
| "grad_norm": 1.84687077999115, | |
| "learning_rate": 6.394804035961038e-07, | |
| "loss": 0.15914049744606018, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 3.5787139689578713, | |
| "grad_norm": 3.980046272277832, | |
| "learning_rate": 6.366340659372462e-07, | |
| "loss": 0.5454177260398865, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 3.5831485587583147, | |
| "grad_norm": 0.8702554702758789, | |
| "learning_rate": 6.338161610519618e-07, | |
| "loss": 0.7745500206947327, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 3.587583148558758, | |
| "grad_norm": 0.7443963885307312, | |
| "learning_rate": 6.310267268517397e-07, | |
| "loss": 0.46495673060417175, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 3.5920177383592016, | |
| "grad_norm": 9.520201683044434, | |
| "learning_rate": 6.282658008650318e-07, | |
| "loss": 0.6363322138786316, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 3.5964523281596454, | |
| "grad_norm": 1.0178289413452148, | |
| "learning_rate": 6.255334202367462e-07, | |
| "loss": 0.7026889324188232, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 3.6008869179600884, | |
| "grad_norm": 1.213256597518921, | |
| "learning_rate": 6.228296217277481e-07, | |
| "loss": 0.6059472560882568, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 3.6053215077605323, | |
| "grad_norm": 0.5287774801254272, | |
| "learning_rate": 6.201544417143641e-07, | |
| "loss": 0.23650091886520386, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 3.6097560975609757, | |
| "grad_norm": 0.9942798614501953, | |
| "learning_rate": 6.175079161878951e-07, | |
| "loss": 0.7636827826499939, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 3.614190687361419, | |
| "grad_norm": 3.0106701850891113, | |
| "learning_rate": 6.148900807541295e-07, | |
| "loss": 0.6264622807502747, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 3.6186252771618626, | |
| "grad_norm": 0.984876275062561, | |
| "learning_rate": 6.123009706328659e-07, | |
| "loss": 0.6594371795654297, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 3.623059866962306, | |
| "grad_norm": 5.279505729675293, | |
| "learning_rate": 6.097406206574378e-07, | |
| "loss": 0.7096071839332581, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 3.6274944567627494, | |
| "grad_norm": 0.8422771096229553, | |
| "learning_rate": 6.072090652742475e-07, | |
| "loss": 0.4414654076099396, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 3.631929046563193, | |
| "grad_norm": 6.341533184051514, | |
| "learning_rate": 6.047063385422993e-07, | |
| "loss": 0.6763387322425842, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 3.6363636363636362, | |
| "grad_norm": 0.9789008498191833, | |
| "learning_rate": 6.022324741327438e-07, | |
| "loss": 0.7263614535331726, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 3.6407982261640797, | |
| "grad_norm": 1.6151984930038452, | |
| "learning_rate": 5.997875053284248e-07, | |
| "loss": 0.3981097936630249, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 3.6452328159645235, | |
| "grad_norm": 1.091586947441101, | |
| "learning_rate": 5.973714650234287e-07, | |
| "loss": 0.5544517636299133, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 3.6496674057649665, | |
| "grad_norm": 0.3249668776988983, | |
| "learning_rate": 5.949843857226466e-07, | |
| "loss": 0.33355870842933655, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 3.6541019955654104, | |
| "grad_norm": 1.1351978778839111, | |
| "learning_rate": 5.926262995413329e-07, | |
| "loss": 0.3144383728504181, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 3.658536585365854, | |
| "grad_norm": 1.1642546653747559, | |
| "learning_rate": 5.902972382046742e-07, | |
| "loss": 0.47315874695777893, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 3.662971175166297, | |
| "grad_norm": 1.5506455898284912, | |
| "learning_rate": 5.879972330473651e-07, | |
| "loss": 0.6661707758903503, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 3.6674057649667406, | |
| "grad_norm": 0.875584065914154, | |
| "learning_rate": 5.857263150131825e-07, | |
| "loss": 0.3526417315006256, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 3.671840354767184, | |
| "grad_norm": 0.9616864323616028, | |
| "learning_rate": 5.834845146545726e-07, | |
| "loss": 0.787284791469574, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 3.6762749445676275, | |
| "grad_norm": 1.849548101425171, | |
| "learning_rate": 5.812718621322386e-07, | |
| "loss": 0.5197221636772156, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 3.680709534368071, | |
| "grad_norm": 1.266353726387024, | |
| "learning_rate": 5.790883872147341e-07, | |
| "loss": 0.4882799983024597, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 3.6851441241685143, | |
| "grad_norm": 0.2810562551021576, | |
| "learning_rate": 5.769341192780643e-07, | |
| "loss": 0.38302525877952576, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 3.6895787139689578, | |
| "grad_norm": 0.9332227110862732, | |
| "learning_rate": 5.748090873052892e-07, | |
| "loss": 0.3155737817287445, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 3.694013303769401, | |
| "grad_norm": 1.3223484754562378, | |
| "learning_rate": 5.727133198861353e-07, | |
| "loss": 0.6590555906295776, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 3.6984478935698446, | |
| "grad_norm": 0.7587100863456726, | |
| "learning_rate": 5.706468452166091e-07, | |
| "loss": 0.6262103915214539, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 3.7028824833702885, | |
| "grad_norm": 0.962223470211029, | |
| "learning_rate": 5.686096910986189e-07, | |
| "loss": 0.6349048018455505, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 3.7073170731707314, | |
| "grad_norm": 1.7819786071777344, | |
| "learning_rate": 5.666018849396016e-07, | |
| "loss": 0.6529886722564697, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 3.7117516629711753, | |
| "grad_norm": 0.7751228213310242, | |
| "learning_rate": 5.646234537521513e-07, | |
| "loss": 0.7602441310882568, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 3.7161862527716187, | |
| "grad_norm": 0.8239582777023315, | |
| "learning_rate": 5.626744241536589e-07, | |
| "loss": 0.7868402004241943, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 3.720620842572062, | |
| "grad_norm": 1.011643648147583, | |
| "learning_rate": 5.607548223659519e-07, | |
| "loss": 0.7444002628326416, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 3.7250554323725056, | |
| "grad_norm": 1.968907117843628, | |
| "learning_rate": 5.58864674214942e-07, | |
| "loss": 0.3951123058795929, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 3.729490022172949, | |
| "grad_norm": 0.7832499742507935, | |
| "learning_rate": 5.57004005130279e-07, | |
| "loss": 0.5796975493431091, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 3.7339246119733924, | |
| "grad_norm": 0.818016529083252, | |
| "learning_rate": 5.551728401450067e-07, | |
| "loss": 0.43335816264152527, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 3.738359201773836, | |
| "grad_norm": 2.979412078857422, | |
| "learning_rate": 5.533712038952278e-07, | |
| "loss": 0.6772918105125427, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 3.7427937915742793, | |
| "grad_norm": 0.39508089423179626, | |
| "learning_rate": 5.51599120619771e-07, | |
| "loss": 0.2403266280889511, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 3.7472283813747227, | |
| "grad_norm": 1.1755974292755127, | |
| "learning_rate": 5.498566141598662e-07, | |
| "loss": 0.7278658747673035, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 3.7516629711751666, | |
| "grad_norm": 0.7855948209762573, | |
| "learning_rate": 5.481437079588227e-07, | |
| "loss": 0.6658391952514648, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 3.7560975609756095, | |
| "grad_norm": 0.7915768027305603, | |
| "learning_rate": 5.464604250617143e-07, | |
| "loss": 0.8541386127471924, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 3.7605321507760534, | |
| "grad_norm": 0.7992685437202454, | |
| "learning_rate": 5.448067881150697e-07, | |
| "loss": 0.8131424784660339, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 3.764966740576497, | |
| "grad_norm": 0.9431101679801941, | |
| "learning_rate": 5.431828193665664e-07, | |
| "loss": 0.4288075268268585, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 3.7694013303769403, | |
| "grad_norm": 2.6646132469177246, | |
| "learning_rate": 5.415885406647334e-07, | |
| "loss": 0.5178676843643188, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.7738359201773837, | |
| "grad_norm": 1.1520713567733765, | |
| "learning_rate": 5.400239734586551e-07, | |
| "loss": 0.912034273147583, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 3.778270509977827, | |
| "grad_norm": 0.39294496178627014, | |
| "learning_rate": 5.384891387976845e-07, | |
| "loss": 0.11222898960113525, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 3.7827050997782705, | |
| "grad_norm": 0.22877201437950134, | |
| "learning_rate": 5.369840573311593e-07, | |
| "loss": 0.4198029041290283, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 3.787139689578714, | |
| "grad_norm": 0.7760717272758484, | |
| "learning_rate": 5.355087493081236e-07, | |
| "loss": 0.5717388391494751, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 3.7915742793791574, | |
| "grad_norm": 0.8549593687057495, | |
| "learning_rate": 5.340632345770564e-07, | |
| "loss": 0.8026604652404785, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 3.796008869179601, | |
| "grad_norm": 1.970601201057434, | |
| "learning_rate": 5.326475325856036e-07, | |
| "loss": 0.5300987958908081, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 3.800443458980044, | |
| "grad_norm": 8.506375312805176, | |
| "learning_rate": 5.312616623803174e-07, | |
| "loss": 0.5400223731994629, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 3.8048780487804876, | |
| "grad_norm": 1.6792744398117065, | |
| "learning_rate": 5.299056426063995e-07, | |
| "loss": 0.676804780960083, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 3.8093126385809315, | |
| "grad_norm": 1.1426763534545898, | |
| "learning_rate": 5.2857949150745e-07, | |
| "loss": 0.5867292881011963, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 3.8137472283813745, | |
| "grad_norm": 0.8977690935134888, | |
| "learning_rate": 5.27283226925222e-07, | |
| "loss": 0.5383524894714355, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 3.8181818181818183, | |
| "grad_norm": 0.8802960515022278, | |
| "learning_rate": 5.260168662993824e-07, | |
| "loss": 0.7805699110031128, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 3.8226164079822618, | |
| "grad_norm": 1.9600553512573242, | |
| "learning_rate": 5.247804266672765e-07, | |
| "loss": 0.6948915719985962, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 3.827050997782705, | |
| "grad_norm": 0.7845137119293213, | |
| "learning_rate": 5.235739246636988e-07, | |
| "loss": 0.6090778708457947, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 3.8314855875831486, | |
| "grad_norm": 0.8858228921890259, | |
| "learning_rate": 5.223973765206694e-07, | |
| "loss": 0.38666534423828125, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 3.835920177383592, | |
| "grad_norm": 2.2224628925323486, | |
| "learning_rate": 5.212507980672155e-07, | |
| "loss": 0.5477333664894104, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 3.8403547671840355, | |
| "grad_norm": 0.6720737814903259, | |
| "learning_rate": 5.201342047291587e-07, | |
| "loss": 0.7528340816497803, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 3.844789356984479, | |
| "grad_norm": 9.535623550415039, | |
| "learning_rate": 5.190476115289063e-07, | |
| "loss": 0.7895328402519226, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 3.8492239467849223, | |
| "grad_norm": 0.94792240858078, | |
| "learning_rate": 5.179910330852521e-07, | |
| "loss": 0.8134070634841919, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 3.8536585365853657, | |
| "grad_norm": 1.6452993154525757, | |
| "learning_rate": 5.169644836131759e-07, | |
| "loss": 0.8293890953063965, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 3.858093126385809, | |
| "grad_norm": 0.7658231258392334, | |
| "learning_rate": 5.159679769236553e-07, | |
| "loss": 0.4353466331958771, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 3.8625277161862526, | |
| "grad_norm": 1.3875316381454468, | |
| "learning_rate": 5.150015264234782e-07, | |
| "loss": 0.42465826869010925, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 3.8669623059866964, | |
| "grad_norm": 0.9564653038978577, | |
| "learning_rate": 5.140651451150627e-07, | |
| "loss": 0.7305734157562256, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 3.8713968957871394, | |
| "grad_norm": 0.9205288290977478, | |
| "learning_rate": 5.131588455962835e-07, | |
| "loss": 0.6675954461097717, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 3.8758314855875833, | |
| "grad_norm": 1.4623075723648071, | |
| "learning_rate": 5.122826400602999e-07, | |
| "loss": 0.43095389008522034, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 3.8802660753880267, | |
| "grad_norm": 0.8612291812896729, | |
| "learning_rate": 5.114365402953946e-07, | |
| "loss": 0.5411372184753418, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.88470066518847, | |
| "grad_norm": 1.1240525245666504, | |
| "learning_rate": 5.106205576848123e-07, | |
| "loss": 0.6075332760810852, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 3.8891352549889135, | |
| "grad_norm": 1.0948525667190552, | |
| "learning_rate": 5.09834703206609e-07, | |
| "loss": 0.7073659896850586, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 3.893569844789357, | |
| "grad_norm": 6.487936496734619, | |
| "learning_rate": 5.090789874335027e-07, | |
| "loss": 0.5417055487632751, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 3.8980044345898004, | |
| "grad_norm": 0.8754919171333313, | |
| "learning_rate": 5.083534205327321e-07, | |
| "loss": 0.796424150466919, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 3.902439024390244, | |
| "grad_norm": 0.22185245156288147, | |
| "learning_rate": 5.076580122659192e-07, | |
| "loss": 0.006255284883081913, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 3.9068736141906872, | |
| "grad_norm": 3.0637013912200928, | |
| "learning_rate": 5.069927719889383e-07, | |
| "loss": 0.36486220359802246, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 3.9113082039911307, | |
| "grad_norm": 0.33201995491981506, | |
| "learning_rate": 5.063577086517894e-07, | |
| "loss": 0.3559632897377014, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 3.9157427937915745, | |
| "grad_norm": 1.5193932056427002, | |
| "learning_rate": 5.057528307984792e-07, | |
| "loss": 0.538555920124054, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 3.9201773835920175, | |
| "grad_norm": 1.2443639039993286, | |
| "learning_rate": 5.051781465669053e-07, | |
| "loss": 0.7477619647979736, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 3.9246119733924614, | |
| "grad_norm": 0.8854790925979614, | |
| "learning_rate": 5.04633663688746e-07, | |
| "loss": 0.7172592878341675, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 3.929046563192905, | |
| "grad_norm": 1.6672402620315552, | |
| "learning_rate": 5.04119389489358e-07, | |
| "loss": 0.27976056933403015, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 3.933481152993348, | |
| "grad_norm": 0.8740100264549255, | |
| "learning_rate": 5.036353308876764e-07, | |
| "loss": 0.6978716254234314, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 3.9379157427937916, | |
| "grad_norm": 0.8391640186309814, | |
| "learning_rate": 5.031814943961221e-07, | |
| "loss": 0.8046401143074036, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 3.942350332594235, | |
| "grad_norm": 0.3492567241191864, | |
| "learning_rate": 5.027578861205139e-07, | |
| "loss": 0.13979971408843994, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 3.9467849223946785, | |
| "grad_norm": 2.362764596939087, | |
| "learning_rate": 5.023645117599877e-07, | |
| "loss": 0.48992088437080383, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 3.951219512195122, | |
| "grad_norm": 0.279731422662735, | |
| "learning_rate": 5.020013766069176e-07, | |
| "loss": 0.4223584532737732, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 3.9556541019955653, | |
| "grad_norm": 0.77850341796875, | |
| "learning_rate": 5.016684855468464e-07, | |
| "loss": 0.477926105260849, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 3.9600886917960088, | |
| "grad_norm": 1.2909936904907227, | |
| "learning_rate": 5.013658430584194e-07, | |
| "loss": 0.7137855887413025, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 3.964523281596452, | |
| "grad_norm": 0.8325835466384888, | |
| "learning_rate": 5.010934532133236e-07, | |
| "loss": 0.4540127217769623, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 3.9689578713968956, | |
| "grad_norm": 0.9248032569885254, | |
| "learning_rate": 5.008513196762342e-07, | |
| "loss": 0.5620056390762329, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 3.9733924611973395, | |
| "grad_norm": 0.9865884780883789, | |
| "learning_rate": 5.006394457047638e-07, | |
| "loss": 0.6748104095458984, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 3.9778270509977824, | |
| "grad_norm": 1.5635875463485718, | |
| "learning_rate": 5.004578341494197e-07, | |
| "loss": 0.38833579421043396, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 3.9822616407982263, | |
| "grad_norm": 0.9720349907875061, | |
| "learning_rate": 5.003064874535649e-07, | |
| "loss": 0.680509626865387, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 3.9866962305986697, | |
| "grad_norm": 1.0985702276229858, | |
| "learning_rate": 5.00185407653385e-07, | |
| "loss": 0.7808913588523865, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 3.991130820399113, | |
| "grad_norm": 4.035973072052002, | |
| "learning_rate": 5.000945963778627e-07, | |
| "loss": 0.6849638223648071, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.9955654101995566, | |
| "grad_norm": 0.8696609735488892, | |
| "learning_rate": 5.000340548487528e-07, | |
| "loss": 0.7774142622947693, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 1.2884798049926758, | |
| "learning_rate": 5.000037838805682e-07, | |
| "loss": 0.4334436058998108, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 1804, | |
| "total_flos": 3.4175049861232067e+18, | |
| "train_loss": 0.8579487466323551, | |
| "train_runtime": 9192.0887, | |
| "train_samples_per_second": 5.888, | |
| "train_steps_per_second": 0.196 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1804, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.4175049861232067e+18, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |