Image-Text-to-Text
Transformers
Safetensors
English
qwen2_5_vl
chart-to-code
multimodal
vision-language
sft
cold-start
matplotlib
conversational
text-generation-inference
Instructions to use cwbc/MM-ReCoder-SFT-Cold-Start with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use cwbc/MM-ReCoder-SFT-Cold-Start with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="cwbc/MM-ReCoder-SFT-Cold-Start") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForMultimodalLM processor = AutoProcessor.from_pretrained("cwbc/MM-ReCoder-SFT-Cold-Start") model = AutoModelForMultimodalLM.from_pretrained("cwbc/MM-ReCoder-SFT-Cold-Start") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use cwbc/MM-ReCoder-SFT-Cold-Start with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "cwbc/MM-ReCoder-SFT-Cold-Start" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "cwbc/MM-ReCoder-SFT-Cold-Start", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/cwbc/MM-ReCoder-SFT-Cold-Start
- SGLang
How to use cwbc/MM-ReCoder-SFT-Cold-Start with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "cwbc/MM-ReCoder-SFT-Cold-Start" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "cwbc/MM-ReCoder-SFT-Cold-Start", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "cwbc/MM-ReCoder-SFT-Cold-Start" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "cwbc/MM-ReCoder-SFT-Cold-Start", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use cwbc/MM-ReCoder-SFT-Cold-Start with Docker Model Runner:
docker model run hf.co/cwbc/MM-ReCoder-SFT-Cold-Start
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 112, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.018018018018018018, | |
| "grad_norm": 4.300365447998047, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6205, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.036036036036036036, | |
| "grad_norm": 2.4697682857513428, | |
| "learning_rate": 9.999921320324328e-06, | |
| "loss": 0.4825, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.05405405405405406, | |
| "grad_norm": 1.6707440614700317, | |
| "learning_rate": 9.999685283773504e-06, | |
| "loss": 0.4016, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.07207207207207207, | |
| "grad_norm": 1.063056468963623, | |
| "learning_rate": 9.999291897776043e-06, | |
| "loss": 0.3491, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.09009009009009009, | |
| "grad_norm": 1.9208970069885254, | |
| "learning_rate": 9.998741174712534e-06, | |
| "loss": 0.3424, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.10810810810810811, | |
| "grad_norm": 1.2284053564071655, | |
| "learning_rate": 9.998033131915266e-06, | |
| "loss": 0.3004, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.12612612612612611, | |
| "grad_norm": 0.9112282395362854, | |
| "learning_rate": 9.997167791667668e-06, | |
| "loss": 0.2775, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.14414414414414414, | |
| "grad_norm": 0.9811397790908813, | |
| "learning_rate": 9.996145181203616e-06, | |
| "loss": 0.2732, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.16216216216216217, | |
| "grad_norm": 0.813277542591095, | |
| "learning_rate": 9.994965332706574e-06, | |
| "loss": 0.2549, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.18018018018018017, | |
| "grad_norm": 0.7180718183517456, | |
| "learning_rate": 9.993628283308582e-06, | |
| "loss": 0.2494, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.1981981981981982, | |
| "grad_norm": 0.6469175815582275, | |
| "learning_rate": 9.992134075089085e-06, | |
| "loss": 0.2428, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.21621621621621623, | |
| "grad_norm": 0.6054768562316895, | |
| "learning_rate": 9.990482755073607e-06, | |
| "loss": 0.2335, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.23423423423423423, | |
| "grad_norm": 0.6253430843353271, | |
| "learning_rate": 9.98867437523228e-06, | |
| "loss": 0.2473, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.25225225225225223, | |
| "grad_norm": 0.5254931449890137, | |
| "learning_rate": 9.986708992478202e-06, | |
| "loss": 0.2345, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "grad_norm": 0.47010546922683716, | |
| "learning_rate": 9.984586668665641e-06, | |
| "loss": 0.2191, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.2882882882882883, | |
| "grad_norm": 0.5132431983947754, | |
| "learning_rate": 9.982307470588097e-06, | |
| "loss": 0.2323, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.3063063063063063, | |
| "grad_norm": 0.48005616664886475, | |
| "learning_rate": 9.979871469976197e-06, | |
| "loss": 0.206, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.32432432432432434, | |
| "grad_norm": 0.49143457412719727, | |
| "learning_rate": 9.977278743495434e-06, | |
| "loss": 0.205, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.34234234234234234, | |
| "grad_norm": 0.429962158203125, | |
| "learning_rate": 9.974529372743762e-06, | |
| "loss": 0.2136, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.36036036036036034, | |
| "grad_norm": 0.43287697434425354, | |
| "learning_rate": 9.97162344424902e-06, | |
| "loss": 0.1981, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.3783783783783784, | |
| "grad_norm": 0.4126758873462677, | |
| "learning_rate": 9.968561049466214e-06, | |
| "loss": 0.2005, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.3963963963963964, | |
| "grad_norm": 0.41148409247398376, | |
| "learning_rate": 9.965342284774633e-06, | |
| "loss": 0.1896, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.4144144144144144, | |
| "grad_norm": 0.40187039971351624, | |
| "learning_rate": 9.961967251474823e-06, | |
| "loss": 0.1869, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.43243243243243246, | |
| "grad_norm": 0.4011826813220978, | |
| "learning_rate": 9.958436055785391e-06, | |
| "loss": 0.1954, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.45045045045045046, | |
| "grad_norm": 0.3922724723815918, | |
| "learning_rate": 9.954748808839675e-06, | |
| "loss": 0.196, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.46846846846846846, | |
| "grad_norm": 0.3767087459564209, | |
| "learning_rate": 9.950905626682229e-06, | |
| "loss": 0.1885, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.4864864864864865, | |
| "grad_norm": 0.38024166226387024, | |
| "learning_rate": 9.946906630265184e-06, | |
| "loss": 0.2059, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.5045045045045045, | |
| "grad_norm": 0.35347816348075867, | |
| "learning_rate": 9.942751945444437e-06, | |
| "loss": 0.1933, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.5225225225225225, | |
| "grad_norm": 0.36196964979171753, | |
| "learning_rate": 9.938441702975689e-06, | |
| "loss": 0.184, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "grad_norm": 0.3916337192058563, | |
| "learning_rate": 9.933976038510334e-06, | |
| "loss": 0.1876, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.5585585585585585, | |
| "grad_norm": 0.35248634219169617, | |
| "learning_rate": 9.92935509259118e-06, | |
| "loss": 0.1831, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.5765765765765766, | |
| "grad_norm": 0.3620535433292389, | |
| "learning_rate": 9.924579010648042e-06, | |
| "loss": 0.1817, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.5945945945945946, | |
| "grad_norm": 0.36167603731155396, | |
| "learning_rate": 9.91964794299315e-06, | |
| "loss": 0.1769, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.6126126126126126, | |
| "grad_norm": 0.35638344287872314, | |
| "learning_rate": 9.914562044816424e-06, | |
| "loss": 0.1765, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.6306306306306306, | |
| "grad_norm": 0.35270994901657104, | |
| "learning_rate": 9.909321476180594e-06, | |
| "loss": 0.1786, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.6486486486486487, | |
| "grad_norm": 0.362835168838501, | |
| "learning_rate": 9.903926402016153e-06, | |
| "loss": 0.1784, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.3634340465068817, | |
| "learning_rate": 9.898376992116179e-06, | |
| "loss": 0.1794, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.6846846846846847, | |
| "grad_norm": 0.3652496337890625, | |
| "learning_rate": 9.892673421130979e-06, | |
| "loss": 0.1714, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.7027027027027027, | |
| "grad_norm": 0.34574374556541443, | |
| "learning_rate": 9.886815868562596e-06, | |
| "loss": 0.1658, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.7207207207207207, | |
| "grad_norm": 0.3675323724746704, | |
| "learning_rate": 9.88080451875917e-06, | |
| "loss": 0.1766, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.7387387387387387, | |
| "grad_norm": 0.33044207096099854, | |
| "learning_rate": 9.874639560909118e-06, | |
| "loss": 0.1814, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.7567567567567568, | |
| "grad_norm": 0.3569028079509735, | |
| "learning_rate": 9.868321189035196e-06, | |
| "loss": 0.1727, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.7747747747747747, | |
| "grad_norm": 0.34842026233673096, | |
| "learning_rate": 9.861849601988384e-06, | |
| "loss": 0.1683, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.7927927927927928, | |
| "grad_norm": 0.35948798060417175, | |
| "learning_rate": 9.855225003441629e-06, | |
| "loss": 0.1719, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.8108108108108109, | |
| "grad_norm": 0.3589368164539337, | |
| "learning_rate": 9.848447601883436e-06, | |
| "loss": 0.1659, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.8288288288288288, | |
| "grad_norm": 0.3669843077659607, | |
| "learning_rate": 9.841517610611309e-06, | |
| "loss": 0.1703, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.8468468468468469, | |
| "grad_norm": 0.34229257702827454, | |
| "learning_rate": 9.834435247725032e-06, | |
| "loss": 0.1766, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.8648648648648649, | |
| "grad_norm": 0.3512667417526245, | |
| "learning_rate": 9.827200736119815e-06, | |
| "loss": 0.1643, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.8828828828828829, | |
| "grad_norm": 0.3597641885280609, | |
| "learning_rate": 9.819814303479268e-06, | |
| "loss": 0.169, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.9009009009009009, | |
| "grad_norm": 0.4169813096523285, | |
| "learning_rate": 9.812276182268236e-06, | |
| "loss": 0.1706, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.918918918918919, | |
| "grad_norm": 0.35649940371513367, | |
| "learning_rate": 9.804586609725499e-06, | |
| "loss": 0.172, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.9369369369369369, | |
| "grad_norm": 0.38562002778053284, | |
| "learning_rate": 9.79674582785628e-06, | |
| "loss": 0.1554, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.954954954954955, | |
| "grad_norm": 0.3706550896167755, | |
| "learning_rate": 9.788754083424654e-06, | |
| "loss": 0.1619, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.972972972972973, | |
| "grad_norm": 0.3817874789237976, | |
| "learning_rate": 9.78061162794576e-06, | |
| "loss": 0.1563, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.990990990990991, | |
| "grad_norm": 0.3631809949874878, | |
| "learning_rate": 9.772318717677905e-06, | |
| "loss": 0.1686, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.3631809949874878, | |
| "learning_rate": 9.763875613614482e-06, | |
| "loss": 0.0795, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 1.018018018018018, | |
| "grad_norm": 0.37705934047698975, | |
| "learning_rate": 9.755282581475769e-06, | |
| "loss": 0.1467, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 1.0360360360360361, | |
| "grad_norm": 0.3958112895488739, | |
| "learning_rate": 9.746539891700558e-06, | |
| "loss": 0.1395, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 1.054054054054054, | |
| "grad_norm": 0.3740142285823822, | |
| "learning_rate": 9.737647819437645e-06, | |
| "loss": 0.1407, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 1.072072072072072, | |
| "grad_norm": 0.3496512174606323, | |
| "learning_rate": 9.728606644537177e-06, | |
| "loss": 0.1369, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.09009009009009, | |
| "grad_norm": 0.3684415817260742, | |
| "learning_rate": 9.719416651541839e-06, | |
| "loss": 0.1307, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 1.1081081081081081, | |
| "grad_norm": 0.3743540048599243, | |
| "learning_rate": 9.710078129677895e-06, | |
| "loss": 0.1378, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 1.1261261261261262, | |
| "grad_norm": 0.39405685663223267, | |
| "learning_rate": 9.700591372846096e-06, | |
| "loss": 0.1315, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 1.1441441441441442, | |
| "grad_norm": 0.38246047496795654, | |
| "learning_rate": 9.690956679612422e-06, | |
| "loss": 0.1385, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.1621621621621623, | |
| "grad_norm": 0.38582369685173035, | |
| "learning_rate": 9.681174353198687e-06, | |
| "loss": 0.1295, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.1801801801801801, | |
| "grad_norm": 0.3975503742694855, | |
| "learning_rate": 9.671244701472999e-06, | |
| "loss": 0.1326, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 1.1981981981981982, | |
| "grad_norm": 0.3592880964279175, | |
| "learning_rate": 9.661168036940071e-06, | |
| "loss": 0.1336, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 1.2162162162162162, | |
| "grad_norm": 0.362184077501297, | |
| "learning_rate": 9.650944676731383e-06, | |
| "loss": 0.1363, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 1.2342342342342343, | |
| "grad_norm": 0.35390111804008484, | |
| "learning_rate": 9.640574942595195e-06, | |
| "loss": 0.1385, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 1.2522522522522523, | |
| "grad_norm": 0.3839528262615204, | |
| "learning_rate": 9.63005916088644e-06, | |
| "loss": 0.133, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.2702702702702702, | |
| "grad_norm": 0.3693341612815857, | |
| "learning_rate": 9.619397662556434e-06, | |
| "loss": 0.1395, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 1.2882882882882882, | |
| "grad_norm": 0.36481085419654846, | |
| "learning_rate": 9.608590783142471e-06, | |
| "loss": 0.1392, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.3063063063063063, | |
| "grad_norm": 0.3655868172645569, | |
| "learning_rate": 9.597638862757255e-06, | |
| "loss": 0.1294, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 1.3243243243243243, | |
| "grad_norm": 0.35944369435310364, | |
| "learning_rate": 9.586542246078203e-06, | |
| "loss": 0.1275, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 1.3423423423423424, | |
| "grad_norm": 0.3815653622150421, | |
| "learning_rate": 9.5753012823366e-06, | |
| "loss": 0.1316, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.3603603603603602, | |
| "grad_norm": 0.3833068907260895, | |
| "learning_rate": 9.563916325306595e-06, | |
| "loss": 0.1271, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.3783783783783785, | |
| "grad_norm": 0.369124174118042, | |
| "learning_rate": 9.552387733294081e-06, | |
| "loss": 0.1352, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 1.3963963963963963, | |
| "grad_norm": 0.35860103368759155, | |
| "learning_rate": 9.540715869125407e-06, | |
| "loss": 0.1342, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.4144144144144144, | |
| "grad_norm": 0.39357277750968933, | |
| "learning_rate": 9.528901100135971e-06, | |
| "loss": 0.1323, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 1.4324324324324325, | |
| "grad_norm": 0.3725232779979706, | |
| "learning_rate": 9.51694379815865e-06, | |
| "loss": 0.1309, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.4504504504504505, | |
| "grad_norm": 0.3814895749092102, | |
| "learning_rate": 9.504844339512096e-06, | |
| "loss": 0.1277, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 1.4684684684684686, | |
| "grad_norm": 0.4078717529773712, | |
| "learning_rate": 9.492603104988907e-06, | |
| "loss": 0.1342, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 1.4864864864864864, | |
| "grad_norm": 0.360506534576416, | |
| "learning_rate": 9.480220479843627e-06, | |
| "loss": 0.1332, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 1.5045045045045045, | |
| "grad_norm": 0.37059858441352844, | |
| "learning_rate": 9.467696853780625e-06, | |
| "loss": 0.1303, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.5225225225225225, | |
| "grad_norm": 0.38473525643348694, | |
| "learning_rate": 9.45503262094184e-06, | |
| "loss": 0.1355, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.5405405405405406, | |
| "grad_norm": 0.39016297459602356, | |
| "learning_rate": 9.442228179894362e-06, | |
| "loss": 0.1415, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.5585585585585586, | |
| "grad_norm": 0.372662752866745, | |
| "learning_rate": 9.4292839336179e-06, | |
| "loss": 0.1334, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 1.5765765765765765, | |
| "grad_norm": 0.3725448250770569, | |
| "learning_rate": 9.416200289492092e-06, | |
| "loss": 0.1285, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.5945945945945947, | |
| "grad_norm": 0.3796994090080261, | |
| "learning_rate": 9.40297765928369e-06, | |
| "loss": 0.1274, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 1.6126126126126126, | |
| "grad_norm": 0.36067870259284973, | |
| "learning_rate": 9.389616459133597e-06, | |
| "loss": 0.1301, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.6306306306306306, | |
| "grad_norm": 0.36582452058792114, | |
| "learning_rate": 9.376117109543769e-06, | |
| "loss": 0.1319, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 1.6486486486486487, | |
| "grad_norm": 0.38476914167404175, | |
| "learning_rate": 9.362480035363987e-06, | |
| "loss": 0.1259, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.3849290907382965, | |
| "learning_rate": 9.348705665778479e-06, | |
| "loss": 0.1302, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 1.6846846846846848, | |
| "grad_norm": 0.3458653688430786, | |
| "learning_rate": 9.334794434292416e-06, | |
| "loss": 0.1229, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 1.7027027027027026, | |
| "grad_norm": 0.369865357875824, | |
| "learning_rate": 9.320746778718274e-06, | |
| "loss": 0.1348, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.7207207207207207, | |
| "grad_norm": 0.34437230229377747, | |
| "learning_rate": 9.306563141162046e-06, | |
| "loss": 0.1268, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.7387387387387387, | |
| "grad_norm": 0.37380245327949524, | |
| "learning_rate": 9.292243968009332e-06, | |
| "loss": 0.1331, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 1.7567567567567568, | |
| "grad_norm": 0.3576115369796753, | |
| "learning_rate": 9.27778970991129e-06, | |
| "loss": 0.1327, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 1.7747747747747749, | |
| "grad_norm": 0.3359750211238861, | |
| "learning_rate": 9.263200821770462e-06, | |
| "loss": 0.1312, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 1.7927927927927927, | |
| "grad_norm": 0.4232477843761444, | |
| "learning_rate": 9.248477762726438e-06, | |
| "loss": 0.1262, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.810810810810811, | |
| "grad_norm": 0.34946057200431824, | |
| "learning_rate": 9.233620996141421e-06, | |
| "loss": 0.125, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 1.8288288288288288, | |
| "grad_norm": 0.37758418917655945, | |
| "learning_rate": 9.218630989585647e-06, | |
| "loss": 0.1259, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 1.8468468468468469, | |
| "grad_norm": 0.38684237003326416, | |
| "learning_rate": 9.203508214822652e-06, | |
| "loss": 0.1282, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 1.864864864864865, | |
| "grad_norm": 0.3739815652370453, | |
| "learning_rate": 9.188253147794443e-06, | |
| "loss": 0.1238, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 1.8828828828828827, | |
| "grad_norm": 0.3532610237598419, | |
| "learning_rate": 9.172866268606514e-06, | |
| "loss": 0.1272, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.900900900900901, | |
| "grad_norm": 0.37537145614624023, | |
| "learning_rate": 9.157348061512728e-06, | |
| "loss": 0.1287, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 1.9189189189189189, | |
| "grad_norm": 0.3723607659339905, | |
| "learning_rate": 9.141699014900084e-06, | |
| "loss": 0.1237, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 1.936936936936937, | |
| "grad_norm": 0.33924514055252075, | |
| "learning_rate": 9.125919621273348e-06, | |
| "loss": 0.1277, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 1.954954954954955, | |
| "grad_norm": 0.39682239294052124, | |
| "learning_rate": 9.110010377239552e-06, | |
| "loss": 0.1276, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 1.972972972972973, | |
| "grad_norm": 0.39324480295181274, | |
| "learning_rate": 9.093971783492354e-06, | |
| "loss": 0.1301, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.990990990990991, | |
| "grad_norm": 0.3867541253566742, | |
| "learning_rate": 9.077804344796302e-06, | |
| "loss": 0.1327, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.39770832657814026, | |
| "learning_rate": 9.061508569970926e-06, | |
| "loss": 0.0666, | |
| "step": 112 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 560, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 56, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.424337771526357e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |