Text Generation
Transformers
Safetensors
Spanish
llama_longbel
biomedical-entity-linking
entity-linking
entity-disambiguation
named-entity-linking
biomedical
healthcare
snomed
spaccc
medprocner
symptemist
distemist
constrained-decoding
causal-lm
llm
conversational
custom_code
Eval Results (legacy)
Instructions to use AnonymousARR42/LongBEL_8B_SPACCC with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use AnonymousARR42/LongBEL_8B_SPACCC with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="AnonymousARR42/LongBEL_8B_SPACCC", trust_remote_code=True) messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("AnonymousARR42/LongBEL_8B_SPACCC", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use AnonymousARR42/LongBEL_8B_SPACCC with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "AnonymousARR42/LongBEL_8B_SPACCC" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "AnonymousARR42/LongBEL_8B_SPACCC", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/AnonymousARR42/LongBEL_8B_SPACCC
- SGLang
How to use AnonymousARR42/LongBEL_8B_SPACCC with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "AnonymousARR42/LongBEL_8B_SPACCC" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "AnonymousARR42/LongBEL_8B_SPACCC", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "AnonymousARR42/LongBEL_8B_SPACCC" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "AnonymousARR42/LongBEL_8B_SPACCC", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use AnonymousARR42/LongBEL_8B_SPACCC with Docker Model Runner:
docker model run hf.co/AnonymousARR42/LongBEL_8B_SPACCC
| { | |
| "best_global_step": 18252, | |
| "best_metric": 0.8571, | |
| "best_model_checkpoint": "models/NED/SPACCC_human_only_tfidf_hybrid_long_v2_addheaders/Llama-3.1-8B-Instruct/checkpoint-18252", | |
| "epoch": 50.0, | |
| "eval_steps": 500, | |
| "global_step": 304200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "entropy": 1.6452295544859141, | |
| "epoch": 1.0, | |
| "grad_norm": 36.5, | |
| "learning_rate": 1.999671268902038e-05, | |
| "loss": 0.8315, | |
| "mean_token_accuracy": 0.8217970482187001, | |
| "num_tokens": 23190253.0, | |
| "step": 6084 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_entropy": 1.7185479640960692, | |
| "eval_loss": 0.4542328417301178, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 23190253.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.3763, | |
| "eval_samples_per_second": 37.199, | |
| "eval_steps_per_second": 13.286, | |
| "step": 6084 | |
| }, | |
| { | |
| "entropy": 1.6400107836190374, | |
| "epoch": 2.0, | |
| "grad_norm": 24.0, | |
| "learning_rate": 2.9690823318896277e-05, | |
| "loss": 0.5539, | |
| "mean_token_accuracy": 0.869478269118294, | |
| "num_tokens": 46380506.0, | |
| "step": 12168 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_entropy": 1.826171350479126, | |
| "eval_loss": 0.3982131779193878, | |
| "eval_mean_token_accuracy": 0.9231559276580811, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 46380506.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3572, | |
| "eval_samples_per_second": 39.19, | |
| "eval_steps_per_second": 13.996, | |
| "step": 12168 | |
| }, | |
| { | |
| "entropy": 1.369841670310082, | |
| "epoch": 3.0, | |
| "grad_norm": 48.5, | |
| "learning_rate": 2.9072266617865347e-05, | |
| "loss": 0.2993, | |
| "mean_token_accuracy": 0.9245726638164354, | |
| "num_tokens": 69570759.0, | |
| "step": 18252 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_entropy": 1.2688756227493285, | |
| "eval_loss": 0.4362812042236328, | |
| "eval_mean_token_accuracy": 0.9365079402923584, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 69570759.0, | |
| "eval_recall": 0.8571, | |
| "eval_runtime": 0.3611, | |
| "eval_samples_per_second": 38.773, | |
| "eval_steps_per_second": 13.848, | |
| "step": 18252 | |
| }, | |
| { | |
| "entropy": 1.019543299896007, | |
| "epoch": 4.0, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 2.845370991683442e-05, | |
| "loss": 0.1426, | |
| "mean_token_accuracy": 0.9636940412482958, | |
| "num_tokens": 92761012.0, | |
| "step": 24336 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_entropy": 1.04158456325531, | |
| "eval_loss": 0.5508492588996887, | |
| "eval_mean_token_accuracy": 0.9152194261550903, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 92761012.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.3625, | |
| "eval_samples_per_second": 38.617, | |
| "eval_steps_per_second": 13.792, | |
| "step": 24336 | |
| }, | |
| { | |
| "entropy": 0.8108051640179343, | |
| "epoch": 5.0, | |
| "grad_norm": 0.0081787109375, | |
| "learning_rate": 2.7835153215803492e-05, | |
| "loss": 0.0607, | |
| "mean_token_accuracy": 0.9852782511283974, | |
| "num_tokens": 115951265.0, | |
| "step": 30420 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_entropy": 0.8801020622253418, | |
| "eval_loss": 0.6922857165336609, | |
| "eval_mean_token_accuracy": 0.9152194261550903, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 115951265.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3885, | |
| "eval_samples_per_second": 36.034, | |
| "eval_steps_per_second": 12.869, | |
| "step": 30420 | |
| }, | |
| { | |
| "entropy": 0.718129879927886, | |
| "epoch": 6.0, | |
| "grad_norm": 0.0277099609375, | |
| "learning_rate": 2.7216596514772566e-05, | |
| "loss": 0.0218, | |
| "mean_token_accuracy": 0.9949554541466034, | |
| "num_tokens": 139141518.0, | |
| "step": 36504 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_entropy": 0.7900081038475036, | |
| "eval_loss": 0.6174029111862183, | |
| "eval_mean_token_accuracy": 0.9247432351112366, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 139141518.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3581, | |
| "eval_samples_per_second": 39.098, | |
| "eval_steps_per_second": 13.964, | |
| "step": 36504 | |
| }, | |
| { | |
| "entropy": 0.707064643982286, | |
| "epoch": 7.0, | |
| "grad_norm": 0.00396728515625, | |
| "learning_rate": 2.659803981374164e-05, | |
| "loss": 0.0071, | |
| "mean_token_accuracy": 0.9984249214566436, | |
| "num_tokens": 162331771.0, | |
| "step": 42588 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_entropy": 0.8332192063331604, | |
| "eval_loss": 0.6586180925369263, | |
| "eval_mean_token_accuracy": 0.9476190447807312, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 162331771.0, | |
| "eval_recall": 0.8571, | |
| "eval_runtime": 0.3996, | |
| "eval_samples_per_second": 35.034, | |
| "eval_steps_per_second": 12.512, | |
| "step": 42588 | |
| }, | |
| { | |
| "entropy": 0.7012911128574575, | |
| "epoch": 8.0, | |
| "grad_norm": 6.628036499023438e-05, | |
| "learning_rate": 2.597948311271071e-05, | |
| "loss": 0.0021, | |
| "mean_token_accuracy": 0.9994771537578553, | |
| "num_tokens": 185522024.0, | |
| "step": 48672 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_entropy": 0.8370911121368408, | |
| "eval_loss": 0.695911705493927, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 185522024.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.4123, | |
| "eval_samples_per_second": 33.959, | |
| "eval_steps_per_second": 12.128, | |
| "step": 48672 | |
| }, | |
| { | |
| "entropy": 0.7100966135621619, | |
| "epoch": 9.0, | |
| "grad_norm": 0.0034332275390625, | |
| "learning_rate": 2.5360926411679782e-05, | |
| "loss": 0.0006, | |
| "mean_token_accuracy": 0.9998389675889965, | |
| "num_tokens": 208712277.0, | |
| "step": 54756 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_entropy": 0.848201584815979, | |
| "eval_loss": 0.6835612654685974, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 208712277.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.4367, | |
| "eval_samples_per_second": 32.06, | |
| "eval_steps_per_second": 11.45, | |
| "step": 54756 | |
| }, | |
| { | |
| "entropy": 0.7072051375869228, | |
| "epoch": 10.0, | |
| "grad_norm": 0.01507568359375, | |
| "learning_rate": 2.4742369710648856e-05, | |
| "loss": 0.0001, | |
| "mean_token_accuracy": 0.9999864275501716, | |
| "num_tokens": 231902530.0, | |
| "step": 60840 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_entropy": 0.8428204655647278, | |
| "eval_loss": 0.6963585615158081, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 231902530.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3772, | |
| "eval_samples_per_second": 37.119, | |
| "eval_steps_per_second": 13.257, | |
| "step": 60840 | |
| }, | |
| { | |
| "entropy": 0.7021450536628054, | |
| "epoch": 11.0, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 2.4123813009617927e-05, | |
| "loss": 0.0001, | |
| "mean_token_accuracy": 0.9999853084543517, | |
| "num_tokens": 255092783.0, | |
| "step": 66924 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_entropy": 0.8535857558250427, | |
| "eval_loss": 0.703579306602478, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 255092783.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3599, | |
| "eval_samples_per_second": 38.9, | |
| "eval_steps_per_second": 13.893, | |
| "step": 66924 | |
| }, | |
| { | |
| "entropy": 0.7062483392484403, | |
| "epoch": 12.0, | |
| "grad_norm": 0.0003795623779296875, | |
| "learning_rate": 2.3505256308586998e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 278283036.0, | |
| "step": 73008 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_entropy": 0.84964017868042, | |
| "eval_loss": 0.7106609344482422, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 278283036.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3587, | |
| "eval_samples_per_second": 39.032, | |
| "eval_steps_per_second": 13.94, | |
| "step": 73008 | |
| }, | |
| { | |
| "entropy": 0.7034359549041801, | |
| "epoch": 13.0, | |
| "grad_norm": 0.004058837890625, | |
| "learning_rate": 2.2886699607556072e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 0.9999969561953517, | |
| "num_tokens": 301473289.0, | |
| "step": 79092 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_entropy": 0.846993887424469, | |
| "eval_loss": 0.7097910642623901, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 301473289.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3594, | |
| "eval_samples_per_second": 38.956, | |
| "eval_steps_per_second": 13.913, | |
| "step": 79092 | |
| }, | |
| { | |
| "entropy": 0.7021650211110387, | |
| "epoch": 14.0, | |
| "grad_norm": 0.0126953125, | |
| "learning_rate": 2.2268142906525143e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 324663542.0, | |
| "step": 85176 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_entropy": 0.8468610405921936, | |
| "eval_loss": 0.7046443223953247, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 324663542.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3627, | |
| "eval_samples_per_second": 38.595, | |
| "eval_steps_per_second": 13.784, | |
| "step": 85176 | |
| }, | |
| { | |
| "entropy": 0.7020127680115916, | |
| "epoch": 15.0, | |
| "grad_norm": 0.01336669921875, | |
| "learning_rate": 2.1649586205494213e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 347853795.0, | |
| "step": 91260 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_entropy": 0.8471502661705017, | |
| "eval_loss": 0.7057820558547974, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 347853795.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3597, | |
| "eval_samples_per_second": 38.921, | |
| "eval_steps_per_second": 13.9, | |
| "step": 91260 | |
| }, | |
| { | |
| "entropy": 0.7020189780925623, | |
| "epoch": 16.0, | |
| "grad_norm": 0.002655029296875, | |
| "learning_rate": 2.1031029504463287e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 23190253.0, | |
| "step": 97344 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_entropy": 0.8467319965362549, | |
| "eval_loss": 0.7018277049064636, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 23190253.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3922, | |
| "eval_samples_per_second": 35.698, | |
| "eval_steps_per_second": 12.749, | |
| "step": 97344 | |
| }, | |
| { | |
| "entropy": 0.7017412878901139, | |
| "epoch": 17.0, | |
| "grad_norm": 0.00823974609375, | |
| "learning_rate": 2.0412472803432358e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 46380506.0, | |
| "step": 103428 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_entropy": 0.8470642566680908, | |
| "eval_loss": 0.7061834335327148, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 46380506.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3627, | |
| "eval_samples_per_second": 38.601, | |
| "eval_steps_per_second": 13.786, | |
| "step": 103428 | |
| }, | |
| { | |
| "entropy": 0.7014865135280495, | |
| "epoch": 18.0, | |
| "grad_norm": 0.0004730224609375, | |
| "learning_rate": 1.979391610240143e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 69570759.0, | |
| "step": 109512 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_entropy": 0.8469521641731262, | |
| "eval_loss": 0.705422043800354, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 69570759.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3685, | |
| "eval_samples_per_second": 37.988, | |
| "eval_steps_per_second": 13.567, | |
| "step": 109512 | |
| }, | |
| { | |
| "entropy": 0.7014951153384628, | |
| "epoch": 19.0, | |
| "grad_norm": 0.0012359619140625, | |
| "learning_rate": 1.9175359401370503e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 92761012.0, | |
| "step": 115596 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_entropy": 0.8468135356903076, | |
| "eval_loss": 0.7019368410110474, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 92761012.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3664, | |
| "eval_samples_per_second": 38.213, | |
| "eval_steps_per_second": 13.648, | |
| "step": 115596 | |
| }, | |
| { | |
| "entropy": 0.7013456000359257, | |
| "epoch": 20.0, | |
| "grad_norm": 0.0032958984375, | |
| "learning_rate": 1.8556802700339577e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 115951265.0, | |
| "step": 121680 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_entropy": 0.8471927762031555, | |
| "eval_loss": 0.7029876708984375, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 115951265.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3699, | |
| "eval_samples_per_second": 37.852, | |
| "eval_steps_per_second": 13.519, | |
| "step": 121680 | |
| }, | |
| { | |
| "entropy": 0.701600033552297, | |
| "epoch": 21.0, | |
| "grad_norm": 0.007232666015625, | |
| "learning_rate": 1.793824599930865e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 139141518.0, | |
| "step": 127764 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_entropy": 0.8469080567359925, | |
| "eval_loss": 0.6987797617912292, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 139141518.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3683, | |
| "eval_samples_per_second": 38.011, | |
| "eval_steps_per_second": 13.575, | |
| "step": 127764 | |
| }, | |
| { | |
| "entropy": 0.7013353118568875, | |
| "epoch": 22.0, | |
| "grad_norm": 0.005035400390625, | |
| "learning_rate": 1.7319689298277722e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 162331771.0, | |
| "step": 133848 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_entropy": 0.8477846264839173, | |
| "eval_loss": 0.7038993835449219, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 162331771.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3637, | |
| "eval_samples_per_second": 38.496, | |
| "eval_steps_per_second": 13.749, | |
| "step": 133848 | |
| }, | |
| { | |
| "entropy": 0.701190030512098, | |
| "epoch": 23.0, | |
| "grad_norm": 0.0034942626953125, | |
| "learning_rate": 1.6701132597246793e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 185522024.0, | |
| "step": 139932 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_entropy": 0.8471533417701721, | |
| "eval_loss": 0.7041516900062561, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 185522024.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3647, | |
| "eval_samples_per_second": 38.392, | |
| "eval_steps_per_second": 13.711, | |
| "step": 139932 | |
| }, | |
| { | |
| "entropy": 0.7012511798518022, | |
| "epoch": 24.0, | |
| "grad_norm": 0.033447265625, | |
| "learning_rate": 1.6082575896215867e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 208712277.0, | |
| "step": 146016 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_entropy": 0.8475937247276306, | |
| "eval_loss": 0.7037870287895203, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 208712277.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3629, | |
| "eval_samples_per_second": 38.578, | |
| "eval_steps_per_second": 13.778, | |
| "step": 146016 | |
| }, | |
| { | |
| "entropy": 0.7011014092799318, | |
| "epoch": 25.0, | |
| "grad_norm": 0.0004138946533203125, | |
| "learning_rate": 1.5464019195184938e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 231902530.0, | |
| "step": 152100 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_entropy": 0.8474008202552795, | |
| "eval_loss": 0.7015256881713867, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 231902530.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3667, | |
| "eval_samples_per_second": 38.176, | |
| "eval_steps_per_second": 13.634, | |
| "step": 152100 | |
| }, | |
| { | |
| "entropy": 0.7010043011849684, | |
| "epoch": 26.0, | |
| "grad_norm": 0.005462646484375, | |
| "learning_rate": 1.4845462494154008e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 255092783.0, | |
| "step": 158184 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_entropy": 0.8452415347099305, | |
| "eval_loss": 0.7029375433921814, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 255092783.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3822, | |
| "eval_samples_per_second": 36.63, | |
| "eval_steps_per_second": 13.082, | |
| "step": 158184 | |
| }, | |
| { | |
| "entropy": 0.7014232551303057, | |
| "epoch": 27.0, | |
| "grad_norm": 0.0093994140625, | |
| "learning_rate": 1.4226905793123081e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 278283036.0, | |
| "step": 164268 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_entropy": 0.8463044404983521, | |
| "eval_loss": 0.7052632570266724, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 278283036.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.388, | |
| "eval_samples_per_second": 36.085, | |
| "eval_steps_per_second": 12.888, | |
| "step": 164268 | |
| }, | |
| { | |
| "entropy": 0.701196315226235, | |
| "epoch": 28.0, | |
| "grad_norm": 0.00665283203125, | |
| "learning_rate": 1.3608349092092153e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 301473289.0, | |
| "step": 170352 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_entropy": 0.8478581547737122, | |
| "eval_loss": 0.7016162872314453, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 301473289.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3628, | |
| "eval_samples_per_second": 38.588, | |
| "eval_steps_per_second": 13.781, | |
| "step": 170352 | |
| }, | |
| { | |
| "entropy": 0.7013136386910526, | |
| "epoch": 29.0, | |
| "grad_norm": 0.002960205078125, | |
| "learning_rate": 1.2989792391061224e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 324663542.0, | |
| "step": 176436 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_entropy": 0.848319697380066, | |
| "eval_loss": 0.7054678797721863, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 324663542.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.4297, | |
| "eval_samples_per_second": 32.584, | |
| "eval_steps_per_second": 11.637, | |
| "step": 176436 | |
| }, | |
| { | |
| "entropy": 0.7013322007080037, | |
| "epoch": 30.0, | |
| "grad_norm": 0.023193359375, | |
| "learning_rate": 1.2371235690030298e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 347853795.0, | |
| "step": 182520 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_entropy": 0.8468625545501709, | |
| "eval_loss": 0.7053791284561157, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 347853795.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3794, | |
| "eval_samples_per_second": 36.904, | |
| "eval_steps_per_second": 13.18, | |
| "step": 182520 | |
| }, | |
| { | |
| "entropy": 0.7009657870618079, | |
| "epoch": 31.0, | |
| "grad_norm": 0.00531005859375, | |
| "learning_rate": 1.175267898899937e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 23190253.0, | |
| "step": 188604 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_entropy": 0.8471902132034301, | |
| "eval_loss": 0.7000441551208496, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 23190253.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3469, | |
| "eval_samples_per_second": 40.36, | |
| "eval_steps_per_second": 14.414, | |
| "step": 188604 | |
| }, | |
| { | |
| "entropy": 0.7013605682964312, | |
| "epoch": 32.0, | |
| "grad_norm": 0.004486083984375, | |
| "learning_rate": 1.1134122287968443e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 46380506.0, | |
| "step": 194688 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_entropy": 0.8475266933441162, | |
| "eval_loss": 0.7029227018356323, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 46380506.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3352, | |
| "eval_samples_per_second": 41.764, | |
| "eval_steps_per_second": 14.916, | |
| "step": 194688 | |
| }, | |
| { | |
| "entropy": 0.7009823948223288, | |
| "epoch": 33.0, | |
| "grad_norm": 0.00072479248046875, | |
| "learning_rate": 1.0515565586937514e-05, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 69570759.0, | |
| "step": 200772 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_entropy": 0.8483369827270508, | |
| "eval_loss": 0.6992912292480469, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 69570759.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3344, | |
| "eval_samples_per_second": 41.86, | |
| "eval_steps_per_second": 14.95, | |
| "step": 200772 | |
| }, | |
| { | |
| "entropy": 0.7011937351406287, | |
| "epoch": 34.0, | |
| "grad_norm": 0.0191650390625, | |
| "learning_rate": 9.897008885906586e-06, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 92761012.0, | |
| "step": 206856 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_entropy": 0.8469909429550171, | |
| "eval_loss": 0.7026051878929138, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 92761012.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3375, | |
| "eval_samples_per_second": 41.484, | |
| "eval_steps_per_second": 14.816, | |
| "step": 206856 | |
| }, | |
| { | |
| "entropy": 0.7014018389580674, | |
| "epoch": 35.0, | |
| "grad_norm": 0.0016021728515625, | |
| "learning_rate": 9.278452184875659e-06, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 115951265.0, | |
| "step": 212940 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_entropy": 0.8470338106155395, | |
| "eval_loss": 0.701595664024353, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 115951265.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3346, | |
| "eval_samples_per_second": 41.838, | |
| "eval_steps_per_second": 14.942, | |
| "step": 212940 | |
| }, | |
| { | |
| "entropy": 0.7011160486862112, | |
| "epoch": 36.0, | |
| "grad_norm": 0.00010251998901367188, | |
| "learning_rate": 8.65989548384473e-06, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 139141518.0, | |
| "step": 219024 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_entropy": 0.8465482711791992, | |
| "eval_loss": 0.7029346823692322, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 139141518.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3364, | |
| "eval_samples_per_second": 41.619, | |
| "eval_steps_per_second": 14.864, | |
| "step": 219024 | |
| }, | |
| { | |
| "entropy": 0.7011719772899566, | |
| "epoch": 37.0, | |
| "grad_norm": 0.0274658203125, | |
| "learning_rate": 8.041338782813804e-06, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 162331771.0, | |
| "step": 225108 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_entropy": 0.8483202934265137, | |
| "eval_loss": 0.7015349268913269, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 162331771.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3421, | |
| "eval_samples_per_second": 40.92, | |
| "eval_steps_per_second": 14.614, | |
| "step": 225108 | |
| }, | |
| { | |
| "entropy": 0.7014031262087399, | |
| "epoch": 38.0, | |
| "grad_norm": 0.00131988525390625, | |
| "learning_rate": 7.4227820817828744e-06, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 185522024.0, | |
| "step": 231192 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_entropy": 0.8476453304290772, | |
| "eval_loss": 0.7029452323913574, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 185522024.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.365, | |
| "eval_samples_per_second": 38.357, | |
| "eval_steps_per_second": 13.699, | |
| "step": 231192 | |
| }, | |
| { | |
| "entropy": 0.7013709572664928, | |
| "epoch": 39.0, | |
| "grad_norm": 0.00299072265625, | |
| "learning_rate": 6.804225380751948e-06, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 208712277.0, | |
| "step": 237276 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_entropy": 0.8475553870201111, | |
| "eval_loss": 0.7111210227012634, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 208712277.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3606, | |
| "eval_samples_per_second": 38.824, | |
| "eval_steps_per_second": 13.866, | |
| "step": 237276 | |
| }, | |
| { | |
| "entropy": 0.701420904293333, | |
| "epoch": 40.0, | |
| "grad_norm": 0.0274658203125, | |
| "learning_rate": 6.185668679721019e-06, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 231902530.0, | |
| "step": 243360 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_entropy": 0.8471273303031921, | |
| "eval_loss": 0.7025880813598633, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 231902530.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3628, | |
| "eval_samples_per_second": 38.594, | |
| "eval_steps_per_second": 13.783, | |
| "step": 243360 | |
| }, | |
| { | |
| "entropy": 0.7012593725219829, | |
| "epoch": 41.0, | |
| "grad_norm": 0.00136566162109375, | |
| "learning_rate": 5.567111978690091e-06, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 255092783.0, | |
| "step": 249444 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_entropy": 0.8475102543830871, | |
| "eval_loss": 0.701640784740448, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 255092783.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3585, | |
| "eval_samples_per_second": 39.047, | |
| "eval_steps_per_second": 13.945, | |
| "step": 249444 | |
| }, | |
| { | |
| "entropy": 0.7012932761612102, | |
| "epoch": 42.0, | |
| "grad_norm": 0.004669189453125, | |
| "learning_rate": 4.948555277659164e-06, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 278283036.0, | |
| "step": 255528 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_entropy": 0.8476259231567382, | |
| "eval_loss": 0.7012701034545898, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 278283036.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3591, | |
| "eval_samples_per_second": 38.982, | |
| "eval_steps_per_second": 13.922, | |
| "step": 255528 | |
| }, | |
| { | |
| "entropy": 0.7010069375638505, | |
| "epoch": 43.0, | |
| "grad_norm": 0.0091552734375, | |
| "learning_rate": 4.329998576628236e-06, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 301473289.0, | |
| "step": 261612 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_entropy": 0.8467528820037842, | |
| "eval_loss": 0.701405942440033, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 301473289.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3595, | |
| "eval_samples_per_second": 38.939, | |
| "eval_steps_per_second": 13.907, | |
| "step": 261612 | |
| }, | |
| { | |
| "entropy": 0.701388671474971, | |
| "epoch": 44.0, | |
| "grad_norm": 0.01031494140625, | |
| "learning_rate": 3.711441875597308e-06, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 324663542.0, | |
| "step": 267696 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_entropy": 0.847226333618164, | |
| "eval_loss": 0.7051442265510559, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 324663542.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3574, | |
| "eval_samples_per_second": 39.168, | |
| "eval_steps_per_second": 13.989, | |
| "step": 267696 | |
| }, | |
| { | |
| "entropy": 0.7011981701121998, | |
| "epoch": 45.0, | |
| "grad_norm": 0.0189208984375, | |
| "learning_rate": 3.09288517456638e-06, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 347853795.0, | |
| "step": 273780 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_entropy": 0.8469288349151611, | |
| "eval_loss": 0.699046790599823, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 347853795.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3618, | |
| "eval_samples_per_second": 38.7, | |
| "eval_steps_per_second": 13.821, | |
| "step": 273780 | |
| }, | |
| { | |
| "entropy": 0.7014733728004239, | |
| "epoch": 46.0, | |
| "grad_norm": 0.007354736328125, | |
| "learning_rate": 2.474328473535452e-06, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 23190253.0, | |
| "step": 279864 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_entropy": 0.8493005037307739, | |
| "eval_loss": 0.7080731391906738, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 23190253.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.7195, | |
| "eval_samples_per_second": 19.457, | |
| "eval_steps_per_second": 6.949, | |
| "step": 279864 | |
| }, | |
| { | |
| "entropy": 0.7013301944983468, | |
| "epoch": 47.0, | |
| "grad_norm": 0.039306640625, | |
| "learning_rate": 1.8557717725045243e-06, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 46380506.0, | |
| "step": 285948 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_entropy": 0.8469571113586426, | |
| "eval_loss": 0.7055781483650208, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 46380506.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3604, | |
| "eval_samples_per_second": 38.845, | |
| "eval_steps_per_second": 13.873, | |
| "step": 285948 | |
| }, | |
| { | |
| "entropy": 0.7012139727887474, | |
| "epoch": 48.0, | |
| "grad_norm": 0.004791259765625, | |
| "learning_rate": 1.2372150714735964e-06, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 69570759.0, | |
| "step": 292032 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_entropy": 0.8478953242301941, | |
| "eval_loss": 0.7026681900024414, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 69570759.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.3695, | |
| "eval_samples_per_second": 37.885, | |
| "eval_steps_per_second": 13.53, | |
| "step": 292032 | |
| }, | |
| { | |
| "entropy": 0.7014247281409501, | |
| "epoch": 49.0, | |
| "grad_norm": 0.00099945068359375, | |
| "learning_rate": 6.186583704426686e-07, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 92761012.0, | |
| "step": 298116 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_entropy": 0.8473766565322876, | |
| "eval_loss": 0.7014277577400208, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 92761012.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.4396, | |
| "eval_samples_per_second": 31.845, | |
| "eval_steps_per_second": 11.373, | |
| "step": 298116 | |
| }, | |
| { | |
| "entropy": 0.7012487682443477, | |
| "epoch": 50.0, | |
| "grad_norm": 0.01092529296875, | |
| "learning_rate": 1.0166941174078366e-10, | |
| "loss": 0.0, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 115951265.0, | |
| "step": 304200 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_entropy": 0.8481771588325501, | |
| "eval_loss": 0.7044681906700134, | |
| "eval_mean_token_accuracy": 0.9358543395996094, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 115951265.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.9148, | |
| "eval_samples_per_second": 15.304, | |
| "eval_steps_per_second": 5.466, | |
| "step": 304200 | |
| } | |
| ], | |
| "logging_steps": 0, | |
| "max_steps": 304200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.221235748535009e+19, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |