Instructions to use AIcell/guava-05-14 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use AIcell/guava-05-14 with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("/workspace/.hf_home/hub/models--Qwen--Qwen3.5-4B/snapshots/851bf6e806efd8d0a36b00ddf55e13ccb7b8cd0a") model = PeftModel.from_pretrained(base_model, "AIcell/guava-05-14") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": 105, | |
| "best_metric": 0.53931481, | |
| "best_model_checkpoint": "/workspace/xiruili_temporary/guava/runs/qwen35-4b-train-v6-full-r16-lr2e-5/v1-20260513-234931/checkpoint-105", | |
| "epoch": 3.0, | |
| "eval_steps": 25, | |
| "global_step": 105, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.029197080291970802, | |
| "grad_norm": 1.995608925819397, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 1.0870462656021118, | |
| "step": 1, | |
| "token_acc": 0.7222708272577039 | |
| }, | |
| { | |
| "epoch": 0.145985401459854, | |
| "grad_norm": 1.630089521408081, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 1.0861663818359375, | |
| "step": 5, | |
| "token_acc": 0.7234353454485584 | |
| }, | |
| { | |
| "epoch": 0.291970802919708, | |
| "grad_norm": 0.810945987701416, | |
| "learning_rate": 1.9919548128307954e-05, | |
| "loss": 1.0100549697875976, | |
| "step": 10, | |
| "token_acc": 0.7320401812893188 | |
| }, | |
| { | |
| "epoch": 0.43795620437956206, | |
| "grad_norm": 0.6168099641799927, | |
| "learning_rate": 1.9594929736144978e-05, | |
| "loss": 0.8932361602783203, | |
| "step": 15, | |
| "token_acc": 0.752801364142539 | |
| }, | |
| { | |
| "epoch": 0.583941605839416, | |
| "grad_norm": 0.5895264744758606, | |
| "learning_rate": 1.9029265382866216e-05, | |
| "loss": 0.8415294647216797, | |
| "step": 20, | |
| "token_acc": 0.7655036474215341 | |
| }, | |
| { | |
| "epoch": 0.7299270072992701, | |
| "grad_norm": 0.5221685767173767, | |
| "learning_rate": 1.8236765814298328e-05, | |
| "loss": 0.7946267127990723, | |
| "step": 25, | |
| "token_acc": 0.7786007141912445 | |
| }, | |
| { | |
| "epoch": 0.7299270072992701, | |
| "eval_loss": 0.7223935127258301, | |
| "eval_runtime": 19.6664, | |
| "eval_samples_per_second": 1.424, | |
| "eval_steps_per_second": 0.712, | |
| "eval_token_acc": 0.7937243620478398, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.8759124087591241, | |
| "grad_norm": 0.505389392375946, | |
| "learning_rate": 1.72373403810507e-05, | |
| "loss": 0.727280855178833, | |
| "step": 30, | |
| "token_acc": 0.7940088439236739 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.8667115569114685, | |
| "learning_rate": 1.6056096871376667e-05, | |
| "loss": 0.6953943252563477, | |
| "step": 35, | |
| "token_acc": 0.8008854009457692 | |
| }, | |
| { | |
| "epoch": 1.145985401459854, | |
| "grad_norm": 0.4732973575592041, | |
| "learning_rate": 1.472271074772683e-05, | |
| "loss": 0.6909239768981934, | |
| "step": 40, | |
| "token_acc": 0.802294099742074 | |
| }, | |
| { | |
| "epoch": 1.2919708029197081, | |
| "grad_norm": 0.4912143349647522, | |
| "learning_rate": 1.3270679633174219e-05, | |
| "loss": 0.6232150554656982, | |
| "step": 45, | |
| "token_acc": 0.8173081972378302 | |
| }, | |
| { | |
| "epoch": 1.437956204379562, | |
| "grad_norm": 0.4965362846851349, | |
| "learning_rate": 1.1736481776669307e-05, | |
| "loss": 0.6203184127807617, | |
| "step": 50, | |
| "token_acc": 0.817360592270597 | |
| }, | |
| { | |
| "epoch": 1.437956204379562, | |
| "eval_loss": 0.5858569741249084, | |
| "eval_runtime": 19.4944, | |
| "eval_samples_per_second": 1.436, | |
| "eval_steps_per_second": 0.718, | |
| "eval_token_acc": 0.8254754674764264, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.583941605839416, | |
| "grad_norm": 0.5230202078819275, | |
| "learning_rate": 1.015865963834808e-05, | |
| "loss": 0.6270622253417969, | |
| "step": 55, | |
| "token_acc": 0.8175454142667258 | |
| }, | |
| { | |
| "epoch": 1.72992700729927, | |
| "grad_norm": 0.5222014784812927, | |
| "learning_rate": 8.576851617267151e-06, | |
| "loss": 0.5988405227661133, | |
| "step": 60, | |
| "token_acc": 0.8216454622561493 | |
| }, | |
| { | |
| "epoch": 1.8759124087591241, | |
| "grad_norm": 0.5679988265037537, | |
| "learning_rate": 7.0307962467172555e-06, | |
| "loss": 0.6027673721313477, | |
| "step": 65, | |
| "token_acc": 0.8212479748637636 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.9797302484512329, | |
| "learning_rate": 5.559333873942259e-06, | |
| "loss": 0.5929806709289551, | |
| "step": 70, | |
| "token_acc": 0.828269567391848 | |
| }, | |
| { | |
| "epoch": 2.145985401459854, | |
| "grad_norm": 0.5088374018669128, | |
| "learning_rate": 4.19943090428802e-06, | |
| "loss": 0.5630727767944336, | |
| "step": 75, | |
| "token_acc": 0.830645705417988 | |
| }, | |
| { | |
| "epoch": 2.145985401459854, | |
| "eval_loss": 0.5463234782218933, | |
| "eval_runtime": 19.6522, | |
| "eval_samples_per_second": 1.425, | |
| "eval_steps_per_second": 0.712, | |
| "eval_token_acc": 0.8345319908369293, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 2.291970802919708, | |
| "grad_norm": 0.49748751521110535, | |
| "learning_rate": 2.9852511229367862e-06, | |
| "loss": 0.5831465721130371, | |
| "step": 80, | |
| "token_acc": 0.8253881640954412 | |
| }, | |
| { | |
| "epoch": 2.437956204379562, | |
| "grad_norm": 0.5103219747543335, | |
| "learning_rate": 1.947297424689414e-06, | |
| "loss": 0.5736560821533203, | |
| "step": 85, | |
| "token_acc": 0.8275780189959294 | |
| }, | |
| { | |
| "epoch": 2.5839416058394162, | |
| "grad_norm": 0.5354544520378113, | |
| "learning_rate": 1.1116455134507665e-06, | |
| "loss": 0.5565204620361328, | |
| "step": 90, | |
| "token_acc": 0.8317895383059783 | |
| }, | |
| { | |
| "epoch": 2.72992700729927, | |
| "grad_norm": 0.5385181903839111, | |
| "learning_rate": 4.992888225905467e-07, | |
| "loss": 0.5768057823181152, | |
| "step": 95, | |
| "token_acc": 0.8267348161960575 | |
| }, | |
| { | |
| "epoch": 2.875912408759124, | |
| "grad_norm": 0.5486910343170166, | |
| "learning_rate": 1.2561111323605714e-07, | |
| "loss": 0.5681517601013184, | |
| "step": 100, | |
| "token_acc": 0.8298371468398327 | |
| }, | |
| { | |
| "epoch": 2.875912408759124, | |
| "eval_loss": 0.5395660400390625, | |
| "eval_runtime": 19.736, | |
| "eval_samples_per_second": 1.419, | |
| "eval_steps_per_second": 0.709, | |
| "eval_token_acc": 0.8356507378402855, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 1.13607656955719, | |
| "learning_rate": 0.0, | |
| "loss": 0.5709176063537598, | |
| "step": 105, | |
| "token_acc": 0.8310829760807724 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.5393148064613342, | |
| "eval_runtime": 19.8847, | |
| "eval_samples_per_second": 1.408, | |
| "eval_steps_per_second": 0.704, | |
| "eval_token_acc": 0.835810558840765, | |
| "step": 105 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 105, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 25, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.994522975314903e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |