Instructions to use TomPanda/LLM-Restate-Discllaw with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use TomPanda/LLM-Restate-Discllaw with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("/data/oss_bucket_0/mushuang/disc/") model = PeftModel.from_pretrained(base_model, "TomPanda/LLM-Restate-Discllaw") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9972396871645453, | |
| "eval_steps": 200.0, | |
| "global_step": 814, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 9.996276622795847e-06, | |
| "loss": 0.1401, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.98511203659851e-06, | |
| "loss": 0.0684, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.966522869394282e-06, | |
| "loss": 0.051, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.940536806975732e-06, | |
| "loss": 0.0444, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.907192551707831e-06, | |
| "loss": 0.0436, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 9.866539764886562e-06, | |
| "loss": 0.038, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 9.818638992775822e-06, | |
| "loss": 0.0421, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 9.763561576432781e-06, | |
| "loss": 0.0415, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 9.70138954545603e-06, | |
| "loss": 0.0402, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 9.632215495814724e-06, | |
| "loss": 0.0375, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 9.55614245194068e-06, | |
| "loss": 0.0362, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 9.473283713288862e-06, | |
| "loss": 0.0373, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 9.383762685594736e-06, | |
| "loss": 0.0375, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 9.287712697079827e-06, | |
| "loss": 0.0374, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 9.185276799879212e-06, | |
| "loss": 0.0358, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 9.076607556986699e-06, | |
| "loss": 0.0365, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 8.961866815035e-06, | |
| "loss": 0.0378, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 8.841225463249305e-06, | |
| "loss": 0.0367, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 8.714863178933258e-06, | |
| "loss": 0.0359, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 8.582968159866416e-06, | |
| "loss": 0.0346, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 8.445736844011712e-06, | |
| "loss": 0.0361, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 8.303373616950408e-06, | |
| "loss": 0.0352, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.156090507480242e-06, | |
| "loss": 0.0351, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 8.004106871830155e-06, | |
| "loss": 0.0345, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 7.847649066961905e-06, | |
| "loss": 0.0364, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 7.68695011344511e-06, | |
| "loss": 0.0313, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 7.52224934840788e-06, | |
| "loss": 0.0322, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 7.353792069079826e-06, | |
| "loss": 0.0348, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 7.181829167458441e-06, | |
| "loss": 0.0374, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.006616756642867e-06, | |
| "loss": 0.0348, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 6.828415789391632e-06, | |
| "loss": 0.0318, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.647491669472421e-06, | |
| "loss": 0.0345, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 6.464113856382752e-06, | |
| "loss": 0.034, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 6.278555464030228e-06, | |
| "loss": 0.0305, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 6.091092853970098e-06, | |
| "loss": 0.0337, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 5.902005223805931e-06, | |
| "loss": 0.0326, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 5.711574191366427e-06, | |
| "loss": 0.0344, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 5.520083375277644e-06, | |
| "loss": 0.032, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 5.3278179725553525e-06, | |
| "loss": 0.0329, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 5.135064333846612e-06, | |
| "loss": 0.0337, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.942109536953177e-06, | |
| "loss": 0.033, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.749240959271918e-06, | |
| "loss": 0.0298, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 4.556745849789055e-06, | |
| "loss": 0.0338, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.364910901265607e-06, | |
| "loss": 0.0332, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 4.174021823251294e-06, | |
| "loss": 0.0328, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.984362916562753e-06, | |
| "loss": 0.0329, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.7962166498598785e-06, | |
| "loss": 0.0315, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.6098632389508637e-06, | |
| "loss": 0.0299, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.4255802294525464e-06, | |
| "loss": 0.0319, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.2436420834276013e-06, | |
| "loss": 0.0319, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.0643197706142136e-06, | |
| "loss": 0.0333, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.8878803648570773e-06, | |
| "loss": 0.0306, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.7145866463407163e-06, | |
| "loss": 0.0309, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.544696710217588e-06, | |
| "loss": 0.0325, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.3784635822138424e-06, | |
| "loss": 0.032, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.2161348417852346e-06, | |
| "loss": 0.034, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.05795225338444e-06, | |
| "loss": 0.0322, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.9041514063889571e-06, | |
| "loss": 0.0328, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 1.7549613642258573e-06, | |
| "loss": 0.0336, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 1.6106043232159745e-06, | |
| "loss": 0.0339, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.4712952816456095e-06, | |
| "loss": 0.0325, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 1.337241719558648e-06, | |
| "loss": 0.0327, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 1.2086432897459738e-06, | |
| "loss": 0.0304, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 1.0856915203924096e-06, | |
| "loss": 0.0329, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 9.685695298240432e-07, | |
| "loss": 0.0322, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 8.574517537807897e-07, | |
| "loss": 0.0343, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 7.525036856203677e-07, | |
| "loss": 0.0328, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 6.538816298406203e-07, | |
| "loss": 0.0334, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 5.617324692872744e-07, | |
| "loss": 0.0334, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 4.7619344639384447e-07, | |
| "loss": 0.0337, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 3.9739195877949223e-07, | |
| "loss": 0.0306, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 3.254453695092752e-07, | |
| "loss": 0.032, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.604608322993518e-07, | |
| "loss": 0.0325, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 2.0253513192751374e-07, | |
| "loss": 0.0351, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.5175454008667712e-07, | |
| "loss": 0.0309, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.0819468689607426e-07, | |
| "loss": 0.0305, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 7.192044826145772e-08, | |
| "loss": 0.0284, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 4.298584925212068e-08, | |
| "loss": 0.0305, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 2.143398363860738e-08, | |
| "loss": 0.0303, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 7.2969497109715016e-09, | |
| "loss": 0.0313, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 5.958024731567147e-10, | |
| "loss": 0.0331, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 814, | |
| "tflops": 490.2053045046936, | |
| "token/s": 1736.022262942593, | |
| "total_flos": 1.5851362853622645e+19, | |
| "train_loss": 0.035910474946516446, | |
| "train_runtime": 31598.4886, | |
| "train_samples_per_second": 6.603, | |
| "train_steps_per_second": 0.026 | |
| } | |
| ], | |
| "log_save_evaluate_time": 2188.9126505851746, | |
| "logging_steps": 10, | |
| "max_steps": 814, | |
| "num_train_epochs": 2, | |
| "save_steps": 200, | |
| "total_flos": 1.5851362853622645e+19, | |
| "total_tokens": 51055680.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |