Instructions to use crhysc/diffractgpt_jarvis_alex123_cod_element_list with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use crhysc/diffractgpt_jarvis_alex123_cod_element_list with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("unsloth/mistral-7b-bnb-4bit") model = PeftModel.from_pretrained(base_model, "crhysc/diffractgpt_jarvis_alex123_cod_element_list") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0035124532433173, | |
| "eval_steps": 500, | |
| "global_step": 4000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.012544475868990056, | |
| "grad_norm": 0.13098102807998657, | |
| "learning_rate": 4.6226415094339625e-05, | |
| "loss": 0.9457, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.025088951737980113, | |
| "grad_norm": 0.09060654789209366, | |
| "learning_rate": 4.980675516719879e-05, | |
| "loss": 0.3123, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.037633427606970166, | |
| "grad_norm": 0.1386842280626297, | |
| "learning_rate": 4.959670643589313e-05, | |
| "loss": 0.2832, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.050177903475960225, | |
| "grad_norm": 0.18612022697925568, | |
| "learning_rate": 4.9386657704587466e-05, | |
| "loss": 0.2708, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06272237934495027, | |
| "grad_norm": 0.17801949381828308, | |
| "learning_rate": 4.91766089732818e-05, | |
| "loss": 0.2662, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.07526685521394033, | |
| "grad_norm": 0.10166127979755402, | |
| "learning_rate": 4.896656024197614e-05, | |
| "loss": 0.2599, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.08781133108293039, | |
| "grad_norm": 0.21376383304595947, | |
| "learning_rate": 4.8756511510670475e-05, | |
| "loss": 0.2549, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.10035580695192045, | |
| "grad_norm": 0.18035802245140076, | |
| "learning_rate": 4.8546462779364816e-05, | |
| "loss": 0.2515, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1129002828209105, | |
| "grad_norm": 0.17301003634929657, | |
| "learning_rate": 4.833641404805915e-05, | |
| "loss": 0.2492, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.12544475868990054, | |
| "grad_norm": 0.15648286044597626, | |
| "learning_rate": 4.812636531675349e-05, | |
| "loss": 0.2476, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.12544475868990054, | |
| "eval_loss": 0.40483859181404114, | |
| "eval_runtime": 304.1505, | |
| "eval_samples_per_second": 65.034, | |
| "eval_steps_per_second": 1.019, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.13798923455889062, | |
| "grad_norm": 0.16616199910640717, | |
| "learning_rate": 4.7916316585447826e-05, | |
| "loss": 0.245, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.15053371042788066, | |
| "grad_norm": 0.6291008591651917, | |
| "learning_rate": 4.770626785414216e-05, | |
| "loss": 0.2434, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.16307818629687074, | |
| "grad_norm": 0.27643585205078125, | |
| "learning_rate": 4.74962191228365e-05, | |
| "loss": 0.2414, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.17562266216586078, | |
| "grad_norm": 0.27261775732040405, | |
| "learning_rate": 4.7286170391530835e-05, | |
| "loss": 0.2407, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.18816713803485083, | |
| "grad_norm": 0.1961769014596939, | |
| "learning_rate": 4.7076121660225176e-05, | |
| "loss": 0.2389, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2007116139038409, | |
| "grad_norm": 0.13879896700382233, | |
| "learning_rate": 4.686607292891951e-05, | |
| "loss": 0.237, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.21325608977283095, | |
| "grad_norm": 0.20926761627197266, | |
| "learning_rate": 4.665602419761385e-05, | |
| "loss": 0.2372, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.225800565641821, | |
| "grad_norm": 0.22533361613750458, | |
| "learning_rate": 4.6445975466308186e-05, | |
| "loss": 0.2351, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.23834504151081107, | |
| "grad_norm": 0.1977718323469162, | |
| "learning_rate": 4.623592673500252e-05, | |
| "loss": 0.2348, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.2508895173798011, | |
| "grad_norm": 0.26971426606178284, | |
| "learning_rate": 4.602587800369686e-05, | |
| "loss": 0.2334, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2508895173798011, | |
| "eval_loss": 0.4072725474834442, | |
| "eval_runtime": 304.8492, | |
| "eval_samples_per_second": 64.885, | |
| "eval_steps_per_second": 1.017, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2634339932487912, | |
| "grad_norm": 0.2769719958305359, | |
| "learning_rate": 4.5815829272391195e-05, | |
| "loss": 0.2334, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.27597846911778123, | |
| "grad_norm": 0.28794065117836, | |
| "learning_rate": 4.5605780541085536e-05, | |
| "loss": 0.2318, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.2885229449867713, | |
| "grad_norm": 0.37538444995880127, | |
| "learning_rate": 4.539573180977987e-05, | |
| "loss": 0.231, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.3010674208557613, | |
| "grad_norm": 0.17134840786457062, | |
| "learning_rate": 4.5185683078474204e-05, | |
| "loss": 0.231, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.31361189672475137, | |
| "grad_norm": 0.28663370013237, | |
| "learning_rate": 4.4975634347168545e-05, | |
| "loss": 0.2294, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.3261563725937415, | |
| "grad_norm": 0.7121312022209167, | |
| "learning_rate": 4.476558561586288e-05, | |
| "loss": 0.2297, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3387008484627315, | |
| "grad_norm": 0.2550923526287079, | |
| "learning_rate": 4.455553688455722e-05, | |
| "loss": 0.2283, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.35124532433172156, | |
| "grad_norm": 1.0167971849441528, | |
| "learning_rate": 4.4345488153251555e-05, | |
| "loss": 0.2263, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.3637898002007116, | |
| "grad_norm": 0.6048764586448669, | |
| "learning_rate": 4.4135439421945896e-05, | |
| "loss": 0.2277, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.37633427606970166, | |
| "grad_norm": 0.31545552611351013, | |
| "learning_rate": 4.392539069064023e-05, | |
| "loss": 0.2269, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.37633427606970166, | |
| "eval_loss": 0.4088518023490906, | |
| "eval_runtime": 305.124, | |
| "eval_samples_per_second": 64.826, | |
| "eval_steps_per_second": 1.016, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.3888787519386917, | |
| "grad_norm": 0.37552791833877563, | |
| "learning_rate": 4.3715341959334564e-05, | |
| "loss": 0.2246, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.4014232278076818, | |
| "grad_norm": 0.2993505299091339, | |
| "learning_rate": 4.3505293228028905e-05, | |
| "loss": 0.2261, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.41396770367667185, | |
| "grad_norm": 0.15790335834026337, | |
| "learning_rate": 4.329524449672324e-05, | |
| "loss": 0.2251, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.4265121795456619, | |
| "grad_norm": 0.47013625502586365, | |
| "learning_rate": 4.308519576541758e-05, | |
| "loss": 0.2243, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.43905665541465194, | |
| "grad_norm": 0.2053990662097931, | |
| "learning_rate": 4.2875147034111915e-05, | |
| "loss": 0.2237, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.451601131283642, | |
| "grad_norm": 0.17550259828567505, | |
| "learning_rate": 4.2665098302806256e-05, | |
| "loss": 0.2228, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.46414560715263203, | |
| "grad_norm": 0.5729805827140808, | |
| "learning_rate": 4.245504957150059e-05, | |
| "loss": 0.2228, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.47669008302162214, | |
| "grad_norm": 0.3008301854133606, | |
| "learning_rate": 4.2245000840194924e-05, | |
| "loss": 0.2217, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.4892345588906122, | |
| "grad_norm": 0.2061658799648285, | |
| "learning_rate": 4.2034952108889265e-05, | |
| "loss": 0.223, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.5017790347596022, | |
| "grad_norm": 0.2295321226119995, | |
| "learning_rate": 4.18249033775836e-05, | |
| "loss": 0.2219, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5017790347596022, | |
| "eval_loss": 0.4091717004776001, | |
| "eval_runtime": 304.785, | |
| "eval_samples_per_second": 64.898, | |
| "eval_steps_per_second": 1.017, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5143235106285923, | |
| "grad_norm": 0.22435450553894043, | |
| "learning_rate": 4.161485464627794e-05, | |
| "loss": 0.2215, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.5268679864975824, | |
| "grad_norm": 0.185350701212883, | |
| "learning_rate": 4.1404805914972275e-05, | |
| "loss": 0.2207, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.5394124623665724, | |
| "grad_norm": 0.46742141246795654, | |
| "learning_rate": 4.119475718366661e-05, | |
| "loss": 0.2197, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.5519569382355625, | |
| "grad_norm": 0.20891498029232025, | |
| "learning_rate": 4.098470845236095e-05, | |
| "loss": 0.2194, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.5645014141045525, | |
| "grad_norm": 0.4283987581729889, | |
| "learning_rate": 4.0774659721055284e-05, | |
| "loss": 0.2192, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.5770458899735426, | |
| "grad_norm": 0.32103636860847473, | |
| "learning_rate": 4.0564610989749625e-05, | |
| "loss": 0.2185, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.5895903658425327, | |
| "grad_norm": 0.20490871369838715, | |
| "learning_rate": 4.035456225844396e-05, | |
| "loss": 0.2183, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.6021348417115227, | |
| "grad_norm": 0.3914024233818054, | |
| "learning_rate": 4.01445135271383e-05, | |
| "loss": 0.2184, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.6146793175805128, | |
| "grad_norm": 0.18293343484401703, | |
| "learning_rate": 3.9934464795832635e-05, | |
| "loss": 0.2186, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.6272237934495027, | |
| "grad_norm": 0.20402023196220398, | |
| "learning_rate": 3.972441606452697e-05, | |
| "loss": 0.2179, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.6272237934495027, | |
| "eval_loss": 0.40875929594039917, | |
| "eval_runtime": 304.5578, | |
| "eval_samples_per_second": 64.947, | |
| "eval_steps_per_second": 1.018, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.6397682693184928, | |
| "grad_norm": 0.48965466022491455, | |
| "learning_rate": 3.951436733322131e-05, | |
| "loss": 0.2168, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.652312745187483, | |
| "grad_norm": 0.5581162571907043, | |
| "learning_rate": 3.9304318601915644e-05, | |
| "loss": 0.2175, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.6648572210564729, | |
| "grad_norm": 0.23750029504299164, | |
| "learning_rate": 3.9094269870609985e-05, | |
| "loss": 0.217, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.677401696925463, | |
| "grad_norm": 0.5061260461807251, | |
| "learning_rate": 3.888422113930432e-05, | |
| "loss": 0.2151, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.689946172794453, | |
| "grad_norm": 0.1854904741048813, | |
| "learning_rate": 3.867417240799866e-05, | |
| "loss": 0.216, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.7024906486634431, | |
| "grad_norm": 0.22555580735206604, | |
| "learning_rate": 3.8464123676692995e-05, | |
| "loss": 0.2157, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.7150351245324331, | |
| "grad_norm": 0.4870660901069641, | |
| "learning_rate": 3.825407494538733e-05, | |
| "loss": 0.2151, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.7275796004014232, | |
| "grad_norm": 0.37115806341171265, | |
| "learning_rate": 3.804402621408167e-05, | |
| "loss": 0.2146, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.7401240762704133, | |
| "grad_norm": 0.34767332673072815, | |
| "learning_rate": 3.7833977482776004e-05, | |
| "loss": 0.2139, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.7526685521394033, | |
| "grad_norm": 0.2617909610271454, | |
| "learning_rate": 3.7623928751470345e-05, | |
| "loss": 0.2149, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.7526685521394033, | |
| "eval_loss": 0.40570953488349915, | |
| "eval_runtime": 304.7965, | |
| "eval_samples_per_second": 64.896, | |
| "eval_steps_per_second": 1.017, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.7652130280083934, | |
| "grad_norm": 0.6052380204200745, | |
| "learning_rate": 3.741388002016468e-05, | |
| "loss": 0.2141, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.7777575038773834, | |
| "grad_norm": 0.3745960295200348, | |
| "learning_rate": 3.7203831288859014e-05, | |
| "loss": 0.213, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.7903019797463735, | |
| "grad_norm": 0.24974456429481506, | |
| "learning_rate": 3.6993782557553355e-05, | |
| "loss": 0.2142, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.8028464556153636, | |
| "grad_norm": 0.4550504684448242, | |
| "learning_rate": 3.678373382624769e-05, | |
| "loss": 0.2133, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.8153909314843536, | |
| "grad_norm": 0.8576037287712097, | |
| "learning_rate": 3.657368509494203e-05, | |
| "loss": 0.2121, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.8279354073533437, | |
| "grad_norm": 0.4864007532596588, | |
| "learning_rate": 3.6363636363636364e-05, | |
| "loss": 0.2131, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.8404798832223337, | |
| "grad_norm": 0.6007568836212158, | |
| "learning_rate": 3.6153587632330705e-05, | |
| "loss": 0.2134, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.8530243590913238, | |
| "grad_norm": 0.2667822241783142, | |
| "learning_rate": 3.594353890102504e-05, | |
| "loss": 0.2123, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.8655688349603139, | |
| "grad_norm": 0.16192808747291565, | |
| "learning_rate": 3.5733490169719374e-05, | |
| "loss": 0.2102, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.8781133108293039, | |
| "grad_norm": 0.4632836580276489, | |
| "learning_rate": 3.5523441438413715e-05, | |
| "loss": 0.2127, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.8781133108293039, | |
| "eval_loss": 0.40804293751716614, | |
| "eval_runtime": 305.2396, | |
| "eval_samples_per_second": 64.802, | |
| "eval_steps_per_second": 1.016, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.890657786698294, | |
| "grad_norm": 0.1812131106853485, | |
| "learning_rate": 3.531339270710805e-05, | |
| "loss": 0.2124, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.903202262567284, | |
| "grad_norm": 0.29924267530441284, | |
| "learning_rate": 3.510334397580239e-05, | |
| "loss": 0.2116, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.9157467384362741, | |
| "grad_norm": 0.30432143807411194, | |
| "learning_rate": 3.4893295244496724e-05, | |
| "loss": 0.2096, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.9282912143052641, | |
| "grad_norm": 0.17945361137390137, | |
| "learning_rate": 3.4683246513191065e-05, | |
| "loss": 0.211, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.9408356901742542, | |
| "grad_norm": 0.2670902907848358, | |
| "learning_rate": 3.44731977818854e-05, | |
| "loss": 0.2118, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.9533801660432443, | |
| "grad_norm": 0.350669801235199, | |
| "learning_rate": 3.4263149050579734e-05, | |
| "loss": 0.2094, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.9659246419122343, | |
| "grad_norm": 0.3146061599254608, | |
| "learning_rate": 3.4053100319274075e-05, | |
| "loss": 0.2092, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.9784691177812244, | |
| "grad_norm": 0.16551902890205383, | |
| "learning_rate": 3.384305158796841e-05, | |
| "loss": 0.2103, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.9910135936502144, | |
| "grad_norm": 0.34425023198127747, | |
| "learning_rate": 3.363300285666275e-05, | |
| "loss": 0.2095, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.0035124532433173, | |
| "grad_norm": 0.44446665048599243, | |
| "learning_rate": 3.3422954125357084e-05, | |
| "loss": 0.2098, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.0035124532433173, | |
| "eval_loss": 0.40903258323669434, | |
| "eval_runtime": 305.036, | |
| "eval_samples_per_second": 64.845, | |
| "eval_steps_per_second": 1.016, | |
| "step": 4000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 11955, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.237451667319271e+20, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |