PEFT
Safetensors
English
cybersecurity
malware-analysis
att&ck
threat-intelligence
mixtral
lora
expert-adapters
cape-sandbox
digital-forensics
Instructions to use umer07/fathom-mixtral with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use umer07/fathom-mixtral with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1") model = PeftModel.from_pretrained(base_model, "umer07/fathom-mixtral") - Notebooks
- Google Colab
- Kaggle
| [ | |
| { | |
| "loss": 1.9181, | |
| "grad_norm": 0.8695156574249268, | |
| "learning_rate": 1.8e-05, | |
| "entropy": 1.457271361351013, | |
| "num_tokens": 223301.0, | |
| "mean_token_accuracy": 0.5781058162450791, | |
| "epoch": 0.016406890894175553, | |
| "step": 10 | |
| }, | |
| { | |
| "loss": 1.7798, | |
| "grad_norm": 0.6769833564758301, | |
| "learning_rate": 3.8e-05, | |
| "entropy": 1.6721824526786804, | |
| "num_tokens": 449106.0, | |
| "mean_token_accuracy": 0.585400715470314, | |
| "epoch": 0.03281378178835111, | |
| "step": 20 | |
| }, | |
| { | |
| "loss": 1.5073, | |
| "grad_norm": 0.903017520904541, | |
| "learning_rate": 5.8e-05, | |
| "entropy": 1.568133682012558, | |
| "num_tokens": 681766.0, | |
| "mean_token_accuracy": 0.6311331987380981, | |
| "epoch": 0.04922067268252666, | |
| "step": 30 | |
| }, | |
| { | |
| "loss": 1.1072, | |
| "grad_norm": 0.4333101511001587, | |
| "learning_rate": 7.800000000000001e-05, | |
| "entropy": 1.0811064839363098, | |
| "num_tokens": 919022.0, | |
| "mean_token_accuracy": 0.7193854421377182, | |
| "epoch": 0.06562756357670221, | |
| "step": 40 | |
| }, | |
| { | |
| "loss": 0.9973, | |
| "grad_norm": 0.33554309606552124, | |
| "learning_rate": 9.8e-05, | |
| "entropy": 0.9973530560731888, | |
| "num_tokens": 1148282.0, | |
| "mean_token_accuracy": 0.7452630162239074, | |
| "epoch": 0.08203445447087777, | |
| "step": 50 | |
| }, | |
| { | |
| "loss": 0.8928, | |
| "grad_norm": 0.338527649641037, | |
| "learning_rate": 9.993628283308581e-05, | |
| "entropy": 0.8918034493923187, | |
| "num_tokens": 1372211.0, | |
| "mean_token_accuracy": 0.7673784762620925, | |
| "epoch": 0.09844134536505332, | |
| "step": 60 | |
| }, | |
| { | |
| "loss": 0.8782, | |
| "grad_norm": 0.32297801971435547, | |
| "learning_rate": 9.971623444249021e-05, | |
| "entropy": 0.8743007063865662, | |
| "num_tokens": 1604226.0, | |
| "mean_token_accuracy": 0.7686779618263244, | |
| "epoch": 0.11484823625922888, | |
| "step": 70 | |
| }, | |
| { | |
| "loss": 0.8258, | |
| "grad_norm": 0.33724743127822876, | |
| "learning_rate": 9.933976038510333e-05, | |
| "entropy": 0.8295134097337723, | |
| "num_tokens": 1838081.0, | |
| "mean_token_accuracy": 0.7803715646266938, | |
| "epoch": 0.13125512715340443, | |
| "step": 80 | |
| }, | |
| { | |
| "loss": 0.7891, | |
| "grad_norm": 0.3447152078151703, | |
| "learning_rate": 9.88080451875917e-05, | |
| "entropy": 0.788675120472908, | |
| "num_tokens": 2071855.0, | |
| "mean_token_accuracy": 0.7867679089307785, | |
| "epoch": 0.14766201804757997, | |
| "step": 90 | |
| }, | |
| { | |
| "loss": 0.8272, | |
| "grad_norm": 0.3010920286178589, | |
| "learning_rate": 9.812276182268236e-05, | |
| "entropy": 0.8329822063446045, | |
| "num_tokens": 2299745.0, | |
| "mean_token_accuracy": 0.7769433975219726, | |
| "epoch": 0.16406890894175555, | |
| "step": 100 | |
| }, | |
| { | |
| "loss": 0.797, | |
| "grad_norm": 0.3699570298194885, | |
| "learning_rate": 9.728606644537178e-05, | |
| "entropy": 0.8039814531803131, | |
| "num_tokens": 2528567.0, | |
| "mean_token_accuracy": 0.784355628490448, | |
| "epoch": 0.1804757998359311, | |
| "step": 110 | |
| }, | |
| { | |
| "loss": 0.7743, | |
| "grad_norm": 0.3595486581325531, | |
| "learning_rate": 9.63005916088644e-05, | |
| "entropy": 0.7768057614564896, | |
| "num_tokens": 2751275.0, | |
| "mean_token_accuracy": 0.7889596104621888, | |
| "epoch": 0.19688269073010664, | |
| "step": 120 | |
| }, | |
| { | |
| "loss": 0.7735, | |
| "grad_norm": 0.3556647002696991, | |
| "learning_rate": 9.516943798158649e-05, | |
| "entropy": 0.776089146733284, | |
| "num_tokens": 2978023.0, | |
| "mean_token_accuracy": 0.7897650897502899, | |
| "epoch": 0.2132895816242822, | |
| "step": 130 | |
| }, | |
| { | |
| "loss": 0.7622, | |
| "grad_norm": 0.3334764242172241, | |
| "learning_rate": 9.389616459133597e-05, | |
| "entropy": 0.7803491950035095, | |
| "num_tokens": 3204517.0, | |
| "mean_token_accuracy": 0.7920954555273056, | |
| "epoch": 0.22969647251845776, | |
| "step": 140 | |
| }, | |
| { | |
| "loss": 0.7576, | |
| "grad_norm": 0.4009985029697418, | |
| "learning_rate": 9.248477762726437e-05, | |
| "entropy": 0.7722930639982224, | |
| "num_tokens": 3434323.0, | |
| "mean_token_accuracy": 0.792155721783638, | |
| "epoch": 0.2461033634126333, | |
| "step": 150 | |
| }, | |
| { | |
| "loss": 0.7741, | |
| "grad_norm": 0.3408016264438629, | |
| "learning_rate": 9.093971783492355e-05, | |
| "entropy": 0.7827848076820374, | |
| "num_tokens": 3661534.0, | |
| "mean_token_accuracy": 0.7909984678030014, | |
| "epoch": 0.26251025430680885, | |
| "step": 160 | |
| }, | |
| { | |
| "loss": 0.7409, | |
| "grad_norm": 0.3399813175201416, | |
| "learning_rate": 8.926584654403724e-05, | |
| "entropy": 0.7509049952030182, | |
| "num_tokens": 3890921.0, | |
| "mean_token_accuracy": 0.7963089287281037, | |
| "epoch": 0.27891714520098443, | |
| "step": 170 | |
| }, | |
| { | |
| "loss": 0.7478, | |
| "grad_norm": 0.30348479747772217, | |
| "learning_rate": 8.746843037295937e-05, | |
| "entropy": 0.7562560260295867, | |
| "num_tokens": 4119537.0, | |
| "mean_token_accuracy": 0.794661870598793, | |
| "epoch": 0.29532403609515995, | |
| "step": 180 | |
| }, | |
| { | |
| "loss": 0.737, | |
| "grad_norm": 0.3403272330760956, | |
| "learning_rate": 8.555312465794403e-05, | |
| "entropy": 0.7475169450044632, | |
| "num_tokens": 4357977.0, | |
| "mean_token_accuracy": 0.796341797709465, | |
| "epoch": 0.3117309269893355, | |
| "step": 190 | |
| }, | |
| { | |
| "loss": 0.7512, | |
| "grad_norm": 0.3219321072101593, | |
| "learning_rate": 8.352595565936554e-05, | |
| "entropy": 0.7560538798570633, | |
| "num_tokens": 4581046.0, | |
| "mean_token_accuracy": 0.7952380329370499, | |
| "epoch": 0.3281378178835111, | |
| "step": 200 | |
| }, | |
| { | |
| "loss": 0.7317, | |
| "grad_norm": 0.3038958013057709, | |
| "learning_rate": 8.139330160087374e-05, | |
| "entropy": 0.7322431743144989, | |
| "num_tokens": 4814227.0, | |
| "mean_token_accuracy": 0.7974914610385895, | |
| "epoch": 0.3445447087776866, | |
| "step": 210 | |
| }, | |
| { | |
| "loss": 0.7205, | |
| "grad_norm": 0.32955309748649597, | |
| "learning_rate": 7.916187260114263e-05, | |
| "entropy": 0.7275226473808288, | |
| "num_tokens": 5048157.0, | |
| "mean_token_accuracy": 0.8014717370271682, | |
| "epoch": 0.3609515996718622, | |
| "step": 220 | |
| }, | |
| { | |
| "loss": 0.7282, | |
| "grad_norm": 0.4224933683872223, | |
| "learning_rate": 7.68386895613546e-05, | |
| "entropy": 0.7310106873512268, | |
| "num_tokens": 5274071.0, | |
| "mean_token_accuracy": 0.7992997527122497, | |
| "epoch": 0.37735849056603776, | |
| "step": 230 | |
| }, | |
| { | |
| "loss": 0.6971, | |
| "grad_norm": 0.33381229639053345, | |
| "learning_rate": 7.443106207484776e-05, | |
| "entropy": 0.6995288044214248, | |
| "num_tokens": 5509360.0, | |
| "mean_token_accuracy": 0.8060027480125427, | |
| "epoch": 0.3937653814602133, | |
| "step": 240 | |
| }, | |
| { | |
| "loss": 0.711, | |
| "grad_norm": 0.31851011514663696, | |
| "learning_rate": 7.194656542843102e-05, | |
| "entropy": 0.7142476379871369, | |
| "num_tokens": 5738285.0, | |
| "mean_token_accuracy": 0.8031993210315704, | |
| "epoch": 0.41017227235438886, | |
| "step": 250 | |
| }, | |
| { | |
| "loss": 0.7237, | |
| "grad_norm": 0.33157357573509216, | |
| "learning_rate": 6.939301676772927e-05, | |
| "entropy": 0.7256091266870499, | |
| "num_tokens": 5962580.0, | |
| "mean_token_accuracy": 0.8013624370098114, | |
| "epoch": 0.4265791632485644, | |
| "step": 260 | |
| }, | |
| { | |
| "loss": 0.721, | |
| "grad_norm": 0.3370811641216278, | |
| "learning_rate": 6.677845050155107e-05, | |
| "entropy": 0.7265744864940643, | |
| "num_tokens": 6196094.0, | |
| "mean_token_accuracy": 0.7991349190473557, | |
| "epoch": 0.44298605414273995, | |
| "step": 270 | |
| }, | |
| { | |
| "loss": 0.6843, | |
| "grad_norm": 0.38120874762535095, | |
| "learning_rate": 6.411109302266616e-05, | |
| "entropy": 0.6908316820859909, | |
| "num_tokens": 6420601.0, | |
| "mean_token_accuracy": 0.8087756901979446, | |
| "epoch": 0.4593929450369155, | |
| "step": 280 | |
| }, | |
| { | |
| "loss": 0.7072, | |
| "grad_norm": 0.38430851697921753, | |
| "learning_rate": 6.139933682453036e-05, | |
| "entropy": 0.7136244118213654, | |
| "num_tokens": 6655119.0, | |
| "mean_token_accuracy": 0.8047497570514679, | |
| "epoch": 0.47579983593109104, | |
| "step": 290 | |
| }, | |
| { | |
| "loss": 0.7252, | |
| "grad_norm": 0.3509667217731476, | |
| "learning_rate": 5.8651714095396135e-05, | |
| "entropy": 0.7334865719079972, | |
| "num_tokens": 6876910.0, | |
| "mean_token_accuracy": 0.799770200252533, | |
| "epoch": 0.4922067268252666, | |
| "step": 300 | |
| }, | |
| { | |
| "loss": 0.6821, | |
| "grad_norm": 0.3153151571750641, | |
| "learning_rate": 5.587686987289189e-05, | |
| "entropy": 0.6873683601617813, | |
| "num_tokens": 7112299.0, | |
| "mean_token_accuracy": 0.8088241666555405, | |
| "epoch": 0.5086136177194421, | |
| "step": 310 | |
| }, | |
| { | |
| "loss": 0.7196, | |
| "grad_norm": 0.34774187207221985, | |
| "learning_rate": 5.3083534843535074e-05, | |
| "entropy": 0.7214434593915939, | |
| "num_tokens": 7346455.0, | |
| "mean_token_accuracy": 0.8039845436811447, | |
| "epoch": 0.5250205086136177, | |
| "step": 320 | |
| }, | |
| { | |
| "loss": 0.6638, | |
| "grad_norm": 0.387768030166626, | |
| "learning_rate": 5.028049787276249e-05, | |
| "entropy": 0.6638175457715988, | |
| "num_tokens": 7571791.0, | |
| "mean_token_accuracy": 0.812444058060646, | |
| "epoch": 0.5414273995077933, | |
| "step": 330 | |
| }, | |
| { | |
| "loss": 0.6766, | |
| "grad_norm": 0.3517005741596222, | |
| "learning_rate": 4.7476578351907954e-05, | |
| "entropy": 0.6799941658973694, | |
| "num_tokens": 7801695.0, | |
| "mean_token_accuracy": 0.811230742931366, | |
| "epoch": 0.5578342904019689, | |
| "step": 340 | |
| }, | |
| { | |
| "loss": 0.6779, | |
| "grad_norm": 0.32577675580978394, | |
| "learning_rate": 4.468059844913444e-05, | |
| "entropy": 0.6814499109983444, | |
| "num_tokens": 8039821.0, | |
| "mean_token_accuracy": 0.8104382246732712, | |
| "epoch": 0.5742411812961444, | |
| "step": 350 | |
| }, | |
| { | |
| "loss": 0.6539, | |
| "grad_norm": 0.35933127999305725, | |
| "learning_rate": 4.1901355351628945e-05, | |
| "entropy": 0.6585495263338089, | |
| "num_tokens": 8273149.0, | |
| "mean_token_accuracy": 0.8166852772235871, | |
| "epoch": 0.5906480721903199, | |
| "step": 360 | |
| }, | |
| { | |
| "loss": 0.6843, | |
| "grad_norm": 0.31598055362701416, | |
| "learning_rate": 3.914759358639719e-05, | |
| "entropy": 0.6861207246780395, | |
| "num_tokens": 8503164.0, | |
| "mean_token_accuracy": 0.8086160510778427, | |
| "epoch": 0.6070549630844955, | |
| "step": 370 | |
| }, | |
| { | |
| "loss": 0.7094, | |
| "grad_norm": 0.3427006006240845, | |
| "learning_rate": 3.642797750674629e-05, | |
| "entropy": 0.7133786290884018, | |
| "num_tokens": 8726435.0, | |
| "mean_token_accuracy": 0.8027824640274048, | |
| "epoch": 0.623461853978671, | |
| "step": 380 | |
| }, | |
| { | |
| "loss": 0.6877, | |
| "grad_norm": 0.34877264499664307, | |
| "learning_rate": 3.375106403102389e-05, | |
| "entropy": 0.6881168276071549, | |
| "num_tokens": 8954291.0, | |
| "mean_token_accuracy": 0.8073496133089065, | |
| "epoch": 0.6398687448728466, | |
| "step": 390 | |
| }, | |
| { | |
| "loss": 0.6835, | |
| "grad_norm": 0.3225726783275604, | |
| "learning_rate": 3.112527571938717e-05, | |
| "entropy": 0.6862167656421662, | |
| "num_tokens": 9177163.0, | |
| "mean_token_accuracy": 0.8089945495128632, | |
| "epoch": 0.6562756357670222, | |
| "step": 400 | |
| }, | |
| { | |
| "loss": 0.7008, | |
| "grad_norm": 0.329756498336792, | |
| "learning_rate": 2.8558874273312674e-05, | |
| "entropy": 0.7071986079216004, | |
| "num_tokens": 9404151.0, | |
| "mean_token_accuracy": 0.8044474363327027, | |
| "epoch": 0.6726825266611977, | |
| "step": 410 | |
| }, | |
| { | |
| "loss": 0.6947, | |
| "grad_norm": 0.3715651035308838, | |
| "learning_rate": 2.605993454122687e-05, | |
| "entropy": 0.69432153403759, | |
| "num_tokens": 9639400.0, | |
| "mean_token_accuracy": 0.8064981371164321, | |
| "epoch": 0.6890894175553732, | |
| "step": 420 | |
| }, | |
| { | |
| "loss": 0.7066, | |
| "grad_norm": 0.3599180281162262, | |
| "learning_rate": 2.3636319112045496e-05, | |
| "entropy": 0.7111173301935196, | |
| "num_tokens": 9867668.0, | |
| "mean_token_accuracy": 0.8044642627239227, | |
| "epoch": 0.7054963084495488, | |
| "step": 430 | |
| }, | |
| { | |
| "loss": 0.7259, | |
| "grad_norm": 0.2912443280220032, | |
| "learning_rate": 2.1295653576560163e-05, | |
| "entropy": 0.7254415988922119, | |
| "num_tokens": 10100826.0, | |
| "mean_token_accuracy": 0.8003069430589675, | |
| "epoch": 0.7219031993437244, | |
| "step": 440 | |
| }, | |
| { | |
| "loss": 0.6761, | |
| "grad_norm": 0.30693626403808594, | |
| "learning_rate": 1.9045302534508297e-05, | |
| "entropy": 0.6833124309778214, | |
| "num_tokens": 10332359.0, | |
| "mean_token_accuracy": 0.8109049916267395, | |
| "epoch": 0.7383100902379, | |
| "step": 450 | |
| }, | |
| { | |
| "loss": 0.736, | |
| "grad_norm": 0.3155220150947571, | |
| "learning_rate": 1.6892346422817946e-05, | |
| "entropy": 0.736938726902008, | |
| "num_tokens": 10563841.0, | |
| "mean_token_accuracy": 0.7979681819677353, | |
| "epoch": 0.7547169811320755, | |
| "step": 460 | |
| }, | |
| { | |
| "loss": 0.6945, | |
| "grad_norm": 0.3748078942298889, | |
| "learning_rate": 1.4843559237933473e-05, | |
| "entropy": 0.7031238079071045, | |
| "num_tokens": 10788876.0, | |
| "mean_token_accuracy": 0.8057133972644805, | |
| "epoch": 0.771123872026251, | |
| "step": 470 | |
| }, | |
| { | |
| "loss": 0.6776, | |
| "grad_norm": 0.3635546565055847, | |
| "learning_rate": 1.2905387222316822e-05, | |
| "entropy": 0.6805126667022705, | |
| "num_tokens": 11015156.0, | |
| "mean_token_accuracy": 0.8101104766130447, | |
| "epoch": 0.7875307629204266, | |
| "step": 480 | |
| }, | |
| { | |
| "loss": 0.676, | |
| "grad_norm": 0.3111382722854614, | |
| "learning_rate": 1.1083928582183711e-05, | |
| "entropy": 0.6774959295988083, | |
| "num_tokens": 11245860.0, | |
| "mean_token_accuracy": 0.8107922226190567, | |
| "epoch": 0.8039376538146021, | |
| "step": 490 | |
| }, | |
| { | |
| "loss": 0.6742, | |
| "grad_norm": 0.32188844680786133, | |
| "learning_rate": 9.384914300290748e-06, | |
| "entropy": 0.6842435419559478, | |
| "num_tokens": 11476241.0, | |
| "mean_token_accuracy": 0.8111602008342743, | |
| "epoch": 0.8203445447087777, | |
| "step": 500 | |
| }, | |
| { | |
| "loss": 0.6544, | |
| "grad_norm": 0.36185422539711, | |
| "learning_rate": 7.813690104143557e-06, | |
| "entropy": 0.6514311820268631, | |
| "num_tokens": 11708112.0, | |
| "mean_token_accuracy": 0.8149820327758789, | |
| "epoch": 0.8367514356029533, | |
| "step": 510 | |
| }, | |
| { | |
| "loss": 0.6765, | |
| "grad_norm": 0.3183876574039459, | |
| "learning_rate": 6.375199646360142e-06, | |
| "entropy": 0.6856429934501648, | |
| "num_tokens": 11939337.0, | |
| "mean_token_accuracy": 0.8090052843093872, | |
| "epoch": 0.8531583264971287, | |
| "step": 520 | |
| }, | |
| { | |
| "loss": 0.6761, | |
| "grad_norm": 0.3287002742290497, | |
| "learning_rate": 5.073968950110941e-06, | |
| "entropy": 0.6834310472011567, | |
| "num_tokens": 12174723.0, | |
| "mean_token_accuracy": 0.8104397505521774, | |
| "epoch": 0.8695652173913043, | |
| "step": 530 | |
| }, | |
| { | |
| "loss": 0.6751, | |
| "grad_norm": 0.35229238867759705, | |
| "learning_rate": 3.914092168575306e-06, | |
| "entropy": 0.6824660181999207, | |
| "num_tokens": 12398555.0, | |
| "mean_token_accuracy": 0.8104325562715531, | |
| "epoch": 0.8859721082854799, | |
| "step": 540 | |
| }, | |
| { | |
| "loss": 0.6834, | |
| "grad_norm": 0.38912639021873474, | |
| "learning_rate": 2.8992187032210518e-06, | |
| "entropy": 0.682240468263626, | |
| "num_tokens": 12624846.0, | |
| "mean_token_accuracy": 0.8091065347194671, | |
| "epoch": 0.9023789991796555, | |
| "step": 550 | |
| }, | |
| { | |
| "loss": 0.696, | |
| "grad_norm": 0.306355744600296, | |
| "learning_rate": 2.032541721437209e-06, | |
| "entropy": 0.7058492481708527, | |
| "num_tokens": 12859015.0, | |
| "mean_token_accuracy": 0.8039765357971191, | |
| "epoch": 0.918785890073831, | |
| "step": 560 | |
| }, | |
| { | |
| "loss": 0.6727, | |
| "grad_norm": 0.38508960604667664, | |
| "learning_rate": 1.3167881096480372e-06, | |
| "entropy": 0.681548210978508, | |
| "num_tokens": 13083551.0, | |
| "mean_token_accuracy": 0.8100948423147202, | |
| "epoch": 0.9351927809680065, | |
| "step": 570 | |
| }, | |
| { | |
| "loss": 0.7208, | |
| "grad_norm": 0.33893731236457825, | |
| "learning_rate": 7.542098935195918e-07, | |
| "entropy": 0.7220237284898758, | |
| "num_tokens": 13308857.0, | |
| "mean_token_accuracy": 0.8005945891141891, | |
| "epoch": 0.9515996718621821, | |
| "step": 580 | |
| }, | |
| { | |
| "loss": 0.6759, | |
| "grad_norm": 0.3534739911556244, | |
| "learning_rate": 3.465771522536854e-07, | |
| "entropy": 0.6739370882511139, | |
| "num_tokens": 13543857.0, | |
| "mean_token_accuracy": 0.8097480118274689, | |
| "epoch": 0.9680065627563577, | |
| "step": 590 | |
| }, | |
| { | |
| "loss": 0.6865, | |
| "grad_norm": 0.3553875982761383, | |
| "learning_rate": 9.517244926393609e-08, | |
| "entropy": 0.6908959478139878, | |
| "num_tokens": 13769574.0, | |
| "mean_token_accuracy": 0.806584045290947, | |
| "epoch": 0.9844134536505332, | |
| "step": 600 | |
| }, | |
| { | |
| "loss": 0.6525, | |
| "grad_norm": 0.5078703761100769, | |
| "learning_rate": 7.867967567354306e-10, | |
| "entropy": 0.6598060852602908, | |
| "num_tokens": 13978118.0, | |
| "mean_token_accuracy": 0.8165042933664823, | |
| "epoch": 1.0, | |
| "step": 610 | |
| }, | |
| { | |
| "train_runtime": 6449.4338, | |
| "train_samples_per_second": 3.024, | |
| "train_steps_per_second": 0.095, | |
| "total_flos": 7.156995496917074e+18, | |
| "train_loss": 0.7796625786140317, | |
| "epoch": 1.0, | |
| "step": 610 | |
| } | |
| ] |