PEFT
Safetensors
English
cybersecurity
malware-analysis
att&ck
threat-intelligence
mixtral
lora
expert-adapters
cape-sandbox
digital-forensics
Instructions to use umer07/fathom-mixtral with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use umer07/fathom-mixtral with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1") model = PeftModel.from_pretrained(base_model, "umer07/fathom-mixtral") - Notebooks
- Google Colab
- Kaggle
| [ | |
| { | |
| "loss": 1.6756, | |
| "grad_norm": 0.9338335990905762, | |
| "learning_rate": 1.8e-05, | |
| "entropy": 1.2653020560741424, | |
| "num_tokens": 211745.0, | |
| "mean_token_accuracy": 0.626320019364357, | |
| "epoch": 0.016, | |
| "step": 10 | |
| }, | |
| { | |
| "loss": 1.6675, | |
| "grad_norm": 0.4600104093551636, | |
| "learning_rate": 3.8e-05, | |
| "entropy": 1.5156079709529877, | |
| "num_tokens": 413783.0, | |
| "mean_token_accuracy": 0.6156355381011963, | |
| "epoch": 0.032, | |
| "step": 20 | |
| }, | |
| { | |
| "loss": 1.4313, | |
| "grad_norm": 0.5679482221603394, | |
| "learning_rate": 5.8e-05, | |
| "entropy": 1.511820811033249, | |
| "num_tokens": 618078.0, | |
| "mean_token_accuracy": 0.6563311725854873, | |
| "epoch": 0.048, | |
| "step": 30 | |
| }, | |
| { | |
| "loss": 1.1182, | |
| "grad_norm": 0.5529033541679382, | |
| "learning_rate": 7.800000000000001e-05, | |
| "entropy": 1.1426804274320603, | |
| "num_tokens": 824579.0, | |
| "mean_token_accuracy": 0.7136697113513947, | |
| "epoch": 0.064, | |
| "step": 40 | |
| }, | |
| { | |
| "loss": 0.9574, | |
| "grad_norm": 0.4290783405303955, | |
| "learning_rate": 9.8e-05, | |
| "entropy": 0.9684652209281921, | |
| "num_tokens": 1022359.0, | |
| "mean_token_accuracy": 0.7489378124475479, | |
| "epoch": 0.08, | |
| "step": 50 | |
| }, | |
| { | |
| "loss": 0.8724, | |
| "grad_norm": 0.34012866020202637, | |
| "learning_rate": 9.993956318446873e-05, | |
| "entropy": 0.8785539418458939, | |
| "num_tokens": 1237697.0, | |
| "mean_token_accuracy": 0.7662121266126632, | |
| "epoch": 0.096, | |
| "step": 60 | |
| }, | |
| { | |
| "loss": 0.8526, | |
| "grad_norm": 0.4122697710990906, | |
| "learning_rate": 9.973083336646172e-05, | |
| "entropy": 0.8518908679485321, | |
| "num_tokens": 1438438.0, | |
| "mean_token_accuracy": 0.7753641337156296, | |
| "epoch": 0.112, | |
| "step": 70 | |
| }, | |
| { | |
| "loss": 0.7437, | |
| "grad_norm": 0.3228837251663208, | |
| "learning_rate": 9.937368719576374e-05, | |
| "entropy": 0.7489307045936584, | |
| "num_tokens": 1631589.0, | |
| "mean_token_accuracy": 0.7941762745380402, | |
| "epoch": 0.128, | |
| "step": 80 | |
| }, | |
| { | |
| "loss": 0.7492, | |
| "grad_norm": 0.33696669340133667, | |
| "learning_rate": 9.886919053691883e-05, | |
| "entropy": 0.7552473366260528, | |
| "num_tokens": 1836258.0, | |
| "mean_token_accuracy": 0.7946388304233551, | |
| "epoch": 0.144, | |
| "step": 90 | |
| }, | |
| { | |
| "loss": 0.7372, | |
| "grad_norm": 0.2882266640663147, | |
| "learning_rate": 9.82188490062415e-05, | |
| "entropy": 0.7464645385742188, | |
| "num_tokens": 2046902.0, | |
| "mean_token_accuracy": 0.7958032578229904, | |
| "epoch": 0.16, | |
| "step": 100 | |
| }, | |
| { | |
| "loss": 0.7528, | |
| "grad_norm": 0.34871771931648254, | |
| "learning_rate": 9.742460347846587e-05, | |
| "entropy": 0.7529336363077164, | |
| "num_tokens": 2254427.0, | |
| "mean_token_accuracy": 0.7927632719278336, | |
| "epoch": 0.176, | |
| "step": 110 | |
| }, | |
| { | |
| "loss": 0.6854, | |
| "grad_norm": 0.344471275806427, | |
| "learning_rate": 9.648882429441257e-05, | |
| "entropy": 0.6884129703044891, | |
| "num_tokens": 2452097.0, | |
| "mean_token_accuracy": 0.8083581000566482, | |
| "epoch": 0.192, | |
| "step": 120 | |
| }, | |
| { | |
| "loss": 0.7035, | |
| "grad_norm": 0.2790994942188263, | |
| "learning_rate": 9.541430418696018e-05, | |
| "entropy": 0.712130931019783, | |
| "num_tokens": 2647959.0, | |
| "mean_token_accuracy": 0.8047784239053726, | |
| "epoch": 0.208, | |
| "step": 130 | |
| }, | |
| { | |
| "loss": 0.7175, | |
| "grad_norm": 0.3251378536224365, | |
| "learning_rate": 9.420424994643266e-05, | |
| "entropy": 0.7225965172052383, | |
| "num_tokens": 2851644.0, | |
| "mean_token_accuracy": 0.8003872334957123, | |
| "epoch": 0.224, | |
| "step": 140 | |
| }, | |
| { | |
| "loss": 0.6633, | |
| "grad_norm": 0.3178017735481262, | |
| "learning_rate": 9.28622728502766e-05, | |
| "entropy": 0.6729940563440323, | |
| "num_tokens": 3053179.0, | |
| "mean_token_accuracy": 0.8133321523666381, | |
| "epoch": 0.24, | |
| "step": 150 | |
| }, | |
| { | |
| "loss": 0.7078, | |
| "grad_norm": 0.32298043370246887, | |
| "learning_rate": 9.139237788559e-05, | |
| "entropy": 0.716013514995575, | |
| "num_tokens": 3244569.0, | |
| "mean_token_accuracy": 0.8037430495023727, | |
| "epoch": 0.256, | |
| "step": 160 | |
| }, | |
| { | |
| "loss": 0.7011, | |
| "grad_norm": 0.3123107850551605, | |
| "learning_rate": 8.979895179666673e-05, | |
| "entropy": 0.6998003959655762, | |
| "num_tokens": 3448693.0, | |
| "mean_token_accuracy": 0.8040744125843048, | |
| "epoch": 0.272, | |
| "step": 170 | |
| }, | |
| { | |
| "loss": 0.6887, | |
| "grad_norm": 0.3589050769805908, | |
| "learning_rate": 8.808674999322728e-05, | |
| "entropy": 0.6893662005662918, | |
| "num_tokens": 3650299.0, | |
| "mean_token_accuracy": 0.8075590372085572, | |
| "epoch": 0.288, | |
| "step": 180 | |
| }, | |
| { | |
| "loss": 0.7397, | |
| "grad_norm": 0.3437199890613556, | |
| "learning_rate": 8.626088235840726e-05, | |
| "entropy": 0.7490889757871628, | |
| "num_tokens": 3860706.0, | |
| "mean_token_accuracy": 0.7960624009370804, | |
| "epoch": 0.304, | |
| "step": 190 | |
| }, | |
| { | |
| "loss": 0.7104, | |
| "grad_norm": 0.31088024377822876, | |
| "learning_rate": 8.43267979988576e-05, | |
| "entropy": 0.7115123689174652, | |
| "num_tokens": 4057498.0, | |
| "mean_token_accuracy": 0.8006591260433197, | |
| "epoch": 0.32, | |
| "step": 200 | |
| }, | |
| { | |
| "loss": 0.6628, | |
| "grad_norm": 0.3589341640472412, | |
| "learning_rate": 8.229026898246885e-05, | |
| "entropy": 0.6689569860696792, | |
| "num_tokens": 4251824.0, | |
| "mean_token_accuracy": 0.812098690867424, | |
| "epoch": 0.336, | |
| "step": 210 | |
| }, | |
| { | |
| "loss": 0.676, | |
| "grad_norm": 0.31112033128738403, | |
| "learning_rate": 8.015737311225172e-05, | |
| "entropy": 0.6726008355617523, | |
| "num_tokens": 4465389.0, | |
| "mean_token_accuracy": 0.8103758335113526, | |
| "epoch": 0.352, | |
| "step": 220 | |
| }, | |
| { | |
| "loss": 0.6668, | |
| "grad_norm": 0.33138927817344666, | |
| "learning_rate": 7.793447578778427e-05, | |
| "entropy": 0.6735879182815552, | |
| "num_tokens": 4685518.0, | |
| "mean_token_accuracy": 0.8112154036760331, | |
| "epoch": 0.368, | |
| "step": 230 | |
| }, | |
| { | |
| "loss": 0.6414, | |
| "grad_norm": 0.31951209902763367, | |
| "learning_rate": 7.56282110083577e-05, | |
| "entropy": 0.6442272961139679, | |
| "num_tokens": 4886383.0, | |
| "mean_token_accuracy": 0.8160840034484863, | |
| "epoch": 0.384, | |
| "step": 240 | |
| }, | |
| { | |
| "loss": 0.6817, | |
| "grad_norm": 0.3405151665210724, | |
| "learning_rate": 7.324546157451508e-05, | |
| "entropy": 0.6818776607513428, | |
| "num_tokens": 5086529.0, | |
| "mean_token_accuracy": 0.8091985791921615, | |
| "epoch": 0.4, | |
| "step": 250 | |
| }, | |
| { | |
| "loss": 0.6843, | |
| "grad_norm": 0.30915334820747375, | |
| "learning_rate": 7.079333854706938e-05, | |
| "entropy": 0.6939111232757569, | |
| "num_tokens": 5298416.0, | |
| "mean_token_accuracy": 0.8071506917476654, | |
| "epoch": 0.416, | |
| "step": 260 | |
| }, | |
| { | |
| "loss": 0.6805, | |
| "grad_norm": 0.3196154832839966, | |
| "learning_rate": 6.827916002490304e-05, | |
| "entropy": 0.6853002905845642, | |
| "num_tokens": 5502693.0, | |
| "mean_token_accuracy": 0.8068861842155457, | |
| "epoch": 0.432, | |
| "step": 270 | |
| }, | |
| { | |
| "loss": 0.6497, | |
| "grad_norm": 0.3364115059375763, | |
| "learning_rate": 6.571042930488474e-05, | |
| "entropy": 0.6549761325120926, | |
| "num_tokens": 5697863.0, | |
| "mean_token_accuracy": 0.81393081843853, | |
| "epoch": 0.448, | |
| "step": 280 | |
| }, | |
| { | |
| "loss": 0.6697, | |
| "grad_norm": 0.33196505904197693, | |
| "learning_rate": 6.309481248908235e-05, | |
| "entropy": 0.6747944802045822, | |
| "num_tokens": 5910487.0, | |
| "mean_token_accuracy": 0.8128439605236053, | |
| "epoch": 0.464, | |
| "step": 290 | |
| }, | |
| { | |
| "loss": 0.6535, | |
| "grad_norm": 0.30423638224601746, | |
| "learning_rate": 6.044011560610153e-05, | |
| "entropy": 0.6566255927085877, | |
| "num_tokens": 6127477.0, | |
| "mean_token_accuracy": 0.8121924549341202, | |
| "epoch": 0.48, | |
| "step": 300 | |
| }, | |
| { | |
| "loss": 0.6518, | |
| "grad_norm": 0.3487173914909363, | |
| "learning_rate": 5.775426131482811e-05, | |
| "entropy": 0.6593502879142761, | |
| "num_tokens": 6328197.0, | |
| "mean_token_accuracy": 0.814925542473793, | |
| "epoch": 0.496, | |
| "step": 310 | |
| }, | |
| { | |
| "loss": 0.6931, | |
| "grad_norm": 0.3142063021659851, | |
| "learning_rate": 5.5045265260100234e-05, | |
| "entropy": 0.700766509771347, | |
| "num_tokens": 6536365.0, | |
| "mean_token_accuracy": 0.8070295184850693, | |
| "epoch": 0.512, | |
| "step": 320 | |
| }, | |
| { | |
| "loss": 0.6785, | |
| "grad_norm": 0.3409480154514313, | |
| "learning_rate": 5.232121215087369e-05, | |
| "entropy": 0.6776748090982437, | |
| "num_tokens": 6742650.0, | |
| "mean_token_accuracy": 0.809091717004776, | |
| "epoch": 0.528, | |
| "step": 330 | |
| }, | |
| { | |
| "loss": 0.6438, | |
| "grad_norm": 0.3378758132457733, | |
| "learning_rate": 4.959023163227284e-05, | |
| "entropy": 0.6490969121456146, | |
| "num_tokens": 6946868.0, | |
| "mean_token_accuracy": 0.8178076684474945, | |
| "epoch": 0.544, | |
| "step": 340 | |
| }, | |
| { | |
| "loss": 0.6481, | |
| "grad_norm": 0.3258441388607025, | |
| "learning_rate": 4.6860474023534335e-05, | |
| "entropy": 0.6563442766666412, | |
| "num_tokens": 7138705.0, | |
| "mean_token_accuracy": 0.817442661523819, | |
| "epoch": 0.56, | |
| "step": 350 | |
| }, | |
| { | |
| "loss": 0.6622, | |
| "grad_norm": 0.32307952642440796, | |
| "learning_rate": 4.4140085994251136e-05, | |
| "entropy": 0.6632782518863678, | |
| "num_tokens": 7348846.0, | |
| "mean_token_accuracy": 0.812923377752304, | |
| "epoch": 0.576, | |
| "step": 360 | |
| }, | |
| { | |
| "loss": 0.6436, | |
| "grad_norm": 0.35711175203323364, | |
| "learning_rate": 4.143718625150854e-05, | |
| "entropy": 0.6440205127000809, | |
| "num_tokens": 7556952.0, | |
| "mean_token_accuracy": 0.8172848135232925, | |
| "epoch": 0.592, | |
| "step": 370 | |
| }, | |
| { | |
| "loss": 0.6299, | |
| "grad_norm": 0.3042103052139282, | |
| "learning_rate": 3.875984131047135e-05, | |
| "entropy": 0.6329021096229553, | |
| "num_tokens": 7758339.0, | |
| "mean_token_accuracy": 0.8194414287805557, | |
| "epoch": 0.608, | |
| "step": 380 | |
| }, | |
| { | |
| "loss": 0.6559, | |
| "grad_norm": 0.35361942648887634, | |
| "learning_rate": 3.6116041420732103e-05, | |
| "entropy": 0.6580730825662613, | |
| "num_tokens": 7961705.0, | |
| "mean_token_accuracy": 0.8145245909690857, | |
| "epoch": 0.624, | |
| "step": 390 | |
| }, | |
| { | |
| "loss": 0.6394, | |
| "grad_norm": 0.35071322321891785, | |
| "learning_rate": 3.3513676720265934e-05, | |
| "entropy": 0.6357898592948914, | |
| "num_tokens": 8160317.0, | |
| "mean_token_accuracy": 0.8177996665239334, | |
| "epoch": 0.64, | |
| "step": 400 | |
| }, | |
| { | |
| "loss": 0.6545, | |
| "grad_norm": 0.31700801849365234, | |
| "learning_rate": 3.0960513688157754e-05, | |
| "entropy": 0.649863663315773, | |
| "num_tokens": 8372896.0, | |
| "mean_token_accuracy": 0.8160782635211945, | |
| "epoch": 0.656, | |
| "step": 410 | |
| }, | |
| { | |
| "loss": 0.656, | |
| "grad_norm": 0.3056493401527405, | |
| "learning_rate": 2.846417196637613e-05, | |
| "entropy": 0.6630118876695633, | |
| "num_tokens": 8582698.0, | |
| "mean_token_accuracy": 0.8151620656251908, | |
| "epoch": 0.672, | |
| "step": 420 | |
| }, | |
| { | |
| "loss": 0.6869, | |
| "grad_norm": 0.3303000330924988, | |
| "learning_rate": 2.603210161976687e-05, | |
| "entropy": 0.6842911779880524, | |
| "num_tokens": 8794332.0, | |
| "mean_token_accuracy": 0.805791437625885, | |
| "epoch": 0.688, | |
| "step": 430 | |
| }, | |
| { | |
| "loss": 0.6602, | |
| "grad_norm": 0.3385516405105591, | |
| "learning_rate": 2.3671560902131447e-05, | |
| "entropy": 0.6720612704753876, | |
| "num_tokens": 9008268.0, | |
| "mean_token_accuracy": 0.811782768368721, | |
| "epoch": 0.704, | |
| "step": 440 | |
| }, | |
| { | |
| "loss": 0.6425, | |
| "grad_norm": 0.3051205277442932, | |
| "learning_rate": 2.1389594594744864e-05, | |
| "entropy": 0.6423384606838226, | |
| "num_tokens": 9216146.0, | |
| "mean_token_accuracy": 0.81755151450634, | |
| "epoch": 0.72, | |
| "step": 450 | |
| }, | |
| { | |
| "loss": 0.6262, | |
| "grad_norm": 0.3302038013935089, | |
| "learning_rate": 1.9193012981959475e-05, | |
| "entropy": 0.6281730026006699, | |
| "num_tokens": 9418629.0, | |
| "mean_token_accuracy": 0.820314571261406, | |
| "epoch": 0.736, | |
| "step": 460 | |
| }, | |
| { | |
| "loss": 0.6204, | |
| "grad_norm": 0.2998092472553253, | |
| "learning_rate": 1.7088371526639853e-05, | |
| "entropy": 0.6200591921806335, | |
| "num_tokens": 9624430.0, | |
| "mean_token_accuracy": 0.8230848222970962, | |
| "epoch": 0.752, | |
| "step": 470 | |
| }, | |
| { | |
| "loss": 0.6133, | |
| "grad_norm": 0.3248184025287628, | |
| "learning_rate": 1.508195130608504e-05, | |
| "entropy": 0.6175663053989411, | |
| "num_tokens": 9824038.0, | |
| "mean_token_accuracy": 0.8229408711194992, | |
| "epoch": 0.768, | |
| "step": 480 | |
| }, | |
| { | |
| "loss": 0.6471, | |
| "grad_norm": 0.3508250117301941, | |
| "learning_rate": 1.3179740266825253e-05, | |
| "entropy": 0.6427986294031143, | |
| "num_tokens": 10029090.0, | |
| "mean_token_accuracy": 0.8153195798397064, | |
| "epoch": 0.784, | |
| "step": 490 | |
| }, | |
| { | |
| "loss": 0.6494, | |
| "grad_norm": 0.3246026933193207, | |
| "learning_rate": 1.1387415354235887e-05, | |
| "entropy": 0.6497834831476211, | |
| "num_tokens": 10233813.0, | |
| "mean_token_accuracy": 0.8152256548404694, | |
| "epoch": 0.8, | |
| "step": 500 | |
| }, | |
| { | |
| "loss": 0.621, | |
| "grad_norm": 0.3171588182449341, | |
| "learning_rate": 9.710325570301348e-06, | |
| "entropy": 0.6244612663984299, | |
| "num_tokens": 10435960.0, | |
| "mean_token_accuracy": 0.8210752457380295, | |
| "epoch": 0.816, | |
| "step": 510 | |
| }, | |
| { | |
| "loss": 0.5986, | |
| "grad_norm": 0.32061895728111267, | |
| "learning_rate": 8.153476010090788e-06, | |
| "entropy": 0.5975286513566971, | |
| "num_tokens": 10636030.0, | |
| "mean_token_accuracy": 0.827234199643135, | |
| "epoch": 0.832, | |
| "step": 520 | |
| }, | |
| { | |
| "loss": 0.6372, | |
| "grad_norm": 0.3552604615688324, | |
| "learning_rate": 6.721512924587242e-06, | |
| "entropy": 0.6376388996839524, | |
| "num_tokens": 10838906.0, | |
| "mean_token_accuracy": 0.818820309638977, | |
| "epoch": 0.848, | |
| "step": 530 | |
| }, | |
| { | |
| "loss": 0.6304, | |
| "grad_norm": 0.33599594235420227, | |
| "learning_rate": 5.418709854448489e-06, | |
| "entropy": 0.6320249021053315, | |
| "num_tokens": 11026457.0, | |
| "mean_token_accuracy": 0.8198111385107041, | |
| "epoch": 0.864, | |
| "step": 540 | |
| }, | |
| { | |
| "loss": 0.6235, | |
| "grad_norm": 0.41409239172935486, | |
| "learning_rate": 4.248954876082195e-06, | |
| "entropy": 0.6273589804768562, | |
| "num_tokens": 11211997.0, | |
| "mean_token_accuracy": 0.8219926416873932, | |
| "epoch": 0.88, | |
| "step": 550 | |
| }, | |
| { | |
| "loss": 0.6511, | |
| "grad_norm": 0.337541401386261, | |
| "learning_rate": 3.2157389980976783e-06, | |
| "entropy": 0.6515613377094269, | |
| "num_tokens": 11421486.0, | |
| "mean_token_accuracy": 0.815014871954918, | |
| "epoch": 0.896, | |
| "step": 560 | |
| }, | |
| { | |
| "loss": 0.6209, | |
| "grad_norm": 0.3296094536781311, | |
| "learning_rate": 2.322145742764181e-06, | |
| "entropy": 0.6218553513288498, | |
| "num_tokens": 11621269.0, | |
| "mean_token_accuracy": 0.8230124861001968, | |
| "epoch": 0.912, | |
| "step": 570 | |
| }, | |
| { | |
| "loss": 0.634, | |
| "grad_norm": 0.34123408794403076, | |
| "learning_rate": 1.5708419435684462e-06, | |
| "entropy": 0.6355304539203643, | |
| "num_tokens": 11830087.0, | |
| "mean_token_accuracy": 0.818019899725914, | |
| "epoch": 0.928, | |
| "step": 580 | |
| }, | |
| { | |
| "loss": 0.6276, | |
| "grad_norm": 0.2921477258205414, | |
| "learning_rate": 9.640697863354698e-07, | |
| "entropy": 0.6308085203170777, | |
| "num_tokens": 12030300.0, | |
| "mean_token_accuracy": 0.8203556030988693, | |
| "epoch": 0.944, | |
| "step": 590 | |
| }, | |
| { | |
| "loss": 0.623, | |
| "grad_norm": 0.3348216712474823, | |
| "learning_rate": 5.036401176647332e-07, | |
| "entropy": 0.6214668571949005, | |
| "num_tokens": 12226666.0, | |
| "mean_token_accuracy": 0.8228818953037262, | |
| "epoch": 0.96, | |
| "step": 600 | |
| }, | |
| { | |
| "loss": 0.6305, | |
| "grad_norm": 0.3399432301521301, | |
| "learning_rate": 1.9092704065222788e-07, | |
| "entropy": 0.6307568103075027, | |
| "num_tokens": 12433050.0, | |
| "mean_token_accuracy": 0.8195924639701844, | |
| "epoch": 0.976, | |
| "step": 610 | |
| }, | |
| { | |
| "loss": 0.6685, | |
| "grad_norm": 0.3575892746448517, | |
| "learning_rate": 2.6863814026734102e-08, | |
| "entropy": 0.6648932933807373, | |
| "num_tokens": 12635860.0, | |
| "mean_token_accuracy": 0.8116440147161483, | |
| "epoch": 0.992, | |
| "step": 620 | |
| }, | |
| { | |
| "train_runtime": 6933.2338, | |
| "train_samples_per_second": 2.883, | |
| "train_steps_per_second": 0.09, | |
| "total_flos": 7.776202046473175e+18, | |
| "train_loss": 0.7265998420715332, | |
| "entropy": 0.6022609889507293, | |
| "num_tokens": 12730954.0, | |
| "mean_token_accuracy": 0.8287682414054871, | |
| "epoch": 1.0, | |
| "step": 625 | |
| } | |
| ] |