PEFT
Safetensors
English
cybersecurity
malware-analysis
att&ck
threat-intelligence
mixtral
lora
expert-adapters
cape-sandbox
digital-forensics
Instructions to use umer07/fathom-mixtral with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use umer07/fathom-mixtral with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1") model = PeftModel.from_pretrained(base_model, "umer07/fathom-mixtral") - Notebooks
- Google Colab
- Kaggle
| [ | |
| { | |
| "loss": 1.4019, | |
| "grad_norm": 0.6986930966377258, | |
| "learning_rate": 2.25e-05, | |
| "entropy": 1.199985957145691, | |
| "num_tokens": 647929.0, | |
| "mean_token_accuracy": 0.6914159089326859, | |
| "epoch": 0.19607843137254902, | |
| "step": 10 | |
| }, | |
| { | |
| "loss": 1.3734, | |
| "grad_norm": 0.18671388924121857, | |
| "learning_rate": 4.75e-05, | |
| "entropy": 1.325679862499237, | |
| "num_tokens": 1300854.0, | |
| "mean_token_accuracy": 0.6935183644294739, | |
| "epoch": 0.39215686274509803, | |
| "step": 20 | |
| }, | |
| { | |
| "loss": 1.3317, | |
| "grad_norm": 0.18022498488426208, | |
| "learning_rate": 4.9819267987317665e-05, | |
| "entropy": 1.4015559554100037, | |
| "num_tokens": 1947357.0, | |
| "mean_token_accuracy": 0.6976928591728211, | |
| "epoch": 0.5882352941176471, | |
| "step": 30 | |
| }, | |
| { | |
| "loss": 1.2446, | |
| "grad_norm": 0.18924492597579956, | |
| "learning_rate": 4.9197869469162815e-05, | |
| "entropy": 1.219199287891388, | |
| "num_tokens": 2595628.0, | |
| "mean_token_accuracy": 0.7132954180240632, | |
| "epoch": 0.7843137254901961, | |
| "step": 40 | |
| }, | |
| { | |
| "loss": 1.2139, | |
| "grad_norm": 0.23499619960784912, | |
| "learning_rate": 4.814465939707259e-05, | |
| "entropy": 1.2155689418315887, | |
| "num_tokens": 3245496.0, | |
| "mean_token_accuracy": 0.7180316507816314, | |
| "epoch": 0.9803921568627451, | |
| "step": 50 | |
| }, | |
| { | |
| "loss": 1.1282, | |
| "grad_norm": 0.25000059604644775, | |
| "learning_rate": 4.6678432329734434e-05, | |
| "entropy": 1.1309684544801712, | |
| "num_tokens": 3876930.0, | |
| "mean_token_accuracy": 0.734208858013153, | |
| "epoch": 1.1764705882352942, | |
| "step": 60 | |
| }, | |
| { | |
| "loss": 1.1264, | |
| "grad_norm": 0.3251831531524658, | |
| "learning_rate": 4.482535312390058e-05, | |
| "entropy": 1.1345468521118165, | |
| "num_tokens": 4525171.0, | |
| "mean_token_accuracy": 0.7357972204685211, | |
| "epoch": 1.3725490196078431, | |
| "step": 70 | |
| }, | |
| { | |
| "loss": 1.0226, | |
| "grad_norm": 0.2027675360441208, | |
| "learning_rate": 4.2618490021899384e-05, | |
| "entropy": 1.0273457378149033, | |
| "num_tokens": 5172588.0, | |
| "mean_token_accuracy": 0.7597126334905624, | |
| "epoch": 1.5686274509803921, | |
| "step": 80 | |
| }, | |
| { | |
| "loss": 1.0134, | |
| "grad_norm": 0.2039007991552353, | |
| "learning_rate": 4.009722454806761e-05, | |
| "entropy": 1.0195463865995407, | |
| "num_tokens": 5819977.0, | |
| "mean_token_accuracy": 0.761520317196846, | |
| "epoch": 1.7647058823529411, | |
| "step": 90 | |
| }, | |
| { | |
| "loss": 0.9796, | |
| "grad_norm": 0.1867203265428543, | |
| "learning_rate": 3.730654874451569e-05, | |
| "entropy": 0.9862680763006211, | |
| "num_tokens": 6472798.0, | |
| "mean_token_accuracy": 0.7683205276727676, | |
| "epoch": 1.9607843137254903, | |
| "step": 100 | |
| }, | |
| { | |
| "loss": 0.9679, | |
| "grad_norm": 0.21607249975204468, | |
| "learning_rate": 3.429626228707034e-05, | |
| "entropy": 0.9721104234457016, | |
| "num_tokens": 7112529.0, | |
| "mean_token_accuracy": 0.7711095601320267, | |
| "epoch": 2.156862745098039, | |
| "step": 110 | |
| }, | |
| { | |
| "loss": 0.9666, | |
| "grad_norm": 0.23092280328273773, | |
| "learning_rate": 3.112008380887966e-05, | |
| "entropy": 0.9724053025245667, | |
| "num_tokens": 7758907.0, | |
| "mean_token_accuracy": 0.7709219127893447, | |
| "epoch": 2.3529411764705883, | |
| "step": 120 | |
| }, | |
| { | |
| "loss": 0.9683, | |
| "grad_norm": 0.20999926328659058, | |
| "learning_rate": 2.7834692290132052e-05, | |
| "entropy": 0.9730658024549484, | |
| "num_tokens": 8405368.0, | |
| "mean_token_accuracy": 0.769457995891571, | |
| "epoch": 2.549019607843137, | |
| "step": 130 | |
| }, | |
| { | |
| "loss": 0.9385, | |
| "grad_norm": 0.7688829898834229, | |
| "learning_rate": 2.449871562031194e-05, | |
| "entropy": 0.9452048629522324, | |
| "num_tokens": 9053347.0, | |
| "mean_token_accuracy": 0.7750110507011414, | |
| "epoch": 2.7450980392156863, | |
| "step": 140 | |
| }, | |
| { | |
| "loss": 0.9832, | |
| "grad_norm": 0.26651284098625183, | |
| "learning_rate": 2.1171684382123e-05, | |
| "entropy": 0.9904936224222183, | |
| "num_tokens": 9703053.0, | |
| "mean_token_accuracy": 0.7652157843112946, | |
| "epoch": 2.9411764705882355, | |
| "step": 150 | |
| }, | |
| { | |
| "loss": 0.9586, | |
| "grad_norm": 0.22833150625228882, | |
| "learning_rate": 1.7912969526829558e-05, | |
| "entropy": 0.9607909858226776, | |
| "num_tokens": 10340639.0, | |
| "mean_token_accuracy": 0.7710874438285827, | |
| "epoch": 3.1372549019607843, | |
| "step": 160 | |
| }, | |
| { | |
| "loss": 0.9382, | |
| "grad_norm": 0.2782645523548126, | |
| "learning_rate": 1.4780722898224707e-05, | |
| "entropy": 0.9437746345996857, | |
| "num_tokens": 10987284.0, | |
| "mean_token_accuracy": 0.7756666958332061, | |
| "epoch": 3.3333333333333335, | |
| "step": 170 | |
| }, | |
| { | |
| "loss": 0.9441, | |
| "grad_norm": 0.2571350038051605, | |
| "learning_rate": 1.1830839511600211e-05, | |
| "entropy": 0.9467583298683167, | |
| "num_tokens": 11638213.0, | |
| "mean_token_accuracy": 0.7747708618640899, | |
| "epoch": 3.5294117647058822, | |
| "step": 180 | |
| }, | |
| { | |
| "loss": 0.9175, | |
| "grad_norm": 0.26882949471473694, | |
| "learning_rate": 9.11596010587441e-06, | |
| "entropy": 0.923730057477951, | |
| "num_tokens": 12288333.0, | |
| "mean_token_accuracy": 0.7791785061359405, | |
| "epoch": 3.7254901960784315, | |
| "step": 190 | |
| }, | |
| { | |
| "loss": 0.91, | |
| "grad_norm": 0.22646215558052063, | |
| "learning_rate": 6.684531768359173e-06, | |
| "entropy": 0.9165982186794281, | |
| "num_tokens": 12936318.0, | |
| "mean_token_accuracy": 0.7812086254358291, | |
| "epoch": 3.9215686274509802, | |
| "step": 200 | |
| }, | |
| { | |
| "loss": 0.9193, | |
| "grad_norm": 0.2461111843585968, | |
| "learning_rate": 4.579943395339062e-06, | |
| "entropy": 0.9269091933965683, | |
| "num_tokens": 13570476.0, | |
| "mean_token_accuracy": 0.7785391122102737, | |
| "epoch": 4.117647058823529, | |
| "step": 210 | |
| }, | |
| { | |
| "loss": 0.9259, | |
| "grad_norm": 0.30823367834091187, | |
| "learning_rate": 2.8397514161892486e-06, | |
| "entropy": 0.9287486761808396, | |
| "num_tokens": 14217790.0, | |
| "mean_token_accuracy": 0.7776476472616196, | |
| "epoch": 4.313725490196078, | |
| "step": 220 | |
| }, | |
| { | |
| "loss": 0.8904, | |
| "grad_norm": 0.29259830713272095, | |
| "learning_rate": 1.4950095980035772e-06, | |
| "entropy": 0.8954251229763031, | |
| "num_tokens": 14867755.0, | |
| "mean_token_accuracy": 0.7863523244857789, | |
| "epoch": 4.509803921568627, | |
| "step": 230 | |
| }, | |
| { | |
| "loss": 0.9222, | |
| "grad_norm": 0.2476108819246292, | |
| "learning_rate": 5.697148903850868e-07, | |
| "entropy": 0.92759590446949, | |
| "num_tokens": 15512923.0, | |
| "mean_token_accuracy": 0.7787990540266037, | |
| "epoch": 4.705882352941177, | |
| "step": 240 | |
| }, | |
| { | |
| "loss": 0.9369, | |
| "grad_norm": 0.2094818353652954, | |
| "learning_rate": 8.037919931187244e-08, | |
| "entropy": 0.9443553179502487, | |
| "num_tokens": 16165639.0, | |
| "mean_token_accuracy": 0.7741728782653808, | |
| "epoch": 4.901960784313726, | |
| "step": 250 | |
| }, | |
| { | |
| "train_runtime": 4077.9021, | |
| "train_samples_per_second": 1.994, | |
| "train_steps_per_second": 0.063, | |
| "total_flos": 4.655305572168499e+18, | |
| "train_loss": 1.0347469572927437, | |
| "entropy": 0.9348823964595795, | |
| "num_tokens": 16478955.0, | |
| "mean_token_accuracy": 0.7756926536560058, | |
| "epoch": 5.0, | |
| "step": 255 | |
| } | |
| ] |