PEFT
Safetensors
English
cybersecurity
malware-analysis
att&ck
threat-intelligence
mixtral
lora
expert-adapters
cape-sandbox
digital-forensics
Instructions to use umer07/fathom-mixtral with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use umer07/fathom-mixtral with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1") model = PeftModel.from_pretrained(base_model, "umer07/fathom-mixtral") - Notebooks
- Google Colab
- Kaggle
| [ | |
| { | |
| "loss": 1.4307, | |
| "grad_norm": 0.6940367817878723, | |
| "learning_rate": 3e-05, | |
| "entropy": 1.1788243889808654, | |
| "num_tokens": 155850.0, | |
| "mean_token_accuracy": 0.6935877948999405, | |
| "epoch": 0.11764705882352941, | |
| "step": 10 | |
| }, | |
| { | |
| "loss": 1.1806, | |
| "grad_norm": 0.7020200490951538, | |
| "learning_rate": 6.333333333333333e-05, | |
| "entropy": 1.2202600359916687, | |
| "num_tokens": 312325.0, | |
| "mean_token_accuracy": 0.7312566488981247, | |
| "epoch": 0.23529411764705882, | |
| "step": 20 | |
| }, | |
| { | |
| "loss": 0.7607, | |
| "grad_norm": 0.47733989357948303, | |
| "learning_rate": 9.666666666666667e-05, | |
| "entropy": 0.8012344032526016, | |
| "num_tokens": 469039.0, | |
| "mean_token_accuracy": 0.8055870264768601, | |
| "epoch": 0.35294117647058826, | |
| "step": 30 | |
| }, | |
| { | |
| "loss": 0.5203, | |
| "grad_norm": 0.45993906259536743, | |
| "learning_rate": 9.96057350657239e-05, | |
| "entropy": 0.5153448149561882, | |
| "num_tokens": 623975.0, | |
| "mean_token_accuracy": 0.8535495191812515, | |
| "epoch": 0.47058823529411764, | |
| "step": 40 | |
| }, | |
| { | |
| "loss": 0.4475, | |
| "grad_norm": 0.3403550088405609, | |
| "learning_rate": 9.825082472361557e-05, | |
| "entropy": 0.44883628338575365, | |
| "num_tokens": 779685.0, | |
| "mean_token_accuracy": 0.8701241254806519, | |
| "epoch": 0.5882352941176471, | |
| "step": 50 | |
| }, | |
| { | |
| "loss": 0.4282, | |
| "grad_norm": 0.2295123040676117, | |
| "learning_rate": 9.595676696276172e-05, | |
| "entropy": 0.43300508707761765, | |
| "num_tokens": 936272.0, | |
| "mean_token_accuracy": 0.874285313487053, | |
| "epoch": 0.7058823529411765, | |
| "step": 60 | |
| }, | |
| { | |
| "loss": 0.4267, | |
| "grad_norm": 0.17979678511619568, | |
| "learning_rate": 9.276821300802534e-05, | |
| "entropy": 0.4283462271094322, | |
| "num_tokens": 1091178.0, | |
| "mean_token_accuracy": 0.8739971458911896, | |
| "epoch": 0.8235294117647058, | |
| "step": 70 | |
| }, | |
| { | |
| "loss": 0.4321, | |
| "grad_norm": 0.14834725856781006, | |
| "learning_rate": 8.874722443520899e-05, | |
| "entropy": 0.4309688463807106, | |
| "num_tokens": 1247181.0, | |
| "mean_token_accuracy": 0.87317014336586, | |
| "epoch": 0.9411764705882353, | |
| "step": 80 | |
| }, | |
| { | |
| "loss": 0.4183, | |
| "grad_norm": 0.1518690288066864, | |
| "learning_rate": 8.397206521307584e-05, | |
| "entropy": 0.4187887296080589, | |
| "num_tokens": 1399415.0, | |
| "mean_token_accuracy": 0.8748669505119324, | |
| "epoch": 1.0588235294117647, | |
| "step": 90 | |
| }, | |
| { | |
| "loss": 0.4285, | |
| "grad_norm": 0.16970738768577576, | |
| "learning_rate": 7.85356783842216e-05, | |
| "entropy": 0.42983078211545944, | |
| "num_tokens": 1552473.0, | |
| "mean_token_accuracy": 0.8730897456407547, | |
| "epoch": 1.1764705882352942, | |
| "step": 100 | |
| }, | |
| { | |
| "loss": 0.4141, | |
| "grad_norm": 0.1731289178133011, | |
| "learning_rate": 7.254387703447154e-05, | |
| "entropy": 0.4201514065265656, | |
| "num_tokens": 1709461.0, | |
| "mean_token_accuracy": 0.875280225276947, | |
| "epoch": 1.2941176470588236, | |
| "step": 110 | |
| }, | |
| { | |
| "loss": 0.4096, | |
| "grad_norm": 0.1622275412082672, | |
| "learning_rate": 6.611328476152557e-05, | |
| "entropy": 0.41087806075811384, | |
| "num_tokens": 1867531.0, | |
| "mean_token_accuracy": 0.8777904808521271, | |
| "epoch": 1.4117647058823528, | |
| "step": 120 | |
| }, | |
| { | |
| "loss": 0.4074, | |
| "grad_norm": 0.15023751556873322, | |
| "learning_rate": 5.9369065729286245e-05, | |
| "entropy": 0.40929861813783647, | |
| "num_tokens": 2023122.0, | |
| "mean_token_accuracy": 0.8778889566659928, | |
| "epoch": 1.5294117647058822, | |
| "step": 130 | |
| }, | |
| { | |
| "loss": 0.4091, | |
| "grad_norm": 0.13255436718463898, | |
| "learning_rate": 5.244248848978067e-05, | |
| "entropy": 0.41203168481588365, | |
| "num_tokens": 2178280.0, | |
| "mean_token_accuracy": 0.8771576941013336, | |
| "epoch": 1.6470588235294117, | |
| "step": 140 | |
| }, | |
| { | |
| "loss": 0.4027, | |
| "grad_norm": 0.156217560172081, | |
| "learning_rate": 4.5468370990111006e-05, | |
| "entropy": 0.4050563558936119, | |
| "num_tokens": 2335590.0, | |
| "mean_token_accuracy": 0.8797280818223954, | |
| "epoch": 1.7647058823529411, | |
| "step": 150 | |
| }, | |
| { | |
| "loss": 0.4089, | |
| "grad_norm": 0.1703469306230545, | |
| "learning_rate": 3.858245649446721e-05, | |
| "entropy": 0.4095383077859879, | |
| "num_tokens": 2490848.0, | |
| "mean_token_accuracy": 0.8773213714361191, | |
| "epoch": 1.8823529411764706, | |
| "step": 160 | |
| }, | |
| { | |
| "loss": 0.4052, | |
| "grad_norm": 0.19920362532138824, | |
| "learning_rate": 3.1918771495895396e-05, | |
| "entropy": 0.4070162191987038, | |
| "num_tokens": 2644022.0, | |
| "mean_token_accuracy": 0.8788870364427567, | |
| "epoch": 2.0, | |
| "step": 170 | |
| }, | |
| { | |
| "loss": 0.4031, | |
| "grad_norm": 0.13015073537826538, | |
| "learning_rate": 2.560701704306336e-05, | |
| "entropy": 0.40551580488681793, | |
| "num_tokens": 2800120.0, | |
| "mean_token_accuracy": 0.8789848744869232, | |
| "epoch": 2.1176470588235294, | |
| "step": 180 | |
| }, | |
| { | |
| "loss": 0.4067, | |
| "grad_norm": 0.14793093502521515, | |
| "learning_rate": 1.977004425688126e-05, | |
| "entropy": 0.41181707233190534, | |
| "num_tokens": 2955965.0, | |
| "mean_token_accuracy": 0.877298504114151, | |
| "epoch": 2.235294117647059, | |
| "step": 190 | |
| }, | |
| { | |
| "loss": 0.4079, | |
| "grad_norm": 0.1416511833667755, | |
| "learning_rate": 1.4521463173173965e-05, | |
| "entropy": 0.41118668466806413, | |
| "num_tokens": 3111112.0, | |
| "mean_token_accuracy": 0.8776616156101227, | |
| "epoch": 2.3529411764705883, | |
| "step": 200 | |
| }, | |
| { | |
| "loss": 0.3992, | |
| "grad_norm": 0.14714457094669342, | |
| "learning_rate": 9.963431452563332e-06, | |
| "entropy": 0.4004165202379227, | |
| "num_tokens": 3268703.0, | |
| "mean_token_accuracy": 0.8799054056406022, | |
| "epoch": 2.4705882352941178, | |
| "step": 210 | |
| }, | |
| { | |
| "loss": 0.4058, | |
| "grad_norm": 0.14480003714561462, | |
| "learning_rate": 6.184665997806832e-06, | |
| "entropy": 0.4070618197321892, | |
| "num_tokens": 3423830.0, | |
| "mean_token_accuracy": 0.8782488256692886, | |
| "epoch": 2.588235294117647, | |
| "step": 220 | |
| }, | |
| { | |
| "loss": 0.406, | |
| "grad_norm": 0.14828461408615112, | |
| "learning_rate": 3.258716180199278e-06, | |
| "entropy": 0.4072370767593384, | |
| "num_tokens": 3579594.0, | |
| "mean_token_accuracy": 0.8782227575778961, | |
| "epoch": 2.7058823529411766, | |
| "step": 230 | |
| }, | |
| { | |
| "loss": 0.4079, | |
| "grad_norm": 0.16037017107009888, | |
| "learning_rate": 1.2425322847218368e-06, | |
| "entropy": 0.40918404459953306, | |
| "num_tokens": 3734972.0, | |
| "mean_token_accuracy": 0.8776972621679306, | |
| "epoch": 2.8235294117647056, | |
| "step": 240 | |
| }, | |
| { | |
| "loss": 0.4035, | |
| "grad_norm": 0.1524023711681366, | |
| "learning_rate": 1.753570375247815e-07, | |
| "entropy": 0.4051833271980286, | |
| "num_tokens": 3891447.0, | |
| "mean_token_accuracy": 0.8786362946033478, | |
| "epoch": 2.9411764705882355, | |
| "step": 250 | |
| }, | |
| { | |
| "train_runtime": 1295.9489, | |
| "train_samples_per_second": 6.28, | |
| "train_steps_per_second": 0.197, | |
| "total_flos": 1.3072916160774144e+18, | |
| "train_loss": 0.5008774252498851, | |
| "entropy": 0.406230491399765, | |
| "num_tokens": 3966033.0, | |
| "mean_token_accuracy": 0.8788680493831634, | |
| "epoch": 3.0, | |
| "step": 255 | |
| } | |
| ] |