npc-fin-prm-7b / training_metadata.json
ramankrishna10's picture
NPC Fin-PRM 7B - Financial Process Reward Model (open-source release)
f5266b4 verified
raw
history blame contribute delete
859 Bytes
{
"base_model": "Qwen/Qwen2.5-7B-Instruct",
"quantization": "4bit",
"lora_config": {
"r": 32,
"alpha": 64,
"dropout": 0.05,
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
]
},
"training_config": {
"epochs": 3,
"batch_size": 2,
"gradient_accumulation_steps": 8,
"learning_rate": 0.0002,
"lr_scheduler": "cosine",
"warmup_ratio": 0.05,
"max_seq_length": 2048,
"bf16": true
},
"dataset_stats": {
"train_examples": 37542,
"val_examples": 4171
},
"results": {
"train_loss": 0.25181230885952716,
"eval_loss": 0.21678349375724792,
"elapsed_seconds": 62600.10093379021,
"peak_vram_gb": 23.689548015594482
},
"dry_run": false,
"timestamp": "2026-03-21T21:06:36.174888+00:00"
}