PEFT
Safetensors
English
reasoning
mathematical-reasoning
reward-model
preference-model
prefix-evaluation
utility-model
lora
qwen
Instructions to use zhiqix/PUM with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use zhiqix/PUM with PEFT:
Task type is invalid.
- Notebooks
- Google Colab
- Kaggle
| { | |
| "output_dir": "./output/pum_qwen3_4b_lora_newdata_0420", | |
| "per_device_train_batch_size": 8, | |
| "per_device_eval_batch_size": 8, | |
| "gradient_accumulation_steps": 4, | |
| "num_train_epochs": 2, | |
| "learning_rate": 1e-05, | |
| "weight_decay": 0.01, | |
| "warmup_ratio": 0.08, | |
| "max_grad_norm": 1.0, | |
| "logging_steps": 5, | |
| "eval_steps": 20, | |
| "save_steps": 200, | |
| "bf16": true, | |
| "fp16": false, | |
| "gradient_checkpointing": true, | |
| "gradient_checkpointing_use_reentrant": false, | |
| "ddp_static_graph_for_lora": true, | |
| "num_workers": 4, | |
| "seed": 42, | |
| "log_jsonl_name": "training_log.jsonl", | |
| "save_png_curves": true, | |
| "plot_dpi": 160, | |
| "tie_eval_margin": 0.25, | |
| "train_random_swap_prob": 0.0, | |
| "eval_random_swap_prob": 0.0 | |
| } |