Text Generation
PEFT
Safetensors
English
code-generation
grpo
lora
qlora
spark
co-evolution
python
conversational
Instructions to use amarsaikhan/spark-code-A-3b with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use amarsaikhan/spark-code-A-3b with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-Coder-3B-Instruct") model = PeftModel.from_pretrained(base_model, "amarsaikhan/spark-code-A-3b") - Notebooks
- Google Colab
- Kaggle
| [ | |
| { | |
| "iteration": 0, | |
| "condition": "A", | |
| "eval/pass@1": 0.7963414634146343, | |
| "eval/pass@5": 0.8536585365853658, | |
| "eval_mbpp/pass@1": 0.6339999999999999, | |
| "eval_mbpp/pass@5": 0.68 | |
| }, | |
| { | |
| "iteration": 1, | |
| "condition": "A", | |
| "time_min": 793.5286037087441, | |
| "train/pass_rate": 0.6029668411867365, | |
| "train/mean_reward": 0.630802792321117, | |
| "train/reward_std": 0.4778792950119141, | |
| "train/informative_groups": 132, | |
| "train/num_groups": 200, | |
| "train/num_rollouts": 1146, | |
| "train/mean_group_size": 5.73, | |
| "train/error_counts": { | |
| "none": 691, | |
| "wrong_answer": 335, | |
| "runtime": 112, | |
| "syntax": 6, | |
| "timeout": 2 | |
| }, | |
| "train/mean_test_pass_frac": 0.6421465968586387, | |
| "grpo/loss": -6.980106434566063e-05, | |
| "grpo/policy_loss": -7.18922148014875e-05, | |
| "grpo/kl": 0.0002088015155534978, | |
| "grpo/n_seq": 596, | |
| "grpo/n_tokens": 108403, | |
| "grpo/mean_abs_adv": 0.8494695751934044, | |
| "eval/pass@1": 0.7975609756097561, | |
| "eval/pass@5": 0.8597560975609756, | |
| "eval_mbpp/pass@1": 0.624, | |
| "eval_mbpp/pass@5": 0.69 | |
| }, | |
| { | |
| "iteration": 2, | |
| "condition": "A", | |
| "time_min": 813.6403118530909, | |
| "train/pass_rate": 0.6399317406143344, | |
| "train/mean_reward": 0.6740045506257111, | |
| "train/reward_std": 0.45920328522941584, | |
| "train/informative_groups": 124, | |
| "train/num_groups": 200, | |
| "train/num_rollouts": 1172, | |
| "train/mean_group_size": 5.86, | |
| "train/error_counts": { | |
| "none": 750, | |
| "runtime": 99, | |
| "wrong_answer": 315, | |
| "syntax": 3, | |
| "timeout": 5 | |
| }, | |
| "train/mean_test_pass_frac": 0.6842434584755405, | |
| "grpo/loss": -0.00011221848882273987, | |
| "grpo/policy_loss": -0.00011718302083088311, | |
| "grpo/kl": 0.0004965064841258027, | |
| "grpo/n_seq": 560, | |
| "grpo/n_tokens": 99371, | |
| "grpo/mean_abs_adv": 0.8516633146460711, | |
| "eval/pass@1": 0.7987804878048781, | |
| "eval/pass@5": 0.8475609756097561, | |
| "eval_mbpp/pass@1": 0.632, | |
| "eval_mbpp/pass@5": 0.71 | |
| }, | |
| { | |
| "iteration": 3, | |
| "condition": "A", | |
| "time_min": 787.4754820307096, | |
| "train/pass_rate": 0.6385135135135135, | |
| "train/mean_reward": 0.6712978603603603, | |
| "train/reward_std": 0.4618140711956283, | |
| "train/informative_groups": 123, | |
| "train/num_groups": 200, | |
| "train/num_rollouts": 1184, | |
| "train/mean_group_size": 5.92, | |
| "train/error_counts": { | |
| "none": 756, | |
| "wrong_answer": 317, | |
| "runtime": 107, | |
| "timeout": 2, | |
| "syntax": 2 | |
| }, | |
| "train/mean_test_pass_frac": 0.681179617117117, | |
| "grpo/loss": -2.1914203446325122e-05, | |
| "grpo/policy_loss": -3.2540650668425725e-05, | |
| "grpo/kl": 0.001062764957962002, | |
| "grpo/n_seq": 559, | |
| "grpo/n_tokens": 100376, | |
| "grpo/mean_abs_adv": 0.8552671855178949, | |
| "eval/pass@1": 0.8048780487804879, | |
| "eval/pass@5": 0.8536585365853658, | |
| "eval_mbpp/pass@1": 0.636, | |
| "eval_mbpp/pass@5": 0.69 | |
| } | |
| ] |