CyberCoder-7B-v1 / project_config.json
moro72842's picture
Upload project_config.json
34c4a38 verified
{
"project": "CyberCoder-7B-v1",
"description": "Cybersecurity-focused code model with structured JSON output",
"architecture_decisions": {
"base_model": {
"choice": "Qwen/Qwen2.5-Coder-7B-Instruct",
"rationale": "SOTA open code model at 7B scale. Strong on HumanEval, MBPP, LiveCodeBench. Apache 2.0 license."
},
"training_method": {
"choice": "SFT with LoRA (r=64, alpha=128)",
"rationale": "CyberPal 2.0 recipe. LoRA allows training on single A10G/A100.",
"hyperparameters": {
"learning_rate": 4e-5,
"warmup_ratio": 0.15,
"num_epochs": 2,
"max_seq_length": 4096,
"batch_size_effective": 16,
"optimizer": "AdamW",
"scheduler": "cosine"
}
}
},
"scaling_roadmap": {
"phase_1": "7B LoRA SFT (current) - $4-16, 2-4hrs on A10G",
"phase_2": "7B full SFT with 200K+ examples - $32-64, 12-24hrs on A100",
"phase_3": "32B LoRA SFT - $192-768, 24-48hrs on 8xA100",
"phase_4": "100B+ sparse MoE (frontier) - $5-50M, 2-4 months on 1000+ H100s"
},
"research_references": [
{"paper": "CyberPal 2.0", "arxiv": "2510.14113"},
{"paper": "Foundation-Sec-8B", "arxiv": "2504.21039"},
{"paper": "SWE-Master", "arxiv": "2602.03411"},
{"paper": "RL-Struct (JSON output)", "arxiv": "2512.00319"},
{"paper": "DeepSeek-V3", "arxiv": "2412.19437"}
]
}