moro72842
/

CyberCoder-7B-v1

Model card Files Files and versions

CyberCoder-7B-v1 / project_config.json

moro72842's picture

Upload project_config.json

34c4a38 verified 28 days ago

history blame contribute delete

1.39 kB

	{
	"project": "CyberCoder-7B-v1",
	"description": "Cybersecurity-focused code model with structured JSON output",
	"architecture_decisions": {
	"base_model": {
	"choice": "Qwen/Qwen2.5-Coder-7B-Instruct",
	"rationale": "SOTA open code model at 7B scale. Strong on HumanEval, MBPP, LiveCodeBench. Apache 2.0 license."
	},
	"training_method": {
	"choice": "SFT with LoRA (r=64, alpha=128)",
	"rationale": "CyberPal 2.0 recipe. LoRA allows training on single A10G/A100.",
	"hyperparameters": {
	"learning_rate": 4e-5,
	"warmup_ratio": 0.15,
	"num_epochs": 2,
	"max_seq_length": 4096,
	"batch_size_effective": 16,
	"optimizer": "AdamW",
	"scheduler": "cosine"
	}
	}
	},
	"scaling_roadmap": {
	"phase_1": "7B LoRA SFT (current) - $4-16, 2-4hrs on A10G",
	"phase_2": "7B full SFT with 200K+ examples - $32-64, 12-24hrs on A100",
	"phase_3": "32B LoRA SFT - $192-768, 24-48hrs on 8xA100",
	"phase_4": "100B+ sparse MoE (frontier) - $5-50M, 2-4 months on 1000+ H100s"
	},
	"research_references": [
	{"paper": "CyberPal 2.0", "arxiv": "2510.14113"},
	{"paper": "Foundation-Sec-8B", "arxiv": "2504.21039"},
	{"paper": "SWE-Master", "arxiv": "2602.03411"},
	{"paper": "RL-Struct (JSON output)", "arxiv": "2512.00319"},
	{"paper": "DeepSeek-V3", "arxiv": "2412.19437"}
	]
	}