Add model config metadata for programmatic access
Browse files- swarm_atlas_config.json +91 -0
swarm_atlas_config.json
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_name": "SwarmAtlas-27B",
|
| 3 |
+
"model_version": "v1",
|
| 4 |
+
"base_model": "Qwen/Qwen3.5-27B",
|
| 5 |
+
"organization": "SwarmandBee",
|
| 6 |
+
"architecture": {
|
| 7 |
+
"type": "dense_transformer",
|
| 8 |
+
"attention": "75% Gated Delta Networks + 25% Standard Attention",
|
| 9 |
+
"parameters": "27B (all active)",
|
| 10 |
+
"hidden_dim": 5120,
|
| 11 |
+
"layers": 64,
|
| 12 |
+
"vocab_size": 248320,
|
| 13 |
+
"context_window": {
|
| 14 |
+
"training": 16384,
|
| 15 |
+
"native": 262144,
|
| 16 |
+
"yarn_extended": 1048576
|
| 17 |
+
}
|
| 18 |
+
},
|
| 19 |
+
"training": {
|
| 20 |
+
"method": "bf16 LoRA",
|
| 21 |
+
"lora_r": 64,
|
| 22 |
+
"lora_alpha": 32,
|
| 23 |
+
"learning_rate": 1e-05,
|
| 24 |
+
"scheduler": "cosine",
|
| 25 |
+
"warmup_ratio": 0.05,
|
| 26 |
+
"weight_decay": 0.01,
|
| 27 |
+
"effective_batch_size": 32,
|
| 28 |
+
"max_seq_len": 4096,
|
| 29 |
+
"epoch_fraction": 0.6,
|
| 30 |
+
"steps": 844,
|
| 31 |
+
"training_loss": 0.4186,
|
| 32 |
+
"eval_loss": 0.2238,
|
| 33 |
+
"training_hours": 29.32,
|
| 34 |
+
"gpu": "NVIDIA RTX PRO 6000 Blackwell 96GB",
|
| 35 |
+
"framework": "Unsloth + TRL SFTTrainer",
|
| 36 |
+
"packing": true
|
| 37 |
+
},
|
| 38 |
+
"data": {
|
| 39 |
+
"total_pairs": 45039,
|
| 40 |
+
"pools": {
|
| 41 |
+
"diversified": {
|
| 42 |
+
"share": 0.6,
|
| 43 |
+
"pairs": 27000
|
| 44 |
+
},
|
| 45 |
+
"rpa_risk": {
|
| 46 |
+
"share": 0.25,
|
| 47 |
+
"pairs": 11200
|
| 48 |
+
},
|
| 49 |
+
"macro_graph": {
|
| 50 |
+
"share": 0.08,
|
| 51 |
+
"pairs": 3600
|
| 52 |
+
},
|
| 53 |
+
"golden": {
|
| 54 |
+
"share": 0.04,
|
| 55 |
+
"pairs": 1800
|
| 56 |
+
},
|
| 57 |
+
"mutations": {
|
| 58 |
+
"share": 0.03,
|
| 59 |
+
"pairs": 1400
|
| 60 |
+
}
|
| 61 |
+
},
|
| 62 |
+
"cook_streams": [
|
| 63 |
+
"debt_maturity",
|
| 64 |
+
"cmbs_distress",
|
| 65 |
+
"rate_advisory",
|
| 66 |
+
"equity_advisory",
|
| 67 |
+
"valuation",
|
| 68 |
+
"deal_origination",
|
| 69 |
+
"macro_causality",
|
| 70 |
+
"deal_graph"
|
| 71 |
+
]
|
| 72 |
+
},
|
| 73 |
+
"validation": {
|
| 74 |
+
"memphis_ic_test": {
|
| 75 |
+
"math_accuracy": "12/12",
|
| 76 |
+
"verdict": "NUKED (correct kill)",
|
| 77 |
+
"output_tokens": 10220
|
| 78 |
+
}
|
| 79 |
+
},
|
| 80 |
+
"serving": {
|
| 81 |
+
"engine": "vLLM 0.17.0",
|
| 82 |
+
"throughput": "88 tok/s @ 4 concurrent",
|
| 83 |
+
"api_endpoint": "https://api.swarmandbee.ai/v1",
|
| 84 |
+
"model_name": "swarm/atlas-27b"
|
| 85 |
+
},
|
| 86 |
+
"license": "apache-2.0",
|
| 87 |
+
"contact": {
|
| 88 |
+
"website": "https://swarmandbee.ai",
|
| 89 |
+
"email": "build@swarmandbee.com"
|
| 90 |
+
}
|
| 91 |
+
}
|