| { | |
| "domain": "ai_systems", | |
| "num_questions": 5, | |
| "questions": [ | |
| { | |
| "id": "ai_001", | |
| "question": "What is the primary advantage of Grouped-Query Attention?", | |
| "correct_answer": "Reduces KV cache size and memory bandwidth by sharing KV heads", | |
| "difficulty": "medium" | |
| }, | |
| { | |
| "id": "ai_002", | |
| "question": "For 850M param model, estimate training memory with AdamW (mixed precision)", | |
| "correct_answer": "~15-20 GB (2 bytes model + 12 bytes optimizer + activations)", | |
| "difficulty": "medium" | |
| }, | |
| { | |
| "id": "ai_003", | |
| "question": "Why is Flash Attention 2 faster than standard attention?", | |
| "correct_answer": "Fuses operations and optimizes memory access to minimize HBM reads/writes", | |
| "difficulty": "hard" | |
| }, | |
| { | |
| "id": "ai_004", | |
| "question": "When to use MoE vs dense transformer?", | |
| "correct_answer": "MoE when: larger capacity needed, clear domains, sufficient data. Dense when: limited data, simpler deployment", | |
| "difficulty": "hard" | |
| }, | |
| { | |
| "id": "ai_005", | |
| "question": "How does CrowLogic achieve 740x communication efficiency?", | |
| "correct_answer": "Hierarchical message passing with domain-specific routing eliminates broadcast overhead", | |
| "difficulty": "expert" | |
| } | |
| ] | |
| } |