mike1210
/

crowe-logic-mini

+{
+  "domain": "ai_systems",
+  "num_questions": 5,
+  "questions": [
+    {
+      "id": "ai_001",
+      "question": "What is the primary advantage of Grouped-Query Attention?",
+      "correct_answer": "Reduces KV cache size and memory bandwidth by sharing KV heads",
+      "difficulty": "medium"
+    },
+    {
+      "id": "ai_002",
+      "question": "For 850M param model, estimate training memory with AdamW (mixed precision)",
+      "correct_answer": "~15-20 GB (2 bytes model + 12 bytes optimizer + activations)",
+      "difficulty": "medium"
+    },
+    {
+      "id": "ai_003",
+      "question": "Why is Flash Attention 2 faster than standard attention?",
+      "correct_answer": "Fuses operations and optimizes memory access to minimize HBM reads/writes",
+      "difficulty": "hard"
+    },
+    {
+      "id": "ai_004",
+      "question": "When to use MoE vs dense transformer?",
+      "correct_answer": "MoE when: larger capacity needed, clear domains, sufficient data. Dense when: limited data, simpler deployment",
+      "difficulty": "hard"
+    },
+    {
+      "id": "ai_005",
+      "question": "How does CrowLogic achieve 740x communication efficiency?",
+      "correct_answer": "Hierarchical message passing with domain-specific routing eliminates broadcast overhead",
+      "difficulty": "expert"
+    }
+  ]
+}