mike1210 commited on
Commit
8488a69
·
verified ·
1 Parent(s): 36e1a63

Upload evaluation/ai_systems_benchmark.json with huggingface_hub

Browse files
evaluation/ai_systems_benchmark.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "domain": "ai_systems",
3
+ "num_questions": 5,
4
+ "questions": [
5
+ {
6
+ "id": "ai_001",
7
+ "question": "What is the primary advantage of Grouped-Query Attention?",
8
+ "correct_answer": "Reduces KV cache size and memory bandwidth by sharing KV heads",
9
+ "difficulty": "medium"
10
+ },
11
+ {
12
+ "id": "ai_002",
13
+ "question": "For 850M param model, estimate training memory with AdamW (mixed precision)",
14
+ "correct_answer": "~15-20 GB (2 bytes model + 12 bytes optimizer + activations)",
15
+ "difficulty": "medium"
16
+ },
17
+ {
18
+ "id": "ai_003",
19
+ "question": "Why is Flash Attention 2 faster than standard attention?",
20
+ "correct_answer": "Fuses operations and optimizes memory access to minimize HBM reads/writes",
21
+ "difficulty": "hard"
22
+ },
23
+ {
24
+ "id": "ai_004",
25
+ "question": "When to use MoE vs dense transformer?",
26
+ "correct_answer": "MoE when: larger capacity needed, clear domains, sufficient data. Dense when: limited data, simpler deployment",
27
+ "difficulty": "hard"
28
+ },
29
+ {
30
+ "id": "ai_005",
31
+ "question": "How does CrowLogic achieve 740x communication efficiency?",
32
+ "correct_answer": "Hierarchical message passing with domain-specific routing eliminates broadcast overhead",
33
+ "difficulty": "expert"
34
+ }
35
+ ]
36
+ }