File size: 5,977 Bytes
27efa47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env python3
"""Create initial benchmark datasets for Crowe Logic Mini"""

import json
from pathlib import Path

def create_mycology_benchmark():
    return {
        "domain": "mycology",
        "num_questions": 5,
        "questions": [
            {
                "id": "myco_001",
                "question": "What is the optimal temperature range for Pleurotus ostreatus fruiting?",
                "correct_answer": "55-65°F (13-18°C)",
                "difficulty": "easy"
            },
            {
                "id": "myco_002",
                "question": "Why does supplementing sawdust substrate with bran increase yields?",
                "correct_answer": "Bran provides additional nitrogen and nutrients for mycelial growth",
                "difficulty": "medium"
            },
            {
                "id": "myco_003",
                "question": "Calculate biological efficiency: 5 lbs mushrooms from 10 lbs dry substrate",
                "correct_answer": "50% biological efficiency (5/10 * 100)",
                "difficulty": "easy"
            },
            {
                "id": "myco_004",
                "question": "What causes Trichoderma contamination and how to prevent it?",
                "correct_answer": "Incomplete sterilization. Prevent with proper autoclave cycles (15 PSI, 90+ min) and sterile technique",
                "difficulty": "medium"
            },
            {
                "id": "myco_005",
                "question": "When scaling from 100 to 1000 lbs/week, what is the primary bottleneck?",
                "correct_answer": "Fresh air exchange (FAE) capacity - requires proportional HEPA filtration scaling",
                "difficulty": "hard"
            }
        ]
    }

def create_drug_discovery_benchmark():
    return {
        "domain": "drug_discovery",
        "num_questions": 5,
        "questions": [
            {
                "id": "drug_001",
                "question": "What does IC50 represent in drug screening?",
                "correct_answer": "Concentration of inhibitor required to reduce activity by 50%",
                "difficulty": "easy"
            },
            {
                "id": "drug_002",
                "question": "Why is high lipophilicity (logP) problematic for oral drugs?",
                "correct_answer": "Poor aqueous solubility leads to low bioavailability",
                "difficulty": "medium"
            },
            {
                "id": "drug_003",
                "question": "IC50 = 10 nM vs target, 5 μM vs homolog. Calculate selectivity.",
                "correct_answer": "500-fold selectivity (5000 nM / 10 nM)",
                "difficulty": "medium"
            },
            {
                "id": "drug_004",
                "question": "Lead has good potency but fails ADME. Propose 3 strategies.",
                "correct_answer": "1) Add polar groups for solubility 2) Reduce MW for permeability 3) Modify labile sites for stability",
                "difficulty": "hard"
            },
            {
                "id": "drug_005",
                "question": "How does CriOS Nova achieve 98.5% time compression?",
                "correct_answer": "Parallel assay execution, automated hypothesis testing, AI experimental design eliminate sequential bottlenecks",
                "difficulty": "expert"
            }
        ]
    }

def create_ai_systems_benchmark():
    return {
        "domain": "ai_systems",
        "num_questions": 5,
        "questions": [
            {
                "id": "ai_001",
                "question": "What is the primary advantage of Grouped-Query Attention?",
                "correct_answer": "Reduces KV cache size and memory bandwidth by sharing KV heads",
                "difficulty": "medium"
            },
            {
                "id": "ai_002",
                "question": "For 850M param model, estimate training memory with AdamW (mixed precision)",
                "correct_answer": "~15-20 GB (2 bytes model + 12 bytes optimizer + activations)",
                "difficulty": "medium"
            },
            {
                "id": "ai_003",
                "question": "Why is Flash Attention 2 faster than standard attention?",
                "correct_answer": "Fuses operations and optimizes memory access to minimize HBM reads/writes",
                "difficulty": "hard"
            },
            {
                "id": "ai_004",
                "question": "When to use MoE vs dense transformer?",
                "correct_answer": "MoE when: larger capacity needed, clear domains, sufficient data. Dense when: limited data, simpler deployment",
                "difficulty": "hard"
            },
            {
                "id": "ai_005",
                "question": "How does CrowLogic achieve 740x communication efficiency?",
                "correct_answer": "Hierarchical message passing with domain-specific routing eliminates broadcast overhead",
                "difficulty": "expert"
            }
        ]
    }

def main():
    output_dir = Path("evaluation/benchmarks")
    output_dir.mkdir(parents=True, exist_ok=True)
    
    benchmarks = {
        "mycology": create_mycology_benchmark(),
        "drug_discovery": create_drug_discovery_benchmark(),
        "ai_systems": create_ai_systems_benchmark()
    }
    
    print("=" * 70)
    print("Creating Benchmark Datasets")
    print("=" * 70)
    
    for name, data in benchmarks.items():
        filepath = output_dir / f"{name}_benchmark.json"
        with open(filepath, 'w') as f:
            json.dump(data, f, indent=2)
        print(f"✓ {name}: {data['num_questions']} questions -> {filepath}")
    
    total = sum(b['num_questions'] for b in benchmarks.values())
    print(f"\n✅ Created {total} benchmark questions across {len(benchmarks)} domains")
    print(f"📁 Saved to: {output_dir}")

if __name__ == "__main__":
    main()