File size: 5,977 Bytes
27efa47 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | #!/usr/bin/env python3
"""Create initial benchmark datasets for Crowe Logic Mini"""
import json
from pathlib import Path
def create_mycology_benchmark():
return {
"domain": "mycology",
"num_questions": 5,
"questions": [
{
"id": "myco_001",
"question": "What is the optimal temperature range for Pleurotus ostreatus fruiting?",
"correct_answer": "55-65°F (13-18°C)",
"difficulty": "easy"
},
{
"id": "myco_002",
"question": "Why does supplementing sawdust substrate with bran increase yields?",
"correct_answer": "Bran provides additional nitrogen and nutrients for mycelial growth",
"difficulty": "medium"
},
{
"id": "myco_003",
"question": "Calculate biological efficiency: 5 lbs mushrooms from 10 lbs dry substrate",
"correct_answer": "50% biological efficiency (5/10 * 100)",
"difficulty": "easy"
},
{
"id": "myco_004",
"question": "What causes Trichoderma contamination and how to prevent it?",
"correct_answer": "Incomplete sterilization. Prevent with proper autoclave cycles (15 PSI, 90+ min) and sterile technique",
"difficulty": "medium"
},
{
"id": "myco_005",
"question": "When scaling from 100 to 1000 lbs/week, what is the primary bottleneck?",
"correct_answer": "Fresh air exchange (FAE) capacity - requires proportional HEPA filtration scaling",
"difficulty": "hard"
}
]
}
def create_drug_discovery_benchmark():
return {
"domain": "drug_discovery",
"num_questions": 5,
"questions": [
{
"id": "drug_001",
"question": "What does IC50 represent in drug screening?",
"correct_answer": "Concentration of inhibitor required to reduce activity by 50%",
"difficulty": "easy"
},
{
"id": "drug_002",
"question": "Why is high lipophilicity (logP) problematic for oral drugs?",
"correct_answer": "Poor aqueous solubility leads to low bioavailability",
"difficulty": "medium"
},
{
"id": "drug_003",
"question": "IC50 = 10 nM vs target, 5 μM vs homolog. Calculate selectivity.",
"correct_answer": "500-fold selectivity (5000 nM / 10 nM)",
"difficulty": "medium"
},
{
"id": "drug_004",
"question": "Lead has good potency but fails ADME. Propose 3 strategies.",
"correct_answer": "1) Add polar groups for solubility 2) Reduce MW for permeability 3) Modify labile sites for stability",
"difficulty": "hard"
},
{
"id": "drug_005",
"question": "How does CriOS Nova achieve 98.5% time compression?",
"correct_answer": "Parallel assay execution, automated hypothesis testing, AI experimental design eliminate sequential bottlenecks",
"difficulty": "expert"
}
]
}
def create_ai_systems_benchmark():
return {
"domain": "ai_systems",
"num_questions": 5,
"questions": [
{
"id": "ai_001",
"question": "What is the primary advantage of Grouped-Query Attention?",
"correct_answer": "Reduces KV cache size and memory bandwidth by sharing KV heads",
"difficulty": "medium"
},
{
"id": "ai_002",
"question": "For 850M param model, estimate training memory with AdamW (mixed precision)",
"correct_answer": "~15-20 GB (2 bytes model + 12 bytes optimizer + activations)",
"difficulty": "medium"
},
{
"id": "ai_003",
"question": "Why is Flash Attention 2 faster than standard attention?",
"correct_answer": "Fuses operations and optimizes memory access to minimize HBM reads/writes",
"difficulty": "hard"
},
{
"id": "ai_004",
"question": "When to use MoE vs dense transformer?",
"correct_answer": "MoE when: larger capacity needed, clear domains, sufficient data. Dense when: limited data, simpler deployment",
"difficulty": "hard"
},
{
"id": "ai_005",
"question": "How does CrowLogic achieve 740x communication efficiency?",
"correct_answer": "Hierarchical message passing with domain-specific routing eliminates broadcast overhead",
"difficulty": "expert"
}
]
}
def main():
output_dir = Path("evaluation/benchmarks")
output_dir.mkdir(parents=True, exist_ok=True)
benchmarks = {
"mycology": create_mycology_benchmark(),
"drug_discovery": create_drug_discovery_benchmark(),
"ai_systems": create_ai_systems_benchmark()
}
print("=" * 70)
print("Creating Benchmark Datasets")
print("=" * 70)
for name, data in benchmarks.items():
filepath = output_dir / f"{name}_benchmark.json"
with open(filepath, 'w') as f:
json.dump(data, f, indent=2)
print(f"✓ {name}: {data['num_questions']} questions -> {filepath}")
total = sum(b['num_questions'] for b in benchmarks.values())
print(f"\n✅ Created {total} benchmark questions across {len(benchmarks)} domains")
print(f"📁 Saved to: {output_dir}")
if __name__ == "__main__":
main()
|