| |
| """Create initial benchmark datasets for Crowe Logic Mini""" |
|
|
| import json |
| from pathlib import Path |
|
|
| def create_mycology_benchmark(): |
| return { |
| "domain": "mycology", |
| "num_questions": 5, |
| "questions": [ |
| { |
| "id": "myco_001", |
| "question": "What is the optimal temperature range for Pleurotus ostreatus fruiting?", |
| "correct_answer": "55-65°F (13-18°C)", |
| "difficulty": "easy" |
| }, |
| { |
| "id": "myco_002", |
| "question": "Why does supplementing sawdust substrate with bran increase yields?", |
| "correct_answer": "Bran provides additional nitrogen and nutrients for mycelial growth", |
| "difficulty": "medium" |
| }, |
| { |
| "id": "myco_003", |
| "question": "Calculate biological efficiency: 5 lbs mushrooms from 10 lbs dry substrate", |
| "correct_answer": "50% biological efficiency (5/10 * 100)", |
| "difficulty": "easy" |
| }, |
| { |
| "id": "myco_004", |
| "question": "What causes Trichoderma contamination and how to prevent it?", |
| "correct_answer": "Incomplete sterilization. Prevent with proper autoclave cycles (15 PSI, 90+ min) and sterile technique", |
| "difficulty": "medium" |
| }, |
| { |
| "id": "myco_005", |
| "question": "When scaling from 100 to 1000 lbs/week, what is the primary bottleneck?", |
| "correct_answer": "Fresh air exchange (FAE) capacity - requires proportional HEPA filtration scaling", |
| "difficulty": "hard" |
| } |
| ] |
| } |
|
|
| def create_drug_discovery_benchmark(): |
| return { |
| "domain": "drug_discovery", |
| "num_questions": 5, |
| "questions": [ |
| { |
| "id": "drug_001", |
| "question": "What does IC50 represent in drug screening?", |
| "correct_answer": "Concentration of inhibitor required to reduce activity by 50%", |
| "difficulty": "easy" |
| }, |
| { |
| "id": "drug_002", |
| "question": "Why is high lipophilicity (logP) problematic for oral drugs?", |
| "correct_answer": "Poor aqueous solubility leads to low bioavailability", |
| "difficulty": "medium" |
| }, |
| { |
| "id": "drug_003", |
| "question": "IC50 = 10 nM vs target, 5 μM vs homolog. Calculate selectivity.", |
| "correct_answer": "500-fold selectivity (5000 nM / 10 nM)", |
| "difficulty": "medium" |
| }, |
| { |
| "id": "drug_004", |
| "question": "Lead has good potency but fails ADME. Propose 3 strategies.", |
| "correct_answer": "1) Add polar groups for solubility 2) Reduce MW for permeability 3) Modify labile sites for stability", |
| "difficulty": "hard" |
| }, |
| { |
| "id": "drug_005", |
| "question": "How does CriOS Nova achieve 98.5% time compression?", |
| "correct_answer": "Parallel assay execution, automated hypothesis testing, AI experimental design eliminate sequential bottlenecks", |
| "difficulty": "expert" |
| } |
| ] |
| } |
|
|
| def create_ai_systems_benchmark(): |
| return { |
| "domain": "ai_systems", |
| "num_questions": 5, |
| "questions": [ |
| { |
| "id": "ai_001", |
| "question": "What is the primary advantage of Grouped-Query Attention?", |
| "correct_answer": "Reduces KV cache size and memory bandwidth by sharing KV heads", |
| "difficulty": "medium" |
| }, |
| { |
| "id": "ai_002", |
| "question": "For 850M param model, estimate training memory with AdamW (mixed precision)", |
| "correct_answer": "~15-20 GB (2 bytes model + 12 bytes optimizer + activations)", |
| "difficulty": "medium" |
| }, |
| { |
| "id": "ai_003", |
| "question": "Why is Flash Attention 2 faster than standard attention?", |
| "correct_answer": "Fuses operations and optimizes memory access to minimize HBM reads/writes", |
| "difficulty": "hard" |
| }, |
| { |
| "id": "ai_004", |
| "question": "When to use MoE vs dense transformer?", |
| "correct_answer": "MoE when: larger capacity needed, clear domains, sufficient data. Dense when: limited data, simpler deployment", |
| "difficulty": "hard" |
| }, |
| { |
| "id": "ai_005", |
| "question": "How does CrowLogic achieve 740x communication efficiency?", |
| "correct_answer": "Hierarchical message passing with domain-specific routing eliminates broadcast overhead", |
| "difficulty": "expert" |
| } |
| ] |
| } |
|
|
| def main(): |
| output_dir = Path("evaluation/benchmarks") |
| output_dir.mkdir(parents=True, exist_ok=True) |
| |
| benchmarks = { |
| "mycology": create_mycology_benchmark(), |
| "drug_discovery": create_drug_discovery_benchmark(), |
| "ai_systems": create_ai_systems_benchmark() |
| } |
| |
| print("=" * 70) |
| print("Creating Benchmark Datasets") |
| print("=" * 70) |
| |
| for name, data in benchmarks.items(): |
| filepath = output_dir / f"{name}_benchmark.json" |
| with open(filepath, 'w') as f: |
| json.dump(data, f, indent=2) |
| print(f"✓ {name}: {data['num_questions']} questions -> {filepath}") |
| |
| total = sum(b['num_questions'] for b in benchmarks.values()) |
| print(f"\n✅ Created {total} benchmark questions across {len(benchmarks)} domains") |
| print(f"📁 Saved to: {output_dir}") |
|
|
| if __name__ == "__main__": |
| main() |
|
|