#!/usr/bin/env python3 """Create initial benchmark datasets for Crowe Logic Mini""" import json from pathlib import Path def create_mycology_benchmark(): return { "domain": "mycology", "num_questions": 5, "questions": [ { "id": "myco_001", "question": "What is the optimal temperature range for Pleurotus ostreatus fruiting?", "correct_answer": "55-65°F (13-18°C)", "difficulty": "easy" }, { "id": "myco_002", "question": "Why does supplementing sawdust substrate with bran increase yields?", "correct_answer": "Bran provides additional nitrogen and nutrients for mycelial growth", "difficulty": "medium" }, { "id": "myco_003", "question": "Calculate biological efficiency: 5 lbs mushrooms from 10 lbs dry substrate", "correct_answer": "50% biological efficiency (5/10 * 100)", "difficulty": "easy" }, { "id": "myco_004", "question": "What causes Trichoderma contamination and how to prevent it?", "correct_answer": "Incomplete sterilization. Prevent with proper autoclave cycles (15 PSI, 90+ min) and sterile technique", "difficulty": "medium" }, { "id": "myco_005", "question": "When scaling from 100 to 1000 lbs/week, what is the primary bottleneck?", "correct_answer": "Fresh air exchange (FAE) capacity - requires proportional HEPA filtration scaling", "difficulty": "hard" } ] } def create_drug_discovery_benchmark(): return { "domain": "drug_discovery", "num_questions": 5, "questions": [ { "id": "drug_001", "question": "What does IC50 represent in drug screening?", "correct_answer": "Concentration of inhibitor required to reduce activity by 50%", "difficulty": "easy" }, { "id": "drug_002", "question": "Why is high lipophilicity (logP) problematic for oral drugs?", "correct_answer": "Poor aqueous solubility leads to low bioavailability", "difficulty": "medium" }, { "id": "drug_003", "question": "IC50 = 10 nM vs target, 5 μM vs homolog. Calculate selectivity.", "correct_answer": "500-fold selectivity (5000 nM / 10 nM)", "difficulty": "medium" }, { "id": "drug_004", "question": "Lead has good potency but fails ADME. Propose 3 strategies.", "correct_answer": "1) Add polar groups for solubility 2) Reduce MW for permeability 3) Modify labile sites for stability", "difficulty": "hard" }, { "id": "drug_005", "question": "How does CriOS Nova achieve 98.5% time compression?", "correct_answer": "Parallel assay execution, automated hypothesis testing, AI experimental design eliminate sequential bottlenecks", "difficulty": "expert" } ] } def create_ai_systems_benchmark(): return { "domain": "ai_systems", "num_questions": 5, "questions": [ { "id": "ai_001", "question": "What is the primary advantage of Grouped-Query Attention?", "correct_answer": "Reduces KV cache size and memory bandwidth by sharing KV heads", "difficulty": "medium" }, { "id": "ai_002", "question": "For 850M param model, estimate training memory with AdamW (mixed precision)", "correct_answer": "~15-20 GB (2 bytes model + 12 bytes optimizer + activations)", "difficulty": "medium" }, { "id": "ai_003", "question": "Why is Flash Attention 2 faster than standard attention?", "correct_answer": "Fuses operations and optimizes memory access to minimize HBM reads/writes", "difficulty": "hard" }, { "id": "ai_004", "question": "When to use MoE vs dense transformer?", "correct_answer": "MoE when: larger capacity needed, clear domains, sufficient data. Dense when: limited data, simpler deployment", "difficulty": "hard" }, { "id": "ai_005", "question": "How does CrowLogic achieve 740x communication efficiency?", "correct_answer": "Hierarchical message passing with domain-specific routing eliminates broadcast overhead", "difficulty": "expert" } ] } def main(): output_dir = Path("evaluation/benchmarks") output_dir.mkdir(parents=True, exist_ok=True) benchmarks = { "mycology": create_mycology_benchmark(), "drug_discovery": create_drug_discovery_benchmark(), "ai_systems": create_ai_systems_benchmark() } print("=" * 70) print("Creating Benchmark Datasets") print("=" * 70) for name, data in benchmarks.items(): filepath = output_dir / f"{name}_benchmark.json" with open(filepath, 'w') as f: json.dump(data, f, indent=2) print(f"✓ {name}: {data['num_questions']} questions -> {filepath}") total = sum(b['num_questions'] for b in benchmarks.values()) print(f"\n✅ Created {total} benchmark questions across {len(benchmarks)} domains") print(f"📁 Saved to: {output_dir}") if __name__ == "__main__": main()