File size: 3,365 Bytes

275f774

[
  {
    "id": "simpson_paradox",
    "prompt": "A hospital notices that its mortality rate for cardiac surgery is 2x the national average, but every individual surgeon performs at or below the national average. Explain this paradox and recommend what the hospital should do.",
    "domain": "reasoning",
    "difficulty": "hard",
    "ground_truth_concepts": ["Simpson's paradox", "case mix", "referral patterns", "risk adjustment"]
  },
  {
    "id": "lru_cache_ttl",
    "prompt": "Implement an LRU cache with O(1) get/put that also supports TTL (time-to-live) expiration on individual entries. Write it in Python with full test cases.",
    "domain": "code",
    "difficulty": "medium",
    "ground_truth_concepts": ["OrderedDict", "doubly-linked list", "lazy expiration", "time complexity"]
  },
  {
    "id": "microservice_crossover",
    "prompt": "You observe that teams using microservices ship features 40% faster than monolith teams in year 1, but 20% slower by year 3. What explains this crossover pattern and what does it imply for architecture decisions?",
    "domain": "analysis",
    "difficulty": "hard",
    "ground_truth_concepts": ["coordination cost", "Conway's law", "distributed complexity", "organizational coupling"]
  },
  {
    "id": "consensus_swarm",
    "prompt": "Design a distributed consensus protocol inspired by biological swarm intelligence. It should handle network partitions and Byzantine faults. Define the protocol formally with safety and liveness proofs.",
    "domain": "creative",
    "difficulty": "hard",
    "ground_truth_concepts": ["pheromone trails", "quorum sensing", "Byzantine fault tolerance", "convergence"]
  },
  {
    "id": "mlr_paradox",
    "prompt": "Medicare Advantage plans are seeing MLRs increase by 200-400 basis points year over year while membership grows 8-12% annually. Is this a structural or cyclical phenomenon? What does it imply for healthcare technology vendors?",
    "domain": "analysis",
    "difficulty": "hard",
    "ground_truth_concepts": ["adverse selection", "risk corridor", "V28 transition", "supplemental benefits", "Star Ratings economics"]
  },
  {
    "id": "rate_limiter",
    "prompt": "Design and implement a rate limiter in Python that supports sliding window, token bucket, and leaky bucket algorithms through a unified interface. Include thread safety.",
    "domain": "code",
    "difficulty": "medium",
    "ground_truth_concepts": ["strategy pattern", "thread safety", "Redis-compatible", "time windowing"]
  },
  {
    "id": "understanding_test",
    "prompt": "Create an operationally testable framework for evaluating whether an AI system has developed genuine understanding vs. sophisticated pattern matching. The framework must distinguish between these in practice.",
    "domain": "creative",
    "difficulty": "hard",
    "ground_truth_concepts": ["transfer learning", "compositional generalization", "causal reasoning", "abstraction"]
  },
  {
    "id": "saas_retention",
    "prompt": "A SaaS company's logo retention is 95% but net revenue retention is 78%. Diagnose the likely dynamics, propose a measurement framework to identify root causes, and recommend interventions.",
    "domain": "analysis",
    "difficulty": "medium",
    "ground_truth_concepts": ["seat contraction", "downgrade patterns", "product-market fit", "expansion revenue"]
  }
]