{ "metadata": { "description": "Distribution of QA pairs across domains and problem types", "total_episodes": 208, "total_qa_pairs": 2496, "problem_type_mapping": { "A": "Recall", "B": "Causal Inference", "C": "State Updating", "D": "State Abstraction" } }, "overall_distribution": { "total_qa_pairs": 2496, "problem_types": { "A": { "name": "Recall", "count": 839, "ratio": 0.336058 }, "B": { "name": "Causal Inference", "count": 596, "ratio": 0.238782 }, "C": { "name": "State Updating", "count": 647, "ratio": 0.259296 }, "D": { "name": "State Abstraction", "count": 414, "ratio": 0.165865 } } }, "domain_distribution": { "TEXT2SQL": { "total_episodes": 51, "episode_ratio": 0.245192, "total_qa_pairs": 612, "qa_ratio": 0.245192, "problem_types": { "A": { "name": "Recall", "count": 223, "ratio_in_domain": 0.364379, "ratio_overall": 0.089344 }, "B": { "name": "Causal Inference", "count": 153, "ratio_in_domain": 0.250000, "ratio_overall": 0.061298 }, "C": { "name": "State Updating", "count": 134, "ratio_in_domain": 0.218954, "ratio_overall": 0.053686 }, "D": { "name": "State Abstraction", "count": 102, "ratio_in_domain": 0.166667, "ratio_overall": 0.040865 } } }, "SOFTWARE": { "total_episodes": 36, "episode_ratio": 0.173077, "total_qa_pairs": 432, "qa_ratio": 0.173077, "problem_types": { "A": { "name": "Recall", "count": 212, "ratio_in_domain": 0.490741, "ratio_overall": 0.084936 }, "B": { "name": "Causal Inference", "count": 75, "ratio_in_domain": 0.173611, "ratio_overall": 0.030048 }, "C": { "name": "State Updating", "count": 73, "ratio_in_domain": 0.168981, "ratio_overall": 0.029247 }, "D": { "name": "State Abstraction", "count": 72, "ratio_in_domain": 0.166667, "ratio_overall": 0.028846 } } }, "WEB": { "total_episodes": 31, "episode_ratio": 0.149038, "total_qa_pairs": 372, "qa_ratio": 0.149038, "problem_types": { "A": { "name": "Recall", "count": 125, "ratio_in_domain": 0.336022, "ratio_overall": 0.050080 }, "B": { "name": "Causal Inference", "count": 93, "ratio_in_domain": 0.250000, "ratio_overall": 0.037260 }, "C": { "name": "State Updating", "count": 93, "ratio_in_domain": 0.250000, "ratio_overall": 0.037260 }, "D": { "name": "State Abstraction", "count": 61, "ratio_in_domain": 0.163978, "ratio_overall": 0.024439 } } }, "GAME": { "total_episodes": 30, "episode_ratio": 0.144231, "total_qa_pairs": 360, "qa_ratio": 0.144231, "problem_types": { "A": { "name": "Recall", "count": 120, "ratio_in_domain": 0.333333, "ratio_overall": 0.048077 }, "B": { "name": "Causal Inference", "count": 90, "ratio_in_domain": 0.250000, "ratio_overall": 0.036058 }, "C": { "name": "State Updating", "count": 90, "ratio_in_domain": 0.250000, "ratio_overall": 0.036058 }, "D": { "name": "State Abstraction", "count": 60, "ratio_in_domain": 0.166667, "ratio_overall": 0.024038 } } }, "EMBODIED_AI": { "total_episodes": 30, "episode_ratio": 0.144231, "total_qa_pairs": 360, "qa_ratio": 0.144231, "problem_types": { "A": { "name": "Recall", "count": 61, "ratio_in_domain": 0.169444, "ratio_overall": 0.024439 }, "B": { "name": "Causal Inference", "count": 90, "ratio_in_domain": 0.250000, "ratio_overall": 0.036058 }, "C": { "name": "State Updating", "count": 150, "ratio_in_domain": 0.416667, "ratio_overall": 0.060096 }, "D": { "name": "State Abstraction", "count": 59, "ratio_in_domain": 0.163889, "ratio_overall": 0.023638 } } }, "OPENWORLD_QA": { "total_episodes": 30, "episode_ratio": 0.144231, "total_qa_pairs": 360, "qa_ratio": 0.144231, "problem_types": { "A": { "name": "Recall", "count": 98, "ratio_in_domain": 0.272222, "ratio_overall": 0.039263 }, "B": { "name": "Causal Inference", "count": 95, "ratio_in_domain": 0.263889, "ratio_overall": 0.038062 }, "C": { "name": "State Updating", "count": 107, "ratio_in_domain": 0.297222, "ratio_overall": 0.042868 }, "D": { "name": "State Abstraction", "count": 60, "ratio_in_domain": 0.166667, "ratio_overall": 0.024038 } } } } }