Spaces:
Running
Running
| { | |
| "metadata": { | |
| "description": "Distribution of QA pairs across domains and problem types", | |
| "total_episodes": 208, | |
| "total_qa_pairs": 2496, | |
| "problem_type_mapping": { | |
| "A": "Recall", | |
| "B": "Causal Inference", | |
| "C": "State Updating", | |
| "D": "State Abstraction" | |
| } | |
| }, | |
| "overall_distribution": { | |
| "total_qa_pairs": 2496, | |
| "problem_types": { | |
| "A": { | |
| "name": "Recall", | |
| "count": 839, | |
| "ratio": 0.336058 | |
| }, | |
| "B": { | |
| "name": "Causal Inference", | |
| "count": 596, | |
| "ratio": 0.238782 | |
| }, | |
| "C": { | |
| "name": "State Updating", | |
| "count": 647, | |
| "ratio": 0.259296 | |
| }, | |
| "D": { | |
| "name": "State Abstraction", | |
| "count": 414, | |
| "ratio": 0.165865 | |
| } | |
| } | |
| }, | |
| "domain_distribution": { | |
| "TEXT2SQL": { | |
| "total_episodes": 51, | |
| "episode_ratio": 0.245192, | |
| "total_qa_pairs": 612, | |
| "qa_ratio": 0.245192, | |
| "problem_types": { | |
| "A": { | |
| "name": "Recall", | |
| "count": 223, | |
| "ratio_in_domain": 0.364379, | |
| "ratio_overall": 0.089344 | |
| }, | |
| "B": { | |
| "name": "Causal Inference", | |
| "count": 153, | |
| "ratio_in_domain": 0.250000, | |
| "ratio_overall": 0.061298 | |
| }, | |
| "C": { | |
| "name": "State Updating", | |
| "count": 134, | |
| "ratio_in_domain": 0.218954, | |
| "ratio_overall": 0.053686 | |
| }, | |
| "D": { | |
| "name": "State Abstraction", | |
| "count": 102, | |
| "ratio_in_domain": 0.166667, | |
| "ratio_overall": 0.040865 | |
| } | |
| } | |
| }, | |
| "SOFTWARE": { | |
| "total_episodes": 36, | |
| "episode_ratio": 0.173077, | |
| "total_qa_pairs": 432, | |
| "qa_ratio": 0.173077, | |
| "problem_types": { | |
| "A": { | |
| "name": "Recall", | |
| "count": 212, | |
| "ratio_in_domain": 0.490741, | |
| "ratio_overall": 0.084936 | |
| }, | |
| "B": { | |
| "name": "Causal Inference", | |
| "count": 75, | |
| "ratio_in_domain": 0.173611, | |
| "ratio_overall": 0.030048 | |
| }, | |
| "C": { | |
| "name": "State Updating", | |
| "count": 73, | |
| "ratio_in_domain": 0.168981, | |
| "ratio_overall": 0.029247 | |
| }, | |
| "D": { | |
| "name": "State Abstraction", | |
| "count": 72, | |
| "ratio_in_domain": 0.166667, | |
| "ratio_overall": 0.028846 | |
| } | |
| } | |
| }, | |
| "WEB": { | |
| "total_episodes": 31, | |
| "episode_ratio": 0.149038, | |
| "total_qa_pairs": 372, | |
| "qa_ratio": 0.149038, | |
| "problem_types": { | |
| "A": { | |
| "name": "Recall", | |
| "count": 125, | |
| "ratio_in_domain": 0.336022, | |
| "ratio_overall": 0.050080 | |
| }, | |
| "B": { | |
| "name": "Causal Inference", | |
| "count": 93, | |
| "ratio_in_domain": 0.250000, | |
| "ratio_overall": 0.037260 | |
| }, | |
| "C": { | |
| "name": "State Updating", | |
| "count": 93, | |
| "ratio_in_domain": 0.250000, | |
| "ratio_overall": 0.037260 | |
| }, | |
| "D": { | |
| "name": "State Abstraction", | |
| "count": 61, | |
| "ratio_in_domain": 0.163978, | |
| "ratio_overall": 0.024439 | |
| } | |
| } | |
| }, | |
| "GAME": { | |
| "total_episodes": 30, | |
| "episode_ratio": 0.144231, | |
| "total_qa_pairs": 360, | |
| "qa_ratio": 0.144231, | |
| "problem_types": { | |
| "A": { | |
| "name": "Recall", | |
| "count": 120, | |
| "ratio_in_domain": 0.333333, | |
| "ratio_overall": 0.048077 | |
| }, | |
| "B": { | |
| "name": "Causal Inference", | |
| "count": 90, | |
| "ratio_in_domain": 0.250000, | |
| "ratio_overall": 0.036058 | |
| }, | |
| "C": { | |
| "name": "State Updating", | |
| "count": 90, | |
| "ratio_in_domain": 0.250000, | |
| "ratio_overall": 0.036058 | |
| }, | |
| "D": { | |
| "name": "State Abstraction", | |
| "count": 60, | |
| "ratio_in_domain": 0.166667, | |
| "ratio_overall": 0.024038 | |
| } | |
| } | |
| }, | |
| "EMBODIED_AI": { | |
| "total_episodes": 30, | |
| "episode_ratio": 0.144231, | |
| "total_qa_pairs": 360, | |
| "qa_ratio": 0.144231, | |
| "problem_types": { | |
| "A": { | |
| "name": "Recall", | |
| "count": 61, | |
| "ratio_in_domain": 0.169444, | |
| "ratio_overall": 0.024439 | |
| }, | |
| "B": { | |
| "name": "Causal Inference", | |
| "count": 90, | |
| "ratio_in_domain": 0.250000, | |
| "ratio_overall": 0.036058 | |
| }, | |
| "C": { | |
| "name": "State Updating", | |
| "count": 150, | |
| "ratio_in_domain": 0.416667, | |
| "ratio_overall": 0.060096 | |
| }, | |
| "D": { | |
| "name": "State Abstraction", | |
| "count": 59, | |
| "ratio_in_domain": 0.163889, | |
| "ratio_overall": 0.023638 | |
| } | |
| } | |
| }, | |
| "OPENWORLD_QA": { | |
| "total_episodes": 30, | |
| "episode_ratio": 0.144231, | |
| "total_qa_pairs": 360, | |
| "qa_ratio": 0.144231, | |
| "problem_types": { | |
| "A": { | |
| "name": "Recall", | |
| "count": 98, | |
| "ratio_in_domain": 0.272222, | |
| "ratio_overall": 0.039263 | |
| }, | |
| "B": { | |
| "name": "Causal Inference", | |
| "count": 95, | |
| "ratio_in_domain": 0.263889, | |
| "ratio_overall": 0.038062 | |
| }, | |
| "C": { | |
| "name": "State Updating", | |
| "count": 107, | |
| "ratio_in_domain": 0.297222, | |
| "ratio_overall": 0.042868 | |
| }, | |
| "D": { | |
| "name": "State Abstraction", | |
| "count": 60, | |
| "ratio_in_domain": 0.166667, | |
| "ratio_overall": 0.024038 | |
| } | |
| } | |
| } | |
| } | |
| } | |