suanlab commited on
Commit
1a3ff44
·
1 Parent(s): 0213c87

point Space tasks to uploaded images

Browse files
Files changed (2) hide show
  1. data/task_a_items.jsonl +19 -19
  2. data/task_b_pairs.jsonl +10 -10
data/task_a_items.jsonl CHANGED
@@ -1,19 +1,19 @@
1
- {"question_id": "tabular_000075::difficulty=1-hop", "question": "What is the value of 'clinic' in row 0?", "answer": "south", "modality": "tabular", "difficulty": "1-hop", "source": "synthetic", "viz_type": "bar_chart", "image_path": "data/images/tabular/tabular_000075__difficulty-1-hop_bar_chart.png"}
2
- {"question_id": "tabular_000180::difficulty=1-hop", "question": "Which 'category' has the highest 'backorder_units' value?", "answer": "apparel", "modality": "tabular", "difficulty": "1-hop", "source": "synthetic", "viz_type": "bar_chart", "image_path": "data/images/tabular/tabular_000180__difficulty-1-hop_bar_chart.png"}
3
- {"question_id": "tabular_000050::difficulty=1-hop", "question": "What is the value of 'patient_group' in row 0?", "answer": "A", "modality": "tabular", "difficulty": "1-hop", "source": "synthetic", "viz_type": "heatmap", "image_path": "data/images/tabular/tabular_000050__difficulty-1-hop_heatmap.png"}
4
- {"question_id": "tabular_000450::difficulty=1-hop", "question": "What is the value of 'hotel_class' in row 0?", "answer": "luxury", "modality": "tabular", "difficulty": "1-hop", "source": "synthetic", "viz_type": "scatter_plot", "image_path": "data/images/tabular/tabular_000450__difficulty-1-hop_scatter_plot.png"}
5
- {"question_id": "tabular_000251::difficulty=1-hop", "question": "What is the value of 'game_date' in row 0?", "answer": "2024-01-03 00:00:00", "modality": "tabular", "difficulty": "1-hop", "source": "synthetic", "viz_type": "bar_chart", "image_path": "data/images/tabular/tabular_000251__difficulty-1-hop_bar_chart.png"}
6
- {"question_id": "tabular_000139::difficulty=2-hop", "question": "How many rows have 'disciplinary_events' strictly greater than 8.45?", "answer": "12", "modality": "tabular", "difficulty": "2-hop", "source": "synthetic", "viz_type": "scatter_plot", "image_path": "data/images/tabular/tabular_000139__difficulty-2-hop_scatter_plot.png"}
7
- {"question_id": "tabular_000086::difficulty=2-hop", "question": "What is the average of 'ldl_mg_dl' across all rows? Round to 2 decimals.", "answer": "105.55", "modality": "tabular", "difficulty": "2-hop", "source": "synthetic", "viz_type": "text_only", "image_path": "data/images/tabular/tabular_000086__difficulty-2-hop_text_only.png"}
8
- {"question_id": "tabular_000111::difficulty=2-hop", "question": "How many rows have 'math_score' strictly greater than 76.03?", "answer": "14", "modality": "tabular", "difficulty": "2-hop", "source": "synthetic", "viz_type": "scatter_plot", "image_path": "data/images/tabular/tabular_000111__difficulty-2-hop_scatter_plot.png"}
9
- {"question_id": "tabular_000164::difficulty=2-hop", "question": "What is the median of 'discount_usd' across all rows? Round to 2 decimals.", "answer": "757.26", "modality": "tabular", "difficulty": "2-hop", "source": "synthetic", "viz_type": "table_image", "image_path": "data/images/tabular/tabular_000164__difficulty-2-hop_table_image.png"}
10
- {"question_id": "tabular_000434::difficulty=2-hop", "question": "What is the sum of 'units_produced' across all rows? Round to 2 decimals.", "answer": "38159.0", "modality": "tabular", "difficulty": "2-hop", "source": "synthetic", "viz_type": "heatmap", "image_path": "data/images/tabular/tabular_000434__difficulty-2-hop_heatmap.png"}
11
- {"question_id": "tabular_000141::difficulty=3-hop", "question": "Which 'school' entry deviates most from the mean of 'attendance_pct'?", "answer": "north_high", "modality": "tabular", "difficulty": "3-hop", "source": "synthetic", "viz_type": "table_image", "image_path": "data/images/tabular/tabular_000141__difficulty-3-hop_table_image.png"}
12
- {"question_id": "tabular_000339::difficulty=3-hop", "question": "Which column has the higher average value: 'median_income_usd' or 'unemployment_pct'?", "answer": "median_income_usd", "modality": "tabular", "difficulty": "3-hop", "source": "synthetic", "viz_type": "text_only", "image_path": "data/images/tabular/tabular_000339__difficulty-3-hop_text_only.png"}
13
- {"question_id": "tabular_000294::difficulty=3-hop", "question": "Which 'team' entry deviates most from the mean of 'possession_pct'?", "answer": "rangers", "modality": "tabular", "difficulty": "3-hop", "source": "synthetic", "viz_type": "table_image", "image_path": "data/images/tabular/tabular_000294__difficulty-3-hop_table_image.png"}
14
- {"question_id": "tabular_000167::difficulty=3-hop", "question": "Do 'transactions' and 'gross_sales_usd' appear positively or negatively correlated?", "answer": "positive", "modality": "tabular", "difficulty": "3-hop", "source": "synthetic", "viz_type": "text_only", "image_path": "data/images/tabular/tabular_000167__difficulty-3-hop_text_only.png"}
15
- {"question_id": "tabular_000373::difficulty=counterfactual", "question": "If all values in 'vehicle_count' were multiplied by 1.2, would 'vehicle_count' remain above 'avg_speed_kmh' on average?", "answer": "yes", "modality": "tabular", "difficulty": "counterfactual", "source": "synthetic", "viz_type": "text_only", "image_path": "data/images/tabular/tabular_000373__difficulty-counterfactual_text_only.png"}
16
- {"question_id": "tabular_000048::difficulty=counterfactual", "question": "If all values in 'operating_margin_pct' were multiplied by 0.85, would 'operating_margin_pct' remain above 'opex_musd' on average?", "answer": "yes", "modality": "tabular", "difficulty": "counterfactual", "source": "synthetic", "viz_type": "scatter_plot", "image_path": "data/images/tabular/tabular_000048__difficulty-counterfactual_scatter_plot.png"}
17
- {"question_id": "tabular_000122::difficulty=counterfactual", "question": "If all values in 'math_score' were multiplied by 1.2, would 'math_score' remain above 'science_score' on average?", "answer": "no", "modality": "tabular", "difficulty": "counterfactual", "source": "synthetic", "viz_type": "text_only", "image_path": "data/images/tabular/tabular_000122__difficulty-counterfactual_text_only.png"}
18
- {"question_id": "tabular_000224::difficulty=counterfactual", "question": "If all values in 'temperature_c' were multiplied by 1.2, would 'temperature_c' remain above 'rainfall_mm' on average?", "answer": "yes", "modality": "tabular", "difficulty": "counterfactual", "source": "synthetic", "viz_type": "bar_chart", "image_path": "data/images/tabular/tabular_000224__difficulty-counterfactual_bar_chart.png"}
19
- {"question_id": "timeseries_000024::difficulty=counterfactual", "question": "If we add 5 only to second half, would second-half mean exceed first-half mean?", "answer": "no", "modality": "timeseries", "difficulty": "counterfactual", "source": "synthetic", "viz_type": "heatmap", "image_path": "data/images/timeseries/timeseries_000024__difficulty-counterfactual_heatmap.png"}
 
1
+ {"question_id": "tabular_000075::difficulty=1-hop", "question": "What is the value of 'clinic' in row 0?", "answer": "south", "modality": "tabular", "difficulty": "1-hop", "source": "synthetic", "viz_type": "bar_chart", "image_path": "images/tabular_000075__difficulty-1-hop_bar_chart.png"}
2
+ {"question_id": "tabular_000180::difficulty=1-hop", "question": "Which 'category' has the highest 'backorder_units' value?", "answer": "apparel", "modality": "tabular", "difficulty": "1-hop", "source": "synthetic", "viz_type": "bar_chart", "image_path": "images/tabular_000180__difficulty-1-hop_bar_chart.png"}
3
+ {"question_id": "tabular_000050::difficulty=1-hop", "question": "What is the value of 'patient_group' in row 0?", "answer": "A", "modality": "tabular", "difficulty": "1-hop", "source": "synthetic", "viz_type": "heatmap", "image_path": "images/tabular_000050__difficulty-1-hop_heatmap.png"}
4
+ {"question_id": "tabular_000450::difficulty=1-hop", "question": "What is the value of 'hotel_class' in row 0?", "answer": "luxury", "modality": "tabular", "difficulty": "1-hop", "source": "synthetic", "viz_type": "scatter_plot", "image_path": "images/tabular_000450__difficulty-1-hop_scatter_plot.png"}
5
+ {"question_id": "tabular_000251::difficulty=1-hop", "question": "What is the value of 'game_date' in row 0?", "answer": "2024-01-03 00:00:00", "modality": "tabular", "difficulty": "1-hop", "source": "synthetic", "viz_type": "bar_chart", "image_path": "images/tabular_000251__difficulty-1-hop_bar_chart.png"}
6
+ {"question_id": "tabular_000139::difficulty=2-hop", "question": "How many rows have 'disciplinary_events' strictly greater than 8.45?", "answer": "12", "modality": "tabular", "difficulty": "2-hop", "source": "synthetic", "viz_type": "scatter_plot", "image_path": "images/tabular_000139__difficulty-2-hop_scatter_plot.png"}
7
+ {"question_id": "tabular_000086::difficulty=2-hop", "question": "What is the average of 'ldl_mg_dl' across all rows? Round to 2 decimals.", "answer": "105.55", "modality": "tabular", "difficulty": "2-hop", "source": "synthetic", "viz_type": "text_only", "image_path": "images/tabular_000086__difficulty-2-hop_text_only.png"}
8
+ {"question_id": "tabular_000111::difficulty=2-hop", "question": "How many rows have 'math_score' strictly greater than 76.03?", "answer": "14", "modality": "tabular", "difficulty": "2-hop", "source": "synthetic", "viz_type": "scatter_plot", "image_path": "images/tabular_000111__difficulty-2-hop_scatter_plot.png"}
9
+ {"question_id": "tabular_000164::difficulty=2-hop", "question": "What is the median of 'discount_usd' across all rows? Round to 2 decimals.", "answer": "757.26", "modality": "tabular", "difficulty": "2-hop", "source": "synthetic", "viz_type": "table_image", "image_path": "images/tabular_000164__difficulty-2-hop_table_image.png"}
10
+ {"question_id": "tabular_000434::difficulty=2-hop", "question": "What is the sum of 'units_produced' across all rows? Round to 2 decimals.", "answer": "38159.0", "modality": "tabular", "difficulty": "2-hop", "source": "synthetic", "viz_type": "heatmap", "image_path": "images/tabular_000434__difficulty-2-hop_heatmap.png"}
11
+ {"question_id": "tabular_000141::difficulty=3-hop", "question": "Which 'school' entry deviates most from the mean of 'attendance_pct'?", "answer": "north_high", "modality": "tabular", "difficulty": "3-hop", "source": "synthetic", "viz_type": "table_image", "image_path": "images/tabular_000141__difficulty-3-hop_table_image.png"}
12
+ {"question_id": "tabular_000339::difficulty=3-hop", "question": "Which column has the higher average value: 'median_income_usd' or 'unemployment_pct'?", "answer": "median_income_usd", "modality": "tabular", "difficulty": "3-hop", "source": "synthetic", "viz_type": "text_only", "image_path": "images/tabular_000339__difficulty-3-hop_text_only.png"}
13
+ {"question_id": "tabular_000294::difficulty=3-hop", "question": "Which 'team' entry deviates most from the mean of 'possession_pct'?", "answer": "rangers", "modality": "tabular", "difficulty": "3-hop", "source": "synthetic", "viz_type": "table_image", "image_path": "images/tabular_000294__difficulty-3-hop_table_image.png"}
14
+ {"question_id": "tabular_000167::difficulty=3-hop", "question": "Do 'transactions' and 'gross_sales_usd' appear positively or negatively correlated?", "answer": "positive", "modality": "tabular", "difficulty": "3-hop", "source": "synthetic", "viz_type": "text_only", "image_path": "images/tabular_000167__difficulty-3-hop_text_only.png"}
15
+ {"question_id": "tabular_000373::difficulty=counterfactual", "question": "If all values in 'vehicle_count' were multiplied by 1.2, would 'vehicle_count' remain above 'avg_speed_kmh' on average?", "answer": "yes", "modality": "tabular", "difficulty": "counterfactual", "source": "synthetic", "viz_type": "text_only", "image_path": "images/tabular_000373__difficulty-counterfactual_text_only.png"}
16
+ {"question_id": "tabular_000048::difficulty=counterfactual", "question": "If all values in 'operating_margin_pct' were multiplied by 0.85, would 'operating_margin_pct' remain above 'opex_musd' on average?", "answer": "yes", "modality": "tabular", "difficulty": "counterfactual", "source": "synthetic", "viz_type": "scatter_plot", "image_path": "images/tabular_000048__difficulty-counterfactual_scatter_plot.png"}
17
+ {"question_id": "tabular_000122::difficulty=counterfactual", "question": "If all values in 'math_score' were multiplied by 1.2, would 'math_score' remain above 'science_score' on average?", "answer": "no", "modality": "tabular", "difficulty": "counterfactual", "source": "synthetic", "viz_type": "text_only", "image_path": "images/tabular_000122__difficulty-counterfactual_text_only.png"}
18
+ {"question_id": "tabular_000224::difficulty=counterfactual", "question": "If all values in 'temperature_c' were multiplied by 1.2, would 'temperature_c' remain above 'rainfall_mm' on average?", "answer": "yes", "modality": "tabular", "difficulty": "counterfactual", "source": "synthetic", "viz_type": "bar_chart", "image_path": "images/tabular_000224__difficulty-counterfactual_bar_chart.png"}
19
+ {"question_id": "timeseries_000024::difficulty=counterfactual", "question": "If we add 5 only to second half, would second-half mean exceed first-half mean?", "answer": "no", "modality": "timeseries", "difficulty": "counterfactual", "source": "synthetic", "viz_type": "heatmap", "image_path": "images/timeseries_000024__difficulty-counterfactual_heatmap.png"}
data/task_b_pairs.jsonl CHANGED
@@ -1,10 +1,10 @@
1
- {"question_id": "tabular_000006::difficulty=1-hop", "question": "Which 'ticker' has the lowest 'close_price' value?", "answer": "BET", "viz_a": "table_image", "viz_b": "bar_chart", "em_a": 1.0, "em_b": 0.0, "image_a_path": "data/images/tabular/tabular_000006__difficulty-1-hop_table_image.png", "image_b_path": "data/images/tabular/tabular_000006__difficulty-1-hop_bar_chart.png"}
2
- {"question_id": "tabular_000077::difficulty=1-hop", "question": "What is the value of 'hemoglobin_g_dl' in row 0?", "answer": "14.81", "viz_a": "heatmap", "viz_b": "bar_chart", "em_a": 1.0, "em_b": 0.0, "image_a_path": "data/images/tabular/tabular_000077__difficulty-1-hop_heatmap.png", "image_b_path": "data/images/tabular/tabular_000077__difficulty-1-hop_bar_chart.png"}
3
- {"question_id": "tabular_000031::difficulty=1-hop", "question": "Which 'business_unit' has the lowest 'revenue_musd' value?", "answer": "cloud", "viz_a": "table_image", "viz_b": "bar_chart", "em_a": 1.0, "em_b": 0.0, "image_a_path": "data/images/tabular/tabular_000031__difficulty-1-hop_table_image.png", "image_b_path": "data/images/tabular/tabular_000031__difficulty-1-hop_bar_chart.png"}
4
- {"question_id": "tabular_000012::difficulty=2-hop", "question": "What is the average of 'trade_volume_k' across all rows? Round to 2 decimals.", "answer": "2854.92", "viz_a": "bar_chart", "viz_b": "heatmap", "em_a": 1.0, "em_b": 0.0, "image_a_path": "data/images/tabular/tabular_000012__difficulty-2-hop_bar_chart.png", "image_b_path": "data/images/tabular/tabular_000012__difficulty-2-hop_heatmap.png"}
5
- {"question_id": "tabular_000435::difficulty=2-hop", "question": "How many rows have 'units_produced' strictly greater than 1301.5?", "answer": "15", "viz_a": "scatter_plot", "viz_b": "bar_chart", "em_a": 1.0, "em_b": 0.0, "image_a_path": "data/images/tabular/tabular_000435__difficulty-2-hop_scatter_plot.png", "image_b_path": "data/images/tabular/tabular_000435__difficulty-2-hop_bar_chart.png"}
6
- {"question_id": "tabular_000220::difficulty=3-hop", "question": "Which 'city' entry deviates most from the mean of 'temperature_c'?", "answer": "lakeside", "viz_a": "table_image", "viz_b": "bar_chart", "em_a": 1.0, "em_b": 0.0, "image_a_path": "data/images/tabular/tabular_000220__difficulty-3-hop_table_image.png", "image_b_path": "data/images/tabular/tabular_000220__difficulty-3-hop_bar_chart.png"}
7
- {"question_id": "tabular_000115::difficulty=3-hop", "question": "Which column has the higher average value: 'math_score' or 'science_score'?", "answer": "science_score", "viz_a": "bar_chart", "viz_b": "table_image", "em_a": 1.0, "em_b": 0.0, "image_a_path": "data/images/tabular/tabular_000115__difficulty-3-hop_bar_chart.png", "image_b_path": "data/images/tabular/tabular_000115__difficulty-3-hop_table_image.png"}
8
- {"question_id": "tabular_000124::difficulty=counterfactual", "question": "If all values in 'reading_score' were multiplied by 1.2, would 'reading_score' remain above 'math_score' on average?", "answer": "yes", "viz_a": "bar_chart", "viz_b": "scatter_plot", "em_a": 1.0, "em_b": 0.0, "image_a_path": "data/images/tabular/tabular_000124__difficulty-counterfactual_bar_chart.png", "image_b_path": "data/images/tabular/tabular_000124__difficulty-counterfactual_scatter_plot.png"}
9
- {"question_id": "tabular_000099::difficulty=counterfactual", "question": "If all values in 'creatinine_mg_dl' were multiplied by 1.2, would 'creatinine_mg_dl' remain above 'hemoglobin_g_dl' on average?", "answer": "yes", "viz_a": "text_only", "viz_b": "bar_chart", "em_a": 1.0, "em_b": 0.0, "image_a_path": "data/images/tabular/tabular_000099__difficulty-counterfactual_text_only.png", "image_b_path": "data/images/tabular/tabular_000099__difficulty-counterfactual_bar_chart.png"}
10
- {"question_id": "timeseries_000024::difficulty=counterfactual", "question": "If we add 5 only to second half, would second-half mean exceed first-half mean?", "answer": "no", "viz_a": "gaf", "viz_b": "heatmap", "em_a": 1.0, "em_b": 0.0, "image_a_path": "data/images/timeseries/timeseries_000024__difficulty-counterfactual_gaf.png", "image_b_path": "data/images/timeseries/timeseries_000024__difficulty-counterfactual_heatmap.png"}
 
1
+ {"question_id": "tabular_000006::difficulty=1-hop", "question": "Which 'ticker' has the lowest 'close_price' value?", "answer": "BET", "viz_a": "table_image", "viz_b": "bar_chart", "em_a": 1.0, "em_b": 0.0, "image_a_path": "images/tabular_000006__difficulty-1-hop_table_image.png", "image_b_path": "images/tabular_000006__difficulty-1-hop_bar_chart.png"}
2
+ {"question_id": "tabular_000077::difficulty=1-hop", "question": "What is the value of 'hemoglobin_g_dl' in row 0?", "answer": "14.81", "viz_a": "heatmap", "viz_b": "bar_chart", "em_a": 1.0, "em_b": 0.0, "image_a_path": "images/tabular_000077__difficulty-1-hop_heatmap.png", "image_b_path": "images/tabular_000077__difficulty-1-hop_bar_chart.png"}
3
+ {"question_id": "tabular_000031::difficulty=1-hop", "question": "Which 'business_unit' has the lowest 'revenue_musd' value?", "answer": "cloud", "viz_a": "table_image", "viz_b": "bar_chart", "em_a": 1.0, "em_b": 0.0, "image_a_path": "images/tabular_000031__difficulty-1-hop_table_image.png", "image_b_path": "images/tabular_000031__difficulty-1-hop_bar_chart.png"}
4
+ {"question_id": "tabular_000012::difficulty=2-hop", "question": "What is the average of 'trade_volume_k' across all rows? Round to 2 decimals.", "answer": "2854.92", "viz_a": "bar_chart", "viz_b": "heatmap", "em_a": 1.0, "em_b": 0.0, "image_a_path": "images/tabular_000012__difficulty-2-hop_bar_chart.png", "image_b_path": "images/tabular_000012__difficulty-2-hop_heatmap.png"}
5
+ {"question_id": "tabular_000435::difficulty=2-hop", "question": "How many rows have 'units_produced' strictly greater than 1301.5?", "answer": "15", "viz_a": "scatter_plot", "viz_b": "bar_chart", "em_a": 1.0, "em_b": 0.0, "image_a_path": "images/tabular_000435__difficulty-2-hop_scatter_plot.png", "image_b_path": "images/tabular_000435__difficulty-2-hop_bar_chart.png"}
6
+ {"question_id": "tabular_000220::difficulty=3-hop", "question": "Which 'city' entry deviates most from the mean of 'temperature_c'?", "answer": "lakeside", "viz_a": "table_image", "viz_b": "bar_chart", "em_a": 1.0, "em_b": 0.0, "image_a_path": "images/tabular_000220__difficulty-3-hop_table_image.png", "image_b_path": "images/tabular_000220__difficulty-3-hop_bar_chart.png"}
7
+ {"question_id": "tabular_000115::difficulty=3-hop", "question": "Which column has the higher average value: 'math_score' or 'science_score'?", "answer": "science_score", "viz_a": "bar_chart", "viz_b": "table_image", "em_a": 1.0, "em_b": 0.0, "image_a_path": "images/tabular_000115__difficulty-3-hop_bar_chart.png", "image_b_path": "images/tabular_000115__difficulty-3-hop_table_image.png"}
8
+ {"question_id": "tabular_000124::difficulty=counterfactual", "question": "If all values in 'reading_score' were multiplied by 1.2, would 'reading_score' remain above 'math_score' on average?", "answer": "yes", "viz_a": "bar_chart", "viz_b": "scatter_plot", "em_a": 1.0, "em_b": 0.0, "image_a_path": "images/tabular_000124__difficulty-counterfactual_bar_chart.png", "image_b_path": "images/tabular_000124__difficulty-counterfactual_scatter_plot.png"}
9
+ {"question_id": "tabular_000099::difficulty=counterfactual", "question": "If all values in 'creatinine_mg_dl' were multiplied by 1.2, would 'creatinine_mg_dl' remain above 'hemoglobin_g_dl' on average?", "answer": "yes", "viz_a": "text_only", "viz_b": "bar_chart", "em_a": 1.0, "em_b": 0.0, "image_a_path": "images/tabular_000099__difficulty-counterfactual_text_only.png", "image_b_path": "images/tabular_000099__difficulty-counterfactual_bar_chart.png"}
10
+ {"question_id": "timeseries_000024::difficulty=counterfactual", "question": "If we add 5 only to second half, would second-half mean exceed first-half mean?", "answer": "no", "viz_a": "gaf", "viz_b": "heatmap", "em_a": 1.0, "em_b": 0.0, "image_a_path": "images/timeseries_000024__difficulty-counterfactual_gaf.png", "image_b_path": "images/timeseries_000024__difficulty-counterfactual_heatmap.png"}