Spaces:
Running
Running
| """ | |
| SQL template definitions for synthetic data generation. | |
| Geometry output convention | |
| -------------------------- | |
| Every final SELECT wraps geometry with ST_AsGeoJSON(): | |
| ST_AsGeoJSON(geometry) AS geometry | |
| This returns a GeoJSON string instead of raw WKB bytes, which is directly | |
| JSON-serialisable and matches what the serving stack expects. | |
| CTEs that compute intermediate geometries (used only for spatial predicates | |
| or ST_Area) keep the column as raw GEOMETRY so DuckDB spatial functions work. | |
| Buffer distance convention | |
| -------------------------- | |
| All buffer templates use {buffer_m} (metres) in SQL: geometry / 111_320. | |
| Question hints use {buffer_label} β a human-readable string like "50 km" or | |
| "500 m" β generated by the handler from a combined (metres, label) choice list. | |
| This keeps km and metre queries in a single template instead of separate ones. | |
| Mixed-source candidates | |
| ----------------------- | |
| generate_samples.py pads every candidate list with 50 % cross-source | |
| distractors so the model always sees both source values and learns the | |
| correct parquet path from the candidates table. | |
| Template families | |
| ----------------- | |
| direct_lookup Simple single-feature fetch by ID. | |
| disambiguation "Place, Container" queries like "Puri, Odisha" β lookup by | |
| ID after resolving an ambiguous name via containing region | |
| or country mentioned in the query. | |
| adjacency ST_Touches β features sharing a border. | |
| multi_adjacency Features that simultaneously touch TWO anchors. | |
| containment ST_Within / ST_Contains β hierarchical nesting. | |
| intersection ST_Intersects β overlapping or crossing features. | |
| buffer ST_Buffer β proximity zones in km or metres. | |
| chained Containment + EXISTS/NOT EXISTS sea predicate. | |
| difference ST_Difference β geometry subtraction. | |
| border_corridor Buffered ST_Intersection of a shared border. | |
| set_operations ST_Union_Agg β merging multiple geometries. | |
| partial_selection Bbox clipping β directional halves or feature clips. | |
| aggregation TOP-N by area with ORDER BY. | |
| window_function ROW_NUMBER() OVER (PARTITION BY) β per-group ranking. | |
| attribute_filter Pure attribute predicates: is_land, country, etc. | |
| """ | |
| from dataclasses import dataclass | |
| from typing import List, Literal | |
| _SUBTYPE_ALIASES = { | |
| "country": ["countries", "nations"], | |
| "region": ["states", "provinces", "oblasts", "departments", "prefectures", "cantons"], | |
| "county": ["districts", "municipalities", "boroughs", "communes", "parishes"], | |
| } | |
| def _join_anchor_exprs(num_anchors: int) -> str: | |
| """Join anchor placeholders into a natural-language list.""" | |
| names = [f"{{anchor_{i}_name}}" for i in range(1, num_anchors + 1)] | |
| if num_anchors == 1: | |
| return names[0] | |
| if num_anchors == 2: | |
| return f"{names[0]} or {names[1]}" | |
| return ", ".join(names[:-1]) + f", or {names[-1]}" | |
| def _buffer_question_hints(target_subtype: str, anchor_name_expr: str) -> List[str]: | |
| """Build varied buffer question hints for one target subtype.""" | |
| aliases = _SUBTYPE_ALIASES[target_subtype] | |
| return [ | |
| f"which {aliases[0]} are within {{buffer_label}} of {anchor_name_expr}?", | |
| f"{aliases[0]} within {{buffer_label}} of {anchor_name_expr}", | |
| f"find {aliases[0]} within a {{buffer_label}} radius of {anchor_name_expr}", | |
| f"which {aliases[1]} are within {{buffer_label}} of {anchor_name_expr}?", | |
| f"show {aliases[0]} within {{buffer_label}} of {anchor_name_expr}", | |
| f"what {aliases[0]} fall within {{buffer_label}} of {anchor_name_expr}?", | |
| f"areas close to {anchor_name_expr} within {{buffer_label}}", | |
| ] | |
| def _make_buffer_templates() -> List["SQLTemplate"]: | |
| """Generate subtype-specific and multi-anchor buffer templates.""" | |
| templates: List[SQLTemplate] = [] | |
| for template_id, target_subtype in ( | |
| ("buffer_04", "country"), | |
| ("buffer_05", "region"), | |
| ("buffer_06", "county"), | |
| ): | |
| templates.append( | |
| SQLTemplate( | |
| template_id=template_id, | |
| family="buffer", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype=target_subtype, | |
| requires_buffer=True, | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT ST_Buffer(geometry, {buffer_m} / 111320.0) AS geom" | |
| " FROM read_parquet('divisions_area')" | |
| " WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.id != '{anchor_id}'" | |
| " AND b.subtype = '{target_subtype}'" | |
| " AND ST_Intersects(b.geometry, a.geom)" | |
| ), | |
| question_hints=_buffer_question_hints(target_subtype, "{anchor_name}"), | |
| ) | |
| ) | |
| for template_id, num_anchors, target_subtype in ( | |
| ("buffer_07", 2, "region"), | |
| ("buffer_08", 3, "country"), | |
| ("buffer_09", 4, "region"), | |
| ("buffer_10", 5, "county"), | |
| ): | |
| anchor_ids = ", ".join(f"'{{anchor_id_{i}}}'" for i in range(1, num_anchors + 1)) | |
| name_expr = _join_anchor_exprs(num_anchors) | |
| templates.append( | |
| SQLTemplate( | |
| template_id=template_id, | |
| family="buffer", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=num_anchors, | |
| target_subtype=target_subtype, | |
| requires_buffer=True, | |
| sql_template=( | |
| "WITH buf AS (" | |
| " SELECT ST_Union_Agg(ST_Buffer(geometry, {buffer_m} / 111320.0)) AS geom" | |
| " FROM read_parquet('divisions_area')" | |
| f" WHERE id IN ({anchor_ids})" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, buf" | |
| f" WHERE b.id NOT IN ({anchor_ids})" | |
| " AND b.subtype = '{target_subtype}'" | |
| " AND ST_Intersects(b.geometry, buf.geom)" | |
| ), | |
| question_hints=_buffer_question_hints(target_subtype, name_expr), | |
| ) | |
| ) | |
| return templates | |
| def _make_country_in_templates() -> List["SQLTemplate"]: | |
| """Generate repeated country-IN set-operation templates.""" | |
| specs = ( | |
| ("contain_multi_01", 2, "region"), | |
| ("contain_multi_02", 3, "region"), | |
| ("contain_multi_03", 2, "county"), | |
| ) | |
| templates: List[SQLTemplate] = [] | |
| for template_id, num_anchors, target_subtype in specs: | |
| countries = ", ".join(f"'{{country_{i}}}'" for i in range(1, num_anchors + 1)) | |
| anchor_names = _join_anchor_exprs(num_anchors).replace(" or ", " and ") | |
| aliases = _SUBTYPE_ALIASES[target_subtype] | |
| question_hints = [ | |
| f"{aliases[0]} of {anchor_names}", | |
| f"all {aliases[0]} in {anchor_names}", | |
| f"show {aliases[0]} across {anchor_names}", | |
| f"find {aliases[0]} across {anchor_names}", | |
| f"list {aliases[0]} in {anchor_names}", | |
| f"areas of {anchor_names}", | |
| ] | |
| if len(aliases) > 1: | |
| question_hints.extend( | |
| [ | |
| f"{aliases[1]} of {anchor_names}", | |
| f"all {aliases[1]} in {anchor_names}", | |
| ] | |
| ) | |
| templates.append( | |
| SQLTemplate( | |
| template_id=template_id, | |
| family="set_operations", | |
| sql_difficulty="medium-hard", | |
| anchor_source="divisions_area", | |
| num_anchors=num_anchors, | |
| target_subtype=target_subtype, | |
| sql_template=( | |
| "SELECT id, names.\"primary\" AS name, subtype, country," | |
| " ST_AsGeoJSON(geometry) AS geometry" | |
| " FROM read_parquet('divisions_area')" | |
| f" WHERE country IN ({countries})" | |
| " AND subtype = '{target_subtype}'" | |
| ), | |
| question_hints=question_hints, | |
| ) | |
| ) | |
| return templates | |
| class SQLTemplate: | |
| """SQL template for synthetic data generation.""" | |
| template_id: str | |
| family: str | |
| sql_difficulty: Literal["easy", "medium", "medium-hard", "hard"] | |
| anchor_source: Literal["divisions_area", "natural_earth", "mixed"] | |
| num_anchors: int | |
| sql_template: str | |
| question_hints: List[str] | |
| target_subtype: str = None | |
| requires_buffer: bool = False | |
| requires_aggregation: bool = False | |
| # --------------------------------------------------------------------------- | |
| # Template catalog | |
| # --------------------------------------------------------------------------- | |
| TEMPLATES = [ | |
| # ββ DIRECT LOOKUP ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SQLTemplate( | |
| template_id="lookup_01", | |
| family="direct_lookup", | |
| sql_difficulty="easy", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| sql_template=( | |
| "SELECT ST_AsGeoJSON(geometry) AS geometry," | |
| " names.\"primary\" AS name, id, subtype, country" | |
| " FROM read_parquet('divisions_area')" | |
| " WHERE id = '{anchor_id}'" | |
| ), | |
| question_hints=[ | |
| "show me {anchor_name}", | |
| "get the boundary of {anchor_name}", | |
| "find {anchor_name}", | |
| "where is {anchor_name}?", | |
| "outline of {anchor_name}", | |
| "map {anchor_name}", | |
| "what does {anchor_name} look like", | |
| "i need the shape of {anchor_name}", | |
| "pull up {anchor_name}", | |
| "can you show {anchor_name}", | |
| "map of {anchor_name}", | |
| "{anchor_name} boundary", | |
| "locate {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="lookup_02", | |
| family="direct_lookup", | |
| sql_difficulty="easy", | |
| anchor_source="natural_earth", | |
| num_anchors=1, | |
| sql_template=( | |
| "SELECT ST_AsGeoJSON(geometry) AS geometry," | |
| " names.\"primary\" AS name, id, subtype" | |
| " FROM read_parquet('natural_earth')" | |
| " WHERE id = '{anchor_id}'" | |
| ), | |
| question_hints=[ | |
| "show me the {anchor_name}", | |
| "get the {anchor_name}", | |
| "find the {anchor_name}", | |
| "where is the {anchor_name}?", | |
| "extent of the {anchor_name}", | |
| "geometry of the {anchor_name}", | |
| "display the {anchor_name}", | |
| "pull up the {anchor_name}", | |
| "i want to see the {anchor_name}", | |
| "map the {anchor_name}", | |
| "how big is the {anchor_name}?", | |
| "outline of the {anchor_name}", | |
| "show the shape of the {anchor_name}", | |
| "trace the {anchor_name}", | |
| "map out the {anchor_name}", | |
| "where exactly is the {anchor_name}", | |
| ], | |
| ), | |
| # ββ DISAMBIGUATION ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # "Puri, Odisha", "Lisbon, Portugal", "Goa, India" β a common real-world | |
| # query pattern where users give a place plus its containing region or | |
| # country to disambiguate same-name localities. | |
| # SQL is a plain lookup by id (disambiguation happens at candidate-pick | |
| # time). Candidates include same-name localities in other regions plus | |
| # the container, so the model must read the CSV to choose correctly. | |
| # | |
| # disambiguate_01: county scoped by its region / country | |
| # disambiguate_02: county scoped by its country | |
| # disambiguate_03: region scoped by its country | |
| SQLTemplate( | |
| template_id="disambiguate_01", | |
| family="disambiguation", | |
| sql_difficulty="easy", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| sql_template=( | |
| "SELECT ST_AsGeoJSON(geometry) AS geometry," | |
| " names.\"primary\" AS name, id, subtype, country, region" | |
| " FROM read_parquet('divisions_area')" | |
| " WHERE id = '{anchor_id}'" | |
| ), | |
| question_hints=[ | |
| "{anchor_name}, {container_name}", | |
| "{anchor_name} in {container_name}", | |
| "the {anchor_name} that's in {container_name}", | |
| "show me {anchor_name}, {container_name}", | |
| "where is {anchor_name}, {container_name}?", | |
| "map of {anchor_name} ({container_name})", | |
| "{anchor_name} ({container_name})", | |
| "{anchor_name} {container_name}", | |
| "pull up {anchor_name} in {container_name}", | |
| "find {anchor_name} in {container_name}", | |
| "locate {anchor_name} in {container_name}", | |
| "need {anchor_name} from {container_name}", | |
| "show {anchor_name} under {container_name}", | |
| "{anchor_name} near {container_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="disambiguate_02", | |
| family="disambiguation", | |
| sql_difficulty="easy", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| sql_template=( | |
| "SELECT ST_AsGeoJSON(geometry) AS geometry," | |
| " names.\"primary\" AS name, id, subtype, country" | |
| " FROM read_parquet('divisions_area')" | |
| " WHERE id = '{anchor_id}'" | |
| ), | |
| question_hints=[ | |
| "{anchor_name}, {container_name}", | |
| "{anchor_name} in {container_name}", | |
| "{anchor_name}, {container_name}.", | |
| "show me {anchor_name}, {container_name}", | |
| "where is {anchor_name} in {container_name}?", | |
| "the {anchor_name} that's in {container_name}", | |
| "map of {anchor_name}, {container_name}", | |
| "pull up {anchor_name} ({container_name})", | |
| "find {anchor_name} in {container_name}", | |
| "{anchor_name} {container_name}", | |
| "locate {anchor_name} in {container_name}", | |
| "need the {anchor_name} in {container_name}", | |
| "show {anchor_name} from {container_name}", | |
| "bring up {anchor_name}, {container_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="disambiguate_03", | |
| family="disambiguation", | |
| sql_difficulty="easy", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| sql_template=( | |
| "SELECT ST_AsGeoJSON(geometry) AS geometry," | |
| " names.\"primary\" AS name, id, subtype, country" | |
| " FROM read_parquet('divisions_area')" | |
| " WHERE id = '{anchor_id}'" | |
| ), | |
| question_hints=[ | |
| "{anchor_name}, {container_name}", | |
| "{anchor_name} state of {container_name}", | |
| "the {anchor_name} region in {container_name}", | |
| "show me {anchor_name}, {container_name}", | |
| "where is {anchor_name} in {container_name}?", | |
| "map of {anchor_name}, {container_name}", | |
| "{anchor_name} ({container_name})", | |
| "{anchor_name} province of {container_name}", | |
| "pull up {anchor_name} in {container_name}", | |
| "find {anchor_name} {container_name}", | |
| "locate {anchor_name} within {container_name}", | |
| "show the {anchor_name} part of {container_name}", | |
| "need {anchor_name} from {container_name}", | |
| "bring up {anchor_name} in {container_name}", | |
| "{anchor_name} prefecture of {container_name}", | |
| "{anchor_name} oblast of {container_name}", | |
| "{anchor_name} department of {container_name}", | |
| "{anchor_name} canton in {container_name}", | |
| ], | |
| ), | |
| # ββ ADJACENCY ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SQLTemplate( | |
| template_id="adj_01", | |
| family="adjacency", | |
| sql_difficulty="medium", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.id != '{anchor_id}'" | |
| " AND ST_Touches(a.geometry, b.geometry)" | |
| ), | |
| question_hints=[ | |
| "what places touch {anchor_name}", | |
| "list everything adjacent to {anchor_name}", | |
| "what shares a border with {anchor_name}", | |
| "neighbours of {anchor_name}", | |
| "what's next to {anchor_name}", | |
| "what surrounds {anchor_name}?", | |
| "places next to {anchor_name}", | |
| "everything bordering {anchor_name}", | |
| "show adjacent places to {anchor_name}", | |
| "areas touching {anchor_name}", | |
| "find the neighbours of {anchor_name}", | |
| "bordering places for {anchor_name}", | |
| "places that meet {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="adj_02", | |
| family="adjacency", | |
| sql_difficulty="medium", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="region", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.id != '{anchor_id}'" | |
| " AND b.subtype = '{target_subtype}'" | |
| " AND ST_Touches(a.geometry, b.geometry)" | |
| ), | |
| question_hints=[ | |
| "which {target_subtype}s border {anchor_name}?", | |
| "what {target_subtype}s share a border with {anchor_name}", | |
| "{target_subtype}s that touch {anchor_name}", | |
| "neighbouring {target_subtype}s of {anchor_name}", | |
| "which {target_subtype}s are adjacent to {anchor_name}?", | |
| "{target_subtype}s along the {anchor_name} border", | |
| "find {target_subtype}s next to {anchor_name}", | |
| "show {target_subtype}s bordering {anchor_name}", | |
| "{target_subtype}s beside {anchor_name}", | |
| "all {target_subtype}s touching {anchor_name}", | |
| "{target_subtype}s meeting {anchor_name}", | |
| "which states border {anchor_name}?", | |
| "which provinces share a border with {anchor_name}?", | |
| "states adjacent to {anchor_name}", | |
| "neighbouring provinces of {anchor_name}", | |
| "which prefectures border {anchor_name}?", | |
| "oblasts bordering {anchor_name}", | |
| "departments that touch {anchor_name}", | |
| "cantons adjacent to {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="adj_03", | |
| family="adjacency", | |
| sql_difficulty="medium", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="sea", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT n.id, n.names.\"primary\" AS name, n.subtype," | |
| " ST_AsGeoJSON(n.geometry) AS geometry" | |
| " FROM read_parquet('natural_earth') AS n, a" | |
| " WHERE n.subtype IN ('ocean', 'sea')" | |
| " AND ST_Intersects(a.geometry, n.geometry)" | |
| ), | |
| question_hints=[ | |
| "which seas touch {anchor_name}?", | |
| "what seas border {anchor_name}?", | |
| "which bodies of water is {anchor_name} next to?", | |
| "what ocean or sea borders {anchor_name}", | |
| "which oceans touch {anchor_name}?", | |
| "what coastline does {anchor_name} have?", | |
| "which water bodies does {anchor_name} border?", | |
| "does {anchor_name} have sea access?", | |
| "what ocean is {anchor_name} on?", | |
| "is {anchor_name} on the coast?", | |
| "what sea is off the coast of {anchor_name}?", | |
| "which ocean lies off {anchor_name}?", | |
| "what water is {anchor_name} on the shore of?", | |
| "which sea or ocean is {anchor_name} along?", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="adj_04", | |
| family="adjacency", | |
| sql_difficulty="medium", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="county", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.id != '{anchor_id}'" | |
| " AND b.subtype = '{target_subtype}'" | |
| " AND ST_Touches(a.geometry, b.geometry)" | |
| ), | |
| question_hints=[ | |
| "neighbouring counties of {anchor_name}", | |
| "neighbouring districts of {anchor_name}", | |
| "which counties border {anchor_name}?", | |
| "which districts border {anchor_name}?", | |
| "counties adjacent to {anchor_name}", | |
| "districts next to {anchor_name}", | |
| "counties sharing a border with {anchor_name}", | |
| "what counties touch {anchor_name}?", | |
| "nearby counties of {anchor_name}", | |
| "counties along the {anchor_name} boundary", | |
| "municipalities bordering {anchor_name}", | |
| "which boroughs are adjacent to {anchor_name}?", | |
| "communes that touch {anchor_name}", | |
| "parishes next to {anchor_name}", | |
| "neighbouring municipalities of {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="adj_05", | |
| family="adjacency", | |
| sql_difficulty="medium", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="country", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.id != '{anchor_id}'" | |
| " AND b.subtype = '{target_subtype}'" | |
| " AND ST_Touches(a.geometry, b.geometry)" | |
| ), | |
| question_hints=[ | |
| "which countries border {anchor_name}?", | |
| "what countries share a border with {anchor_name}?", | |
| "countries adjacent to {anchor_name}", | |
| "neighbouring countries of {anchor_name}", | |
| "which nations border {anchor_name}?", | |
| "what nations are next to {anchor_name}?", | |
| "nations sharing a border with {anchor_name}", | |
| "countries that touch {anchor_name}", | |
| "find countries bordering {anchor_name}", | |
| "show countries adjacent to {anchor_name}", | |
| "all countries touching {anchor_name}", | |
| "which nations are {anchor_name}'s neighbours?", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="adj_06", | |
| family="adjacency", | |
| sql_difficulty="medium", | |
| anchor_source="natural_earth", | |
| num_anchors=1, | |
| target_subtype="country", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('natural_earth') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Intersects(b.geometry, a.geometry)" | |
| ), | |
| question_hints=[ | |
| "which countries border the {anchor_name}?", | |
| "what countries are next to the {anchor_name}?", | |
| "countries along the {anchor_name}", | |
| "which nations touch the {anchor_name}?", | |
| "countries adjacent to the {anchor_name}", | |
| "which countries lie on the {anchor_name}?", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="adj_07", | |
| family="adjacency", | |
| sql_difficulty="medium", | |
| anchor_source="natural_earth", | |
| num_anchors=1, | |
| target_subtype="region", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('natural_earth') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Intersects(b.geometry, a.geometry)" | |
| ), | |
| question_hints=[ | |
| "which regions border the {anchor_name}?", | |
| "what regions are next to the {anchor_name}?", | |
| "regions along the {anchor_name}", | |
| "which states touch the {anchor_name}?", | |
| "provinces adjacent to the {anchor_name}", | |
| "which regions lie on the {anchor_name}?", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="adj_08", | |
| family="adjacency", | |
| sql_difficulty="medium", | |
| anchor_source="natural_earth", | |
| num_anchors=1, | |
| target_subtype="county", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('natural_earth') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Intersects(b.geometry, a.geometry)" | |
| ), | |
| question_hints=[ | |
| "which counties border the {anchor_name}?", | |
| "what districts are next to the {anchor_name}?", | |
| "counties along the {anchor_name}", | |
| "which districts touch the {anchor_name}?", | |
| "municipalities adjacent to the {anchor_name}", | |
| "which counties lie on the {anchor_name}?", | |
| ], | |
| ), | |
| # ββ NATURAL EARTH ADJACENCY βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Division anchor, natural_earth targets. Handler formats anchor_id and | |
| # target_subtype but the SQL hardcodes NE subtypes (like adj_03). | |
| SQLTemplate( | |
| template_id="adj_09", | |
| family="adjacency", | |
| sql_difficulty="medium", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="river", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT n.id, n.names.\"primary\" AS name, n.subtype," | |
| " ST_AsGeoJSON(n.geometry) AS geometry" | |
| " FROM read_parquet('natural_earth') AS n, a" | |
| " WHERE n.subtype IN ('river', 'lake', 'basin')" | |
| " AND ST_Intersects(a.geometry, n.geometry)" | |
| ), | |
| question_hints=[ | |
| "what rivers or lakes are in {anchor_name}?", | |
| "natural water features of {anchor_name}", | |
| "which rivers flow through {anchor_name}?", | |
| "lakes and rivers within {anchor_name}", | |
| "water features inside {anchor_name}", | |
| "what bodies of water cross {anchor_name}?", | |
| "rivers of {anchor_name}", | |
| "show me the lakes in {anchor_name}", | |
| "what rivers run through {anchor_name}?", | |
| "which lakes lie in {anchor_name}?", | |
| "what waterways are in {anchor_name}?", | |
| "which basins, lakes, or rivers are in {anchor_name}?", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="adj_10", | |
| family="adjacency", | |
| sql_difficulty="medium", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="range/mtn", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT n.id, n.names.\"primary\" AS name, n.subtype," | |
| " ST_AsGeoJSON(n.geometry) AS geometry" | |
| " FROM read_parquet('natural_earth') AS n, a" | |
| " WHERE n.subtype = 'range/mtn'" | |
| " AND ST_Intersects(a.geometry, n.geometry)" | |
| ), | |
| question_hints=[ | |
| "what mountain ranges are in {anchor_name}?", | |
| "which mountain ranges cross {anchor_name}?", | |
| "mountains of {anchor_name}", | |
| "mountain regions in {anchor_name}", | |
| "what hills are in {anchor_name}?", | |
| "which hills cross {anchor_name}?", | |
| "ghats of {anchor_name}", | |
| "what ghats are in {anchor_name}?", | |
| "highlands in {anchor_name}", | |
| "mountain belts within {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="adj_11", | |
| family="adjacency", | |
| sql_difficulty="medium", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="plateau", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT n.id, n.names.\"primary\" AS name, n.subtype," | |
| " ST_AsGeoJSON(n.geometry) AS geometry" | |
| " FROM read_parquet('natural_earth') AS n, a" | |
| " WHERE n.subtype = 'plateau'" | |
| " AND ST_Intersects(a.geometry, n.geometry)" | |
| ), | |
| question_hints=[ | |
| "what plateaus are in {anchor_name}?", | |
| "which plateaus cross {anchor_name}?", | |
| "uplands in {anchor_name}", | |
| "what uplands are in {anchor_name}?", | |
| "tablelands of {anchor_name}", | |
| "plateau regions within {anchor_name}", | |
| "show plateaus in {anchor_name}", | |
| "find uplands of {anchor_name}", | |
| "give me plateau areas in {anchor_name}", | |
| "{anchor_name} plateaus and uplands", | |
| "what tablelands are there in {anchor_name}?", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="adj_12", | |
| family="adjacency", | |
| sql_difficulty="medium", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="landform", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT n.id, n.names.\"primary\" AS name, n.subtype," | |
| " ST_AsGeoJSON(n.geometry) AS geometry" | |
| " FROM read_parquet('natural_earth') AS n, a" | |
| " WHERE n.subtype IN ('plain', 'lowland', 'basin', 'valley', 'depression', 'gorge')" | |
| " AND ST_Intersects(a.geometry, n.geometry)" | |
| ), | |
| question_hints=[ | |
| "what plains are in {anchor_name}?", | |
| "basins and valleys of {anchor_name}", | |
| "which basins are in {anchor_name}?", | |
| "what valleys cross {anchor_name}?", | |
| "lowlands in {anchor_name}", | |
| "major landforms in {anchor_name}", | |
| "plains, basins, and valleys within {anchor_name}", | |
| "show me the main landforms in {anchor_name}", | |
| "landforms of {anchor_name}", | |
| "find plains and basins in {anchor_name}", | |
| "{anchor_name} valleys and lowlands", | |
| ], | |
| ), | |
| # ββ MULTI-ADJACENCY ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SQLTemplate( | |
| template_id="multi_adj_01", | |
| family="multi_adjacency", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=2, | |
| target_subtype="region", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_1}'" | |
| ")," | |
| " b AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_2}'" | |
| ")" | |
| " SELECT c.id, c.names.\"primary\" AS name, c.subtype, c.country," | |
| " ST_AsGeoJSON(c.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS c, a, b" | |
| " WHERE c.id NOT IN ('{anchor_id_1}', '{anchor_id_2}')" | |
| " AND c.subtype = '{target_subtype}'" | |
| " AND ST_Touches(c.geometry, a.geometry)" | |
| " AND ST_Touches(c.geometry, b.geometry)" | |
| ), | |
| question_hints=[ | |
| "which {target_subtype}s border both {anchor_1_name} and {anchor_2_name}?", | |
| "what {target_subtype}s touch both {anchor_1_name} and {anchor_2_name}?", | |
| "{target_subtype}s adjacent to both {anchor_1_name} and {anchor_2_name}", | |
| "common {target_subtype} neighbours of {anchor_1_name} and {anchor_2_name}", | |
| "show {target_subtype}s touching both {anchor_1_name} and {anchor_2_name}", | |
| "{target_subtype}s bordering both {anchor_1_name} and {anchor_2_name}", | |
| "shared {target_subtype} neighbours of {anchor_1_name} and {anchor_2_name}", | |
| "find {target_subtype}s adjacent to both {anchor_1_name} and {anchor_2_name}", | |
| "which states border both {anchor_1_name} and {anchor_2_name}?", | |
| "which provinces are adjacent to both {anchor_1_name} and {anchor_2_name}?", | |
| "states touching both {anchor_1_name} and {anchor_2_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="multi_adj_02", | |
| family="multi_adjacency", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=2, | |
| target_subtype="country", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_1}'" | |
| ")," | |
| " b AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_2}'" | |
| ")" | |
| " SELECT c.id, c.names.\"primary\" AS name, c.subtype, c.country," | |
| " ST_AsGeoJSON(c.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS c, a, b" | |
| " WHERE c.id NOT IN ('{anchor_id_1}', '{anchor_id_2}')" | |
| " AND c.subtype = '{target_subtype}'" | |
| " AND ST_Touches(c.geometry, a.geometry)" | |
| " AND ST_Touches(c.geometry, b.geometry)" | |
| ), | |
| question_hints=[ | |
| "which countries border both {anchor_1_name} and {anchor_2_name}?", | |
| "what nations share a border with both {anchor_1_name} and {anchor_2_name}?", | |
| "countries adjacent to both {anchor_1_name} and {anchor_2_name}", | |
| "which country touches both {anchor_1_name} and {anchor_2_name}?", | |
| "nations bordering both {anchor_1_name} and {anchor_2_name}", | |
| "show countries neighbouring both {anchor_1_name} and {anchor_2_name}", | |
| "find countries that border both {anchor_1_name} and {anchor_2_name}", | |
| "common country neighbours of {anchor_1_name} and {anchor_2_name}", | |
| "which nation lies between {anchor_1_name} and {anchor_2_name}?", | |
| "countries sharing a border with both {anchor_1_name} and {anchor_2_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="multi_adj_03", | |
| family="multi_adjacency", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=2, | |
| target_subtype="county", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_1}'" | |
| ")," | |
| " b AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_2}'" | |
| ")" | |
| " SELECT c.id, c.names.\"primary\" AS name, c.subtype, c.country," | |
| " ST_AsGeoJSON(c.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS c, a, b" | |
| " WHERE c.id NOT IN ('{anchor_id_1}', '{anchor_id_2}')" | |
| " AND c.subtype = '{target_subtype}'" | |
| " AND ST_Touches(c.geometry, a.geometry)" | |
| " AND ST_Touches(c.geometry, b.geometry)" | |
| ), | |
| question_hints=[ | |
| "which {target_subtype}s border both {anchor_1_name} and {anchor_2_name}?", | |
| "what {target_subtype}s touch both {anchor_1_name} and {anchor_2_name}?", | |
| "{target_subtype}s adjacent to both {anchor_1_name} and {anchor_2_name}", | |
| "common {target_subtype} neighbours of {anchor_1_name} and {anchor_2_name}", | |
| "show {target_subtype}s touching both {anchor_1_name} and {anchor_2_name}", | |
| "which districts border both {anchor_1_name} and {anchor_2_name}?", | |
| "districts lying between {anchor_1_name} and {anchor_2_name}", | |
| "find districts adjacent to both {anchor_1_name} and {anchor_2_name}", | |
| "municipalities bordering both {anchor_1_name} and {anchor_2_name}", | |
| "which boroughs touch both {anchor_1_name} and {anchor_2_name}?", | |
| ], | |
| ), | |
| # ββ CONTAINMENT ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SQLTemplate( | |
| template_id="contain_01", | |
| family="containment", | |
| sql_difficulty="medium", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="county", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.id != '{anchor_id}'" | |
| " AND b.subtype = '{target_subtype}'" | |
| " AND ST_Within(b.geometry, a.geometry)" | |
| ), | |
| question_hints=[ | |
| "what {target_subtype}s are in {anchor_name}?", | |
| "which {target_subtype}s fall within {anchor_name}?", | |
| "list all {target_subtype}s inside {anchor_name}", | |
| "{target_subtype}s contained by {anchor_name}", | |
| "all {target_subtype}s within {anchor_name}", | |
| "{target_subtype}s of {anchor_name}", | |
| "show every {target_subtype} in {anchor_name}", | |
| "show {target_subtype}s inside {anchor_name}", | |
| "find {target_subtype}s in {anchor_name}", | |
| "give me the {target_subtype}s in {anchor_name}", | |
| "{anchor_name} {target_subtype}s", | |
| "which {target_subtype}s does {anchor_name} contain?", | |
| "what all {target_subtype}s are there in {anchor_name}?", | |
| "{target_subtype}s under {anchor_name}", | |
| "what districts are in {anchor_name}?", | |
| "which districts fall within {anchor_name}?", | |
| "districts of {anchor_name}", | |
| "show all districts inside {anchor_name}", | |
| "list municipalities in {anchor_name}", | |
| "boroughs of {anchor_name}", | |
| "communes inside {anchor_name}", | |
| "parishes in {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="contain_02", | |
| family="containment", | |
| sql_difficulty="medium", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="country", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.id != '{anchor_id}'" | |
| " AND b.subtype = '{target_subtype}'" | |
| " AND ST_Contains(b.geometry, a.geometry)" | |
| ), | |
| question_hints=[ | |
| "what country contains {anchor_name}?", | |
| "which country is {anchor_name} in?", | |
| "what country does {anchor_name} belong to?", | |
| "which nation contains {anchor_name}?", | |
| "{anchor_name} is part of which country?", | |
| "where is {anchor_name}", | |
| "what country is {anchor_name} in", | |
| "{anchor_name} belongs to which country?", | |
| "show country for {anchor_name}", | |
| "find the country of {anchor_name}", | |
| "which country does {anchor_name} fall in?", | |
| "{anchor_name} under which country", | |
| "tell me the country for {anchor_name}", | |
| "which nation does {anchor_name} belong to?", | |
| "what nation is {anchor_name} in?", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="contain_03", | |
| family="containment", | |
| sql_difficulty="medium", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="region", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Within(b.geometry, a.geometry)" | |
| ), | |
| question_hints=[ | |
| "what {target_subtype}s are in {anchor_name}?", | |
| "list all {target_subtype}s in {anchor_name}", | |
| "which {target_subtype}s does {anchor_name} have?", | |
| "show {target_subtype}s of {anchor_name}", | |
| "all {target_subtype}s within {anchor_name}", | |
| "{anchor_name} {target_subtype}s", | |
| "give me the {target_subtype}s of {anchor_name}", | |
| "what states are in {anchor_name}?", | |
| "list all states of {anchor_name}", | |
| "which provinces does {anchor_name} have?", | |
| "show the provinces of {anchor_name}", | |
| "all states within {anchor_name}", | |
| "provinces of {anchor_name}", | |
| "what oblasts are in {anchor_name}?", | |
| "departments of {anchor_name}", | |
| "prefectures in {anchor_name}", | |
| "cantons of {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="contain_04", | |
| family="containment", | |
| sql_difficulty="medium", | |
| anchor_source="natural_earth", | |
| num_anchors=1, | |
| target_subtype="region", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('natural_earth') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Within(b.geometry, a.geometry)" | |
| ), | |
| question_hints=[ | |
| "which {target_subtype}s are in the {anchor_name}?", | |
| "what {target_subtype}s fall within the {anchor_name}?", | |
| "{target_subtype}s inside the {anchor_name}", | |
| "admin {target_subtype}s within the {anchor_name}", | |
| "all regions inside the {anchor_name}", | |
| "what {target_subtype}s does the {anchor_name} contain?", | |
| "{target_subtype}s covered by the {anchor_name}", | |
| "which regions are in the {anchor_name} basin?", | |
| "what admin regions lie within the {anchor_name}?", | |
| "which provinces are inside the {anchor_name}?", | |
| "show the regions in the {anchor_name}", | |
| "find provinces inside the {anchor_name}", | |
| "give me admin regions within the {anchor_name}", | |
| "regions belonging to the {anchor_name}", | |
| "areas contained in the {anchor_name}", | |
| "which states are in the {anchor_name}?", | |
| "states inside the {anchor_name}", | |
| "which prefectures lie within the {anchor_name}?", | |
| "oblasts in the {anchor_name}", | |
| "departments inside the {anchor_name}", | |
| ], | |
| ), | |
| # ββ NATURAL EARTH CONTAINMENT βββββββββββββββββββββββββββββββββββββββββββ | |
| # contain_05: NE anchor (sea/gulf/bay), find countries that touch it. | |
| # Uses containment handler via containment_pairs. | |
| SQLTemplate( | |
| template_id="contain_05", | |
| family="containment", | |
| sql_difficulty="medium", | |
| anchor_source="natural_earth", | |
| num_anchors=1, | |
| target_subtype="country", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('natural_earth') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Intersects(b.geometry, a.geometry)" | |
| ), | |
| question_hints=[ | |
| "which countries border the {anchor_name}?", | |
| "what countries are along the {anchor_name}?", | |
| "countries surrounding the {anchor_name}", | |
| "nations on the {anchor_name}", | |
| "which countries touch the {anchor_name}?", | |
| "countries with coastline on the {anchor_name}", | |
| "what nations lie on the {anchor_name}?", | |
| "which countries are on the coast of the {anchor_name}?", | |
| "what countries lie around the {anchor_name}?", | |
| "which nations have shores on the {anchor_name}?", | |
| "what countries front the {anchor_name}?", | |
| ], | |
| ), | |
| # ββ INTERSECTION βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SQLTemplate( | |
| template_id="intersect_01", | |
| family="intersection", | |
| sql_difficulty="medium-hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="region", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.id != '{anchor_id}'" | |
| " AND b.subtype = '{target_subtype}'" | |
| " AND ST_Intersects(b.geometry, a.geometry)" | |
| ), | |
| question_hints=[ | |
| "which {target_subtype}s intersect {anchor_name}?", | |
| "what {target_subtype}s overlap with {anchor_name}?", | |
| "{target_subtype}s that cross into {anchor_name}", | |
| "which {target_subtype}s overlap {anchor_name}?", | |
| "{target_subtype}s partially inside {anchor_name}", | |
| "what {target_subtype}s extend into {anchor_name}?", | |
| "show {target_subtype}s overlapping {anchor_name}", | |
| "find {target_subtype}s crossing {anchor_name}", | |
| "{target_subtype}s meeting {anchor_name}", | |
| "areas intersecting {anchor_name}", | |
| "{anchor_name} overlapping {target_subtype}s", | |
| "which {target_subtype}s are partly in {anchor_name}?", | |
| "which states intersect {anchor_name}?", | |
| "which provinces overlap {anchor_name}?", | |
| "states crossing into {anchor_name}", | |
| "provinces partly inside {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="intersect_02", | |
| family="intersection", | |
| sql_difficulty="medium-hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="county", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.id != '{anchor_id}'" | |
| " AND b.subtype = '{target_subtype}'" | |
| " AND ST_Intersects(b.geometry, a.geometry)" | |
| ), | |
| question_hints=[ | |
| "which {target_subtype}s intersect {anchor_name}?", | |
| "what {target_subtype}s overlap with {anchor_name}?", | |
| "{target_subtype}s crossing into {anchor_name}", | |
| "which {target_subtype}s are partly in {anchor_name}?", | |
| "show {target_subtype}s overlapping {anchor_name}", | |
| "find {target_subtype}s crossing {anchor_name}", | |
| "{target_subtype}s that overlap {anchor_name}", | |
| "which districts intersect {anchor_name}?", | |
| "what districts overlap with {anchor_name}?", | |
| "districts crossing into {anchor_name}", | |
| "which municipalities are partly in {anchor_name}?", | |
| "boroughs overlapping {anchor_name}", | |
| "communes crossing {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="intersect_03", | |
| family="intersection", | |
| sql_difficulty="medium-hard", | |
| anchor_source="natural_earth", | |
| num_anchors=1, | |
| target_subtype="country", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('natural_earth') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Intersects(b.geometry, a.geometry)" | |
| ), | |
| question_hints=[ | |
| "which countries does the {anchor_name} pass through?", | |
| "what countries does the {anchor_name} cross?", | |
| "countries that overlap the {anchor_name}", | |
| "which countries touch the {anchor_name}?", | |
| "nations intersected by the {anchor_name}", | |
| "which nations does the {anchor_name} cross?", | |
| "countries along the {anchor_name}", | |
| "what countries does the {anchor_name} cover?", | |
| "countries the {anchor_name} spans across", | |
| "what countries is the {anchor_name} in?", | |
| "which countries lie along the {anchor_name}?", | |
| "what countries does the {anchor_name} run through?", | |
| "which countries border the {anchor_name}?", | |
| ], | |
| ), | |
| # ββ NATURAL EARTH INTERSECTION ββββββββββββββββββββββββββββββββββββββββββ | |
| # intersect_04: NE anchor, finding overlapping regions (vs countries in | |
| # intersect_03). Uses cross_source_relations handler. | |
| # intersect_05: division anchor, finding NE features that overlap it. | |
| # Uses intersection_pairs handler (extra NE subtypes ignored in SQL). | |
| SQLTemplate( | |
| template_id="intersect_04", | |
| family="intersection", | |
| sql_difficulty="medium-hard", | |
| anchor_source="natural_earth", | |
| num_anchors=1, | |
| target_subtype="region", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('natural_earth') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Intersects(b.geometry, a.geometry)" | |
| ), | |
| question_hints=[ | |
| "which regions does the {anchor_name} pass through?", | |
| "what admin regions overlap with the {anchor_name}?", | |
| "regions that the {anchor_name} crosses", | |
| "admin areas intersected by the {anchor_name}", | |
| "what provinces does the {anchor_name} span?", | |
| "regions along the {anchor_name}", | |
| "which provinces overlap the {anchor_name}?", | |
| "which regions is the {anchor_name} in?", | |
| "what states does the {anchor_name} run through?", | |
| "which provinces lie along the {anchor_name}?", | |
| "show regions crossed by the {anchor_name}", | |
| "find administrative regions along the {anchor_name}", | |
| "give me the regions touched by the {anchor_name}", | |
| "regions of the {anchor_name}", | |
| "which states does the {anchor_name} pass through?", | |
| "what oblasts does the {anchor_name} cross?", | |
| "which departments does the {anchor_name} run through?", | |
| "prefectures along the {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="intersect_05", | |
| family="intersection", | |
| sql_difficulty="medium-hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="region", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT n.id, n.names.\"primary\" AS name, n.subtype," | |
| " ST_AsGeoJSON(n.geometry) AS geometry" | |
| " FROM read_parquet('natural_earth') AS n, a" | |
| " WHERE ST_Intersects(n.geometry, a.geometry)" | |
| ), | |
| question_hints=[ | |
| "what natural features intersect {anchor_name}?", | |
| "natural features that overlap {anchor_name}", | |
| "which geographic features cross {anchor_name}?", | |
| "everything natural that touches {anchor_name}", | |
| "what geographic features does {anchor_name} contain?", | |
| "natural features within or crossing {anchor_name}", | |
| "show natural features overlapping {anchor_name}", | |
| "find the natural features in or across {anchor_name}", | |
| "{anchor_name} intersecting natural features", | |
| "what physical features are associated with {anchor_name}?", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="intersect_06", | |
| family="intersection", | |
| sql_difficulty="medium-hard", | |
| anchor_source="natural_earth", | |
| num_anchors=1, | |
| target_subtype="county", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('natural_earth') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Intersects(b.geometry, a.geometry)" | |
| ), | |
| question_hints=[ | |
| "which districts does the {anchor_name} pass through?", | |
| "what districts does the {anchor_name} cross?", | |
| "districts intersected by the {anchor_name}", | |
| "which counties does the {anchor_name} flow through?", | |
| "what counties overlap the {anchor_name}?", | |
| "districts along the {anchor_name}", | |
| "which districts are crossed by the {anchor_name}?", | |
| "what districts is the {anchor_name} in?", | |
| "which counties lie along the {anchor_name}?", | |
| "what districts does the {anchor_name} run through?", | |
| "show districts crossed by the {anchor_name}", | |
| "find counties along the {anchor_name}", | |
| "give me the districts touched by the {anchor_name}", | |
| "districts of the {anchor_name}", | |
| "which municipalities does the {anchor_name} pass through?", | |
| "boroughs along the {anchor_name}", | |
| "communes crossed by the {anchor_name}", | |
| "parishes the {anchor_name} runs through", | |
| ], | |
| ), | |
| # ββ BUFFER βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # CTE computes the buffered geometry (raw) for the spatial join. | |
| # Final SELECT wraps the result features with ST_AsGeoJSON. | |
| SQLTemplate( | |
| template_id="buffer_01", | |
| family="buffer", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| requires_buffer=True, | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT ST_Buffer(geometry, {buffer_m} / 111320.0) AS geom" | |
| " FROM read_parquet('divisions_area')" | |
| " WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.id != '{anchor_id}'" | |
| " AND ST_Intersects(b.geometry, a.geom)" | |
| ), | |
| question_hints=[ | |
| "what's within {buffer_label} of {anchor_name}?", | |
| "admin units within {buffer_label} of {anchor_name}", | |
| "features within a {buffer_label} radius of {anchor_name}", | |
| "places within {buffer_label} of {anchor_name}", | |
| "{buffer_label} buffer around {anchor_name}", | |
| "what falls within {buffer_label} of {anchor_name}?", | |
| "everything within {buffer_label} of {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="buffer_02", | |
| family="buffer", | |
| sql_difficulty="hard", | |
| anchor_source="natural_earth", | |
| num_anchors=1, | |
| requires_buffer=True, | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT ST_Buffer(geometry, {buffer_m} / 111320.0) AS geom" | |
| " FROM read_parquet('natural_earth')" | |
| " WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE ST_Intersects(b.geometry, a.geom)" | |
| ), | |
| question_hints=[ | |
| "what admin units are within {buffer_label} of the {anchor_name}?", | |
| "countries within {buffer_label} of the {anchor_name}", | |
| "regions within {buffer_label} of the {anchor_name}", | |
| "what falls within {buffer_label} of the {anchor_name}?", | |
| "admin divisions within a {buffer_label} radius of the {anchor_name}", | |
| "places within {buffer_label} of the {anchor_name}", | |
| "what places are near the {anchor_name}?", | |
| "what admin areas are close to the {anchor_name}?", | |
| "which regions are around the {anchor_name}?", | |
| "what lies within {buffer_label} of the shoreline of the {anchor_name}?", | |
| "show places around the {anchor_name}", | |
| "find areas near the {anchor_name}", | |
| "admin units close to the {anchor_name}", | |
| "what is around the {anchor_name} within {buffer_label}", | |
| "give me nearby admin regions for the {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="buffer_03", | |
| family="buffer", | |
| sql_difficulty="medium", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| requires_buffer=True, | |
| sql_template=( | |
| "SELECT ST_AsGeoJSON(ST_Buffer(geometry, {buffer_m} / 111320.0)) AS geometry" | |
| " FROM read_parquet('divisions_area')" | |
| " WHERE id = '{anchor_id}'" | |
| ), | |
| question_hints=[ | |
| "{buffer_label} buffer around {anchor_name}", | |
| "draw a {buffer_label} buffer around {anchor_name}", | |
| "show the {buffer_label} buffer around {anchor_name}", | |
| "create a {buffer_label} radius around {anchor_name}", | |
| "map a {buffer_label} zone around {anchor_name}", | |
| "outline the {buffer_label} buffer around {anchor_name}", | |
| "buffer {anchor_name} by {buffer_label}", | |
| "show radius {buffer_label} from {anchor_name}", | |
| "{anchor_name} with a {buffer_label} buffer", | |
| ], | |
| ), | |
| # Subtype-specific and multi-anchor buffer templates are generated by helper | |
| # functions to keep the catalog compact while preserving template IDs. | |
| *_make_buffer_templates(), | |
| # ββ NATURAL EARTH BUFFER ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # buffer_11: NE anchor, find other NE features within a buffer distance. | |
| # Uses buffer handler for natural_earth. | |
| SQLTemplate( | |
| template_id="buffer_11", | |
| family="buffer", | |
| sql_difficulty="hard", | |
| anchor_source="natural_earth", | |
| num_anchors=1, | |
| requires_buffer=True, | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT ST_Buffer(geometry, {buffer_m} / 111320.0) AS geom" | |
| " FROM read_parquet('natural_earth')" | |
| " WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT n.id, n.names.\"primary\" AS name, n.subtype," | |
| " ST_AsGeoJSON(n.geometry) AS geometry" | |
| " FROM read_parquet('natural_earth') AS n, a" | |
| " WHERE ST_Intersects(n.geometry, a.geom)" | |
| ), | |
| question_hints=[ | |
| "natural features within {buffer_label} of the {anchor_name}", | |
| "what's within {buffer_label} of the {anchor_name}?", | |
| "geographic features near the {anchor_name} within {buffer_label}", | |
| "everything within {buffer_label} of the {anchor_name}", | |
| "what natural features are close to the {anchor_name}?", | |
| "{buffer_label} radius around the {anchor_name}", | |
| "what natural features are around the {anchor_name}?", | |
| "what lies near the {anchor_name}?", | |
| "which features are close to the {anchor_name}?", | |
| "what natural features are near the shoreline of the {anchor_name}?", | |
| "show nearby natural features for the {anchor_name}", | |
| "find features around the {anchor_name}", | |
| "give me natural features near the {anchor_name}", | |
| "features around the {anchor_name}", | |
| "what is close to the {anchor_name} within {buffer_label}", | |
| ], | |
| ), | |
| # ββ CHAINED ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Containment + EXISTS/NOT EXISTS ocean/sea. | |
| # CTE holds raw geometry for ST_Within; final SELECT wraps with ST_AsGeoJSON. | |
| SQLTemplate( | |
| template_id="chained_01", | |
| family="chained", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="county", | |
| sql_template=( | |
| "WITH region AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, region" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Within(b.geometry, region.geometry)" | |
| " AND EXISTS (" | |
| " SELECT 1 FROM read_parquet('natural_earth') AS n" | |
| " WHERE n.subtype IN ('ocean', 'sea')" | |
| " AND ST_Intersects(b.geometry, n.geometry)" | |
| " )" | |
| ), | |
| question_hints=[ | |
| "coastal {target_subtype}s of {anchor_name}", | |
| "{target_subtype}s in {anchor_name} with sea access", | |
| "which {target_subtype}s in {anchor_name} are on the coast?", | |
| "seaside {target_subtype}s within {anchor_name}", | |
| "{target_subtype}s in {anchor_name} bordering the sea", | |
| "oceanfront {target_subtype}s in {anchor_name}", | |
| "which {target_subtype}s in {anchor_name} have a coastline?", | |
| "show coastal {target_subtype}s in {anchor_name}", | |
| "find {target_subtype}s of {anchor_name} on the shore", | |
| "{target_subtype}s of {anchor_name} by the sea", | |
| "which {target_subtype}s of {anchor_name} touch the ocean?", | |
| "coastline {target_subtype}s in {anchor_name}", | |
| "coastal districts of {anchor_name}", | |
| "which districts of {anchor_name} are on the coast?", | |
| "seaside municipalities of {anchor_name}", | |
| "coastal boroughs in {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="chained_02", | |
| family="chained", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="county", | |
| sql_template=( | |
| "WITH region AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, region" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Intersects(b.geometry, region.geometry)" | |
| " AND NOT EXISTS (" | |
| " SELECT 1 FROM read_parquet('natural_earth') AS n" | |
| " WHERE n.subtype IN ('ocean', 'sea')" | |
| " AND ST_Intersects(b.geometry, n.geometry)" | |
| " )" | |
| ), | |
| question_hints=[ | |
| "landlocked {target_subtype}s in {anchor_name}", | |
| "which {target_subtype}s in {anchor_name} have no sea access?", | |
| "{target_subtype}s in {anchor_name} that are landlocked", | |
| "{target_subtype}s in {anchor_name} with no coastline", | |
| "which {target_subtype}s within {anchor_name} are landlocked?", | |
| "interior {target_subtype}s of {anchor_name} with no ocean border", | |
| "show inland {target_subtype}s in {anchor_name}", | |
| "find non-coastal {target_subtype}s of {anchor_name}", | |
| "{target_subtype}s of {anchor_name} away from the sea", | |
| "which {target_subtype}s in {anchor_name} are not coastal?", | |
| "inner {target_subtype}s of {anchor_name}", | |
| "inland districts of {anchor_name}", | |
| "which districts in {anchor_name} are not on the coast?", | |
| "non-coastal municipalities of {anchor_name}", | |
| "interior boroughs of {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="chained_03", | |
| family="chained", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="region", | |
| sql_template=( | |
| "WITH country AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, country" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Within(b.geometry, country.geometry)" | |
| " AND EXISTS (" | |
| " SELECT 1 FROM read_parquet('natural_earth') AS n" | |
| " WHERE n.subtype IN ('range/mtn', 'island group', 'peninsula', 'depression')" | |
| " AND ST_Intersects(b.geometry, n.geometry)" | |
| " )" | |
| ), | |
| question_hints=[ | |
| "{target_subtype}s in {anchor_name} on a terrain feature or island", | |
| "{target_subtype}s of {anchor_name} on a peninsula or island group", | |
| "{target_subtype}s within {anchor_name} on notable landforms", | |
| "island and peninsula {target_subtype}s of {anchor_name}", | |
| "show {target_subtype}s in {anchor_name} on major landforms", | |
| "find {target_subtype}s of {anchor_name} on islands or peninsulas", | |
| "{target_subtype}s in {anchor_name} on terrain regions", | |
| "states of {anchor_name} on a peninsula or island", | |
| "island states of {anchor_name}", | |
| "provinces of {anchor_name} on major landforms", | |
| "which regions of {anchor_name} are on a peninsula?", | |
| ], | |
| ), | |
| # ββ DIFFERENCE βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # CTEs hold raw geometry; ST_Difference result wrapped with ST_AsGeoJSON. | |
| SQLTemplate( | |
| template_id="diff_01", | |
| family="difference", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=2, | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_1}'" | |
| ")," | |
| " b AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_2}'" | |
| ")" | |
| " SELECT ST_AsGeoJSON(ST_Difference(a.geometry, b.geometry)) AS geometry" | |
| " FROM a, b" | |
| " WHERE ST_Intersects(a.geometry, b.geometry)" | |
| ), | |
| question_hints=[ | |
| "{anchor_1_name} excluding {anchor_2_name}", | |
| "{anchor_1_name} minus {anchor_2_name}", | |
| "the part of {anchor_1_name} that is not in {anchor_2_name}", | |
| "{anchor_1_name} without the {anchor_2_name} area", | |
| "remove {anchor_2_name} from {anchor_1_name}", | |
| "{anchor_1_name} with {anchor_2_name} cut out", | |
| "subtract {anchor_2_name} from {anchor_1_name}", | |
| "what's left of {anchor_1_name} after removing {anchor_2_name}?", | |
| "difference between {anchor_1_name} and {anchor_2_name}", | |
| "keep only {anchor_1_name} outside {anchor_2_name}", | |
| "cut {anchor_2_name} out of {anchor_1_name}", | |
| "show {anchor_1_name} without {anchor_2_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="diff_02", | |
| family="difference", | |
| sql_difficulty="hard", | |
| anchor_source="mixed", | |
| num_anchors=2, | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")," | |
| " b AS (" | |
| " SELECT geometry FROM read_parquet('natural_earth') WHERE id = '{clip_feature_id}'" | |
| ")" | |
| " SELECT ST_AsGeoJSON(ST_Difference(a.geometry, b.geometry)) AS geometry" | |
| " FROM a, b" | |
| " WHERE ST_Intersects(a.geometry, b.geometry)" | |
| ), | |
| question_hints=[ | |
| "the part of {anchor_name} outside the {clip_feature_name}", | |
| "{anchor_name} excluding the {clip_feature_name}", | |
| "{anchor_name} minus the {clip_feature_name}", | |
| "parts of {anchor_name} not covered by the {clip_feature_name}", | |
| "{anchor_name} with the {clip_feature_name} removed", | |
| "what's left of {anchor_name} after removing the {clip_feature_name}?", | |
| "show me {anchor_name} excluding the {clip_feature_name}", | |
| "keep only the part of {anchor_name} outside the {clip_feature_name}", | |
| "cut the {clip_feature_name} out of {anchor_name}", | |
| "difference of {anchor_name} and the {clip_feature_name}", | |
| "{anchor_name} after subtracting the {clip_feature_name}", | |
| ], | |
| ), | |
| # ββ BORDER CORRIDOR ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Intermediate intersection kept raw; final buffer wrapped with ST_AsGeoJSON. | |
| SQLTemplate( | |
| template_id="corridor_01", | |
| family="border_corridor", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=2, | |
| requires_buffer=True, | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_1}'" | |
| ")," | |
| " b AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_2}'" | |
| ")," | |
| " border AS (" | |
| " SELECT ST_Intersection(a.geometry, b.geometry) AS line" | |
| " FROM a, b" | |
| " WHERE ST_Intersects(a.geometry, b.geometry)" | |
| ")" | |
| " SELECT ST_AsGeoJSON(ST_Buffer(border.line, {buffer_km} * 1000.0 / 111320.0)) AS geometry" | |
| " FROM border" | |
| " WHERE border.line IS NOT NULL" | |
| ), | |
| question_hints=[ | |
| "{buffer_km} km zone along the border between {anchor_1_name} and {anchor_2_name}", | |
| "the {buffer_km} km border corridor between {anchor_1_name} and {anchor_2_name}", | |
| "area within {buffer_km} km of the {anchor_1_name}-{anchor_2_name} border", | |
| "the region straddling the border of {anchor_1_name} and {anchor_2_name} within {buffer_km} km", | |
| "{buffer_km} km on either side of the {anchor_1_name} and {anchor_2_name} border", | |
| "buffer the {anchor_1_name}-{anchor_2_name} boundary by {buffer_km} km", | |
| "show the border zone between {anchor_1_name} and {anchor_2_name}", | |
| "map the corridor along the {anchor_1_name}-{anchor_2_name} border", | |
| "find the area near the border of {anchor_1_name} and {anchor_2_name}", | |
| "give me the border buffer for {anchor_1_name} and {anchor_2_name}", | |
| "border area of {anchor_1_name} and {anchor_2_name} within {buffer_km} km", | |
| ], | |
| ), | |
| # ββ SET OPERATIONS βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # union_01 / union_02: 2-anchor and filtered-containment unions. | |
| # union_03: 3-anchor union β trains the model on IN-clause with 3 IDs. | |
| # contain_multi: subtype within multiple countries via country IN clause. | |
| SQLTemplate( | |
| template_id="union_01", | |
| family="set_operations", | |
| sql_difficulty="medium-hard", | |
| anchor_source="divisions_area", | |
| num_anchors=2, | |
| sql_template=( | |
| "SELECT ST_AsGeoJSON(ST_Union_Agg(geometry)) AS geometry," | |
| " array_agg(names.\"primary\") AS names" | |
| " FROM read_parquet('divisions_area')" | |
| " WHERE id IN ('{anchor_id_1}', '{anchor_id_2}')" | |
| ), | |
| question_hints=[ | |
| "the combined area of {anchor_1_name} and {anchor_2_name}", | |
| "union of {anchor_1_name} and {anchor_2_name}", | |
| "merge {anchor_1_name} and {anchor_2_name}", | |
| "{anchor_1_name} and {anchor_2_name} together", | |
| "combined geometry of {anchor_1_name} and {anchor_2_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="union_02", | |
| family="set_operations", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="county", | |
| requires_aggregation=True, | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT ST_AsGeoJSON(ST_Union_Agg(b.geometry)) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Within(b.geometry, a.geometry)" | |
| ), | |
| question_hints=[ | |
| "merge all {target_subtype}s in {anchor_name} into one geometry", | |
| "combined geometry of all {target_subtype}s in {anchor_name}", | |
| "union of all {target_subtype}s within {anchor_name}", | |
| "all {target_subtype}s of {anchor_name} merged together", | |
| "the overall extent of {target_subtype}s in {anchor_name}", | |
| "show one merged shape for all {target_subtype}s in {anchor_name}", | |
| "dissolve all {target_subtype}s in {anchor_name}", | |
| "make one geometry from all {target_subtype}s in {anchor_name}", | |
| "single outline of all {target_subtype}s in {anchor_name}", | |
| "combine the {target_subtype}s of {anchor_name} into one area", | |
| "merge all districts in {anchor_name} into one shape", | |
| "dissolve all municipalities in {anchor_name}", | |
| "combine all boroughs of {anchor_name} into one geometry", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="union_03", | |
| family="set_operations", | |
| sql_difficulty="medium-hard", | |
| anchor_source="divisions_area", | |
| num_anchors=3, | |
| sql_template=( | |
| "SELECT ST_AsGeoJSON(ST_Union_Agg(geometry)) AS geometry," | |
| " array_agg(names.\"primary\") AS names" | |
| " FROM read_parquet('divisions_area')" | |
| " WHERE id IN ('{anchor_id_1}', '{anchor_id_2}', '{anchor_id_3}')" | |
| ), | |
| question_hints=[ | |
| "show me {anchor_1_name}, {anchor_2_name} and {anchor_3_name}", | |
| "the combined area of {anchor_1_name}, {anchor_2_name} and {anchor_3_name}", | |
| "union of {anchor_1_name}, {anchor_2_name} and {anchor_3_name}", | |
| "merge {anchor_1_name}, {anchor_2_name} and {anchor_3_name}", | |
| "{anchor_1_name}, {anchor_2_name} and {anchor_3_name} together", | |
| "display {anchor_1_name}, {anchor_2_name} and {anchor_3_name}", | |
| ], | |
| ), | |
| *_make_country_in_templates(), | |
| # ββ PARTIAL SELECTION ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Bbox clip CTEs use raw geometry; ST_Intersection result wrapped. | |
| SQLTemplate( | |
| template_id="partial_01", | |
| family="partial_selection", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")," | |
| " bbox AS (" | |
| " SELECT ST_XMin(geometry) AS xmin, ST_XMax(geometry) AS xmax," | |
| " ST_YMin(geometry) AS ymin, ST_YMax(geometry) AS ymax FROM a" | |
| ")," | |
| " clip AS (" | |
| " SELECT ST_MakeEnvelope(xmin, (ymin + ymax) / 2.0, xmax, ymax) AS half_geom FROM bbox" | |
| ")" | |
| " SELECT ST_AsGeoJSON(ST_Intersection(a.geometry, clip.half_geom)) AS geometry" | |
| " FROM a, clip" | |
| ), | |
| question_hints=[ | |
| "the northern half of {anchor_name}", | |
| "northern part of {anchor_name}", | |
| "the top half of {anchor_name}", | |
| "northern portion of {anchor_name}", | |
| "upper half of {anchor_name}", | |
| "top side of {anchor_name}", | |
| "show north half of {anchor_name}", | |
| "cut {anchor_name} to the north half", | |
| "only the northern side of {anchor_name}", | |
| "north part of {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="partial_02", | |
| family="partial_selection", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")," | |
| " bbox AS (" | |
| " SELECT ST_XMin(geometry) AS xmin, ST_XMax(geometry) AS xmax," | |
| " ST_YMin(geometry) AS ymin, ST_YMax(geometry) AS ymax FROM a" | |
| ")," | |
| " clip AS (" | |
| " SELECT ST_MakeEnvelope(xmin, ymin, xmax, (ymin + ymax) / 2.0) AS half_geom FROM bbox" | |
| ")" | |
| " SELECT ST_AsGeoJSON(ST_Intersection(a.geometry, clip.half_geom)) AS geometry" | |
| " FROM a, clip" | |
| ), | |
| question_hints=[ | |
| "the southern half of {anchor_name}", | |
| "southern part of {anchor_name}", | |
| "the bottom half of {anchor_name}", | |
| "southern portion of {anchor_name}", | |
| "lower half of {anchor_name}", | |
| "bottom side of {anchor_name}", | |
| "show south half of {anchor_name}", | |
| "cut {anchor_name} to the south half", | |
| "only the southern side of {anchor_name}", | |
| "south part of {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="partial_03", | |
| family="partial_selection", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")," | |
| " bbox AS (" | |
| " SELECT ST_XMin(geometry) AS xmin, ST_XMax(geometry) AS xmax," | |
| " ST_YMin(geometry) AS ymin, ST_YMax(geometry) AS ymax FROM a" | |
| ")," | |
| " clip AS (" | |
| " SELECT ST_MakeEnvelope((xmin + xmax) / 2.0, ymin, xmax, ymax) AS half_geom FROM bbox" | |
| ")" | |
| " SELECT ST_AsGeoJSON(ST_Intersection(a.geometry, clip.half_geom)) AS geometry" | |
| " FROM a, clip" | |
| ), | |
| question_hints=[ | |
| "the eastern half of {anchor_name}", | |
| "eastern part of {anchor_name}", | |
| "the right half of {anchor_name}", | |
| "eastern portion of {anchor_name}", | |
| "east side of {anchor_name}", | |
| "show east half of {anchor_name}", | |
| "cut {anchor_name} to the east half", | |
| "only the eastern side of {anchor_name}", | |
| "right side of {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="partial_04", | |
| family="partial_selection", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")," | |
| " bbox AS (" | |
| " SELECT ST_XMin(geometry) AS xmin, ST_XMax(geometry) AS xmax," | |
| " ST_YMin(geometry) AS ymin, ST_YMax(geometry) AS ymax FROM a" | |
| ")," | |
| " clip AS (" | |
| " SELECT ST_MakeEnvelope(xmin, ymin, (xmin + xmax) / 2.0, ymax) AS half_geom FROM bbox" | |
| ")" | |
| " SELECT ST_AsGeoJSON(ST_Intersection(a.geometry, clip.half_geom)) AS geometry" | |
| " FROM a, clip" | |
| ), | |
| question_hints=[ | |
| "the western half of {anchor_name}", | |
| "western part of {anchor_name}", | |
| "the left half of {anchor_name}", | |
| "western portion of {anchor_name}", | |
| "west side of {anchor_name}", | |
| "show west half of {anchor_name}", | |
| "cut {anchor_name} to the west half", | |
| "only the western side of {anchor_name}", | |
| "left side of {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="partial_05", | |
| family="partial_selection", | |
| sql_difficulty="hard", | |
| anchor_source="mixed", | |
| num_anchors=2, | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry AS g1 FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")," | |
| " b AS (" | |
| " SELECT geometry AS g2 FROM read_parquet('natural_earth') WHERE id = '{clip_feature_id}'" | |
| ")" | |
| " SELECT ST_AsGeoJSON(ST_Intersection(a.g1, b.g2)) AS geometry" | |
| " FROM a, b" | |
| " WHERE ST_Intersects(a.g1, b.g2)" | |
| ), | |
| question_hints=[ | |
| "the part of {anchor_name} that overlaps the {clip_feature_name}", | |
| "{anchor_name} within the {clip_feature_name}", | |
| "the portion of {anchor_name} inside the {clip_feature_name}", | |
| "part of the {clip_feature_name} in {anchor_name}", | |
| "part of {anchor_name} in the {clip_feature_name}", | |
| "clip {anchor_name} to the {clip_feature_name}", | |
| "{anchor_name} clipped to the {clip_feature_name}", | |
| "{clip_feature_name} inside {anchor_name}", | |
| "parts of {anchor_name} covered by the {clip_feature_name}", | |
| "show me where {anchor_name} and the {clip_feature_name} overlap", | |
| "keep only the part of {anchor_name} in the {clip_feature_name}", | |
| "intersection of {anchor_name} and the {clip_feature_name}", | |
| "give me the overlap between {anchor_name} and the {clip_feature_name}", | |
| "only the shared area of {anchor_name} and the {clip_feature_name}", | |
| ], | |
| ), | |
| # ββ AGGREGATION ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ST_Area uses raw geometry in the ORDER BY; final SELECT wraps output. | |
| SQLTemplate( | |
| template_id="agg_01", | |
| family="aggregation", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype=None, # filled at generation time: county or region | |
| requires_aggregation=True, | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name," | |
| " ST_AsGeoJSON(b.geometry) AS geometry," | |
| " ST_Area(b.geometry) AS area" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE ST_Within(b.geometry, a.geometry)" | |
| " AND b.subtype = '{target_subtype}'" | |
| " ORDER BY area DESC" | |
| " LIMIT {top_n}" | |
| ), | |
| question_hints=[ | |
| "top {top_n} largest {target_subtype}s in {anchor_name}", | |
| "biggest {top_n} {target_subtype}s in {anchor_name}", | |
| "{top_n} largest {target_subtype}s inside {anchor_name}", | |
| "the {top_n} biggest {target_subtype}s within {anchor_name}", | |
| "largest {target_subtype} in {anchor_name}", | |
| "which {target_subtype} in {anchor_name} has the most area?", | |
| "show the biggest {target_subtype}s in {anchor_name}", | |
| "list the largest {target_subtype}s for {anchor_name}", | |
| "give me the biggest {target_subtype}s in {anchor_name}", | |
| "{anchor_name} largest {target_subtype}s", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="agg_02", | |
| family="aggregation", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype=None, # filled at generation time: county or region | |
| requires_aggregation=True, | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name," | |
| " ST_AsGeoJSON(b.geometry) AS geometry," | |
| " ST_Area(b.geometry) AS area" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE ST_Within(b.geometry, a.geometry)" | |
| " AND b.subtype = '{target_subtype}'" | |
| " ORDER BY area ASC" | |
| " LIMIT {top_n}" | |
| ), | |
| question_hints=[ | |
| "top {top_n} smallest {target_subtype}s in {anchor_name}", | |
| "smallest {top_n} {target_subtype}s in {anchor_name}", | |
| "{top_n} smallest {target_subtype}s inside {anchor_name}", | |
| "the {top_n} tiniest {target_subtype}s within {anchor_name}", | |
| "smallest {target_subtype} in {anchor_name}", | |
| "which {target_subtype} in {anchor_name} has the least area?", | |
| "show the smallest {target_subtype}s in {anchor_name}", | |
| "list the smallest {target_subtype}s for {anchor_name}", | |
| "give me the tiniest {target_subtype}s in {anchor_name}", | |
| "{anchor_name} smallest {target_subtype}s", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="agg_03", | |
| family="aggregation", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype=None, # filled at generation time: county or region | |
| requires_aggregation=True, | |
| sql_template=( | |
| "SELECT id, names.\"primary\" AS name," | |
| " ST_AsGeoJSON(geometry) AS geometry," | |
| " ST_Area(geometry) AS area" | |
| " FROM read_parquet('divisions_area')" | |
| " WHERE country = '{country}'" | |
| " AND subtype = '{target_subtype}'" | |
| " ORDER BY area DESC" | |
| " LIMIT {top_n}" | |
| ), | |
| question_hints=[ | |
| "top {top_n} largest {target_subtype}s in {anchor_name}", | |
| "{top_n} biggest {target_subtype}s in {anchor_name}", | |
| "largest {top_n} {target_subtype}s in {anchor_name}", | |
| "the {top_n} largest {target_subtype}s in {anchor_name}", | |
| "biggest {target_subtype} in {anchor_name}", | |
| "which {target_subtype} in {anchor_name} is the largest?", | |
| "show the largest {target_subtype}s in {anchor_name}", | |
| "list the biggest {target_subtype}s in {anchor_name}", | |
| "give me the largest {target_subtype}s for {anchor_name}", | |
| "{anchor_name} biggest {target_subtype}s", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="agg_04", | |
| family="aggregation", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype=None, # filled at generation time: county or region | |
| requires_aggregation=True, | |
| sql_template=( | |
| "SELECT id, names.\"primary\" AS name," | |
| " ST_AsGeoJSON(geometry) AS geometry," | |
| " ST_Area(geometry) AS area" | |
| " FROM read_parquet('divisions_area')" | |
| " WHERE country = '{country}'" | |
| " AND subtype = '{target_subtype}'" | |
| " ORDER BY area ASC" | |
| " LIMIT {top_n}" | |
| ), | |
| question_hints=[ | |
| "top {top_n} smallest {target_subtype}s in {anchor_name}", | |
| "{top_n} smallest {target_subtype}s in {anchor_name}", | |
| "smallest {top_n} {target_subtype}s in {anchor_name}", | |
| "the {top_n} smallest {target_subtype}s in {anchor_name}", | |
| "smallest {target_subtype} in {anchor_name}", | |
| "which {target_subtype} in {anchor_name} is the smallest?", | |
| "show the smallest {target_subtype}s in {anchor_name}", | |
| "list the smallest {target_subtype}s in {anchor_name}", | |
| "give me the smallest {target_subtype}s for {anchor_name}", | |
| "{anchor_name} tiniest {target_subtype}s", | |
| ], | |
| ), | |
| # ββ WINDOW FUNCTION ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # CTE keeps raw geometry for ST_Area; final SELECT wraps with ST_AsGeoJSON. | |
| SQLTemplate( | |
| template_id="window_01", | |
| family="window_function", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="county", | |
| requires_aggregation=True, | |
| sql_template=( | |
| "WITH ranked AS (" | |
| " SELECT id, names.\"primary\" AS name, subtype, country, region, geometry," | |
| " ST_Area(geometry) AS area," | |
| " ROW_NUMBER() OVER (PARTITION BY region ORDER BY ST_Area(geometry) DESC) AS rn" | |
| " FROM read_parquet('divisions_area')" | |
| " WHERE country = '{country}'" | |
| " AND subtype = '{target_subtype}'" | |
| ")" | |
| " SELECT id, name, subtype, country, region," | |
| " ST_AsGeoJSON(geometry) AS geometry, area" | |
| " FROM ranked" | |
| " WHERE rn = 1" | |
| ), | |
| question_hints=[ | |
| "the largest {target_subtype} in each region of {anchor_name}", | |
| "biggest {target_subtype} per region in {anchor_name}", | |
| "largest {target_subtype} for every region of {anchor_name}", | |
| "the biggest {target_subtype} in each province of {anchor_name}", | |
| "show the largest {target_subtype} in every region of {anchor_name}", | |
| "list biggest {target_subtype} by region in {anchor_name}", | |
| "for each region in {anchor_name}, give the largest {target_subtype}", | |
| "largest {target_subtype}s grouped by region in {anchor_name}", | |
| "one biggest {target_subtype} for each region of {anchor_name}", | |
| "the largest district in each state of {anchor_name}", | |
| "biggest district per province in {anchor_name}", | |
| "the largest municipality in each region of {anchor_name}", | |
| "biggest borough per state in {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="window_02", | |
| family="window_function", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="county", | |
| requires_aggregation=True, | |
| sql_template=( | |
| "WITH ranked AS (" | |
| " SELECT id, names.\"primary\" AS name, subtype, country, region, geometry," | |
| " ST_Area(geometry) AS area," | |
| " ROW_NUMBER() OVER (PARTITION BY region ORDER BY ST_Area(geometry) ASC) AS rn" | |
| " FROM read_parquet('divisions_area')" | |
| " WHERE country = '{country}'" | |
| " AND subtype = '{target_subtype}'" | |
| ")" | |
| " SELECT id, name, subtype, country, region," | |
| " ST_AsGeoJSON(geometry) AS geometry, area" | |
| " FROM ranked" | |
| " WHERE rn = 1" | |
| ), | |
| question_hints=[ | |
| "the smallest {target_subtype} in each region of {anchor_name}", | |
| "smallest {target_subtype} per region in {anchor_name}", | |
| "tiniest {target_subtype} for every region of {anchor_name}", | |
| "the smallest {target_subtype} in each province of {anchor_name}", | |
| "show the smallest {target_subtype} in every region of {anchor_name}", | |
| "list smallest {target_subtype} by region in {anchor_name}", | |
| "for each region in {anchor_name}, give the smallest {target_subtype}", | |
| "smallest {target_subtype}s grouped by region in {anchor_name}", | |
| "one tiniest {target_subtype} for each region of {anchor_name}", | |
| "the smallest district in each state of {anchor_name}", | |
| "smallest district per province in {anchor_name}", | |
| "the smallest municipality in each region of {anchor_name}", | |
| "tiniest borough per state in {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="window_03", | |
| family="window_function", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="region", | |
| requires_aggregation=True, | |
| sql_template=( | |
| "WITH ranked AS (" | |
| " SELECT id, names.\"primary\" AS name, subtype, country, geometry," | |
| " ST_Area(geometry) AS area," | |
| " ROW_NUMBER() OVER (PARTITION BY country ORDER BY ST_Area(geometry) DESC) AS rn" | |
| " FROM read_parquet('divisions_area')" | |
| " WHERE country = '{country}'" | |
| " AND subtype = '{target_subtype}'" | |
| ")" | |
| " SELECT id, name, subtype, country," | |
| " ST_AsGeoJSON(geometry) AS geometry, area" | |
| " FROM ranked" | |
| " WHERE rn = 1" | |
| ), | |
| question_hints=[ | |
| "the largest {target_subtype} in {anchor_name}", | |
| "which {target_subtype} of {anchor_name} has the most area?", | |
| "biggest {target_subtype} in {anchor_name}", | |
| "what is the largest {target_subtype} of {anchor_name}?", | |
| "show the biggest {target_subtype} of {anchor_name}", | |
| "the largest state in {anchor_name}", | |
| "which state of {anchor_name} is the biggest?", | |
| "the largest province in {anchor_name}", | |
| "biggest state of {anchor_name}", | |
| "what is the largest province of {anchor_name}?", | |
| "the largest oblast in {anchor_name}", | |
| "biggest department of {anchor_name}", | |
| "which prefecture of {anchor_name} has the most area?", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="window_04", | |
| family="window_function", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="region", | |
| requires_aggregation=True, | |
| sql_template=( | |
| "WITH ranked AS (" | |
| " SELECT id, names.\"primary\" AS name, subtype, country, geometry," | |
| " ST_Area(geometry) AS area," | |
| " ROW_NUMBER() OVER (PARTITION BY country ORDER BY ST_Area(geometry) ASC) AS rn" | |
| " FROM read_parquet('divisions_area')" | |
| " WHERE country = '{country}'" | |
| " AND subtype = '{target_subtype}'" | |
| ")" | |
| " SELECT id, name, subtype, country," | |
| " ST_AsGeoJSON(geometry) AS geometry, area" | |
| " FROM ranked" | |
| " WHERE rn = 1" | |
| ), | |
| question_hints=[ | |
| "the smallest {target_subtype} in {anchor_name}", | |
| "which {target_subtype} of {anchor_name} has the least area?", | |
| "tiniest {target_subtype} in {anchor_name}", | |
| "what is the smallest {target_subtype} of {anchor_name}?", | |
| "show the smallest {target_subtype} of {anchor_name}", | |
| "the smallest state in {anchor_name}", | |
| "which state of {anchor_name} is the smallest?", | |
| "the smallest province in {anchor_name}", | |
| "tiniest state of {anchor_name}", | |
| "what is the smallest province of {anchor_name}?", | |
| "the smallest oblast in {anchor_name}", | |
| "smallest department of {anchor_name}", | |
| "which prefecture of {anchor_name} has the least area?", | |
| ], | |
| ), | |
| # ββ ATTRIBUTE FILTER βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # No spatial op β pure WHERE on is_land / is_territorial / country. | |
| SQLTemplate( | |
| template_id="attr_01", | |
| family="attribute_filter", | |
| sql_difficulty="medium", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="county", | |
| sql_template=( | |
| "SELECT id, names.\"primary\" AS name, subtype, country," | |
| " ST_AsGeoJSON(geometry) AS geometry" | |
| " FROM read_parquet('divisions_area')" | |
| " WHERE country = '{country}'" | |
| " AND is_land = TRUE" | |
| " AND subtype = '{target_subtype}'" | |
| ), | |
| question_hints=[ | |
| "land {target_subtype}s of {anchor_name}", | |
| "which {target_subtype}s of {anchor_name} are on land?", | |
| "{anchor_name}'s land-based {target_subtype}s", | |
| "show the land {target_subtype}s of {anchor_name}", | |
| "find land {target_subtype}s of {anchor_name}", | |
| "give me land-based {target_subtype}s in {anchor_name}", | |
| "which {target_subtype}s of {anchor_name} are land-based?", | |
| "non-island {target_subtype}s of {anchor_name}", | |
| "land districts of {anchor_name}", | |
| "which districts of {anchor_name} are on land?", | |
| "land-based municipalities of {anchor_name}", | |
| "mainland boroughs of {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="attr_02", | |
| family="attribute_filter", | |
| sql_difficulty="medium", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="region", | |
| sql_template=( | |
| "SELECT id, names.\"primary\" AS name, subtype, country," | |
| " ST_AsGeoJSON(geometry) AS geometry" | |
| " FROM read_parquet('divisions_area')" | |
| " WHERE country = '{country}'" | |
| " AND is_territorial = TRUE" | |
| " AND subtype = '{target_subtype}'" | |
| ), | |
| question_hints=[ | |
| "territorial {target_subtype}s of {anchor_name}", | |
| "official territorial divisions of {anchor_name}", | |
| "recognised territorial {target_subtype}s belonging to {anchor_name}", | |
| "which territorial regions does {anchor_name} have?", | |
| "show territorial {target_subtype}s of {anchor_name}", | |
| "find official territorial regions of {anchor_name}", | |
| "give me recognised territorial {target_subtype}s of {anchor_name}", | |
| "territorial regions under {anchor_name}", | |
| "{anchor_name} official territorial {target_subtype}s", | |
| "official states of {anchor_name}", | |
| "recognised provinces of {anchor_name}", | |
| "territorial provinces of {anchor_name}", | |
| "official oblasts of {anchor_name}", | |
| "recognised departments of {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="attr_03", | |
| family="attribute_filter", | |
| sql_difficulty="medium", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="region", | |
| sql_template=( | |
| "SELECT id, names.\"primary\" AS name, subtype, country," | |
| " ST_AsGeoJSON(geometry) AS geometry" | |
| " FROM read_parquet('divisions_area')" | |
| " WHERE country = '{country}'" | |
| " AND subtype = '{target_subtype}'" | |
| " AND is_land = FALSE" | |
| ), | |
| question_hints=[ | |
| "offshore {target_subtype}s of {anchor_name}", | |
| "{target_subtype}s of {anchor_name} that are not on land", | |
| "water-associated {target_subtype}s of {anchor_name}", | |
| "marine or offshore {target_subtype}s of {anchor_name}", | |
| "show offshore {target_subtype}s of {anchor_name}", | |
| "find {target_subtype}s of {anchor_name} in water", | |
| "give me non-land {target_subtype}s of {anchor_name}", | |
| "water-side {target_subtype}s of {anchor_name}", | |
| "{anchor_name} {target_subtype}s not on land", | |
| "offshore states of {anchor_name}", | |
| "island provinces of {anchor_name}", | |
| "non-land regions of {anchor_name}", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="attr_04", | |
| family="attribute_filter", | |
| sql_difficulty="medium", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="country", | |
| sql_template=( | |
| "SELECT id, names.\"primary\" AS name, subtype, country," | |
| " ST_AsGeoJSON(geometry) AS geometry" | |
| " FROM read_parquet('divisions_area')" | |
| " WHERE country = '{country}'" | |
| " AND subtype = '{target_subtype}'" | |
| " AND is_territorial = TRUE" | |
| ), | |
| question_hints=[ | |
| "show the recognised boundary of {anchor_name}", | |
| "find the official territorial extent of {anchor_name}", | |
| "get the internationally recognised boundary of {anchor_name}", | |
| "map the sovereign boundary of {anchor_name}", | |
| "show the official country geometry for {anchor_name}", | |
| "display the territorial country entry for {anchor_name}", | |
| "give me the recognised country outline of {anchor_name}", | |
| "official boundary of {anchor_name}", | |
| ], | |
| ), | |
| # ββ NATURAL EARTH CHAINED βββββββββββββββββββββββββββββββββββββββββββββββ | |
| # chained_04: localities in a region that intersect a river or lake. | |
| # chained_05: localities in a region that lie on a mountain range. | |
| SQLTemplate( | |
| template_id="chained_04", | |
| family="chained", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="region", | |
| sql_template=( | |
| "WITH region AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, region" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Within(b.geometry, region.geometry)" | |
| " AND EXISTS (" | |
| " SELECT 1 FROM read_parquet('natural_earth') AS n" | |
| " WHERE n.subtype IN ('river', 'lake', 'basin')" | |
| " AND ST_Intersects(b.geometry, n.geometry)" | |
| " )" | |
| ), | |
| question_hints=[ | |
| "riverside {target_subtype}s in {anchor_name}", | |
| "{target_subtype}s in {anchor_name} near a river or lake", | |
| "which {target_subtype}s in {anchor_name} are on a waterway?", | |
| "lakeside or riverside {target_subtype}s within {anchor_name}", | |
| "{target_subtype}s in {anchor_name} that touch a river", | |
| "which {target_subtype}s in {anchor_name} are on a lake?", | |
| "waterfront {target_subtype}s of {anchor_name}", | |
| "show water-side {target_subtype}s in {anchor_name}", | |
| "find {target_subtype}s of {anchor_name} by rivers or lakes", | |
| "{target_subtype}s of {anchor_name} on the water", | |
| "riverside states of {anchor_name}", | |
| "states of {anchor_name} near a river or lake", | |
| "provinces of {anchor_name} along a waterway", | |
| "which regions of {anchor_name} have a major river?", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="chained_05", | |
| family="chained", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="region", | |
| sql_template=( | |
| "WITH region AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, region" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Within(b.geometry, region.geometry)" | |
| " AND EXISTS (" | |
| " SELECT 1 FROM read_parquet('natural_earth') AS n" | |
| " WHERE n.subtype IN ('range/mtn', 'depression')" | |
| " AND ST_Intersects(b.geometry, n.geometry)" | |
| " )" | |
| ), | |
| question_hints=[ | |
| "mountain {target_subtype}s in {anchor_name}", | |
| "{target_subtype}s in {anchor_name} on a mountain range", | |
| "which {target_subtype}s in {anchor_name} are in the mountains?", | |
| "highland {target_subtype}s within {anchor_name}", | |
| "{target_subtype}s of {anchor_name} in mountainous terrain", | |
| "{target_subtype}s in {anchor_name} near a mountain range", | |
| "show mountain {target_subtype}s in {anchor_name}", | |
| "find {target_subtype}s of {anchor_name} in hilly terrain", | |
| "{target_subtype}s of {anchor_name} by the mountains", | |
| "mountain states of {anchor_name}", | |
| "states of {anchor_name} in mountainous terrain", | |
| "highland provinces of {anchor_name}", | |
| "which regions of {anchor_name} have a mountain range?", | |
| ], | |
| ), | |
| # ββ CHAINED (county-level) ββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Same spatial patterns as chained_01..05 but targeting counties/districts | |
| # so the model learns "coastal districts of X", "riverside counties", etc. | |
| SQLTemplate( | |
| template_id="chained_06", | |
| family="chained", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="county", | |
| sql_template=( | |
| "WITH region AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, region" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Within(b.geometry, region.geometry)" | |
| " AND EXISTS (" | |
| " SELECT 1 FROM read_parquet('natural_earth') AS n" | |
| " WHERE n.subtype IN ('ocean', 'sea')" | |
| " AND ST_Intersects(b.geometry, n.geometry)" | |
| " )" | |
| ), | |
| question_hints=[ | |
| "coastal {target_subtype}s of {anchor_name}", | |
| "which districts of {anchor_name} are on the coast?", | |
| "{target_subtype}s in {anchor_name} that border the sea", | |
| "seaside {target_subtype}s within {anchor_name}", | |
| "{target_subtype}s of {anchor_name} with ocean access", | |
| "which {target_subtype}s in {anchor_name} touch the sea?", | |
| "maritime {target_subtype}s of {anchor_name}", | |
| "show coastal districts of {anchor_name}", | |
| "find {target_subtype}s of {anchor_name} by the sea", | |
| "give me shoreline {target_subtype}s in {anchor_name}", | |
| "{target_subtype}s of {anchor_name} on the coast", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="chained_07", | |
| family="chained", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="county", | |
| sql_template=( | |
| "WITH region AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, region" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Within(b.geometry, region.geometry)" | |
| " AND NOT EXISTS (" | |
| " SELECT 1 FROM read_parquet('natural_earth') AS n" | |
| " WHERE n.subtype IN ('ocean', 'sea')" | |
| " AND ST_Intersects(b.geometry, n.geometry)" | |
| " )" | |
| ), | |
| question_hints=[ | |
| "landlocked {target_subtype}s of {anchor_name}", | |
| "which districts of {anchor_name} have no coastline?", | |
| "interior {target_subtype}s within {anchor_name}", | |
| "{target_subtype}s in {anchor_name} with no sea access", | |
| "non-coastal {target_subtype}s of {anchor_name}", | |
| "inland {target_subtype}s of {anchor_name}", | |
| "show inland districts of {anchor_name}", | |
| "find non-coastal {target_subtype}s in {anchor_name}", | |
| "give me inner {target_subtype}s of {anchor_name}", | |
| "{target_subtype}s of {anchor_name} away from the coast", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="chained_08", | |
| family="chained", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="county", | |
| sql_template=( | |
| "WITH region AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, region" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Within(b.geometry, region.geometry)" | |
| " AND EXISTS (" | |
| " SELECT 1 FROM read_parquet('natural_earth') AS n" | |
| " WHERE n.subtype IN ('river', 'lake', 'basin')" | |
| " AND ST_Intersects(b.geometry, n.geometry)" | |
| " )" | |
| ), | |
| question_hints=[ | |
| "riverside {target_subtype}s of {anchor_name}", | |
| "which districts of {anchor_name} have a river or lake?", | |
| "{target_subtype}s in {anchor_name} on a waterway", | |
| "lakeside {target_subtype}s within {anchor_name}", | |
| "{target_subtype}s of {anchor_name} along a river", | |
| "which {target_subtype}s in {anchor_name} border a lake?", | |
| "show water-side {target_subtype}s of {anchor_name}", | |
| "find {target_subtype}s in {anchor_name} near rivers and lakes", | |
| "give me riverside districts of {anchor_name}", | |
| "{target_subtype}s of {anchor_name} by the water", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="chained_09", | |
| family="chained", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="county", | |
| sql_template=( | |
| "WITH region AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, region" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Within(b.geometry, region.geometry)" | |
| " AND EXISTS (" | |
| " SELECT 1 FROM read_parquet('natural_earth') AS n" | |
| " WHERE n.subtype IN ('range/mtn', 'depression')" | |
| " AND ST_Intersects(b.geometry, n.geometry)" | |
| " )" | |
| ), | |
| question_hints=[ | |
| "mountain {target_subtype}s of {anchor_name}", | |
| "which districts of {anchor_name} are in the mountains?", | |
| "{target_subtype}s in {anchor_name} on a mountain range", | |
| "highland {target_subtype}s within {anchor_name}", | |
| "{target_subtype}s of {anchor_name} in mountainous terrain", | |
| "which {target_subtype}s in {anchor_name} have mountain ranges?", | |
| "show mountain districts of {anchor_name}", | |
| "find {target_subtype}s of {anchor_name} in hilly areas", | |
| "give me highland {target_subtype}s in {anchor_name}", | |
| "{target_subtype}s of {anchor_name} by the mountains", | |
| ], | |
| ), | |
| # chained_10 / chained_11: coastal and inland REGIONS of a country. | |
| # Same pattern as chained_06/07 but with target_subtype='region' and | |
| # container forced to a country so phrasings like "coastal states of | |
| # India" / "inland provinces of Kenya" work correctly. | |
| SQLTemplate( | |
| template_id="chained_10", | |
| family="chained", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="region", | |
| sql_template=( | |
| "WITH country AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, country" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Within(b.geometry, country.geometry)" | |
| " AND EXISTS (" | |
| " SELECT 1 FROM read_parquet('natural_earth') AS n" | |
| " WHERE n.subtype IN ('ocean', 'sea')" | |
| " AND ST_Intersects(b.geometry, n.geometry)" | |
| " )" | |
| ), | |
| question_hints=[ | |
| "coastal states of {anchor_name}", | |
| "coastal regions of {anchor_name}", | |
| "coastal provinces of {anchor_name}", | |
| "which states of {anchor_name} are on the coast?", | |
| "regions of {anchor_name} with sea access", | |
| "states of {anchor_name} that border the ocean", | |
| "maritime states of {anchor_name}", | |
| "seaside regions of {anchor_name}", | |
| "which provinces of {anchor_name} touch the sea?", | |
| "states of {anchor_name} along the coast", | |
| "show coastal regions of {anchor_name}", | |
| "find seaside states of {anchor_name}", | |
| "give me provinces of {anchor_name} on the coast", | |
| "{anchor_name} regions by the sea", | |
| "which oblasts of {anchor_name} are on the coast?", | |
| "coastal departments of {anchor_name}", | |
| "coastal prefectures of {anchor_name}", | |
| "which cantons of {anchor_name} have sea access?", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="chained_11", | |
| family="chained", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="region", | |
| sql_template=( | |
| "WITH country AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, country" | |
| " WHERE b.subtype = '{target_subtype}'" | |
| " AND ST_Within(b.geometry, country.geometry)" | |
| " AND NOT EXISTS (" | |
| " SELECT 1 FROM read_parquet('natural_earth') AS n" | |
| " WHERE n.subtype IN ('ocean', 'sea')" | |
| " AND ST_Intersects(b.geometry, n.geometry)" | |
| " )" | |
| ), | |
| question_hints=[ | |
| "landlocked states of {anchor_name}", | |
| "inland regions of {anchor_name}", | |
| "non-coastal states of {anchor_name}", | |
| "which states of {anchor_name} have no coast?", | |
| "inland provinces of {anchor_name}", | |
| "regions of {anchor_name} without sea access", | |
| "interior states of {anchor_name}", | |
| "states of {anchor_name} that don't border the ocean", | |
| "show inland regions of {anchor_name}", | |
| "find non-coastal states of {anchor_name}", | |
| "give me inner provinces of {anchor_name}", | |
| "{anchor_name} regions away from the sea", | |
| "landlocked oblasts of {anchor_name}", | |
| "inland departments of {anchor_name}", | |
| "interior prefectures of {anchor_name}", | |
| "inland cantons of {anchor_name}", | |
| ], | |
| ), | |
| # chained_12 / chained_13: coastal and landlocked COUNTRIES adjacent to anchor. | |
| # Anchor is any country; finds its neighbours that are coastal / landlocked. | |
| SQLTemplate( | |
| template_id="chained_12", | |
| family="chained", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="country", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.subtype = 'country'" | |
| " AND b.id != '{anchor_id}'" | |
| " AND ST_Touches(a.geometry, b.geometry)" | |
| " AND EXISTS (" | |
| " SELECT 1 FROM read_parquet('natural_earth') AS n" | |
| " WHERE n.subtype IN ('ocean', 'sea')" | |
| " AND ST_Intersects(b.geometry, n.geometry)" | |
| " )" | |
| ), | |
| question_hints=[ | |
| "which countries bordering {anchor_name} are coastal?", | |
| "coastal countries adjacent to {anchor_name}", | |
| "which neighboring countries of {anchor_name} have sea access?", | |
| "countries that touch {anchor_name} and border the ocean", | |
| "which nations next to {anchor_name} are on the coast?", | |
| "find coastal nations bordering {anchor_name}", | |
| "countries neighboring {anchor_name} with a coastline", | |
| "which countries near {anchor_name} touch the sea?", | |
| ], | |
| ), | |
| SQLTemplate( | |
| template_id="chained_13", | |
| family="chained", | |
| sql_difficulty="hard", | |
| anchor_source="divisions_area", | |
| num_anchors=1, | |
| target_subtype="country", | |
| sql_template=( | |
| "WITH a AS (" | |
| " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" | |
| ")" | |
| " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," | |
| " ST_AsGeoJSON(b.geometry) AS geometry" | |
| " FROM read_parquet('divisions_area') AS b, a" | |
| " WHERE b.subtype = 'country'" | |
| " AND b.id != '{anchor_id}'" | |
| " AND ST_Touches(a.geometry, b.geometry)" | |
| " AND NOT EXISTS (" | |
| " SELECT 1 FROM read_parquet('natural_earth') AS n" | |
| " WHERE n.subtype IN ('ocean', 'sea')" | |
| " AND ST_Intersects(b.geometry, n.geometry)" | |
| " )" | |
| ), | |
| question_hints=[ | |
| "which countries bordering {anchor_name} are landlocked?", | |
| "landlocked countries adjacent to {anchor_name}", | |
| "which neighboring countries of {anchor_name} have no sea access?", | |
| "countries that touch {anchor_name} and are landlocked", | |
| "which nations next to {anchor_name} have no coastline?", | |
| "find landlocked nations bordering {anchor_name}", | |
| "countries neighboring {anchor_name} with no ocean access", | |
| "which countries near {anchor_name} are inland?", | |
| ], | |
| ), | |
| ] | |
| # --------------------------------------------------------------------------- | |
| # Helpers | |
| # --------------------------------------------------------------------------- | |
| def get_templates_by_family(family: str) -> List[SQLTemplate]: | |
| """Return all templates for a specific task family.""" | |
| return [t for t in TEMPLATES if t.family == family] | |
| def get_template_by_id(template_id: str) -> SQLTemplate: | |
| """Return a template by its ID, raising ValueError if not found.""" | |
| for t in TEMPLATES: | |
| if t.template_id == template_id: | |
| return t | |
| raise ValueError(f"Template '{template_id}' not found") | |
| if __name__ == "__main__": | |
| families: dict = {} | |
| for t in TEMPLATES: | |
| families[t.family] = families.get(t.family, 0) + 1 | |
| print("SQL Template Catalog") | |
| print("=" * 60) | |
| for family, count in sorted(families.items()): | |
| print(f"{family:20s}: {count:2d} templates") | |
| print(f"{'TOTAL':20s}: {len(TEMPLATES):2d} templates") | |
| # Verify every template's final SELECT wraps geometry with ST_AsGeoJSON | |
| print() | |
| print("Geometry output check (all should show ST_AsGeoJSON)") | |
| print("=" * 60) | |
| for t in TEMPLATES: | |
| has_geojson = "ST_AsGeoJSON" in t.sql_template | |
| status = "OK" if has_geojson else "MISSING" | |
| print(f" {t.template_id:20s}: {status}") | |