""" SQL template definitions for synthetic data generation. Geometry output convention -------------------------- Every final SELECT wraps geometry with ST_AsGeoJSON(): ST_AsGeoJSON(geometry) AS geometry This returns a GeoJSON string instead of raw WKB bytes, which is directly JSON-serialisable and matches what the serving stack expects. CTEs that compute intermediate geometries (used only for spatial predicates or ST_Area) keep the column as raw GEOMETRY so DuckDB spatial functions work. Buffer distance convention -------------------------- All buffer templates use {buffer_m} (metres) in SQL: geometry / 111_320. Question hints use {buffer_label} — a human-readable string like "50 km" or "500 m" — generated by the handler from a combined (metres, label) choice list. This keeps km and metre queries in a single template instead of separate ones. Mixed-source candidates ----------------------- generate_samples.py pads every candidate list with 50 % cross-source distractors so the model always sees both source values and learns the correct parquet path from the candidates table. Template families ----------------- direct_lookup Simple single-feature fetch by ID. disambiguation "Place, Container" queries like "Puri, Odisha" — lookup by ID after resolving an ambiguous name via containing region or country mentioned in the query. adjacency ST_Touches — features sharing a border. multi_adjacency Features that simultaneously touch TWO anchors. containment ST_Within / ST_Contains — hierarchical nesting. intersection ST_Intersects — overlapping or crossing features. buffer ST_Buffer — proximity zones in km or metres. chained Containment + EXISTS/NOT EXISTS sea predicate. difference ST_Difference — geometry subtraction. border_corridor Buffered ST_Intersection of a shared border. set_operations ST_Union_Agg — merging multiple geometries. partial_selection Bbox clipping — directional halves or feature clips. aggregation TOP-N by area with ORDER BY. window_function ROW_NUMBER() OVER (PARTITION BY) — per-group ranking. attribute_filter Pure attribute predicates: is_land, country, etc. """ from dataclasses import dataclass from typing import List, Literal _SUBTYPE_ALIASES = { "country": ["countries", "nations"], "region": ["states", "provinces", "oblasts", "departments", "prefectures", "cantons"], "county": ["districts", "municipalities", "boroughs", "communes", "parishes"], } def _join_anchor_exprs(num_anchors: int) -> str: """Join anchor placeholders into a natural-language list.""" names = [f"{{anchor_{i}_name}}" for i in range(1, num_anchors + 1)] if num_anchors == 1: return names[0] if num_anchors == 2: return f"{names[0]} or {names[1]}" return ", ".join(names[:-1]) + f", or {names[-1]}" def _buffer_question_hints(target_subtype: str, anchor_name_expr: str) -> List[str]: """Build varied buffer question hints for one target subtype.""" aliases = _SUBTYPE_ALIASES[target_subtype] return [ f"which {aliases[0]} are within {{buffer_label}} of {anchor_name_expr}?", f"{aliases[0]} within {{buffer_label}} of {anchor_name_expr}", f"find {aliases[0]} within a {{buffer_label}} radius of {anchor_name_expr}", f"which {aliases[1]} are within {{buffer_label}} of {anchor_name_expr}?", f"show {aliases[0]} within {{buffer_label}} of {anchor_name_expr}", f"what {aliases[0]} fall within {{buffer_label}} of {anchor_name_expr}?", f"areas close to {anchor_name_expr} within {{buffer_label}}", ] def _make_buffer_templates() -> List["SQLTemplate"]: """Generate subtype-specific and multi-anchor buffer templates.""" templates: List[SQLTemplate] = [] for template_id, target_subtype in ( ("buffer_04", "country"), ("buffer_05", "region"), ("buffer_06", "county"), ): templates.append( SQLTemplate( template_id=template_id, family="buffer", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype=target_subtype, requires_buffer=True, sql_template=( "WITH a AS (" " SELECT ST_Buffer(geometry, {buffer_m} / 111320.0) AS geom" " FROM read_parquet('divisions_area')" " WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.id != '{anchor_id}'" " AND b.subtype = '{target_subtype}'" " AND ST_Intersects(b.geometry, a.geom)" ), question_hints=_buffer_question_hints(target_subtype, "{anchor_name}"), ) ) for template_id, num_anchors, target_subtype in ( ("buffer_07", 2, "region"), ("buffer_08", 3, "country"), ("buffer_09", 4, "region"), ("buffer_10", 5, "county"), ): anchor_ids = ", ".join(f"'{{anchor_id_{i}}}'" for i in range(1, num_anchors + 1)) name_expr = _join_anchor_exprs(num_anchors) templates.append( SQLTemplate( template_id=template_id, family="buffer", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=num_anchors, target_subtype=target_subtype, requires_buffer=True, sql_template=( "WITH buf AS (" " SELECT ST_Union_Agg(ST_Buffer(geometry, {buffer_m} / 111320.0)) AS geom" " FROM read_parquet('divisions_area')" f" WHERE id IN ({anchor_ids})" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, buf" f" WHERE b.id NOT IN ({anchor_ids})" " AND b.subtype = '{target_subtype}'" " AND ST_Intersects(b.geometry, buf.geom)" ), question_hints=_buffer_question_hints(target_subtype, name_expr), ) ) return templates def _make_country_in_templates() -> List["SQLTemplate"]: """Generate repeated country-IN set-operation templates.""" specs = ( ("contain_multi_01", 2, "region"), ("contain_multi_02", 3, "region"), ("contain_multi_03", 2, "county"), ) templates: List[SQLTemplate] = [] for template_id, num_anchors, target_subtype in specs: countries = ", ".join(f"'{{country_{i}}}'" for i in range(1, num_anchors + 1)) anchor_names = _join_anchor_exprs(num_anchors).replace(" or ", " and ") aliases = _SUBTYPE_ALIASES[target_subtype] question_hints = [ f"{aliases[0]} of {anchor_names}", f"all {aliases[0]} in {anchor_names}", f"show {aliases[0]} across {anchor_names}", f"find {aliases[0]} across {anchor_names}", f"list {aliases[0]} in {anchor_names}", f"areas of {anchor_names}", ] if len(aliases) > 1: question_hints.extend( [ f"{aliases[1]} of {anchor_names}", f"all {aliases[1]} in {anchor_names}", ] ) templates.append( SQLTemplate( template_id=template_id, family="set_operations", sql_difficulty="medium-hard", anchor_source="divisions_area", num_anchors=num_anchors, target_subtype=target_subtype, sql_template=( "SELECT id, names.\"primary\" AS name, subtype, country," " ST_AsGeoJSON(geometry) AS geometry" " FROM read_parquet('divisions_area')" f" WHERE country IN ({countries})" " AND subtype = '{target_subtype}'" ), question_hints=question_hints, ) ) return templates @dataclass class SQLTemplate: """SQL template for synthetic data generation.""" template_id: str family: str sql_difficulty: Literal["easy", "medium", "medium-hard", "hard"] anchor_source: Literal["divisions_area", "natural_earth", "mixed"] num_anchors: int sql_template: str question_hints: List[str] target_subtype: str = None requires_buffer: bool = False requires_aggregation: bool = False # --------------------------------------------------------------------------- # Template catalog # --------------------------------------------------------------------------- TEMPLATES = [ # ── DIRECT LOOKUP ──────────────────────────────────────────────────────── SQLTemplate( template_id="lookup_01", family="direct_lookup", sql_difficulty="easy", anchor_source="divisions_area", num_anchors=1, sql_template=( "SELECT ST_AsGeoJSON(geometry) AS geometry," " names.\"primary\" AS name, id, subtype, country" " FROM read_parquet('divisions_area')" " WHERE id = '{anchor_id}'" ), question_hints=[ "show me {anchor_name}", "get the boundary of {anchor_name}", "find {anchor_name}", "where is {anchor_name}?", "outline of {anchor_name}", "map {anchor_name}", "what does {anchor_name} look like", "i need the shape of {anchor_name}", "pull up {anchor_name}", "can you show {anchor_name}", "map of {anchor_name}", "{anchor_name} boundary", "locate {anchor_name}", ], ), SQLTemplate( template_id="lookup_02", family="direct_lookup", sql_difficulty="easy", anchor_source="natural_earth", num_anchors=1, sql_template=( "SELECT ST_AsGeoJSON(geometry) AS geometry," " names.\"primary\" AS name, id, subtype" " FROM read_parquet('natural_earth')" " WHERE id = '{anchor_id}'" ), question_hints=[ "show me the {anchor_name}", "get the {anchor_name}", "find the {anchor_name}", "where is the {anchor_name}?", "extent of the {anchor_name}", "geometry of the {anchor_name}", "display the {anchor_name}", "pull up the {anchor_name}", "i want to see the {anchor_name}", "map the {anchor_name}", "how big is the {anchor_name}?", "outline of the {anchor_name}", "show the shape of the {anchor_name}", "trace the {anchor_name}", "map out the {anchor_name}", "where exactly is the {anchor_name}", ], ), # ── DISAMBIGUATION ────────────────────────────────────────────────────── # "Puri, Odisha", "Lisbon, Portugal", "Goa, India" — a common real-world # query pattern where users give a place plus its containing region or # country to disambiguate same-name localities. # SQL is a plain lookup by id (disambiguation happens at candidate-pick # time). Candidates include same-name localities in other regions plus # the container, so the model must read the CSV to choose correctly. # # disambiguate_01: county scoped by its region / country # disambiguate_02: county scoped by its country # disambiguate_03: region scoped by its country SQLTemplate( template_id="disambiguate_01", family="disambiguation", sql_difficulty="easy", anchor_source="divisions_area", num_anchors=1, sql_template=( "SELECT ST_AsGeoJSON(geometry) AS geometry," " names.\"primary\" AS name, id, subtype, country, region" " FROM read_parquet('divisions_area')" " WHERE id = '{anchor_id}'" ), question_hints=[ "{anchor_name}, {container_name}", "{anchor_name} in {container_name}", "the {anchor_name} that's in {container_name}", "show me {anchor_name}, {container_name}", "where is {anchor_name}, {container_name}?", "map of {anchor_name} ({container_name})", "{anchor_name} ({container_name})", "{anchor_name} {container_name}", "pull up {anchor_name} in {container_name}", "find {anchor_name} in {container_name}", "locate {anchor_name} in {container_name}", "need {anchor_name} from {container_name}", "show {anchor_name} under {container_name}", "{anchor_name} near {container_name}", ], ), SQLTemplate( template_id="disambiguate_02", family="disambiguation", sql_difficulty="easy", anchor_source="divisions_area", num_anchors=1, sql_template=( "SELECT ST_AsGeoJSON(geometry) AS geometry," " names.\"primary\" AS name, id, subtype, country" " FROM read_parquet('divisions_area')" " WHERE id = '{anchor_id}'" ), question_hints=[ "{anchor_name}, {container_name}", "{anchor_name} in {container_name}", "{anchor_name}, {container_name}.", "show me {anchor_name}, {container_name}", "where is {anchor_name} in {container_name}?", "the {anchor_name} that's in {container_name}", "map of {anchor_name}, {container_name}", "pull up {anchor_name} ({container_name})", "find {anchor_name} in {container_name}", "{anchor_name} {container_name}", "locate {anchor_name} in {container_name}", "need the {anchor_name} in {container_name}", "show {anchor_name} from {container_name}", "bring up {anchor_name}, {container_name}", ], ), SQLTemplate( template_id="disambiguate_03", family="disambiguation", sql_difficulty="easy", anchor_source="divisions_area", num_anchors=1, sql_template=( "SELECT ST_AsGeoJSON(geometry) AS geometry," " names.\"primary\" AS name, id, subtype, country" " FROM read_parquet('divisions_area')" " WHERE id = '{anchor_id}'" ), question_hints=[ "{anchor_name}, {container_name}", "{anchor_name} state of {container_name}", "the {anchor_name} region in {container_name}", "show me {anchor_name}, {container_name}", "where is {anchor_name} in {container_name}?", "map of {anchor_name}, {container_name}", "{anchor_name} ({container_name})", "{anchor_name} province of {container_name}", "pull up {anchor_name} in {container_name}", "find {anchor_name} {container_name}", "locate {anchor_name} within {container_name}", "show the {anchor_name} part of {container_name}", "need {anchor_name} from {container_name}", "bring up {anchor_name} in {container_name}", "{anchor_name} prefecture of {container_name}", "{anchor_name} oblast of {container_name}", "{anchor_name} department of {container_name}", "{anchor_name} canton in {container_name}", ], ), # ── ADJACENCY ──────────────────────────────────────────────────────────── SQLTemplate( template_id="adj_01", family="adjacency", sql_difficulty="medium", anchor_source="divisions_area", num_anchors=1, sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.id != '{anchor_id}'" " AND ST_Touches(a.geometry, b.geometry)" ), question_hints=[ "what places touch {anchor_name}", "list everything adjacent to {anchor_name}", "what shares a border with {anchor_name}", "neighbours of {anchor_name}", "what's next to {anchor_name}", "what surrounds {anchor_name}?", "places next to {anchor_name}", "everything bordering {anchor_name}", "show adjacent places to {anchor_name}", "areas touching {anchor_name}", "find the neighbours of {anchor_name}", "bordering places for {anchor_name}", "places that meet {anchor_name}", ], ), SQLTemplate( template_id="adj_02", family="adjacency", sql_difficulty="medium", anchor_source="divisions_area", num_anchors=1, target_subtype="region", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.id != '{anchor_id}'" " AND b.subtype = '{target_subtype}'" " AND ST_Touches(a.geometry, b.geometry)" ), question_hints=[ "which {target_subtype}s border {anchor_name}?", "what {target_subtype}s share a border with {anchor_name}", "{target_subtype}s that touch {anchor_name}", "neighbouring {target_subtype}s of {anchor_name}", "which {target_subtype}s are adjacent to {anchor_name}?", "{target_subtype}s along the {anchor_name} border", "find {target_subtype}s next to {anchor_name}", "show {target_subtype}s bordering {anchor_name}", "{target_subtype}s beside {anchor_name}", "all {target_subtype}s touching {anchor_name}", "{target_subtype}s meeting {anchor_name}", "which states border {anchor_name}?", "which provinces share a border with {anchor_name}?", "states adjacent to {anchor_name}", "neighbouring provinces of {anchor_name}", "which prefectures border {anchor_name}?", "oblasts bordering {anchor_name}", "departments that touch {anchor_name}", "cantons adjacent to {anchor_name}", ], ), SQLTemplate( template_id="adj_03", family="adjacency", sql_difficulty="medium", anchor_source="divisions_area", num_anchors=1, target_subtype="sea", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT n.id, n.names.\"primary\" AS name, n.subtype," " ST_AsGeoJSON(n.geometry) AS geometry" " FROM read_parquet('natural_earth') AS n, a" " WHERE n.subtype IN ('ocean', 'sea')" " AND ST_Intersects(a.geometry, n.geometry)" ), question_hints=[ "which seas touch {anchor_name}?", "what seas border {anchor_name}?", "which bodies of water is {anchor_name} next to?", "what ocean or sea borders {anchor_name}", "which oceans touch {anchor_name}?", "what coastline does {anchor_name} have?", "which water bodies does {anchor_name} border?", "does {anchor_name} have sea access?", "what ocean is {anchor_name} on?", "is {anchor_name} on the coast?", "what sea is off the coast of {anchor_name}?", "which ocean lies off {anchor_name}?", "what water is {anchor_name} on the shore of?", "which sea or ocean is {anchor_name} along?", ], ), SQLTemplate( template_id="adj_04", family="adjacency", sql_difficulty="medium", anchor_source="divisions_area", num_anchors=1, target_subtype="county", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.id != '{anchor_id}'" " AND b.subtype = '{target_subtype}'" " AND ST_Touches(a.geometry, b.geometry)" ), question_hints=[ "neighbouring counties of {anchor_name}", "neighbouring districts of {anchor_name}", "which counties border {anchor_name}?", "which districts border {anchor_name}?", "counties adjacent to {anchor_name}", "districts next to {anchor_name}", "counties sharing a border with {anchor_name}", "what counties touch {anchor_name}?", "nearby counties of {anchor_name}", "counties along the {anchor_name} boundary", "municipalities bordering {anchor_name}", "which boroughs are adjacent to {anchor_name}?", "communes that touch {anchor_name}", "parishes next to {anchor_name}", "neighbouring municipalities of {anchor_name}", ], ), SQLTemplate( template_id="adj_05", family="adjacency", sql_difficulty="medium", anchor_source="divisions_area", num_anchors=1, target_subtype="country", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.id != '{anchor_id}'" " AND b.subtype = '{target_subtype}'" " AND ST_Touches(a.geometry, b.geometry)" ), question_hints=[ "which countries border {anchor_name}?", "what countries share a border with {anchor_name}?", "countries adjacent to {anchor_name}", "neighbouring countries of {anchor_name}", "which nations border {anchor_name}?", "what nations are next to {anchor_name}?", "nations sharing a border with {anchor_name}", "countries that touch {anchor_name}", "find countries bordering {anchor_name}", "show countries adjacent to {anchor_name}", "all countries touching {anchor_name}", "which nations are {anchor_name}'s neighbours?", ], ), SQLTemplate( template_id="adj_06", family="adjacency", sql_difficulty="medium", anchor_source="natural_earth", num_anchors=1, target_subtype="country", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('natural_earth') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.subtype = '{target_subtype}'" " AND ST_Intersects(b.geometry, a.geometry)" ), question_hints=[ "which countries border the {anchor_name}?", "what countries are next to the {anchor_name}?", "countries along the {anchor_name}", "which nations touch the {anchor_name}?", "countries adjacent to the {anchor_name}", "which countries lie on the {anchor_name}?", ], ), SQLTemplate( template_id="adj_07", family="adjacency", sql_difficulty="medium", anchor_source="natural_earth", num_anchors=1, target_subtype="region", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('natural_earth') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.subtype = '{target_subtype}'" " AND ST_Intersects(b.geometry, a.geometry)" ), question_hints=[ "which regions border the {anchor_name}?", "what regions are next to the {anchor_name}?", "regions along the {anchor_name}", "which states touch the {anchor_name}?", "provinces adjacent to the {anchor_name}", "which regions lie on the {anchor_name}?", ], ), SQLTemplate( template_id="adj_08", family="adjacency", sql_difficulty="medium", anchor_source="natural_earth", num_anchors=1, target_subtype="county", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('natural_earth') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.subtype = '{target_subtype}'" " AND ST_Intersects(b.geometry, a.geometry)" ), question_hints=[ "which counties border the {anchor_name}?", "what districts are next to the {anchor_name}?", "counties along the {anchor_name}", "which districts touch the {anchor_name}?", "municipalities adjacent to the {anchor_name}", "which counties lie on the {anchor_name}?", ], ), # ── NATURAL EARTH ADJACENCY ───────────────────────────────────────────── # Division anchor, natural_earth targets. Handler formats anchor_id and # target_subtype but the SQL hardcodes NE subtypes (like adj_03). SQLTemplate( template_id="adj_09", family="adjacency", sql_difficulty="medium", anchor_source="divisions_area", num_anchors=1, target_subtype="river", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT n.id, n.names.\"primary\" AS name, n.subtype," " ST_AsGeoJSON(n.geometry) AS geometry" " FROM read_parquet('natural_earth') AS n, a" " WHERE n.subtype IN ('river', 'lake', 'basin')" " AND ST_Intersects(a.geometry, n.geometry)" ), question_hints=[ "what rivers or lakes are in {anchor_name}?", "natural water features of {anchor_name}", "which rivers flow through {anchor_name}?", "lakes and rivers within {anchor_name}", "water features inside {anchor_name}", "what bodies of water cross {anchor_name}?", "rivers of {anchor_name}", "show me the lakes in {anchor_name}", "what rivers run through {anchor_name}?", "which lakes lie in {anchor_name}?", "what waterways are in {anchor_name}?", "which basins, lakes, or rivers are in {anchor_name}?", ], ), SQLTemplate( template_id="adj_10", family="adjacency", sql_difficulty="medium", anchor_source="divisions_area", num_anchors=1, target_subtype="range/mtn", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT n.id, n.names.\"primary\" AS name, n.subtype," " ST_AsGeoJSON(n.geometry) AS geometry" " FROM read_parquet('natural_earth') AS n, a" " WHERE n.subtype = 'range/mtn'" " AND ST_Intersects(a.geometry, n.geometry)" ), question_hints=[ "what mountain ranges are in {anchor_name}?", "which mountain ranges cross {anchor_name}?", "mountains of {anchor_name}", "mountain regions in {anchor_name}", "what hills are in {anchor_name}?", "which hills cross {anchor_name}?", "ghats of {anchor_name}", "what ghats are in {anchor_name}?", "highlands in {anchor_name}", "mountain belts within {anchor_name}", ], ), SQLTemplate( template_id="adj_11", family="adjacency", sql_difficulty="medium", anchor_source="divisions_area", num_anchors=1, target_subtype="plateau", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT n.id, n.names.\"primary\" AS name, n.subtype," " ST_AsGeoJSON(n.geometry) AS geometry" " FROM read_parquet('natural_earth') AS n, a" " WHERE n.subtype = 'plateau'" " AND ST_Intersects(a.geometry, n.geometry)" ), question_hints=[ "what plateaus are in {anchor_name}?", "which plateaus cross {anchor_name}?", "uplands in {anchor_name}", "what uplands are in {anchor_name}?", "tablelands of {anchor_name}", "plateau regions within {anchor_name}", "show plateaus in {anchor_name}", "find uplands of {anchor_name}", "give me plateau areas in {anchor_name}", "{anchor_name} plateaus and uplands", "what tablelands are there in {anchor_name}?", ], ), SQLTemplate( template_id="adj_12", family="adjacency", sql_difficulty="medium", anchor_source="divisions_area", num_anchors=1, target_subtype="landform", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT n.id, n.names.\"primary\" AS name, n.subtype," " ST_AsGeoJSON(n.geometry) AS geometry" " FROM read_parquet('natural_earth') AS n, a" " WHERE n.subtype IN ('plain', 'lowland', 'basin', 'valley', 'depression', 'gorge')" " AND ST_Intersects(a.geometry, n.geometry)" ), question_hints=[ "what plains are in {anchor_name}?", "basins and valleys of {anchor_name}", "which basins are in {anchor_name}?", "what valleys cross {anchor_name}?", "lowlands in {anchor_name}", "major landforms in {anchor_name}", "plains, basins, and valleys within {anchor_name}", "show me the main landforms in {anchor_name}", "landforms of {anchor_name}", "find plains and basins in {anchor_name}", "{anchor_name} valleys and lowlands", ], ), # ── MULTI-ADJACENCY ────────────────────────────────────────────────────── SQLTemplate( template_id="multi_adj_01", family="multi_adjacency", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=2, target_subtype="region", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_1}'" ")," " b AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_2}'" ")" " SELECT c.id, c.names.\"primary\" AS name, c.subtype, c.country," " ST_AsGeoJSON(c.geometry) AS geometry" " FROM read_parquet('divisions_area') AS c, a, b" " WHERE c.id NOT IN ('{anchor_id_1}', '{anchor_id_2}')" " AND c.subtype = '{target_subtype}'" " AND ST_Touches(c.geometry, a.geometry)" " AND ST_Touches(c.geometry, b.geometry)" ), question_hints=[ "which {target_subtype}s border both {anchor_1_name} and {anchor_2_name}?", "what {target_subtype}s touch both {anchor_1_name} and {anchor_2_name}?", "{target_subtype}s adjacent to both {anchor_1_name} and {anchor_2_name}", "common {target_subtype} neighbours of {anchor_1_name} and {anchor_2_name}", "show {target_subtype}s touching both {anchor_1_name} and {anchor_2_name}", "{target_subtype}s bordering both {anchor_1_name} and {anchor_2_name}", "shared {target_subtype} neighbours of {anchor_1_name} and {anchor_2_name}", "find {target_subtype}s adjacent to both {anchor_1_name} and {anchor_2_name}", "which states border both {anchor_1_name} and {anchor_2_name}?", "which provinces are adjacent to both {anchor_1_name} and {anchor_2_name}?", "states touching both {anchor_1_name} and {anchor_2_name}", ], ), SQLTemplate( template_id="multi_adj_02", family="multi_adjacency", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=2, target_subtype="country", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_1}'" ")," " b AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_2}'" ")" " SELECT c.id, c.names.\"primary\" AS name, c.subtype, c.country," " ST_AsGeoJSON(c.geometry) AS geometry" " FROM read_parquet('divisions_area') AS c, a, b" " WHERE c.id NOT IN ('{anchor_id_1}', '{anchor_id_2}')" " AND c.subtype = '{target_subtype}'" " AND ST_Touches(c.geometry, a.geometry)" " AND ST_Touches(c.geometry, b.geometry)" ), question_hints=[ "which countries border both {anchor_1_name} and {anchor_2_name}?", "what nations share a border with both {anchor_1_name} and {anchor_2_name}?", "countries adjacent to both {anchor_1_name} and {anchor_2_name}", "which country touches both {anchor_1_name} and {anchor_2_name}?", "nations bordering both {anchor_1_name} and {anchor_2_name}", "show countries neighbouring both {anchor_1_name} and {anchor_2_name}", "find countries that border both {anchor_1_name} and {anchor_2_name}", "common country neighbours of {anchor_1_name} and {anchor_2_name}", "which nation lies between {anchor_1_name} and {anchor_2_name}?", "countries sharing a border with both {anchor_1_name} and {anchor_2_name}", ], ), SQLTemplate( template_id="multi_adj_03", family="multi_adjacency", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=2, target_subtype="county", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_1}'" ")," " b AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_2}'" ")" " SELECT c.id, c.names.\"primary\" AS name, c.subtype, c.country," " ST_AsGeoJSON(c.geometry) AS geometry" " FROM read_parquet('divisions_area') AS c, a, b" " WHERE c.id NOT IN ('{anchor_id_1}', '{anchor_id_2}')" " AND c.subtype = '{target_subtype}'" " AND ST_Touches(c.geometry, a.geometry)" " AND ST_Touches(c.geometry, b.geometry)" ), question_hints=[ "which {target_subtype}s border both {anchor_1_name} and {anchor_2_name}?", "what {target_subtype}s touch both {anchor_1_name} and {anchor_2_name}?", "{target_subtype}s adjacent to both {anchor_1_name} and {anchor_2_name}", "common {target_subtype} neighbours of {anchor_1_name} and {anchor_2_name}", "show {target_subtype}s touching both {anchor_1_name} and {anchor_2_name}", "which districts border both {anchor_1_name} and {anchor_2_name}?", "districts lying between {anchor_1_name} and {anchor_2_name}", "find districts adjacent to both {anchor_1_name} and {anchor_2_name}", "municipalities bordering both {anchor_1_name} and {anchor_2_name}", "which boroughs touch both {anchor_1_name} and {anchor_2_name}?", ], ), # ── CONTAINMENT ────────────────────────────────────────────────────────── SQLTemplate( template_id="contain_01", family="containment", sql_difficulty="medium", anchor_source="divisions_area", num_anchors=1, target_subtype="county", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.id != '{anchor_id}'" " AND b.subtype = '{target_subtype}'" " AND ST_Within(b.geometry, a.geometry)" ), question_hints=[ "what {target_subtype}s are in {anchor_name}?", "which {target_subtype}s fall within {anchor_name}?", "list all {target_subtype}s inside {anchor_name}", "{target_subtype}s contained by {anchor_name}", "all {target_subtype}s within {anchor_name}", "{target_subtype}s of {anchor_name}", "show every {target_subtype} in {anchor_name}", "show {target_subtype}s inside {anchor_name}", "find {target_subtype}s in {anchor_name}", "give me the {target_subtype}s in {anchor_name}", "{anchor_name} {target_subtype}s", "which {target_subtype}s does {anchor_name} contain?", "what all {target_subtype}s are there in {anchor_name}?", "{target_subtype}s under {anchor_name}", "what districts are in {anchor_name}?", "which districts fall within {anchor_name}?", "districts of {anchor_name}", "show all districts inside {anchor_name}", "list municipalities in {anchor_name}", "boroughs of {anchor_name}", "communes inside {anchor_name}", "parishes in {anchor_name}", ], ), SQLTemplate( template_id="contain_02", family="containment", sql_difficulty="medium", anchor_source="divisions_area", num_anchors=1, target_subtype="country", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.id != '{anchor_id}'" " AND b.subtype = '{target_subtype}'" " AND ST_Contains(b.geometry, a.geometry)" ), question_hints=[ "what country contains {anchor_name}?", "which country is {anchor_name} in?", "what country does {anchor_name} belong to?", "which nation contains {anchor_name}?", "{anchor_name} is part of which country?", "where is {anchor_name}", "what country is {anchor_name} in", "{anchor_name} belongs to which country?", "show country for {anchor_name}", "find the country of {anchor_name}", "which country does {anchor_name} fall in?", "{anchor_name} under which country", "tell me the country for {anchor_name}", "which nation does {anchor_name} belong to?", "what nation is {anchor_name} in?", ], ), SQLTemplate( template_id="contain_03", family="containment", sql_difficulty="medium", anchor_source="divisions_area", num_anchors=1, target_subtype="region", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.subtype = '{target_subtype}'" " AND ST_Within(b.geometry, a.geometry)" ), question_hints=[ "what {target_subtype}s are in {anchor_name}?", "list all {target_subtype}s in {anchor_name}", "which {target_subtype}s does {anchor_name} have?", "show {target_subtype}s of {anchor_name}", "all {target_subtype}s within {anchor_name}", "{anchor_name} {target_subtype}s", "give me the {target_subtype}s of {anchor_name}", "what states are in {anchor_name}?", "list all states of {anchor_name}", "which provinces does {anchor_name} have?", "show the provinces of {anchor_name}", "all states within {anchor_name}", "provinces of {anchor_name}", "what oblasts are in {anchor_name}?", "departments of {anchor_name}", "prefectures in {anchor_name}", "cantons of {anchor_name}", ], ), SQLTemplate( template_id="contain_04", family="containment", sql_difficulty="medium", anchor_source="natural_earth", num_anchors=1, target_subtype="region", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('natural_earth') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.subtype = '{target_subtype}'" " AND ST_Within(b.geometry, a.geometry)" ), question_hints=[ "which {target_subtype}s are in the {anchor_name}?", "what {target_subtype}s fall within the {anchor_name}?", "{target_subtype}s inside the {anchor_name}", "admin {target_subtype}s within the {anchor_name}", "all regions inside the {anchor_name}", "what {target_subtype}s does the {anchor_name} contain?", "{target_subtype}s covered by the {anchor_name}", "which regions are in the {anchor_name} basin?", "what admin regions lie within the {anchor_name}?", "which provinces are inside the {anchor_name}?", "show the regions in the {anchor_name}", "find provinces inside the {anchor_name}", "give me admin regions within the {anchor_name}", "regions belonging to the {anchor_name}", "areas contained in the {anchor_name}", "which states are in the {anchor_name}?", "states inside the {anchor_name}", "which prefectures lie within the {anchor_name}?", "oblasts in the {anchor_name}", "departments inside the {anchor_name}", ], ), # ── NATURAL EARTH CONTAINMENT ─────────────────────────────────────────── # contain_05: NE anchor (sea/gulf/bay), find countries that touch it. # Uses containment handler via containment_pairs. SQLTemplate( template_id="contain_05", family="containment", sql_difficulty="medium", anchor_source="natural_earth", num_anchors=1, target_subtype="country", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('natural_earth') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.subtype = '{target_subtype}'" " AND ST_Intersects(b.geometry, a.geometry)" ), question_hints=[ "which countries border the {anchor_name}?", "what countries are along the {anchor_name}?", "countries surrounding the {anchor_name}", "nations on the {anchor_name}", "which countries touch the {anchor_name}?", "countries with coastline on the {anchor_name}", "what nations lie on the {anchor_name}?", "which countries are on the coast of the {anchor_name}?", "what countries lie around the {anchor_name}?", "which nations have shores on the {anchor_name}?", "what countries front the {anchor_name}?", ], ), # ── INTERSECTION ───────────────────────────────────────────────────────── SQLTemplate( template_id="intersect_01", family="intersection", sql_difficulty="medium-hard", anchor_source="divisions_area", num_anchors=1, target_subtype="region", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.id != '{anchor_id}'" " AND b.subtype = '{target_subtype}'" " AND ST_Intersects(b.geometry, a.geometry)" ), question_hints=[ "which {target_subtype}s intersect {anchor_name}?", "what {target_subtype}s overlap with {anchor_name}?", "{target_subtype}s that cross into {anchor_name}", "which {target_subtype}s overlap {anchor_name}?", "{target_subtype}s partially inside {anchor_name}", "what {target_subtype}s extend into {anchor_name}?", "show {target_subtype}s overlapping {anchor_name}", "find {target_subtype}s crossing {anchor_name}", "{target_subtype}s meeting {anchor_name}", "areas intersecting {anchor_name}", "{anchor_name} overlapping {target_subtype}s", "which {target_subtype}s are partly in {anchor_name}?", "which states intersect {anchor_name}?", "which provinces overlap {anchor_name}?", "states crossing into {anchor_name}", "provinces partly inside {anchor_name}", ], ), SQLTemplate( template_id="intersect_02", family="intersection", sql_difficulty="medium-hard", anchor_source="divisions_area", num_anchors=1, target_subtype="county", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.id != '{anchor_id}'" " AND b.subtype = '{target_subtype}'" " AND ST_Intersects(b.geometry, a.geometry)" ), question_hints=[ "which {target_subtype}s intersect {anchor_name}?", "what {target_subtype}s overlap with {anchor_name}?", "{target_subtype}s crossing into {anchor_name}", "which {target_subtype}s are partly in {anchor_name}?", "show {target_subtype}s overlapping {anchor_name}", "find {target_subtype}s crossing {anchor_name}", "{target_subtype}s that overlap {anchor_name}", "which districts intersect {anchor_name}?", "what districts overlap with {anchor_name}?", "districts crossing into {anchor_name}", "which municipalities are partly in {anchor_name}?", "boroughs overlapping {anchor_name}", "communes crossing {anchor_name}", ], ), SQLTemplate( template_id="intersect_03", family="intersection", sql_difficulty="medium-hard", anchor_source="natural_earth", num_anchors=1, target_subtype="country", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('natural_earth') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.subtype = '{target_subtype}'" " AND ST_Intersects(b.geometry, a.geometry)" ), question_hints=[ "which countries does the {anchor_name} pass through?", "what countries does the {anchor_name} cross?", "countries that overlap the {anchor_name}", "which countries touch the {anchor_name}?", "nations intersected by the {anchor_name}", "which nations does the {anchor_name} cross?", "countries along the {anchor_name}", "what countries does the {anchor_name} cover?", "countries the {anchor_name} spans across", "what countries is the {anchor_name} in?", "which countries lie along the {anchor_name}?", "what countries does the {anchor_name} run through?", "which countries border the {anchor_name}?", ], ), # ── NATURAL EARTH INTERSECTION ────────────────────────────────────────── # intersect_04: NE anchor, finding overlapping regions (vs countries in # intersect_03). Uses cross_source_relations handler. # intersect_05: division anchor, finding NE features that overlap it. # Uses intersection_pairs handler (extra NE subtypes ignored in SQL). SQLTemplate( template_id="intersect_04", family="intersection", sql_difficulty="medium-hard", anchor_source="natural_earth", num_anchors=1, target_subtype="region", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('natural_earth') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.subtype = '{target_subtype}'" " AND ST_Intersects(b.geometry, a.geometry)" ), question_hints=[ "which regions does the {anchor_name} pass through?", "what admin regions overlap with the {anchor_name}?", "regions that the {anchor_name} crosses", "admin areas intersected by the {anchor_name}", "what provinces does the {anchor_name} span?", "regions along the {anchor_name}", "which provinces overlap the {anchor_name}?", "which regions is the {anchor_name} in?", "what states does the {anchor_name} run through?", "which provinces lie along the {anchor_name}?", "show regions crossed by the {anchor_name}", "find administrative regions along the {anchor_name}", "give me the regions touched by the {anchor_name}", "regions of the {anchor_name}", "which states does the {anchor_name} pass through?", "what oblasts does the {anchor_name} cross?", "which departments does the {anchor_name} run through?", "prefectures along the {anchor_name}", ], ), SQLTemplate( template_id="intersect_05", family="intersection", sql_difficulty="medium-hard", anchor_source="divisions_area", num_anchors=1, target_subtype="region", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT n.id, n.names.\"primary\" AS name, n.subtype," " ST_AsGeoJSON(n.geometry) AS geometry" " FROM read_parquet('natural_earth') AS n, a" " WHERE ST_Intersects(n.geometry, a.geometry)" ), question_hints=[ "what natural features intersect {anchor_name}?", "natural features that overlap {anchor_name}", "which geographic features cross {anchor_name}?", "everything natural that touches {anchor_name}", "what geographic features does {anchor_name} contain?", "natural features within or crossing {anchor_name}", "show natural features overlapping {anchor_name}", "find the natural features in or across {anchor_name}", "{anchor_name} intersecting natural features", "what physical features are associated with {anchor_name}?", ], ), SQLTemplate( template_id="intersect_06", family="intersection", sql_difficulty="medium-hard", anchor_source="natural_earth", num_anchors=1, target_subtype="county", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('natural_earth') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.subtype = '{target_subtype}'" " AND ST_Intersects(b.geometry, a.geometry)" ), question_hints=[ "which districts does the {anchor_name} pass through?", "what districts does the {anchor_name} cross?", "districts intersected by the {anchor_name}", "which counties does the {anchor_name} flow through?", "what counties overlap the {anchor_name}?", "districts along the {anchor_name}", "which districts are crossed by the {anchor_name}?", "what districts is the {anchor_name} in?", "which counties lie along the {anchor_name}?", "what districts does the {anchor_name} run through?", "show districts crossed by the {anchor_name}", "find counties along the {anchor_name}", "give me the districts touched by the {anchor_name}", "districts of the {anchor_name}", "which municipalities does the {anchor_name} pass through?", "boroughs along the {anchor_name}", "communes crossed by the {anchor_name}", "parishes the {anchor_name} runs through", ], ), # ── BUFFER ─────────────────────────────────────────────────────────────── # CTE computes the buffered geometry (raw) for the spatial join. # Final SELECT wraps the result features with ST_AsGeoJSON. SQLTemplate( template_id="buffer_01", family="buffer", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, requires_buffer=True, sql_template=( "WITH a AS (" " SELECT ST_Buffer(geometry, {buffer_m} / 111320.0) AS geom" " FROM read_parquet('divisions_area')" " WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.id != '{anchor_id}'" " AND ST_Intersects(b.geometry, a.geom)" ), question_hints=[ "what's within {buffer_label} of {anchor_name}?", "admin units within {buffer_label} of {anchor_name}", "features within a {buffer_label} radius of {anchor_name}", "places within {buffer_label} of {anchor_name}", "{buffer_label} buffer around {anchor_name}", "what falls within {buffer_label} of {anchor_name}?", "everything within {buffer_label} of {anchor_name}", ], ), SQLTemplate( template_id="buffer_02", family="buffer", sql_difficulty="hard", anchor_source="natural_earth", num_anchors=1, requires_buffer=True, sql_template=( "WITH a AS (" " SELECT ST_Buffer(geometry, {buffer_m} / 111320.0) AS geom" " FROM read_parquet('natural_earth')" " WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE ST_Intersects(b.geometry, a.geom)" ), question_hints=[ "what admin units are within {buffer_label} of the {anchor_name}?", "countries within {buffer_label} of the {anchor_name}", "regions within {buffer_label} of the {anchor_name}", "what falls within {buffer_label} of the {anchor_name}?", "admin divisions within a {buffer_label} radius of the {anchor_name}", "places within {buffer_label} of the {anchor_name}", "what places are near the {anchor_name}?", "what admin areas are close to the {anchor_name}?", "which regions are around the {anchor_name}?", "what lies within {buffer_label} of the shoreline of the {anchor_name}?", "show places around the {anchor_name}", "find areas near the {anchor_name}", "admin units close to the {anchor_name}", "what is around the {anchor_name} within {buffer_label}", "give me nearby admin regions for the {anchor_name}", ], ), SQLTemplate( template_id="buffer_03", family="buffer", sql_difficulty="medium", anchor_source="divisions_area", num_anchors=1, requires_buffer=True, sql_template=( "SELECT ST_AsGeoJSON(ST_Buffer(geometry, {buffer_m} / 111320.0)) AS geometry" " FROM read_parquet('divisions_area')" " WHERE id = '{anchor_id}'" ), question_hints=[ "{buffer_label} buffer around {anchor_name}", "draw a {buffer_label} buffer around {anchor_name}", "show the {buffer_label} buffer around {anchor_name}", "create a {buffer_label} radius around {anchor_name}", "map a {buffer_label} zone around {anchor_name}", "outline the {buffer_label} buffer around {anchor_name}", "buffer {anchor_name} by {buffer_label}", "show radius {buffer_label} from {anchor_name}", "{anchor_name} with a {buffer_label} buffer", ], ), # Subtype-specific and multi-anchor buffer templates are generated by helper # functions to keep the catalog compact while preserving template IDs. *_make_buffer_templates(), # ── NATURAL EARTH BUFFER ──────────────────────────────────────────────── # buffer_11: NE anchor, find other NE features within a buffer distance. # Uses buffer handler for natural_earth. SQLTemplate( template_id="buffer_11", family="buffer", sql_difficulty="hard", anchor_source="natural_earth", num_anchors=1, requires_buffer=True, sql_template=( "WITH a AS (" " SELECT ST_Buffer(geometry, {buffer_m} / 111320.0) AS geom" " FROM read_parquet('natural_earth')" " WHERE id = '{anchor_id}'" ")" " SELECT n.id, n.names.\"primary\" AS name, n.subtype," " ST_AsGeoJSON(n.geometry) AS geometry" " FROM read_parquet('natural_earth') AS n, a" " WHERE ST_Intersects(n.geometry, a.geom)" ), question_hints=[ "natural features within {buffer_label} of the {anchor_name}", "what's within {buffer_label} of the {anchor_name}?", "geographic features near the {anchor_name} within {buffer_label}", "everything within {buffer_label} of the {anchor_name}", "what natural features are close to the {anchor_name}?", "{buffer_label} radius around the {anchor_name}", "what natural features are around the {anchor_name}?", "what lies near the {anchor_name}?", "which features are close to the {anchor_name}?", "what natural features are near the shoreline of the {anchor_name}?", "show nearby natural features for the {anchor_name}", "find features around the {anchor_name}", "give me natural features near the {anchor_name}", "features around the {anchor_name}", "what is close to the {anchor_name} within {buffer_label}", ], ), # ── CHAINED ────────────────────────────────────────────────────────────── # Containment + EXISTS/NOT EXISTS ocean/sea. # CTE holds raw geometry for ST_Within; final SELECT wraps with ST_AsGeoJSON. SQLTemplate( template_id="chained_01", family="chained", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype="county", sql_template=( "WITH region AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, region" " WHERE b.subtype = '{target_subtype}'" " AND ST_Within(b.geometry, region.geometry)" " AND EXISTS (" " SELECT 1 FROM read_parquet('natural_earth') AS n" " WHERE n.subtype IN ('ocean', 'sea')" " AND ST_Intersects(b.geometry, n.geometry)" " )" ), question_hints=[ "coastal {target_subtype}s of {anchor_name}", "{target_subtype}s in {anchor_name} with sea access", "which {target_subtype}s in {anchor_name} are on the coast?", "seaside {target_subtype}s within {anchor_name}", "{target_subtype}s in {anchor_name} bordering the sea", "oceanfront {target_subtype}s in {anchor_name}", "which {target_subtype}s in {anchor_name} have a coastline?", "show coastal {target_subtype}s in {anchor_name}", "find {target_subtype}s of {anchor_name} on the shore", "{target_subtype}s of {anchor_name} by the sea", "which {target_subtype}s of {anchor_name} touch the ocean?", "coastline {target_subtype}s in {anchor_name}", "coastal districts of {anchor_name}", "which districts of {anchor_name} are on the coast?", "seaside municipalities of {anchor_name}", "coastal boroughs in {anchor_name}", ], ), SQLTemplate( template_id="chained_02", family="chained", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype="county", sql_template=( "WITH region AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, region" " WHERE b.subtype = '{target_subtype}'" " AND ST_Intersects(b.geometry, region.geometry)" " AND NOT EXISTS (" " SELECT 1 FROM read_parquet('natural_earth') AS n" " WHERE n.subtype IN ('ocean', 'sea')" " AND ST_Intersects(b.geometry, n.geometry)" " )" ), question_hints=[ "landlocked {target_subtype}s in {anchor_name}", "which {target_subtype}s in {anchor_name} have no sea access?", "{target_subtype}s in {anchor_name} that are landlocked", "{target_subtype}s in {anchor_name} with no coastline", "which {target_subtype}s within {anchor_name} are landlocked?", "interior {target_subtype}s of {anchor_name} with no ocean border", "show inland {target_subtype}s in {anchor_name}", "find non-coastal {target_subtype}s of {anchor_name}", "{target_subtype}s of {anchor_name} away from the sea", "which {target_subtype}s in {anchor_name} are not coastal?", "inner {target_subtype}s of {anchor_name}", "inland districts of {anchor_name}", "which districts in {anchor_name} are not on the coast?", "non-coastal municipalities of {anchor_name}", "interior boroughs of {anchor_name}", ], ), SQLTemplate( template_id="chained_03", family="chained", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype="region", sql_template=( "WITH country AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, country" " WHERE b.subtype = '{target_subtype}'" " AND ST_Within(b.geometry, country.geometry)" " AND EXISTS (" " SELECT 1 FROM read_parquet('natural_earth') AS n" " WHERE n.subtype IN ('range/mtn', 'island group', 'peninsula', 'depression')" " AND ST_Intersects(b.geometry, n.geometry)" " )" ), question_hints=[ "{target_subtype}s in {anchor_name} on a terrain feature or island", "{target_subtype}s of {anchor_name} on a peninsula or island group", "{target_subtype}s within {anchor_name} on notable landforms", "island and peninsula {target_subtype}s of {anchor_name}", "show {target_subtype}s in {anchor_name} on major landforms", "find {target_subtype}s of {anchor_name} on islands or peninsulas", "{target_subtype}s in {anchor_name} on terrain regions", "states of {anchor_name} on a peninsula or island", "island states of {anchor_name}", "provinces of {anchor_name} on major landforms", "which regions of {anchor_name} are on a peninsula?", ], ), # ── DIFFERENCE ─────────────────────────────────────────────────────────── # CTEs hold raw geometry; ST_Difference result wrapped with ST_AsGeoJSON. SQLTemplate( template_id="diff_01", family="difference", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=2, sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_1}'" ")," " b AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_2}'" ")" " SELECT ST_AsGeoJSON(ST_Difference(a.geometry, b.geometry)) AS geometry" " FROM a, b" " WHERE ST_Intersects(a.geometry, b.geometry)" ), question_hints=[ "{anchor_1_name} excluding {anchor_2_name}", "{anchor_1_name} minus {anchor_2_name}", "the part of {anchor_1_name} that is not in {anchor_2_name}", "{anchor_1_name} without the {anchor_2_name} area", "remove {anchor_2_name} from {anchor_1_name}", "{anchor_1_name} with {anchor_2_name} cut out", "subtract {anchor_2_name} from {anchor_1_name}", "what's left of {anchor_1_name} after removing {anchor_2_name}?", "difference between {anchor_1_name} and {anchor_2_name}", "keep only {anchor_1_name} outside {anchor_2_name}", "cut {anchor_2_name} out of {anchor_1_name}", "show {anchor_1_name} without {anchor_2_name}", ], ), SQLTemplate( template_id="diff_02", family="difference", sql_difficulty="hard", anchor_source="mixed", num_anchors=2, sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")," " b AS (" " SELECT geometry FROM read_parquet('natural_earth') WHERE id = '{clip_feature_id}'" ")" " SELECT ST_AsGeoJSON(ST_Difference(a.geometry, b.geometry)) AS geometry" " FROM a, b" " WHERE ST_Intersects(a.geometry, b.geometry)" ), question_hints=[ "the part of {anchor_name} outside the {clip_feature_name}", "{anchor_name} excluding the {clip_feature_name}", "{anchor_name} minus the {clip_feature_name}", "parts of {anchor_name} not covered by the {clip_feature_name}", "{anchor_name} with the {clip_feature_name} removed", "what's left of {anchor_name} after removing the {clip_feature_name}?", "show me {anchor_name} excluding the {clip_feature_name}", "keep only the part of {anchor_name} outside the {clip_feature_name}", "cut the {clip_feature_name} out of {anchor_name}", "difference of {anchor_name} and the {clip_feature_name}", "{anchor_name} after subtracting the {clip_feature_name}", ], ), # ── BORDER CORRIDOR ────────────────────────────────────────────────────── # Intermediate intersection kept raw; final buffer wrapped with ST_AsGeoJSON. SQLTemplate( template_id="corridor_01", family="border_corridor", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=2, requires_buffer=True, sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_1}'" ")," " b AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id_2}'" ")," " border AS (" " SELECT ST_Intersection(a.geometry, b.geometry) AS line" " FROM a, b" " WHERE ST_Intersects(a.geometry, b.geometry)" ")" " SELECT ST_AsGeoJSON(ST_Buffer(border.line, {buffer_km} * 1000.0 / 111320.0)) AS geometry" " FROM border" " WHERE border.line IS NOT NULL" ), question_hints=[ "{buffer_km} km zone along the border between {anchor_1_name} and {anchor_2_name}", "the {buffer_km} km border corridor between {anchor_1_name} and {anchor_2_name}", "area within {buffer_km} km of the {anchor_1_name}-{anchor_2_name} border", "the region straddling the border of {anchor_1_name} and {anchor_2_name} within {buffer_km} km", "{buffer_km} km on either side of the {anchor_1_name} and {anchor_2_name} border", "buffer the {anchor_1_name}-{anchor_2_name} boundary by {buffer_km} km", "show the border zone between {anchor_1_name} and {anchor_2_name}", "map the corridor along the {anchor_1_name}-{anchor_2_name} border", "find the area near the border of {anchor_1_name} and {anchor_2_name}", "give me the border buffer for {anchor_1_name} and {anchor_2_name}", "border area of {anchor_1_name} and {anchor_2_name} within {buffer_km} km", ], ), # ── SET OPERATIONS ─────────────────────────────────────────────────────── # union_01 / union_02: 2-anchor and filtered-containment unions. # union_03: 3-anchor union — trains the model on IN-clause with 3 IDs. # contain_multi: subtype within multiple countries via country IN clause. SQLTemplate( template_id="union_01", family="set_operations", sql_difficulty="medium-hard", anchor_source="divisions_area", num_anchors=2, sql_template=( "SELECT ST_AsGeoJSON(ST_Union_Agg(geometry)) AS geometry," " array_agg(names.\"primary\") AS names" " FROM read_parquet('divisions_area')" " WHERE id IN ('{anchor_id_1}', '{anchor_id_2}')" ), question_hints=[ "the combined area of {anchor_1_name} and {anchor_2_name}", "union of {anchor_1_name} and {anchor_2_name}", "merge {anchor_1_name} and {anchor_2_name}", "{anchor_1_name} and {anchor_2_name} together", "combined geometry of {anchor_1_name} and {anchor_2_name}", ], ), SQLTemplate( template_id="union_02", family="set_operations", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype="county", requires_aggregation=True, sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT ST_AsGeoJSON(ST_Union_Agg(b.geometry)) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.subtype = '{target_subtype}'" " AND ST_Within(b.geometry, a.geometry)" ), question_hints=[ "merge all {target_subtype}s in {anchor_name} into one geometry", "combined geometry of all {target_subtype}s in {anchor_name}", "union of all {target_subtype}s within {anchor_name}", "all {target_subtype}s of {anchor_name} merged together", "the overall extent of {target_subtype}s in {anchor_name}", "show one merged shape for all {target_subtype}s in {anchor_name}", "dissolve all {target_subtype}s in {anchor_name}", "make one geometry from all {target_subtype}s in {anchor_name}", "single outline of all {target_subtype}s in {anchor_name}", "combine the {target_subtype}s of {anchor_name} into one area", "merge all districts in {anchor_name} into one shape", "dissolve all municipalities in {anchor_name}", "combine all boroughs of {anchor_name} into one geometry", ], ), SQLTemplate( template_id="union_03", family="set_operations", sql_difficulty="medium-hard", anchor_source="divisions_area", num_anchors=3, sql_template=( "SELECT ST_AsGeoJSON(ST_Union_Agg(geometry)) AS geometry," " array_agg(names.\"primary\") AS names" " FROM read_parquet('divisions_area')" " WHERE id IN ('{anchor_id_1}', '{anchor_id_2}', '{anchor_id_3}')" ), question_hints=[ "show me {anchor_1_name}, {anchor_2_name} and {anchor_3_name}", "the combined area of {anchor_1_name}, {anchor_2_name} and {anchor_3_name}", "union of {anchor_1_name}, {anchor_2_name} and {anchor_3_name}", "merge {anchor_1_name}, {anchor_2_name} and {anchor_3_name}", "{anchor_1_name}, {anchor_2_name} and {anchor_3_name} together", "display {anchor_1_name}, {anchor_2_name} and {anchor_3_name}", ], ), *_make_country_in_templates(), # ── PARTIAL SELECTION ──────────────────────────────────────────────────── # Bbox clip CTEs use raw geometry; ST_Intersection result wrapped. SQLTemplate( template_id="partial_01", family="partial_selection", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")," " bbox AS (" " SELECT ST_XMin(geometry) AS xmin, ST_XMax(geometry) AS xmax," " ST_YMin(geometry) AS ymin, ST_YMax(geometry) AS ymax FROM a" ")," " clip AS (" " SELECT ST_MakeEnvelope(xmin, (ymin + ymax) / 2.0, xmax, ymax) AS half_geom FROM bbox" ")" " SELECT ST_AsGeoJSON(ST_Intersection(a.geometry, clip.half_geom)) AS geometry" " FROM a, clip" ), question_hints=[ "the northern half of {anchor_name}", "northern part of {anchor_name}", "the top half of {anchor_name}", "northern portion of {anchor_name}", "upper half of {anchor_name}", "top side of {anchor_name}", "show north half of {anchor_name}", "cut {anchor_name} to the north half", "only the northern side of {anchor_name}", "north part of {anchor_name}", ], ), SQLTemplate( template_id="partial_02", family="partial_selection", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")," " bbox AS (" " SELECT ST_XMin(geometry) AS xmin, ST_XMax(geometry) AS xmax," " ST_YMin(geometry) AS ymin, ST_YMax(geometry) AS ymax FROM a" ")," " clip AS (" " SELECT ST_MakeEnvelope(xmin, ymin, xmax, (ymin + ymax) / 2.0) AS half_geom FROM bbox" ")" " SELECT ST_AsGeoJSON(ST_Intersection(a.geometry, clip.half_geom)) AS geometry" " FROM a, clip" ), question_hints=[ "the southern half of {anchor_name}", "southern part of {anchor_name}", "the bottom half of {anchor_name}", "southern portion of {anchor_name}", "lower half of {anchor_name}", "bottom side of {anchor_name}", "show south half of {anchor_name}", "cut {anchor_name} to the south half", "only the southern side of {anchor_name}", "south part of {anchor_name}", ], ), SQLTemplate( template_id="partial_03", family="partial_selection", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")," " bbox AS (" " SELECT ST_XMin(geometry) AS xmin, ST_XMax(geometry) AS xmax," " ST_YMin(geometry) AS ymin, ST_YMax(geometry) AS ymax FROM a" ")," " clip AS (" " SELECT ST_MakeEnvelope((xmin + xmax) / 2.0, ymin, xmax, ymax) AS half_geom FROM bbox" ")" " SELECT ST_AsGeoJSON(ST_Intersection(a.geometry, clip.half_geom)) AS geometry" " FROM a, clip" ), question_hints=[ "the eastern half of {anchor_name}", "eastern part of {anchor_name}", "the right half of {anchor_name}", "eastern portion of {anchor_name}", "east side of {anchor_name}", "show east half of {anchor_name}", "cut {anchor_name} to the east half", "only the eastern side of {anchor_name}", "right side of {anchor_name}", ], ), SQLTemplate( template_id="partial_04", family="partial_selection", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")," " bbox AS (" " SELECT ST_XMin(geometry) AS xmin, ST_XMax(geometry) AS xmax," " ST_YMin(geometry) AS ymin, ST_YMax(geometry) AS ymax FROM a" ")," " clip AS (" " SELECT ST_MakeEnvelope(xmin, ymin, (xmin + xmax) / 2.0, ymax) AS half_geom FROM bbox" ")" " SELECT ST_AsGeoJSON(ST_Intersection(a.geometry, clip.half_geom)) AS geometry" " FROM a, clip" ), question_hints=[ "the western half of {anchor_name}", "western part of {anchor_name}", "the left half of {anchor_name}", "western portion of {anchor_name}", "west side of {anchor_name}", "show west half of {anchor_name}", "cut {anchor_name} to the west half", "only the western side of {anchor_name}", "left side of {anchor_name}", ], ), SQLTemplate( template_id="partial_05", family="partial_selection", sql_difficulty="hard", anchor_source="mixed", num_anchors=2, sql_template=( "WITH a AS (" " SELECT geometry AS g1 FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")," " b AS (" " SELECT geometry AS g2 FROM read_parquet('natural_earth') WHERE id = '{clip_feature_id}'" ")" " SELECT ST_AsGeoJSON(ST_Intersection(a.g1, b.g2)) AS geometry" " FROM a, b" " WHERE ST_Intersects(a.g1, b.g2)" ), question_hints=[ "the part of {anchor_name} that overlaps the {clip_feature_name}", "{anchor_name} within the {clip_feature_name}", "the portion of {anchor_name} inside the {clip_feature_name}", "part of the {clip_feature_name} in {anchor_name}", "part of {anchor_name} in the {clip_feature_name}", "clip {anchor_name} to the {clip_feature_name}", "{anchor_name} clipped to the {clip_feature_name}", "{clip_feature_name} inside {anchor_name}", "parts of {anchor_name} covered by the {clip_feature_name}", "show me where {anchor_name} and the {clip_feature_name} overlap", "keep only the part of {anchor_name} in the {clip_feature_name}", "intersection of {anchor_name} and the {clip_feature_name}", "give me the overlap between {anchor_name} and the {clip_feature_name}", "only the shared area of {anchor_name} and the {clip_feature_name}", ], ), # ── AGGREGATION ────────────────────────────────────────────────────────── # ST_Area uses raw geometry in the ORDER BY; final SELECT wraps output. SQLTemplate( template_id="agg_01", family="aggregation", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype=None, # filled at generation time: county or region requires_aggregation=True, sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name," " ST_AsGeoJSON(b.geometry) AS geometry," " ST_Area(b.geometry) AS area" " FROM read_parquet('divisions_area') AS b, a" " WHERE ST_Within(b.geometry, a.geometry)" " AND b.subtype = '{target_subtype}'" " ORDER BY area DESC" " LIMIT {top_n}" ), question_hints=[ "top {top_n} largest {target_subtype}s in {anchor_name}", "biggest {top_n} {target_subtype}s in {anchor_name}", "{top_n} largest {target_subtype}s inside {anchor_name}", "the {top_n} biggest {target_subtype}s within {anchor_name}", "largest {target_subtype} in {anchor_name}", "which {target_subtype} in {anchor_name} has the most area?", "show the biggest {target_subtype}s in {anchor_name}", "list the largest {target_subtype}s for {anchor_name}", "give me the biggest {target_subtype}s in {anchor_name}", "{anchor_name} largest {target_subtype}s", ], ), SQLTemplate( template_id="agg_02", family="aggregation", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype=None, # filled at generation time: county or region requires_aggregation=True, sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name," " ST_AsGeoJSON(b.geometry) AS geometry," " ST_Area(b.geometry) AS area" " FROM read_parquet('divisions_area') AS b, a" " WHERE ST_Within(b.geometry, a.geometry)" " AND b.subtype = '{target_subtype}'" " ORDER BY area ASC" " LIMIT {top_n}" ), question_hints=[ "top {top_n} smallest {target_subtype}s in {anchor_name}", "smallest {top_n} {target_subtype}s in {anchor_name}", "{top_n} smallest {target_subtype}s inside {anchor_name}", "the {top_n} tiniest {target_subtype}s within {anchor_name}", "smallest {target_subtype} in {anchor_name}", "which {target_subtype} in {anchor_name} has the least area?", "show the smallest {target_subtype}s in {anchor_name}", "list the smallest {target_subtype}s for {anchor_name}", "give me the tiniest {target_subtype}s in {anchor_name}", "{anchor_name} smallest {target_subtype}s", ], ), SQLTemplate( template_id="agg_03", family="aggregation", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype=None, # filled at generation time: county or region requires_aggregation=True, sql_template=( "SELECT id, names.\"primary\" AS name," " ST_AsGeoJSON(geometry) AS geometry," " ST_Area(geometry) AS area" " FROM read_parquet('divisions_area')" " WHERE country = '{country}'" " AND subtype = '{target_subtype}'" " ORDER BY area DESC" " LIMIT {top_n}" ), question_hints=[ "top {top_n} largest {target_subtype}s in {anchor_name}", "{top_n} biggest {target_subtype}s in {anchor_name}", "largest {top_n} {target_subtype}s in {anchor_name}", "the {top_n} largest {target_subtype}s in {anchor_name}", "biggest {target_subtype} in {anchor_name}", "which {target_subtype} in {anchor_name} is the largest?", "show the largest {target_subtype}s in {anchor_name}", "list the biggest {target_subtype}s in {anchor_name}", "give me the largest {target_subtype}s for {anchor_name}", "{anchor_name} biggest {target_subtype}s", ], ), SQLTemplate( template_id="agg_04", family="aggregation", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype=None, # filled at generation time: county or region requires_aggregation=True, sql_template=( "SELECT id, names.\"primary\" AS name," " ST_AsGeoJSON(geometry) AS geometry," " ST_Area(geometry) AS area" " FROM read_parquet('divisions_area')" " WHERE country = '{country}'" " AND subtype = '{target_subtype}'" " ORDER BY area ASC" " LIMIT {top_n}" ), question_hints=[ "top {top_n} smallest {target_subtype}s in {anchor_name}", "{top_n} smallest {target_subtype}s in {anchor_name}", "smallest {top_n} {target_subtype}s in {anchor_name}", "the {top_n} smallest {target_subtype}s in {anchor_name}", "smallest {target_subtype} in {anchor_name}", "which {target_subtype} in {anchor_name} is the smallest?", "show the smallest {target_subtype}s in {anchor_name}", "list the smallest {target_subtype}s in {anchor_name}", "give me the smallest {target_subtype}s for {anchor_name}", "{anchor_name} tiniest {target_subtype}s", ], ), # ── WINDOW FUNCTION ────────────────────────────────────────────────────── # CTE keeps raw geometry for ST_Area; final SELECT wraps with ST_AsGeoJSON. SQLTemplate( template_id="window_01", family="window_function", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype="county", requires_aggregation=True, sql_template=( "WITH ranked AS (" " SELECT id, names.\"primary\" AS name, subtype, country, region, geometry," " ST_Area(geometry) AS area," " ROW_NUMBER() OVER (PARTITION BY region ORDER BY ST_Area(geometry) DESC) AS rn" " FROM read_parquet('divisions_area')" " WHERE country = '{country}'" " AND subtype = '{target_subtype}'" ")" " SELECT id, name, subtype, country, region," " ST_AsGeoJSON(geometry) AS geometry, area" " FROM ranked" " WHERE rn = 1" ), question_hints=[ "the largest {target_subtype} in each region of {anchor_name}", "biggest {target_subtype} per region in {anchor_name}", "largest {target_subtype} for every region of {anchor_name}", "the biggest {target_subtype} in each province of {anchor_name}", "show the largest {target_subtype} in every region of {anchor_name}", "list biggest {target_subtype} by region in {anchor_name}", "for each region in {anchor_name}, give the largest {target_subtype}", "largest {target_subtype}s grouped by region in {anchor_name}", "one biggest {target_subtype} for each region of {anchor_name}", "the largest district in each state of {anchor_name}", "biggest district per province in {anchor_name}", "the largest municipality in each region of {anchor_name}", "biggest borough per state in {anchor_name}", ], ), SQLTemplate( template_id="window_02", family="window_function", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype="county", requires_aggregation=True, sql_template=( "WITH ranked AS (" " SELECT id, names.\"primary\" AS name, subtype, country, region, geometry," " ST_Area(geometry) AS area," " ROW_NUMBER() OVER (PARTITION BY region ORDER BY ST_Area(geometry) ASC) AS rn" " FROM read_parquet('divisions_area')" " WHERE country = '{country}'" " AND subtype = '{target_subtype}'" ")" " SELECT id, name, subtype, country, region," " ST_AsGeoJSON(geometry) AS geometry, area" " FROM ranked" " WHERE rn = 1" ), question_hints=[ "the smallest {target_subtype} in each region of {anchor_name}", "smallest {target_subtype} per region in {anchor_name}", "tiniest {target_subtype} for every region of {anchor_name}", "the smallest {target_subtype} in each province of {anchor_name}", "show the smallest {target_subtype} in every region of {anchor_name}", "list smallest {target_subtype} by region in {anchor_name}", "for each region in {anchor_name}, give the smallest {target_subtype}", "smallest {target_subtype}s grouped by region in {anchor_name}", "one tiniest {target_subtype} for each region of {anchor_name}", "the smallest district in each state of {anchor_name}", "smallest district per province in {anchor_name}", "the smallest municipality in each region of {anchor_name}", "tiniest borough per state in {anchor_name}", ], ), SQLTemplate( template_id="window_03", family="window_function", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype="region", requires_aggregation=True, sql_template=( "WITH ranked AS (" " SELECT id, names.\"primary\" AS name, subtype, country, geometry," " ST_Area(geometry) AS area," " ROW_NUMBER() OVER (PARTITION BY country ORDER BY ST_Area(geometry) DESC) AS rn" " FROM read_parquet('divisions_area')" " WHERE country = '{country}'" " AND subtype = '{target_subtype}'" ")" " SELECT id, name, subtype, country," " ST_AsGeoJSON(geometry) AS geometry, area" " FROM ranked" " WHERE rn = 1" ), question_hints=[ "the largest {target_subtype} in {anchor_name}", "which {target_subtype} of {anchor_name} has the most area?", "biggest {target_subtype} in {anchor_name}", "what is the largest {target_subtype} of {anchor_name}?", "show the biggest {target_subtype} of {anchor_name}", "the largest state in {anchor_name}", "which state of {anchor_name} is the biggest?", "the largest province in {anchor_name}", "biggest state of {anchor_name}", "what is the largest province of {anchor_name}?", "the largest oblast in {anchor_name}", "biggest department of {anchor_name}", "which prefecture of {anchor_name} has the most area?", ], ), SQLTemplate( template_id="window_04", family="window_function", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype="region", requires_aggregation=True, sql_template=( "WITH ranked AS (" " SELECT id, names.\"primary\" AS name, subtype, country, geometry," " ST_Area(geometry) AS area," " ROW_NUMBER() OVER (PARTITION BY country ORDER BY ST_Area(geometry) ASC) AS rn" " FROM read_parquet('divisions_area')" " WHERE country = '{country}'" " AND subtype = '{target_subtype}'" ")" " SELECT id, name, subtype, country," " ST_AsGeoJSON(geometry) AS geometry, area" " FROM ranked" " WHERE rn = 1" ), question_hints=[ "the smallest {target_subtype} in {anchor_name}", "which {target_subtype} of {anchor_name} has the least area?", "tiniest {target_subtype} in {anchor_name}", "what is the smallest {target_subtype} of {anchor_name}?", "show the smallest {target_subtype} of {anchor_name}", "the smallest state in {anchor_name}", "which state of {anchor_name} is the smallest?", "the smallest province in {anchor_name}", "tiniest state of {anchor_name}", "what is the smallest province of {anchor_name}?", "the smallest oblast in {anchor_name}", "smallest department of {anchor_name}", "which prefecture of {anchor_name} has the least area?", ], ), # ── ATTRIBUTE FILTER ───────────────────────────────────────────────────── # No spatial op — pure WHERE on is_land / is_territorial / country. SQLTemplate( template_id="attr_01", family="attribute_filter", sql_difficulty="medium", anchor_source="divisions_area", num_anchors=1, target_subtype="county", sql_template=( "SELECT id, names.\"primary\" AS name, subtype, country," " ST_AsGeoJSON(geometry) AS geometry" " FROM read_parquet('divisions_area')" " WHERE country = '{country}'" " AND is_land = TRUE" " AND subtype = '{target_subtype}'" ), question_hints=[ "land {target_subtype}s of {anchor_name}", "which {target_subtype}s of {anchor_name} are on land?", "{anchor_name}'s land-based {target_subtype}s", "show the land {target_subtype}s of {anchor_name}", "find land {target_subtype}s of {anchor_name}", "give me land-based {target_subtype}s in {anchor_name}", "which {target_subtype}s of {anchor_name} are land-based?", "non-island {target_subtype}s of {anchor_name}", "land districts of {anchor_name}", "which districts of {anchor_name} are on land?", "land-based municipalities of {anchor_name}", "mainland boroughs of {anchor_name}", ], ), SQLTemplate( template_id="attr_02", family="attribute_filter", sql_difficulty="medium", anchor_source="divisions_area", num_anchors=1, target_subtype="region", sql_template=( "SELECT id, names.\"primary\" AS name, subtype, country," " ST_AsGeoJSON(geometry) AS geometry" " FROM read_parquet('divisions_area')" " WHERE country = '{country}'" " AND is_territorial = TRUE" " AND subtype = '{target_subtype}'" ), question_hints=[ "territorial {target_subtype}s of {anchor_name}", "official territorial divisions of {anchor_name}", "recognised territorial {target_subtype}s belonging to {anchor_name}", "which territorial regions does {anchor_name} have?", "show territorial {target_subtype}s of {anchor_name}", "find official territorial regions of {anchor_name}", "give me recognised territorial {target_subtype}s of {anchor_name}", "territorial regions under {anchor_name}", "{anchor_name} official territorial {target_subtype}s", "official states of {anchor_name}", "recognised provinces of {anchor_name}", "territorial provinces of {anchor_name}", "official oblasts of {anchor_name}", "recognised departments of {anchor_name}", ], ), SQLTemplate( template_id="attr_03", family="attribute_filter", sql_difficulty="medium", anchor_source="divisions_area", num_anchors=1, target_subtype="region", sql_template=( "SELECT id, names.\"primary\" AS name, subtype, country," " ST_AsGeoJSON(geometry) AS geometry" " FROM read_parquet('divisions_area')" " WHERE country = '{country}'" " AND subtype = '{target_subtype}'" " AND is_land = FALSE" ), question_hints=[ "offshore {target_subtype}s of {anchor_name}", "{target_subtype}s of {anchor_name} that are not on land", "water-associated {target_subtype}s of {anchor_name}", "marine or offshore {target_subtype}s of {anchor_name}", "show offshore {target_subtype}s of {anchor_name}", "find {target_subtype}s of {anchor_name} in water", "give me non-land {target_subtype}s of {anchor_name}", "water-side {target_subtype}s of {anchor_name}", "{anchor_name} {target_subtype}s not on land", "offshore states of {anchor_name}", "island provinces of {anchor_name}", "non-land regions of {anchor_name}", ], ), SQLTemplate( template_id="attr_04", family="attribute_filter", sql_difficulty="medium", anchor_source="divisions_area", num_anchors=1, target_subtype="country", sql_template=( "SELECT id, names.\"primary\" AS name, subtype, country," " ST_AsGeoJSON(geometry) AS geometry" " FROM read_parquet('divisions_area')" " WHERE country = '{country}'" " AND subtype = '{target_subtype}'" " AND is_territorial = TRUE" ), question_hints=[ "show the recognised boundary of {anchor_name}", "find the official territorial extent of {anchor_name}", "get the internationally recognised boundary of {anchor_name}", "map the sovereign boundary of {anchor_name}", "show the official country geometry for {anchor_name}", "display the territorial country entry for {anchor_name}", "give me the recognised country outline of {anchor_name}", "official boundary of {anchor_name}", ], ), # ── NATURAL EARTH CHAINED ─────────────────────────────────────────────── # chained_04: localities in a region that intersect a river or lake. # chained_05: localities in a region that lie on a mountain range. SQLTemplate( template_id="chained_04", family="chained", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype="region", sql_template=( "WITH region AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, region" " WHERE b.subtype = '{target_subtype}'" " AND ST_Within(b.geometry, region.geometry)" " AND EXISTS (" " SELECT 1 FROM read_parquet('natural_earth') AS n" " WHERE n.subtype IN ('river', 'lake', 'basin')" " AND ST_Intersects(b.geometry, n.geometry)" " )" ), question_hints=[ "riverside {target_subtype}s in {anchor_name}", "{target_subtype}s in {anchor_name} near a river or lake", "which {target_subtype}s in {anchor_name} are on a waterway?", "lakeside or riverside {target_subtype}s within {anchor_name}", "{target_subtype}s in {anchor_name} that touch a river", "which {target_subtype}s in {anchor_name} are on a lake?", "waterfront {target_subtype}s of {anchor_name}", "show water-side {target_subtype}s in {anchor_name}", "find {target_subtype}s of {anchor_name} by rivers or lakes", "{target_subtype}s of {anchor_name} on the water", "riverside states of {anchor_name}", "states of {anchor_name} near a river or lake", "provinces of {anchor_name} along a waterway", "which regions of {anchor_name} have a major river?", ], ), SQLTemplate( template_id="chained_05", family="chained", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype="region", sql_template=( "WITH region AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, region" " WHERE b.subtype = '{target_subtype}'" " AND ST_Within(b.geometry, region.geometry)" " AND EXISTS (" " SELECT 1 FROM read_parquet('natural_earth') AS n" " WHERE n.subtype IN ('range/mtn', 'depression')" " AND ST_Intersects(b.geometry, n.geometry)" " )" ), question_hints=[ "mountain {target_subtype}s in {anchor_name}", "{target_subtype}s in {anchor_name} on a mountain range", "which {target_subtype}s in {anchor_name} are in the mountains?", "highland {target_subtype}s within {anchor_name}", "{target_subtype}s of {anchor_name} in mountainous terrain", "{target_subtype}s in {anchor_name} near a mountain range", "show mountain {target_subtype}s in {anchor_name}", "find {target_subtype}s of {anchor_name} in hilly terrain", "{target_subtype}s of {anchor_name} by the mountains", "mountain states of {anchor_name}", "states of {anchor_name} in mountainous terrain", "highland provinces of {anchor_name}", "which regions of {anchor_name} have a mountain range?", ], ), # ── CHAINED (county-level) ────────────────────────────────────────────── # Same spatial patterns as chained_01..05 but targeting counties/districts # so the model learns "coastal districts of X", "riverside counties", etc. SQLTemplate( template_id="chained_06", family="chained", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype="county", sql_template=( "WITH region AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, region" " WHERE b.subtype = '{target_subtype}'" " AND ST_Within(b.geometry, region.geometry)" " AND EXISTS (" " SELECT 1 FROM read_parquet('natural_earth') AS n" " WHERE n.subtype IN ('ocean', 'sea')" " AND ST_Intersects(b.geometry, n.geometry)" " )" ), question_hints=[ "coastal {target_subtype}s of {anchor_name}", "which districts of {anchor_name} are on the coast?", "{target_subtype}s in {anchor_name} that border the sea", "seaside {target_subtype}s within {anchor_name}", "{target_subtype}s of {anchor_name} with ocean access", "which {target_subtype}s in {anchor_name} touch the sea?", "maritime {target_subtype}s of {anchor_name}", "show coastal districts of {anchor_name}", "find {target_subtype}s of {anchor_name} by the sea", "give me shoreline {target_subtype}s in {anchor_name}", "{target_subtype}s of {anchor_name} on the coast", ], ), SQLTemplate( template_id="chained_07", family="chained", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype="county", sql_template=( "WITH region AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, region" " WHERE b.subtype = '{target_subtype}'" " AND ST_Within(b.geometry, region.geometry)" " AND NOT EXISTS (" " SELECT 1 FROM read_parquet('natural_earth') AS n" " WHERE n.subtype IN ('ocean', 'sea')" " AND ST_Intersects(b.geometry, n.geometry)" " )" ), question_hints=[ "landlocked {target_subtype}s of {anchor_name}", "which districts of {anchor_name} have no coastline?", "interior {target_subtype}s within {anchor_name}", "{target_subtype}s in {anchor_name} with no sea access", "non-coastal {target_subtype}s of {anchor_name}", "inland {target_subtype}s of {anchor_name}", "show inland districts of {anchor_name}", "find non-coastal {target_subtype}s in {anchor_name}", "give me inner {target_subtype}s of {anchor_name}", "{target_subtype}s of {anchor_name} away from the coast", ], ), SQLTemplate( template_id="chained_08", family="chained", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype="county", sql_template=( "WITH region AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, region" " WHERE b.subtype = '{target_subtype}'" " AND ST_Within(b.geometry, region.geometry)" " AND EXISTS (" " SELECT 1 FROM read_parquet('natural_earth') AS n" " WHERE n.subtype IN ('river', 'lake', 'basin')" " AND ST_Intersects(b.geometry, n.geometry)" " )" ), question_hints=[ "riverside {target_subtype}s of {anchor_name}", "which districts of {anchor_name} have a river or lake?", "{target_subtype}s in {anchor_name} on a waterway", "lakeside {target_subtype}s within {anchor_name}", "{target_subtype}s of {anchor_name} along a river", "which {target_subtype}s in {anchor_name} border a lake?", "show water-side {target_subtype}s of {anchor_name}", "find {target_subtype}s in {anchor_name} near rivers and lakes", "give me riverside districts of {anchor_name}", "{target_subtype}s of {anchor_name} by the water", ], ), SQLTemplate( template_id="chained_09", family="chained", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype="county", sql_template=( "WITH region AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, region" " WHERE b.subtype = '{target_subtype}'" " AND ST_Within(b.geometry, region.geometry)" " AND EXISTS (" " SELECT 1 FROM read_parquet('natural_earth') AS n" " WHERE n.subtype IN ('range/mtn', 'depression')" " AND ST_Intersects(b.geometry, n.geometry)" " )" ), question_hints=[ "mountain {target_subtype}s of {anchor_name}", "which districts of {anchor_name} are in the mountains?", "{target_subtype}s in {anchor_name} on a mountain range", "highland {target_subtype}s within {anchor_name}", "{target_subtype}s of {anchor_name} in mountainous terrain", "which {target_subtype}s in {anchor_name} have mountain ranges?", "show mountain districts of {anchor_name}", "find {target_subtype}s of {anchor_name} in hilly areas", "give me highland {target_subtype}s in {anchor_name}", "{target_subtype}s of {anchor_name} by the mountains", ], ), # chained_10 / chained_11: coastal and inland REGIONS of a country. # Same pattern as chained_06/07 but with target_subtype='region' and # container forced to a country so phrasings like "coastal states of # India" / "inland provinces of Kenya" work correctly. SQLTemplate( template_id="chained_10", family="chained", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype="region", sql_template=( "WITH country AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, country" " WHERE b.subtype = '{target_subtype}'" " AND ST_Within(b.geometry, country.geometry)" " AND EXISTS (" " SELECT 1 FROM read_parquet('natural_earth') AS n" " WHERE n.subtype IN ('ocean', 'sea')" " AND ST_Intersects(b.geometry, n.geometry)" " )" ), question_hints=[ "coastal states of {anchor_name}", "coastal regions of {anchor_name}", "coastal provinces of {anchor_name}", "which states of {anchor_name} are on the coast?", "regions of {anchor_name} with sea access", "states of {anchor_name} that border the ocean", "maritime states of {anchor_name}", "seaside regions of {anchor_name}", "which provinces of {anchor_name} touch the sea?", "states of {anchor_name} along the coast", "show coastal regions of {anchor_name}", "find seaside states of {anchor_name}", "give me provinces of {anchor_name} on the coast", "{anchor_name} regions by the sea", "which oblasts of {anchor_name} are on the coast?", "coastal departments of {anchor_name}", "coastal prefectures of {anchor_name}", "which cantons of {anchor_name} have sea access?", ], ), SQLTemplate( template_id="chained_11", family="chained", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype="region", sql_template=( "WITH country AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, country" " WHERE b.subtype = '{target_subtype}'" " AND ST_Within(b.geometry, country.geometry)" " AND NOT EXISTS (" " SELECT 1 FROM read_parquet('natural_earth') AS n" " WHERE n.subtype IN ('ocean', 'sea')" " AND ST_Intersects(b.geometry, n.geometry)" " )" ), question_hints=[ "landlocked states of {anchor_name}", "inland regions of {anchor_name}", "non-coastal states of {anchor_name}", "which states of {anchor_name} have no coast?", "inland provinces of {anchor_name}", "regions of {anchor_name} without sea access", "interior states of {anchor_name}", "states of {anchor_name} that don't border the ocean", "show inland regions of {anchor_name}", "find non-coastal states of {anchor_name}", "give me inner provinces of {anchor_name}", "{anchor_name} regions away from the sea", "landlocked oblasts of {anchor_name}", "inland departments of {anchor_name}", "interior prefectures of {anchor_name}", "inland cantons of {anchor_name}", ], ), # chained_12 / chained_13: coastal and landlocked COUNTRIES adjacent to anchor. # Anchor is any country; finds its neighbours that are coastal / landlocked. SQLTemplate( template_id="chained_12", family="chained", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype="country", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.subtype = 'country'" " AND b.id != '{anchor_id}'" " AND ST_Touches(a.geometry, b.geometry)" " AND EXISTS (" " SELECT 1 FROM read_parquet('natural_earth') AS n" " WHERE n.subtype IN ('ocean', 'sea')" " AND ST_Intersects(b.geometry, n.geometry)" " )" ), question_hints=[ "which countries bordering {anchor_name} are coastal?", "coastal countries adjacent to {anchor_name}", "which neighboring countries of {anchor_name} have sea access?", "countries that touch {anchor_name} and border the ocean", "which nations next to {anchor_name} are on the coast?", "find coastal nations bordering {anchor_name}", "countries neighboring {anchor_name} with a coastline", "which countries near {anchor_name} touch the sea?", ], ), SQLTemplate( template_id="chained_13", family="chained", sql_difficulty="hard", anchor_source="divisions_area", num_anchors=1, target_subtype="country", sql_template=( "WITH a AS (" " SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'" ")" " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country," " ST_AsGeoJSON(b.geometry) AS geometry" " FROM read_parquet('divisions_area') AS b, a" " WHERE b.subtype = 'country'" " AND b.id != '{anchor_id}'" " AND ST_Touches(a.geometry, b.geometry)" " AND NOT EXISTS (" " SELECT 1 FROM read_parquet('natural_earth') AS n" " WHERE n.subtype IN ('ocean', 'sea')" " AND ST_Intersects(b.geometry, n.geometry)" " )" ), question_hints=[ "which countries bordering {anchor_name} are landlocked?", "landlocked countries adjacent to {anchor_name}", "which neighboring countries of {anchor_name} have no sea access?", "countries that touch {anchor_name} and are landlocked", "which nations next to {anchor_name} have no coastline?", "find landlocked nations bordering {anchor_name}", "countries neighboring {anchor_name} with no ocean access", "which countries near {anchor_name} are inland?", ], ), ] # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def get_templates_by_family(family: str) -> List[SQLTemplate]: """Return all templates for a specific task family.""" return [t for t in TEMPLATES if t.family == family] def get_template_by_id(template_id: str) -> SQLTemplate: """Return a template by its ID, raising ValueError if not found.""" for t in TEMPLATES: if t.template_id == template_id: return t raise ValueError(f"Template '{template_id}' not found") if __name__ == "__main__": families: dict = {} for t in TEMPLATES: families[t.family] = families.get(t.family, 0) + 1 print("SQL Template Catalog") print("=" * 60) for family, count in sorted(families.items()): print(f"{family:20s}: {count:2d} templates") print(f"{'TOTAL':20s}: {len(TEMPLATES):2d} templates") # Verify every template's final SELECT wraps geometry with ST_AsGeoJSON print() print("Geometry output check (all should show ST_AsGeoJSON)") print("=" * 60) for t in TEMPLATES: has_geojson = "ST_AsGeoJSON" in t.sql_template status = "OK" if has_geojson else "MISSING" print(f" {t.template_id:20s}: {status}")