Spaces:

developmentseed
/

gazet

Running

App Files Files Community

srmsoumya commited on 24 days ago

Commit

ca28f70

1 Parent(s): d237392

enh: Add templates to handle queries like subregion, region

Browse files

Files changed (4) hide show

dataset/config.yaml +3 -2
dataset/scripts/export_training_data.py +30 -13
dataset/scripts/generate_samples.py +153 -11
dataset/scripts/sql_templates.py +200 -0

dataset/config.yaml CHANGED Viewed

@@ -18,12 +18,13 @@ countries:
 # template_id gets enough coverage after uniform sampling + stratified split.
 sample_targets:
   direct_lookup:       500
-  adjacency:          1500   # 5 templates (adj_01..05)
   multi_adjacency:     300
   containment:        1200   # 4 templates (contain_01..04)
   intersection:       1200   # 4 templates (intersect_01..04)
   buffer:             1200   # 5 templates (buffer_01..05)
-  chained:            2700   # 9 templates (chained_01..09)
   difference:          900   # 2 templates, one is mixed (diff_02)
   border_corridor:     300
   set_operations:      900

 # template_id gets enough coverage after uniform sampling + stratified split.
 sample_targets:
   direct_lookup:       500
+  disambiguation:     1500   # 3 templates (disambiguate_01..03) - "Puri, Odisha" pattern
+  adjacency:          1800   # 6 templates (adj_01..06) - adj_06 is counties
   multi_adjacency:     300
   containment:        1200   # 4 templates (contain_01..04)
   intersection:       1200   # 4 templates (intersect_01..04)
   buffer:             1200   # 5 templates (buffer_01..05)
+  chained:            3300   # 11 templates (chained_01..11) - 10/11 are coastal/inland regions
   difference:          900   # 2 templates, one is mixed (diff_02)
   border_corridor:     300
   set_operations:      900

dataset/scripts/export_training_data.py CHANGED Viewed

@@ -206,17 +206,19 @@ def sample_to_sql_pair(sample: Dict[str, Any]) -> Optional[Dict]:
 # Derived from the same SQL samples: selected_candidates → PlacesResult JSON.
 # ---------------------------------------------------------------------------
-_PLACE_SYSTEM = """You are a geographic entity extractor. Extract every place name mentioned in the user query and return valid JSON only.
 OUTPUT FORMAT:
 {"places": [{"place": "<name>", "country": "<ISO-2>", "subtype": "<subtype>"}]}
 "country" and "subtype" are optional; omit if not applicable.
 RULES:
-- Extract ALL named places. If the query mentions two or more places (e.g. one admin area and one physical feature, or two countries), return every one of them in the order they appear.
-- Only extract places explicitly named. Do not infer or expand category nouns such as "regions", "districts", "counties", "rivers", "mountains" when they refer to a type rather than a specific place (e.g. "regions of India" -> extract "India" only).
 - No duplicate place names.
-- "country": ISO 3166-1 alpha-2. Include only if explicitly mentioned or unambiguous from the name.
 - "subtype": include only when the geographic level is clear from the query.
 SUBTYPES:
@@ -224,17 +226,23 @@ country, dependency, region, county, localadmin, locality, macrohood, neighborho
 - Default to locality for cities/towns; omit for physical features (oceans, seas, rivers, lakes, basins, mountains, ranges, peninsulas, islands, terrain areas).
 EXAMPLES:
-Query: "coastal districts of Brazil"
--> {"places": [{"place": "Brazil", "subtype": "country"}]}
-Query: "part of Ecuador in the Amazon basin"
--> {"places": [{"place": "Ecuador", "subtype": "country"}, {"place": "Amazon basin"}]}
-Query: "Amazon basin inside Ecuador"
--> {"places": [{"place": "Amazon basin"}, {"place": "Ecuador", "subtype": "country"}]}
-Query: "which regions border both France and Germany?"
--> {"places": [{"place": "France", "subtype": "country"}, {"place": "Germany", "subtype": "country"}]}
 Query: "northern half of India"
 -> {"places": [{"place": "India", "subtype": "country"}]}
@@ -245,6 +253,15 @@ Query: "what's within 50 km of Paris?"
 Query: "countries the Nile crosses"
 -> {"places": [{"place": "Nile"}]}
 Query: "merge Nairobi and Mombasa"
 -> {"places": [{"place": "Nairobi", "subtype": "locality"}, {"place": "Mombasa", "subtype": "locality"}]}"""
@@ -281,7 +298,7 @@ def sample_to_place_pair(sample: Dict[str, Any]) -> Optional[Dict]:
     Uses selected_candidates to determine the correct PlacesResult output.
     Skips samples where no valid places can be derived.
     """
-    selected_ids = set(sample.get("target", {}).get("selected_candidates", []))
     if not selected_ids:
         return None

 # Derived from the same SQL samples: selected_candidates → PlacesResult JSON.
 # ---------------------------------------------------------------------------
+_PLACE_SYSTEM = """You are a geographic entity extractor. Extract the place names the user is asking about and return valid JSON only.
 OUTPUT FORMAT:
 {"places": [{"place": "<name>", "country": "<ISO-2>", "subtype": "<subtype>"}]}
 "country" and "subtype" are optional; omit if not applicable.
 RULES:
+- Extract the place(s) that are the target of the query.
+- When a place is followed by its containing region, state, or country as disambiguation context ("Puri, Odisha", "Lisboa, Portugal", "Goa, India", "Manchester in US"), extract ONLY the specific place. Do not return the container as a separate place — record its info on the target using `country` (ISO-2) when unambiguous.
+- When a query names two or more distinct anchors joined by words like "and", "both", "between", "or" ("France and Germany", "between Nairobi and Mombasa"), or mixes an admin area with a physical feature as independent anchors ("part of Ecuador in the Amazon basin"), extract every anchor in the order they appear.
+- Do not infer or expand category nouns like "regions", "districts", "counties", "rivers", "mountains" when they refer to a type rather than a specific place ("regions of India" -> extract "India" only).
 - No duplicate place names.
+- "country": ISO 3166-1 alpha-2. Include only if explicitly mentioned or unambiguous.
 - "subtype": include only when the geographic level is clear from the query.
 SUBTYPES:
 - Default to locality for cities/towns; omit for physical features (oceans, seas, rivers, lakes, basins, mountains, ranges, peninsulas, islands, terrain areas).
 EXAMPLES:
+Query: "Puri, Odisha"
+-> {"places": [{"place": "Puri", "subtype": "locality", "country": "IN"}]}
+Query: "Lisboa, Portugal"
+-> {"places": [{"place": "Lisboa", "subtype": "locality", "country": "PT"}]}
+Query: "Goa, India"
+-> {"places": [{"place": "Goa", "subtype": "region", "country": "IN"}]}
+Query: "Manchester in US"
+-> {"places": [{"place": "Manchester", "subtype": "locality", "country": "US"}]}
+Query: "Springfield, Illinois"
+-> {"places": [{"place": "Springfield", "subtype": "locality", "country": "US"}]}
+Query: "coastal districts of Brazil"
+-> {"places": [{"place": "Brazil", "subtype": "country"}]}
 Query: "northern half of India"
 -> {"places": [{"place": "India", "subtype": "country"}]}
 Query: "countries the Nile crosses"
 -> {"places": [{"place": "Nile"}]}
+Query: "part of Ecuador in the Amazon basin"
+-> {"places": [{"place": "Ecuador", "subtype": "country"}, {"place": "Amazon basin"}]}
+Query: "Amazon basin inside Ecuador"
+-> {"places": [{"place": "Amazon basin"}, {"place": "Ecuador", "subtype": "country"}]}
+Query: "which regions border both France and Germany?"
+-> {"places": [{"place": "France", "subtype": "country"}, {"place": "Germany", "subtype": "country"}]}
 Query: "merge Nairobi and Mombasa"
 -> {"places": [{"place": "Nairobi", "subtype": "locality"}, {"place": "Mombasa", "subtype": "locality"}]}"""
     Uses selected_candidates to determine the correct PlacesResult output.
     Skips samples where no valid places can be derived.
     """
+    selected_ids = sample.get("target", {}).get("selected_candidates", [])
     if not selected_ids:
         return None

dataset/scripts/generate_samples.py CHANGED Viewed

@@ -95,12 +95,27 @@ def load_relation_tables(intermediate_dir: Path, quiet: bool = False) -> Dict[st
     return tables
-def sample_adjacency_anchor(adjacency_df: pd.DataFrame) -> Optional[Dict[str, Any]]:
-    """Sample a random adjacency pair."""
     if adjacency_df.empty:
         return None
-    row = adjacency_df.sample(n=1).iloc[0]
     return {
         'anchor_id': row['anchor_id'],
         'anchor_name': row['anchor_name'],
@@ -140,6 +155,37 @@ def sample_containment_anchor(containment_df: pd.DataFrame) -> Optional[Dict[str
     }
 def sample_cross_source_anchor(cross_source_df: pd.DataFrame) -> Optional[Dict[str, Any]]:
     """Sample a random cross-source relation."""
     if cross_source_df.empty:
@@ -517,12 +563,89 @@ def generate_template_based_sample(
         # Question
         question = random.choice(template.question_hints).format(anchor_name=anchor['name'])
     elif template.family == "adjacency":
-        anchor = sample_adjacency_anchor(tables['adjacency_pairs'])
         if not anchor:
             return None
         sql = template.sql_template.format(
             anchor_id=anchor['anchor_id'],
             target_subtype=anchor['target_subtype']
@@ -918,11 +1041,30 @@ def generate_template_based_sample(
             table_key = 'landlocked_containment_pairs'
         else:
             table_key = 'containment_pairs'
-        anchor = sample_containment_anchor(tables.get(table_key, tables['containment_pairs']))
         if not anchor:
             return None
-        target_subtype = anchor.get('contained_subtype', 'locality')
         sql = template.sql_template.format(
             anchor_id=anchor['container_id'],
@@ -1182,14 +1324,14 @@ def generate_template_based_sample(
             or anchor.get('id')
         )
         selected_candidate_ids = [c.candidate_id for c in candidates if c.id == anchor_id_to_find]
     return TrainingSample(
         id=sample_id,
         question=question,
         candidates=candidates,
         target={
             "selected_candidates": selected_candidate_ids,
-            "sql": sql
         },
         metadata={
             "task_family": template.family,

     return tables
+def sample_adjacency_anchor(
+    adjacency_df: pd.DataFrame,
+    target_subtype: Optional[str] = None,
+) -> Optional[Dict[str, Any]]:
+    """Sample a random adjacency pair, optionally filtered by target_subtype.
+    When ``target_subtype`` is provided, only rows whose neighbouring feature
+    matches that subtype are considered. This lets subtype-specific templates
+    (e.g. "neighbouring counties of X") guarantee coverage instead of relying
+    on whatever subtype the random draw happens to produce.
+    """
     if adjacency_df.empty:
         return None
+    df = adjacency_df
+    if target_subtype is not None:
+        df = df[df['target_subtype'] == target_subtype]
+        if df.empty:
+            return None
+    row = df.sample(n=1).iloc[0]
     return {
         'anchor_id': row['anchor_id'],
         'anchor_name': row['anchor_name'],
     }
+def sample_disambiguation_anchor(
+    containment_df: pd.DataFrame,
+    contained_subtypes: List[str],
+    container_subtypes: List[str],
+) -> Optional[Dict[str, Any]]:
+    """Sample a (contained, container) pair from containment_pairs.
+    Used by disambiguation templates like "Puri, Odisha" where the contained
+    entity is the target and the container provides disambiguation context.
+    """
+    if containment_df.empty:
+        return None
+    df = containment_df[
+        containment_df['contained_subtype'].isin(contained_subtypes)
+        & containment_df['container_subtype'].isin(container_subtypes)
+    ]
+    if df.empty:
+        return None
+    row = df.sample(n=1).iloc[0]
+    return {
+        'contained_id': row['contained_id'],
+        'contained_name': row['contained_name'],
+        'contained_subtype': row['contained_subtype'],
+        'container_id': row['container_id'],
+        'container_name': row['container_name'],
+        'container_subtype': row['container_subtype'],
+    }
 def sample_cross_source_anchor(cross_source_df: pd.DataFrame) -> Optional[Dict[str, Any]]:
     """Sample a random cross-source relation."""
     if cross_source_df.empty:
         # Question
         question = random.choice(template.question_hints).format(anchor_name=anchor['name'])
+    elif template.family == "disambiguation":
+        # "Puri, Odisha" style: pick a (contained, container) pair whose
+        # subtypes match the template, build candidates that include the
+        # container + same-name distractors so the model must read the CSV
+        # to pick the right entry.
+        _disambig_subtypes = {
+            "disambiguate_01": (["locality"], ["region", "county", "localadmin"]),
+            "disambiguate_02": (["locality"], ["country"]),
+            "disambiguate_03": (["region", "dependency"], ["country"]),
+        }
+        contained_sts, container_sts = _disambig_subtypes.get(
+            template.template_id, (["locality"], ["country"])
+        )
+        pair = sample_disambiguation_anchor(
+            tables["containment_pairs"], contained_sts, container_sts
+        )
+        if not pair:
+            return None
+        candidates = build_candidate_list(
+            con, pair["contained_id"], pair["contained_name"], "divisions_area",
+            num_candidates=10, difficulty="hard"
+        )
+        # Ensure the container is among the candidates so the model can
+        # ground the disambiguation context (e.g. "Odisha").
+        if not any(c.id == pair["container_id"] for c in candidates):
+            container_rows = con.execute(
+                'SELECT id, names."primary" AS name, subtype, country, region, admin_level '
+                'FROM read_parquet(?) WHERE id = ? LIMIT 1',
+                [DIVISIONS_AREA_PATH, pair["container_id"]]
+            ).fetchdf()
+            if container_rows.empty:
+                return None
+            crow = container_rows.iloc[0]
+            def _nn(v):
+                return None if pd.isna(v) else v
+            container_cand = Candidate(
+                candidate_id="temp",
+                source="divisions_area",
+                id=pair["container_id"],
+                name=_nn(crow["name"]),
+                subtype=_nn(crow["subtype"]),
+                country=_nn(crow["country"]),
+                region=_nn(crow["region"]),
+                admin_level=_nn(crow["admin_level"]),
+                similarity=0.95,
+            )
+            # Insert the container right after the true target and drop the
+            # last filler distractor so the total stays at 10.
+            candidates = [candidates[0], container_cand] + candidates[1:-1]
+            for i, c in enumerate(candidates, 1):
+                c.candidate_id = f"c{i}"
+        sql = template.sql_template.format(anchor_id=pair["contained_id"])
+        question = random.choice(template.question_hints).format(
+            anchor_name=pair["contained_name"],
+            container_name=pair["container_name"],
+        )
+        # Only the contained entity is the query target — the container is
+        # disambiguation context and stays in candidates but NOT in
+        # selected_candidates. The model learns to use the container row of
+        # the CSV (via country/region columns) to pick the right same-name
+        # locality.
+        anchor = {"id": pair["contained_id"], "name": pair["contained_name"]}
     elif template.family == "adjacency":
+        # If the template pins a target_subtype (e.g. adj_02='region',
+        # adj_06='county'), honour it so the sampled pair is guaranteed to
+        # match the question phrasing ("neighbouring counties of X").
+        anchor = sample_adjacency_anchor(
+            tables['adjacency_pairs'],
+            target_subtype=template.target_subtype,
+        )
         if not anchor:
             return None
         sql = template.sql_template.format(
             anchor_id=anchor['anchor_id'],
             target_subtype=anchor['target_subtype']
             table_key = 'landlocked_containment_pairs'
         else:
             table_key = 'containment_pairs'
+        # chained_10/11 need a country-level anchor ("coastal states of
+        # India") and region-level targets, so filter the containment pairs
+        # to (container=country, contained=region) before sampling.
+        _chained_subtype_filter = {
+            "chained_10": ("country", "region"),
+            "chained_11": ("country", "region"),
+        }
+        df = tables.get(table_key, tables['containment_pairs'])
+        filt = _chained_subtype_filter.get(template.template_id)
+        if filt:
+            df = df[
+                (df['container_subtype'] == filt[0])
+                & (df['contained_subtype'] == filt[1])
+            ]
+        anchor = sample_containment_anchor(df)
         if not anchor:
             return None
+        # Prefer the template-pinned target_subtype when set (e.g. chained_10
+        # always wants 'region') so the SQL filter and question phrasing stay
+        # in sync regardless of what the sampled pair happens to contain.
+        target_subtype = template.target_subtype or anchor.get('contained_subtype', 'locality')
         sql = template.sql_template.format(
             anchor_id=anchor['container_id'],
             or anchor.get('id')
         )
         selected_candidate_ids = [c.candidate_id for c in candidates if c.id == anchor_id_to_find]
     return TrainingSample(
         id=sample_id,
         question=question,
         candidates=candidates,
         target={
             "selected_candidates": selected_candidate_ids,
+            "sql": sql,
         },
         metadata={
             "task_family": template.family,

dataset/scripts/sql_templates.py CHANGED Viewed

@@ -25,6 +25,9 @@ correct parquet path from the candidates table.
 Template families
 -----------------
 direct_lookup      Simple single-feature fetch by ID.
 adjacency          ST_Touches — features sharing a border.
 multi_adjacency    Features that simultaneously touch TWO anchors.
 containment        ST_Within / ST_Contains — hierarchical nesting.
@@ -125,6 +128,96 @@ TEMPLATES = [
         ],
     ),
     # ── ADJACENCY ────────────────────────────────────────────────────────────
     SQLTemplate(
@@ -215,6 +308,38 @@ TEMPLATES = [
         ],
     ),
     # ── MULTI-ADJACENCY ──────────────────────────────────────────────────────
     SQLTemplate(
@@ -1555,6 +1680,81 @@ TEMPLATES = [
         ],
     ),
     # ── NATURAL EARTH CONTAINMENT ───────────────────────────────────────────
     # contain_04: NE anchor (sea/gulf/bay), find countries that touch it.
     # Uses containment handler via containment_pairs.

 Template families
 -----------------
 direct_lookup      Simple single-feature fetch by ID.
+disambiguation     "Place, Container" queries like "Puri, Odisha" — lookup by
+                   ID after resolving an ambiguous name via containing region
+                   or country mentioned in the query.
 adjacency          ST_Touches — features sharing a border.
 multi_adjacency    Features that simultaneously touch TWO anchors.
 containment        ST_Within / ST_Contains — hierarchical nesting.
         ],
     ),
+    # ── DISAMBIGUATION ──────────────────────────────────────────────────────
+    # "Puri, Odisha", "Lisbon, Portugal", "Goa, India" — a common real-world
+    # query pattern where users give a place plus its containing region or
+    # country to disambiguate same-name localities.
+    # SQL is a plain lookup by id (disambiguation happens at candidate-pick
+    # time). Candidates include same-name localities in other regions plus
+    # the container, so the model must read the CSV to choose correctly.
+    #
+    # disambiguate_01: locality scoped by its region / county
+    # disambiguate_02: locality scoped by its country
+    # disambiguate_03: region / dependency scoped by its country
+    SQLTemplate(
+        template_id="disambiguate_01",
+        family="disambiguation",
+        sql_difficulty="easy",
+        anchor_source="divisions_area",
+        num_anchors=1,
+        sql_template=(
+            "SELECT ST_AsGeoJSON(geometry) AS geometry,"
+            " names.\"primary\" AS name, id, subtype, country, region"
+            " FROM read_parquet('divisions_area')"
+            " WHERE id = '{anchor_id}'"
+        ),
+        question_hints=[
+            "{anchor_name}, {container_name}",
+            "{anchor_name} in {container_name}",
+            "the {anchor_name} that's in {container_name}",
+            "show me {anchor_name}, {container_name}",
+            "where is {anchor_name}, {container_name}?",
+            "map of {anchor_name} ({container_name})",
+            "{anchor_name} ({container_name})",
+            "{anchor_name} {container_name}",
+            "pull up {anchor_name} in {container_name}",
+            "find {anchor_name} in {container_name}",
+        ],
+    ),
+    SQLTemplate(
+        template_id="disambiguate_02",
+        family="disambiguation",
+        sql_difficulty="easy",
+        anchor_source="divisions_area",
+        num_anchors=1,
+        sql_template=(
+            "SELECT ST_AsGeoJSON(geometry) AS geometry,"
+            " names.\"primary\" AS name, id, subtype, country"
+            " FROM read_parquet('divisions_area')"
+            " WHERE id = '{anchor_id}'"
+        ),
+        question_hints=[
+            "{anchor_name}, {container_name}",
+            "{anchor_name} in {container_name}",
+            "{anchor_name}, {container_name}.",
+            "show me {anchor_name}, {container_name}",
+            "where is {anchor_name} in {container_name}?",
+            "the {anchor_name} that's in {container_name}",
+            "map of {anchor_name}, {container_name}",
+            "pull up {anchor_name} ({container_name})",
+            "find {anchor_name} in {container_name}",
+            "{anchor_name} {container_name}",
+        ],
+    ),
+    SQLTemplate(
+        template_id="disambiguate_03",
+        family="disambiguation",
+        sql_difficulty="easy",
+        anchor_source="divisions_area",
+        num_anchors=1,
+        sql_template=(
+            "SELECT ST_AsGeoJSON(geometry) AS geometry,"
+            " names.\"primary\" AS name, id, subtype, country"
+            " FROM read_parquet('divisions_area')"
+            " WHERE id = '{anchor_id}'"
+        ),
+        question_hints=[
+            "{anchor_name}, {container_name}",
+            "{anchor_name} state of {container_name}",
+            "the {anchor_name} region in {container_name}",
+            "show me {anchor_name}, {container_name}",
+            "where is {anchor_name} in {container_name}?",
+            "map of {anchor_name}, {container_name}",
+            "{anchor_name} ({container_name})",
+            "{anchor_name} province of {container_name}",
+            "pull up {anchor_name} in {container_name}",
+            "find {anchor_name} {container_name}",
+        ],
+    ),
     # ── ADJACENCY ────────────────────────────────────────────────────────────
     SQLTemplate(
         ],
     ),
+    SQLTemplate(
+        template_id="adj_06",
+        family="adjacency",
+        sql_difficulty="medium",
+        anchor_source="divisions_area",
+        num_anchors=1,
+        target_subtype="county",
+        sql_template=(
+            "WITH a AS ("
+            "  SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'"
+            ")"
+            " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country,"
+            "        ST_AsGeoJSON(b.geometry) AS geometry"
+            " FROM read_parquet('divisions_area') AS b, a"
+            " WHERE b.id != '{anchor_id}'"
+            "   AND b.subtype = '{target_subtype}'"
+            "   AND ST_Touches(a.geometry, b.geometry)"
+        ),
+        question_hints=[
+            "neighbouring counties of {anchor_name}",
+            "neighbouring districts of {anchor_name}",
+            "which counties border {anchor_name}?",
+            "which districts border {anchor_name}?",
+            "counties adjacent to {anchor_name}",
+            "districts next to {anchor_name}",
+            "counties sharing a border with {anchor_name}",
+            "what counties touch {anchor_name}?",
+            "nearby counties of {anchor_name}",
+            "counties along the {anchor_name} boundary",
+        ],
+    ),
     # ── MULTI-ADJACENCY ──────────────────────────────────────────────────────
     SQLTemplate(
         ],
     ),
+    # chained_10 / chained_11: coastal and inland REGIONS of a country.
+    # Same pattern as chained_06/07 but with target_subtype='region' and
+    # container forced to a country so phrasings like "coastal states of
+    # India" / "inland provinces of Kenya" work correctly.
+    SQLTemplate(
+        template_id="chained_10",
+        family="chained",
+        sql_difficulty="hard",
+        anchor_source="divisions_area",
+        num_anchors=1,
+        target_subtype="region",
+        sql_template=(
+            "WITH country AS ("
+            "  SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'"
+            ")"
+            " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country,"
+            "        ST_AsGeoJSON(b.geometry) AS geometry"
+            " FROM read_parquet('divisions_area') AS b, country"
+            " WHERE b.subtype = '{target_subtype}'"
+            "   AND ST_Within(b.geometry, country.geometry)"
+            "   AND EXISTS ("
+            "     SELECT 1 FROM read_parquet('natural_earth') AS n"
+            "     WHERE n.subtype IN ('ocean', 'sea')"
+            "       AND ST_Intersects(b.geometry, n.geometry)"
+            "   )"
+        ),
+        question_hints=[
+            "coastal states of {anchor_name}",
+            "coastal regions of {anchor_name}",
+            "coastal provinces of {anchor_name}",
+            "which states of {anchor_name} are on the coast?",
+            "regions of {anchor_name} with sea access",
+            "states of {anchor_name} that border the ocean",
+            "maritime states of {anchor_name}",
+            "seaside regions of {anchor_name}",
+            "which provinces of {anchor_name} touch the sea?",
+            "states of {anchor_name} along the coast",
+        ],
+    ),
+    SQLTemplate(
+        template_id="chained_11",
+        family="chained",
+        sql_difficulty="hard",
+        anchor_source="divisions_area",
+        num_anchors=1,
+        target_subtype="region",
+        sql_template=(
+            "WITH country AS ("
+            "  SELECT geometry FROM read_parquet('divisions_area') WHERE id = '{anchor_id}'"
+            ")"
+            " SELECT b.id, b.names.\"primary\" AS name, b.subtype, b.country,"
+            "        ST_AsGeoJSON(b.geometry) AS geometry"
+            " FROM read_parquet('divisions_area') AS b, country"
+            " WHERE b.subtype = '{target_subtype}'"
+            "   AND ST_Within(b.geometry, country.geometry)"
+            "   AND NOT EXISTS ("
+            "     SELECT 1 FROM read_parquet('natural_earth') AS n"
+            "     WHERE n.subtype IN ('ocean', 'sea')"
+            "       AND ST_Intersects(b.geometry, n.geometry)"
+            "   )"
+        ),
+        question_hints=[
+            "landlocked states of {anchor_name}",
+            "inland regions of {anchor_name}",
+            "non-coastal states of {anchor_name}",
+            "which states of {anchor_name} have no coast?",
+            "inland provinces of {anchor_name}",
+            "regions of {anchor_name} without sea access",
+            "interior states of {anchor_name}",
+            "states of {anchor_name} that don't border the ocean",
+        ],
+    ),
     # ── NATURAL EARTH CONTAINMENT ───────────────────────────────────────────
     # contain_04: NE anchor (sea/gulf/bay), find countries that touch it.
     # Uses containment handler via containment_pairs.