Spaces:

cjc0013
/

dronesightings

Running

App Files Files Community

cjc0013 commited on 20 days ago

Commit

c518d3f

verified ·

1 Parent(s): 47078c4

Simplify drone Space for public readers

Browse files

Research-steered plain-language redesign: guided storylines first, simple map and report filters, readable source cards, and technical details moved to data notes.

Files changed (3) hide show

README.md +4 -4
public_space_app.py +421 -312
space_manifest.json +6 -6

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 title: Drone Sightings Map
-emoji: 🛸
 colorFrom: red
 colorTo: blue
 sdk: gradio
@@ -9,8 +9,8 @@ app_file: app.py
 python_version: 3.11
 ---
-# Mystery Drone Reports Around Sensitive Sites
-Map-first review surface for public-source reports about mystery, unidentified, suspicious, or unauthorized drone activity around sensitive sites.
-The default map groups repeated reports by coordinate/site so dense clusters are readable. Marker size is case count, color is strongest evidence tier, and symbol is coordinate quality. Selecting a marker opens the source-linked cases behind it.

 ---
 title: Drone Sightings Map
+emoji: "🛸"
 colorFrom: red
 colorTo: blue
 sdk: gradio
 python_version: 3.11
 ---
+# Mystery Drone Reports Near Sensitive Places
+Plain-language Space for exploring public-source reports about mystery, unidentified, suspicious, or unauthorized drone activity near sensitive places.
+Start with the guided storylines, then use the map and report list for source links, cautions, and technical details.

public_space_app.py CHANGED Viewed

@@ -8,40 +8,62 @@ import pandas as pd
 import plotly.express as px
-GROUP_COLUMNS = [
-    "case_count",
-    "strongest_evidence_tier",
-    "plot_label",
-    "coordinate_quality",
-    "country",
-    "date_span",
-    "probable_cluster_count",
-    "evidence_mix",
-    "top_source_domains",
 ]
-CASE_COLUMNS = [
     "case_rank",
     "evidence_tier",
-    "report_date",
-    "country",
-    "site_name",
-    "site_type",
     "coordinate_quality",
-    "headline",
-    "source_domain",
-    "followup_status",
 ]
-TIER_RANK = {
-    "resolved_sensitive_site_report": 0,
-    "named_sensitive_site_report": 1,
-    "source_discovered_report": 2,
-}
-TIER_LABEL = {
-    "resolved_sensitive_site_report": "resolved site report",
-    "named_sensitive_site_report": "named-site report",
-    "source_discovered_report": "source-discovered report",
-}
-COARSE_COORDINATE_QUALITIES = {"region_centroid", "country_centroid", "city_area_centroid"}
 def _load_data(data_dir: Path) -> tuple[pd.DataFrame, dict, dict]:
@@ -51,14 +73,21 @@ def _load_data(data_dir: Path) -> tuple[pd.DataFrame, dict, dict]:
     cases["case_rank"] = pd.to_numeric(cases["case_rank"], errors="coerce").fillna(999999).astype(int)
     cases["plot_lat"] = pd.to_numeric(cases["plot_lat"], errors="coerce")
     cases["plot_lon"] = pd.to_numeric(cases["plot_lon"], errors="coerce")
-    cases["report_year"] = cases["report_date"].astype(str).str.slice(0, 4).replace("", "unknown")
     cases["map_group_id"] = cases.apply(
         lambda row: "|".join(
             [
                 f"{float(row['plot_lat']):.4f}" if pd.notna(row["plot_lat"]) else "",
                 f"{float(row['plot_lon']):.4f}" if pd.notna(row["plot_lon"]) else "",
                 str(row.get("plot_label", "")),
-                str(row.get("coordinate_quality", "")),
                 str(row.get("country", "")),
             ]
         ),
@@ -67,342 +96,422 @@ def _load_data(data_dir: Path) -> tuple[pd.DataFrame, dict, dict]:
     return cases, manifest, quality
-def _markdown_header(manifest: dict, quality: dict) -> str:
-    tiers = manifest.get("counts_by_evidence_tier", {})
-    return f"""# Mystery Drone Reports Around Sensitive Sites
-Map-first review surface for public-source reports around military, airport, maritime, emergency-service, and critical-infrastructure contexts.
-**{manifest.get("case_count", 0)} cases** | **{manifest.get("probable_cluster_count", 0)} probable clusters** | **{quality.get("mappable_case_count", 0)} mapped rows** | **release gate: {"pass" if quality.get("release_grade") else "review"}**
-Default view groups repeated reports at the same public coordinate so dense clusters, like New Jersey, read as one place-marker with a case list instead of a pile of overlapping dots.
-Evidence tiers: resolved site `{tiers.get("resolved_sensitive_site_report", 0)}`, named site `{tiers.get("named_sensitive_site_report", 0)}`, source-discovered `{tiers.get("source_discovered_report", 0)}`.
-Map points are source-indexed report locations, not verified findings of threat, attribution, anomalous origin, or hostile intent. `coordinate_quality` tells you whether a marker is a site, city, region, or country centroid.
-"""
-def _options(values: pd.Series) -> list[str]:
-    return sorted(str(value) for value in values.dropna().astype(str).unique() if str(value))
-def _strongest_tier(values: pd.Series) -> str:
-    tiers = [str(value) for value in values if str(value)]
-    if not tiers:
-        return "source_discovered_report"
-    return sorted(tiers, key=lambda value: TIER_RANK.get(value, 99))[0]
-def _count_text(values: pd.Series, *, label_map: dict[str, str] | None = None, limit: int = 4) -> str:
-    counts = values.astype(str).replace("", "unknown").value_counts()
-    parts = []
-    for key, value in counts.head(limit).items():
-        label = label_map.get(key, key) if label_map else key
-        parts.append(f"{label}: {int(value)}")
-    return "; ".join(parts)
-def _date_span(values: pd.Series) -> str:
-    dates = sorted(str(value) for value in values if str(value))
-    if not dates:
-        return "undated"
-    if dates[0] == dates[-1]:
-        return dates[0]
-    return f"{dates[0]} to {dates[-1]}"
-def _filter_cases(
-    cases: pd.DataFrame,
-    evidence_tiers: list[str] | None,
-    coordinate_qualities: list[str] | None,
-    countries: list[str] | None,
-    site_types: list[str] | None,
-    source_domains: list[str] | None,
-    years: list[str] | None,
-    query: str,
-) -> pd.DataFrame:
-    filtered = cases.copy()
-    if evidence_tiers:
-        filtered = filtered[filtered["evidence_tier"].isin(evidence_tiers)]
-    if coordinate_qualities:
-        filtered = filtered[filtered["coordinate_quality"].isin(coordinate_qualities)]
-    if countries:
-        filtered = filtered[filtered["country"].isin(countries)]
-    if site_types:
-        filtered = filtered[filtered["site_type"].isin(site_types)]
-    if source_domains:
-        filtered = filtered[filtered["source_domain"].isin(source_domains)]
-    if years:
-        filtered = filtered[filtered["report_year"].isin(years)]
-    query = str(query or "").strip().lower()
-    if query:
         haystack = (
-            filtered["headline"].astype(str)
             + " "
-            + filtered["site_name"].astype(str)
             + " "
-            + filtered["plot_label"].astype(str)
             + " "
-            + filtered["country"].astype(str)
             + " "
-            + filtered["source_domain"].astype(str)
         ).str.lower()
-        filtered = filtered[haystack.str.contains(query, regex=False)]
-    return filtered.sort_values(["case_rank"]).reset_index(drop=True)
-def _group_cases(filtered: pd.DataFrame) -> pd.DataFrame:
-    rows: list[dict] = []
-    if filtered.empty:
-        return pd.DataFrame(columns=GROUP_COLUMNS + ["map_group_id", "plot_lat", "plot_lon"])
-    for group_id, group in filtered.groupby("map_group_id", sort=False):
-        strongest = _strongest_tier(group["evidence_tier"])
-        rows.append(
             {
                 "map_group_id": group_id,
-                "case_count": int(len(group)),
-                "strongest_evidence_tier": strongest,
                 "plot_lat": float(group["plot_lat"].iloc[0]),
                 "plot_lon": float(group["plot_lon"].iloc[0]),
-                "plot_label": str(group["plot_label"].iloc[0]),
-                "coordinate_quality": str(group["coordinate_quality"].iloc[0]),
-                "country": str(group["country"].iloc[0]),
-                "date_span": _date_span(group["report_date"]),
-                "probable_cluster_count": int(group["probable_cluster_id"].nunique()),
-                "evidence_mix": _count_text(group["evidence_tier"], label_map=TIER_LABEL),
-                "top_source_domains": _count_text(group["source_domain"], limit=3),
-                "site_types": _count_text(group["site_type"], limit=3),
             }
         )
-    grouped = pd.DataFrame(rows)
-    grouped = grouped.sort_values(
-        ["case_count", "strongest_evidence_tier", "plot_label"],
-        ascending=[False, True, True],
-    ).reset_index(drop=True)
-    return grouped
-def _marker_rows(filtered: pd.DataFrame, mode: str, repeated_only: bool) -> pd.DataFrame:
-    working = filtered.copy()
-    if mode == "Coarse-location review":
-        working = working[working["coordinate_quality"].isin(COARSE_COORDINATE_QUALITIES)]
-    if mode == "Individual cases":
-        group_sizes = working["map_group_id"].value_counts().to_dict()
-        if repeated_only:
-            working = working[working["map_group_id"].map(group_sizes).fillna(0) > 1]
-        markers = working.copy()
-        markers["case_count"] = 1
-        markers["strongest_evidence_tier"] = markers["evidence_tier"]
-        markers["date_span"] = markers["report_date"]
-        markers["probable_cluster_count"] = 1
-        markers["evidence_mix"] = markers["evidence_tier"].map(lambda value: TIER_LABEL.get(str(value), str(value)))
-        markers["top_source_domains"] = markers["source_domain"]
-        return markers.sort_values(["case_rank"]).reset_index(drop=True)
-    grouped = _group_cases(working)
-    if repeated_only and not grouped.empty:
-        grouped = grouped[grouped["case_count"] > 1].reset_index(drop=True)
-    return grouped
-def _summary_text(filtered: pd.DataFrame, markers: pd.DataFrame, mode: str) -> str:
-    if filtered.empty:
-        return "No rows match the current filters."
-    precise_count = int((filtered["coordinate_quality"] == "site_centroid").sum())
-    grouped_count = int(len(markers))
-    largest_stack = int(markers["case_count"].max()) if "case_count" in markers and not markers.empty else 0
-    return (
-        f"Showing {len(filtered)} cases as {grouped_count} map markers in `{mode}` mode. "
-        f"{precise_count} cases use site centroids; the largest visible marker groups {largest_stack} cases. "
-        "Marker size is case count; color is strongest evidence tier; symbol is coordinate quality."
-    )
-def _map(markers: pd.DataFrame, mode: str):
-    if markers.empty:
-        fig = px.scatter_geo(pd.DataFrame({"plot_lat": [], "plot_lon": []}), lat="plot_lat", lon="plot_lon", height=690)
-        fig.update_layout(margin={"l": 0, "r": 0, "t": 20, "b": 0})
         return fig
     fig = px.scatter_geo(
-        markers,
         lat="plot_lat",
         lon="plot_lon",
-        color="strongest_evidence_tier",
-        symbol="coordinate_quality",
-        size="case_count",
-        size_max=36 if mode != "Individual cases" else 14,
-        hover_name="plot_label",
         hover_data={
-            "case_count": True,
-            "probable_cluster_count": True,
-            "country": True,
-            "date_span": True,
-            "evidence_mix": True,
-            "top_source_domains": True,
-            "coordinate_quality": True,
             "plot_lat": False,
             "plot_lon": False,
         },
         projection="natural earth",
-        height=690,
         color_discrete_map={
-            "resolved_sensitive_site_report": "#b42318",
-            "named_sensitive_site_report": "#b76e00",
-            "source_discovered_report": "#2b6f9e",
         },
     )
-    fig.update_traces(marker={"opacity": 0.78, "line": {"width": 0.6, "color": "white"}})
     fig.update_geos(showland=True, landcolor="#eef2f5", showocean=True, oceancolor="#dfeaf2", showcountries=True)
-    fig.update_layout(
-        margin={"l": 0, "r": 0, "t": 24, "b": 0},
-        legend_orientation="h",
-        legend_title_text="Evidence tier / coordinate quality",
-    )
     return fig
-def _cases_for_marker(marker: dict, filtered_rows: list[dict], mode: str) -> list[dict]:
-    if mode == "Individual cases":
-        case_id = marker.get("case_id")
-        return [row for row in filtered_rows if row.get("case_id") == case_id]
-    group_id = marker.get("map_group_id")
-    return [row for row in filtered_rows if row.get("map_group_id") == group_id]
-def _detail(markers: list[dict], filtered_rows: list[dict], index: int | None, mode: str) -> str:
-    if not markers:
-        return "No map marker selected."
-    try:
-        marker = markers[int(index or 0)]
-    except (IndexError, TypeError, ValueError):
-        marker = markers[0]
-    marker_cases = _cases_for_marker(marker, filtered_rows, mode)
-    marker_cases = sorted(
-        marker_cases,
-        key=lambda row: (
-            TIER_RANK.get(str(row.get("evidence_tier")), 99),
-            str(row.get("report_date", "")),
-            int(row.get("case_rank") or 999999),
-        ),
     )
-    quality = marker.get("coordinate_quality", "")
-    warning = ""
-    if quality in COARSE_COORDINATE_QUALITIES:
-        warning = "\n\n**Coordinate note:** this marker is a coarse centroid. Use it as a review location, not a precise sighting coordinate."
-    lines = [
-        f"### {marker.get('plot_label', '')}",
-        "",
-        f"- Map mode: `{mode}`",
-        f"- Cases at marker: `{len(marker_cases)}`",
-        f"- Probable clusters: `{marker.get('probable_cluster_count', '')}`",
-        f"- Evidence mix: {marker.get('evidence_mix', '')}",
-        f"- Date span: `{marker.get('date_span', '')}`",
-        f"- Coordinate quality: `{quality}`",
-        f"- Top source domains: {marker.get('top_source_domains', '')}",
-        warning,
-        "",
-        "#### Cases behind this marker",
-    ]
-    for row in marker_cases[:18]:
         lines.extend(
             [
                 "",
-                f"**#{row.get('case_rank')} - {row.get('headline', '')}**",
-                f"- `{row.get('evidence_tier', '')}` | `{row.get('report_date', '')}` | `{row.get('site_name', '')}`",
-                f"- Source: [{row.get('publisher', '') or row.get('source_domain', '')}]({row.get('source_url', '')})",
-                f"- Boundary: {row.get('claim_boundary', '')}",
             ]
         )
-    if len(marker_cases) > 18:
-        lines.append(f"\n...and {len(marker_cases) - 18} more rows in the marker table/filter result.")
-    return "\n".join(line for line in lines if line is not None)
-def _render(
-    cases: pd.DataFrame,
-    evidence_tiers,
-    coordinate_qualities,
-    countries,
-    site_types,
-    source_domains,
-    years,
-    mode,
-    repeated_only,
-    query,
-):
-    filtered = _filter_cases(cases, evidence_tiers, coordinate_qualities, countries, site_types, source_domains, years, query)
-    markers = _marker_rows(filtered, mode or "Grouped sites", bool(repeated_only))
-    filtered_rows = filtered.to_dict("records")
-    marker_records = markers.to_dict("records")
-    marker_table_columns = GROUP_COLUMNS if mode != "Individual cases" else CASE_COLUMNS
-    marker_table = markers[[column for column in marker_table_columns if column in markers.columns]].copy()
-    return (
-        _summary_text(filtered, markers, mode or "Grouped sites"),
-        _map(markers, mode or "Grouped sites"),
-        marker_table,
-        marker_records,
-        filtered_rows,
-        _detail(marker_records, filtered_rows, 0, mode or "Grouped sites"),
     )
 def build_app(data_dir: str | Path):
     data_dir = Path(data_dir)
     cases, manifest, quality = _load_data(data_dir)
-    evidence_choices = _options(cases["evidence_tier"])
-    coordinate_choices = _options(cases["coordinate_quality"])
-    with gr.Blocks(title="Mystery Drone Reports Around Sensitive Sites") as app:
-        gr.Markdown(_markdown_header(manifest, quality))
-        with gr.Row():
-            mode = gr.Radio(
-                choices=["Grouped sites", "Individual cases", "Coarse-location review"],
-                value="Grouped sites",
-                label="Map mode",
             )
-            repeated_only = gr.Checkbox(value=False, label="Only repeated markers")
-            query = gr.Textbox(label="Search", placeholder="Try New Jersey, Langley, Copenhagen, airport, military base")
-        with gr.Row():
-            evidence_filter = gr.CheckboxGroup(choices=evidence_choices, value=evidence_choices, label="Evidence tier")
-            coordinate_filter = gr.CheckboxGroup(choices=coordinate_choices, value=coordinate_choices, label="Coordinate quality")
-        with gr.Row():
-            country_filter = gr.Dropdown(choices=_options(cases["country"]), value=[], multiselect=True, label="Country")
-            site_filter = gr.Dropdown(choices=_options(cases["site_type"]), value=[], multiselect=True, label="Site type")
-            source_filter = gr.Dropdown(choices=_options(cases["source_domain"]), value=[], multiselect=True, label="Source domain")
-            year_filter = gr.Dropdown(choices=_options(cases["report_year"]), value=[], multiselect=True, label="Report year")
-        summary = gr.Markdown()
-        with gr.Row():
-            with gr.Column(scale=3):
-                map_plot = gr.Plot(label="Grouped case map")
-            with gr.Column(scale=2):
-                detail = gr.Markdown()
-        marker_table = gr.Dataframe(label="Visible map markers", interactive=False)
-        marker_rows_state = gr.State([])
-        filtered_rows_state = gr.State([])
-        def render(evidence_tiers, coordinate_qualities, countries, site_types, source_domains, years, map_mode, repeats, search_query):
-            return _render(cases, evidence_tiers, coordinate_qualities, countries, site_types, source_domains, years, map_mode, repeats, search_query)
-        inputs = [
-            evidence_filter,
-            coordinate_filter,
-            country_filter,
-            site_filter,
-            source_filter,
-            year_filter,
-            mode,
-            repeated_only,
-            query,
-        ]
-        outputs = [summary, map_plot, marker_table, marker_rows_state, filtered_rows_state, detail]
-        for control in inputs:
-            control.change(render, inputs=inputs, outputs=outputs)
-        def select_marker(markers, filtered_rows, map_mode, evt: gr.SelectData):
-            if not evt or evt.index is None:
-                return _detail(markers, filtered_rows, 0, map_mode)
-            row_index = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
-            return _detail(markers, filtered_rows, row_index, map_mode)
-        marker_table.select(select_marker, inputs=[marker_rows_state, filtered_rows_state, mode], outputs=detail)
-        app.load(render, inputs=inputs, outputs=outputs)
     return app

 import plotly.express as px
+EUROPE_COUNTRIES = {
+    "Belgium",
+    "Denmark",
+    "Germany",
+    "Ireland",
+    "Italy",
+    "Netherlands",
+    "Spain",
+    "Sweden",
+    "United Kingdom",
+}
+CLARITY_LABELS = {
+    "resolved_sensitive_site_report": "Specific site matched",
+    "named_sensitive_site_report": "Specific site named",
+    "source_discovered_report": "News lead to review",
+}
+LOCATION_LABELS = {
+    "site_centroid": "Specific site location",
+    "city_area_centroid": "City-area location",
+    "region_centroid": "General regional location",
+    "country_centroid": "Country-level location",
+}
+STORY_CHOICES = [
+    "Start here: main storylines",
+    "New Jersey coastal/security reports",
+    "European airport disruptions",
+    "Military base reports",
+    "All reports by place",
 ]
+REPORT_COLUMNS = [
+    "Headline",
+    "Date",
+    "Place",
+    "Place type",
+    "Country",
+    "Source",
+    "Why included",
+    "Caution",
+]
+PLACE_COLUMNS = [
+    "Place",
+    "Reports",
+    "Place type",
+    "Region",
+    "Location note",
+    "Date span",
+    "Why look here",
+]
+TECH_COLUMNS = [
+    "case_id",
     "case_rank",
     "evidence_tier",
     "coordinate_quality",
+    "probable_cluster_id",
+    "public_row_sha256",
 ]
 def _load_data(data_dir: Path) -> tuple[pd.DataFrame, dict, dict]:
     cases["case_rank"] = pd.to_numeric(cases["case_rank"], errors="coerce").fillna(999999).astype(int)
     cases["plot_lat"] = pd.to_numeric(cases["plot_lat"], errors="coerce")
     cases["plot_lon"] = pd.to_numeric(cases["plot_lon"], errors="coerce")
+    cases["report_year"] = cases["report_date"].astype(str).str.slice(0, 4).replace("", "Older / unknown")
+    cases["reader_clarity"] = cases["evidence_tier"].map(CLARITY_LABELS).fillna("News lead to review")
+    cases["location_note"] = cases["coordinate_quality"].map(LOCATION_LABELS).fillna("General location")
+    cases["place_type_reader"] = cases.apply(_place_type_label, axis=1)
+    cases["region_reader"] = cases["country"].map(_region_label)
+    cases["story_group"] = cases.apply(_story_group, axis=1)
+    cases["reader_caution"] = cases.apply(_reader_caution, axis=1)
+    cases["why_included"] = cases.apply(_why_included, axis=1)
     cases["map_group_id"] = cases.apply(
         lambda row: "|".join(
             [
                 f"{float(row['plot_lat']):.4f}" if pd.notna(row["plot_lat"]) else "",
                 f"{float(row['plot_lon']):.4f}" if pd.notna(row["plot_lon"]) else "",
                 str(row.get("plot_label", "")),
+                str(row.get("place_type_reader", "")),
                 str(row.get("country", "")),
             ]
         ),
     return cases, manifest, quality
+def _place_type_label(row: pd.Series) -> str:
+    text = f"{row.get('site_type', '')} {row.get('site_name', '')} {row.get('plot_label', '')} {row.get('headline', '')}".lower()
+    if "airport" in text or "runway" in text:
+        return "Airport"
+    if "coast guard" in text or "coastal" in text or "maritime" in text or "new jersey" in text:
+        return "Coastal/security"
+    if "military" in text or "air force" in text or "air base" in text or "arsenal" in text or "raf " in text or "joint base" in text:
+        return "Military site"
+    if "critical" in text or "infrastructure" in text or "nuclear" in text or "power" in text:
+        return "Critical infrastructure"
+    return "Other / unclear"
+def _region_label(country: str) -> str:
+    if country == "United States":
+        return "United States"
+    if country in EUROPE_COUNTRIES:
+        return "Europe"
+    return "Other / unclear"
+def _story_group(row: pd.Series) -> str:
+    text = f"{row.get('headline', '')} {row.get('site_name', '')} {row.get('plot_label', '')} {row.get('country', '')}".lower()
+    if "new jersey" in text or "coast guard" in text:
+        return "New Jersey coastal/security reports"
+    if row.get("region_reader") == "Europe" and ("airport" in text or row.get("place_type_reader") == "Airport"):
+        return "European airport disruptions"
+    if row.get("place_type_reader") == "Military site":
+        return "Military base reports"
+    return "All reports by place"
+def _reader_caution(row: pd.Series) -> str:
+    clarity = row.get("reader_clarity", "")
+    location = row.get("location_note", "")
+    if clarity == "News lead to review":
+        return "Treat as a source lead, not a confirmed event."
+    if location != "Specific site location":
+        return "Location is approximate."
+    return "Check the linked source before drawing conclusions."
+def _why_included(row: pd.Series) -> str:
+    clarity = row.get("reader_clarity", "")
+    place_type = row.get("place_type_reader", "")
+    if clarity == "Specific site matched":
+        return f"Matched to a {place_type.lower()} report location."
+    if clarity == "Specific site named":
+        return f"The source names a {place_type.lower()} or sensitive place."
+    return f"The source language points to a drone report near a {place_type.lower()} context."
+def _date_span(values: pd.Series) -> str:
+    dates = sorted(str(value) for value in values if str(value))
+    if not dates:
+        return "Date unclear"
+    if dates[0] == dates[-1]:
+        return dates[0]
+    return f"{dates[0]} to {dates[-1]}"
+def _count_text(values: pd.Series, limit: int = 4) -> str:
+    counts = values.astype(str).replace("", "unknown").value_counts()
+    return ", ".join(f"{key}: {int(value)}" for key, value in counts.head(limit).items())
+def _header(manifest: dict) -> str:
+    named_or_matched = int(manifest.get("resolved_sensitive_site_report_count", 0)) + int(
+        manifest.get("named_sensitive_site_report_count", 0)
+    )
+    leads = int(manifest.get("source_discovered_report_count", 0))
+    return f"""# Mystery Drone Reports Near Sensitive Places
+This is a public-source index of news reports near airports, military sites, coastal/security areas, and other sensitive places. It is not proof of threat, intent, or unusual origin.
+**{manifest.get("case_count", 0)} public-source reports** | **{named_or_matched} name or match a specific sensitive site** | **{leads} broader leads for follow-up**
+"""
+def _story_intro(story: str, rows: pd.DataFrame) -> str:
+    if rows.empty:
+        return "No reports match this storyline."
+    places = _count_text(rows["plot_label"], limit=5)
+    sources = _count_text(rows["source_domain"], limit=5)
+    dates = _date_span(rows["report_date"])
+    location_note = "Some markers are approximate because public reports often describe areas rather than exact coordinates."
+    if story == "New Jersey coastal/security reports":
+        lead = "This group collects public reports connected to the New Jersey drone wave and nearby coastal/security locations."
+        caution = "Many rows are broad reporting leads, so treat this as a reporting trail rather than a confirmed incident list."
+    elif story == "European airport disruptions":
+        lead = "This group follows reports around European airport disruptions and related drone activity."
+        caution = "Airport closures and disruption reports can involve repeated follow-up stories, so use the source links to separate event reports from later context."
+    elif story == "Military base reports":
+        lead = "This group focuses on reports that name or point toward military bases and military-site areas."
+        caution = "A report near a base does not prove origin, intent, or threat."
+    elif story == "All reports by place":
+        lead = "This view groups the full report set by place so repeated locations are easier to scan."
+        caution = "Marker size means number of source reports, not number of confirmed objects."
+    else:
+        lead = "Pick a storyline below to explore the main reporting trails."
+        caution = "Start with the story summaries, then use the map and sources for details."
+    return f"""## {story}
+{lead}
+- Reports in view: **{len(rows)}**
+- Date range: **{dates}**
+- Common places: {places}
+- Common sources: {sources}
+**What this does not prove:** {caution}
+**Location note:** {location_note}
+"""
+def _story_rows(cases: pd.DataFrame, story: str) -> pd.DataFrame:
+    if story == "Start here: main storylines":
+        return cases.copy()
+    if story == "All reports by place":
+        return cases.copy()
+    return cases[cases["story_group"] == story].copy()
+def _filter_rows(cases: pd.DataFrame, search: str, region: str, place_type: str, clarity: str, year: str) -> pd.DataFrame:
+    rows = cases.copy()
+    if region and region != "All":
+        rows = rows[rows["region_reader"] == region]
+    if place_type and place_type != "All":
+        rows = rows[rows["place_type_reader"] == place_type]
+    if clarity and clarity != "All":
+        rows = rows[rows["reader_clarity"] == clarity]
+    if year and year != "All":
+        if year == "Older / unknown":
+            rows = rows[~rows["report_year"].isin(["2024", "2025", "2026"])]
+        else:
+            rows = rows[rows["report_year"] == year]
+    search = str(search or "").strip().lower()
+    if search:
         haystack = (
+            rows["headline"].astype(str)
             + " "
+            + rows["site_name"].astype(str)
             + " "
+            + rows["plot_label"].astype(str)
             + " "
+            + rows["country"].astype(str)
             + " "
+            + rows["source_domain"].astype(str)
         ).str.lower()
+        rows = rows[haystack.str.contains(search, regex=False)]
+    return rows.sort_values(["case_rank"]).reset_index(drop=True)
+def _group_rows(rows: pd.DataFrame) -> pd.DataFrame:
+    out: list[dict] = []
+    if rows.empty:
+        return pd.DataFrame(columns=["Place", "Reports", "Place type", "Region", "Location note", "Date span", "Why look here", "map_group_id", "plot_lat", "plot_lon"])
+    for group_id, group in rows.groupby("map_group_id", sort=False):
+        out.append(
             {
                 "map_group_id": group_id,
+                "Place": str(group["plot_label"].iloc[0]),
+                "Reports": int(len(group)),
+                "Place type": str(group["place_type_reader"].iloc[0]),
+                "Region": str(group["region_reader"].iloc[0]),
+                "Location note": str(group["location_note"].iloc[0]),
+                "Date span": _date_span(group["report_date"]),
+                "Why look here": _count_text(group["reader_clarity"], limit=3),
                 "plot_lat": float(group["plot_lat"].iloc[0]),
                 "plot_lon": float(group["plot_lon"].iloc[0]),
+                "source_summary": _count_text(group["source_domain"], limit=3),
             }
         )
+    grouped = pd.DataFrame(out)
+    return grouped.sort_values(["Reports", "Place"], ascending=[False, True]).reset_index(drop=True)
+def _map(groups: pd.DataFrame):
+    if groups.empty:
+        fig = px.scatter_geo(pd.DataFrame({"plot_lat": [], "plot_lon": []}), lat="plot_lat", lon="plot_lon", height=560)
+        fig.update_layout(margin={"l": 0, "r": 0, "t": 12, "b": 0})
         return fig
     fig = px.scatter_geo(
+        groups,
         lat="plot_lat",
         lon="plot_lon",
+        color="Place type",
+        size="Reports",
+        size_max=38,
+        hover_name="Place",
         hover_data={
+            "Reports": True,
+            "Region": True,
+            "Location note": True,
+            "Date span": True,
+            "Why look here": True,
+            "source_summary": True,
             "plot_lat": False,
             "plot_lon": False,
         },
         projection="natural earth",
+        height=560,
         color_discrete_map={
+            "Airport": "#1f77b4",
+            "Military site": "#b42318",
+            "Coastal/security": "#2e7d62",
+            "Critical infrastructure": "#8e5ea2",
+            "Other / unclear": "#6b7280",
         },
     )
+    fig.update_traces(marker={"opacity": 0.8, "line": {"width": 0.6, "color": "white"}})
     fig.update_geos(showland=True, landcolor="#eef2f5", showocean=True, oceancolor="#dfeaf2", showcountries=True)
+    fig.update_layout(margin={"l": 0, "r": 0, "t": 18, "b": 0}, legend_title_text="Place type")
     return fig
+def _public_table(rows: pd.DataFrame) -> pd.DataFrame:
+    if rows.empty:
+        return pd.DataFrame(columns=REPORT_COLUMNS)
+    return pd.DataFrame(
+        {
+            "Headline": rows["headline"],
+            "Date": rows["report_date"].replace("", "Date unclear"),
+            "Place": rows["plot_label"],
+            "Place type": rows["place_type_reader"],
+            "Country": rows["country"].replace("", "unknown"),
+            "Source": rows["source_domain"],
+            "Why included": rows["why_included"],
+            "Caution": rows["reader_caution"],
+        }
     )
+def _source_cards(rows: pd.DataFrame, limit: int = 10) -> str:
+    if rows.empty:
+        return "No reports match this view."
+    lines = ["## Source links to inspect", ""]
+    for _, row in rows.head(limit).iterrows():
         lines.extend(
             [
+                f"### {row['headline']}",
+                f"- Date: {row['report_date'] or 'Date unclear'}",
+                f"- Place: {row['plot_label']} ({row['location_note']})",
+                f"- Why included: {row['why_included']}",
+                f"- Caution: {row['reader_caution']}",
+                f"- Source: [{row['publisher'] or row['source_domain']}]({row['source_url']})",
                 "",
             ]
         )
+    if len(rows) > limit:
+        lines.append(f"...and {len(rows) - limit} more reports in the list.")
+    return "\n".join(lines)
+def _story_card_markdown(cases: pd.DataFrame) -> str:
+    cards = []
+    for story in STORY_CHOICES[1:]:
+        rows = _story_rows(cases, story)
+        if story == "All reports by place":
+            subtitle = "Scan every mapped report grouped by place."
+        elif story == "New Jersey coastal/security reports":
+            subtitle = "The largest reporting trail in this release."
+        elif story == "European airport disruptions":
+            subtitle = "Airport closures and disruption reports across Europe."
+        else:
+            subtitle = "Reports around bases and military-site areas."
+        cards.append(f"**{story}** - {len(rows)} reports. {subtitle}")
+    return "## Pick a storyline to explore\n\n" + "\n\n".join(cards)
+def _render_story(cases: pd.DataFrame, story: str):
+    rows = _story_rows(cases, story)
+    groups = _group_rows(rows)
+    intro = _header_from_rows(cases) + "\n\n" + _story_card_markdown(cases) if story == "Start here: main storylines" else _story_intro(story, rows)
+    return intro, _map(groups), groups[PLACE_COLUMNS], _public_table(rows), _source_cards(rows)
+def _header_from_rows(cases: pd.DataFrame) -> str:
+    specific = int((cases["reader_clarity"].isin(["Specific site matched", "Specific site named"])).sum())
+    leads = int((cases["reader_clarity"] == "News lead to review").sum())
+    return f"""# Mystery Drone Reports Near Sensitive Places
+This is a public-source index of news reports near airports, military sites, coastal/security areas, and other sensitive places.
+It is not proof of threat, intent, or unusual origin.
+**{len(cases)} public-source reports** | **{specific} name or match a specific sensitive site** | **{leads} broader leads for follow-up**
+"""
+def _render_map(cases: pd.DataFrame, search: str, region: str, place_type: str, clarity: str, year: str):
+    rows = _filter_rows(cases, search, region, place_type, clarity, year)
+    groups = _group_rows(rows)
+    summary = (
+        f"Showing {len(rows)} reports at {len(groups)} places. "
+        "Bigger markers mean more reports at that place. Colors show the kind of place."
     )
+    return summary, _map(groups), groups[PLACE_COLUMNS], _public_table(rows), _source_cards(rows)
+def _render_reports(cases: pd.DataFrame, search: str, region: str, place_type: str, clarity: str, year: str):
+    rows = _filter_rows(cases, search, region, place_type, clarity, year)
+    summary = f"Showing {len(rows)} reports. Select a row by using the source links in the detail panel below."
+    return summary, _public_table(rows), _source_cards(rows), _technical_table(rows)
+def _technical_table(rows: pd.DataFrame) -> pd.DataFrame:
+    if rows.empty:
+        return pd.DataFrame(columns=TECH_COLUMNS)
+    return rows[TECH_COLUMNS].copy()
+def _data_notes(manifest: dict, quality: dict) -> str:
+    return f"""# Data notes
+This Space keeps the technical classifications available, but keeps them out of the first screen.
+- Release version: {manifest.get('release_version')}
+- Public rows: {manifest.get('case_count')}
+- Quality gate passed: {quality.get('release_grade')}
+- Duplicate source URLs: {quality.get('duplicate_source_url_count')}
+- Missing source URLs: {quality.get('missing_source_url_count')}
+- Mappable rows: {quality.get('mappable_case_count')}
+Plain-language translations:
+- Specific site matched = stricter source/site matching found a sensitive-site report.
+- Specific site named = the source names a sensitive site, but it still needs review.
+- News lead to review = public source language suggests a relevant report, but this is a lead, not a confirmed event.
+- Specific site location = marker uses a known site point.
+- General regional location or country-level location = marker is approximate.
+"""
 def build_app(data_dir: str | Path):
     data_dir = Path(data_dir)
     cases, manifest, quality = _load_data(data_dir)
+    region_choices = ["All", "United States", "Europe", "Other / unclear"]
+    place_choices = ["All", "Airport", "Military site", "Coastal/security", "Critical infrastructure", "Other / unclear"]
+    clarity_choices = ["All", "Specific site matched", "Specific site named", "News lead to review"]
+    year_choices = ["All", "2026", "2025", "2024", "Older / unknown"]
+    with gr.Blocks(title="Mystery Drone Reports Near Sensitive Places") as app:
+        with gr.Tab("Start here"):
+            story = gr.Radio(choices=STORY_CHOICES, value=STORY_CHOICES[0], label="Pick a storyline")
+            story_intro = gr.Markdown()
+            with gr.Row():
+                story_map = gr.Plot(label="Story map")
+                story_sources = gr.Markdown()
+            story_places = gr.Dataframe(label="Places in this story", interactive=False)
+            story_reports = gr.Dataframe(label="Reports in this story", interactive=False)
+            story.change(
+                lambda selected: _render_story(cases, selected),
+                inputs=story,
+                outputs=[story_intro, story_map, story_places, story_reports, story_sources],
+            )
+            app.load(
+                lambda: _render_story(cases, STORY_CHOICES[0]),
+                outputs=[story_intro, story_map, story_places, story_reports, story_sources],
+            )
+        with gr.Tab("Map"):
+            gr.Markdown("## Map\n\nBigger markers mean more public-source reports at that place. Colors show the kind of place.")
+            with gr.Row():
+                map_search = gr.Textbox(label="Search", placeholder="Search a place, country, source, or headline")
+                map_region = gr.Dropdown(choices=region_choices, value="All", label="Region")
+                map_place = gr.Dropdown(choices=place_choices, value="All", label="Place type")
+                map_clarity = gr.Dropdown(choices=clarity_choices, value="All", label="Report clarity")
+                map_year = gr.Dropdown(choices=year_choices, value="All", label="Time")
+            map_summary = gr.Markdown()
+            map_plot = gr.Plot(label="Report map")
+            map_places = gr.Dataframe(label="Places shown on the map", interactive=False)
+            map_reports = gr.Dataframe(label="Reports shown by current filters", interactive=False)
+            map_sources = gr.Markdown()
+            map_inputs = [map_search, map_region, map_place, map_clarity, map_year]
+            for control in map_inputs:
+                control.change(
+                    lambda search, region, place, clarity, year: _render_map(cases, search, region, place, clarity, year),
+                    inputs=map_inputs,
+                    outputs=[map_summary, map_plot, map_places, map_reports, map_sources],
+                )
+            app.load(
+                lambda: _render_map(cases, "", "All", "All", "All", "All"),
+                outputs=[map_summary, map_plot, map_places, map_reports, map_sources],
             )
+        with gr.Tab("Reports"):
+            gr.Markdown("## All reports\n\nUse this when you want source links and row-level cautions.")
+            with gr.Row():
+                report_search = gr.Textbox(label="Search", placeholder="Search a place, country, source, or headline")
+                report_region = gr.Dropdown(choices=region_choices, value="All", label="Region")
+                report_place = gr.Dropdown(choices=place_choices, value="All", label="Place type")
+                report_clarity = gr.Dropdown(choices=clarity_choices, value="All", label="Report clarity")
+                report_year = gr.Dropdown(choices=year_choices, value="All", label="Time")
+            report_summary = gr.Markdown()
+            report_table = gr.Dataframe(label="Readable report list", interactive=False)
+            report_sources = gr.Markdown()
+            with gr.Accordion("Show technical fields", open=False):
+                technical_table = gr.Dataframe(label="Technical row fields", interactive=False)
+            report_inputs = [report_search, report_region, report_place, report_clarity, report_year]
+            for control in report_inputs:
+                control.change(
+                    lambda search, region, place, clarity, year: _render_reports(cases, search, region, place, clarity, year),
+                    inputs=report_inputs,
+                    outputs=[report_summary, report_table, report_sources, technical_table],
+                )
+            app.load(
+                lambda: _render_reports(cases, "", "All", "All", "All", "All"),
+                outputs=[report_summary, report_table, report_sources, technical_table],
+            )
+        with gr.Tab("Data notes"):
+            gr.Markdown(_data_notes(manifest, quality))
+            with gr.Accordion("Technical manifest", open=False):
+                gr.JSON(manifest)
+            with gr.Accordion("Quality report", open=False):
+                gr.JSON(quality)
     return app

space_manifest.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "space_bundle_version": "mystery_drone_sensitive_site_space_v2",
   "source_release_version": "mystery-drone-sensitive-site-cases-2026-05-v1",
   "case_count": 149,
   "release_grade": true,
@@ -13,14 +13,14 @@
     {
       "artifact_role": "space_public_app",
       "artifact_path": "public_space_app.py",
-      "content_sha256": "05ecfaa2d35d3cb16dcfba7a9bc94ea9a19d21a49fe875acc704b50f27c419f1",
-      "byte_count": 17863
     },
     {
       "artifact_role": "readme",
       "artifact_path": "README.md",
-      "content_sha256": "050e3c4fcac1cc1dd91c026f350d2e458d4641c0da2d8ec18c06009a8d7be990",
-      "byte_count": 605
     },
     {
       "artifact_role": "requirements",
@@ -47,5 +47,5 @@
       "byte_count": 1008
     }
   ],
-  "bundle_hash": "1efe312db89231fc27a11b2c2e540727fd9196ed7bec35115afee24d474cda6e"
 }

 {
+  "space_bundle_version": "mystery_drone_sensitive_site_space_v3_plain_language",
   "source_release_version": "mystery-drone-sensitive-site-cases-2026-05-v1",
   "case_count": 149,
   "release_grade": true,
     {
       "artifact_role": "space_public_app",
       "artifact_path": "public_space_app.py",
+      "content_sha256": "e1daff7c9f9772f8e87295eba2ac5bc346e06d2c2fde78ce8c0e01d33a359ad1",
+      "byte_count": 23312
     },
     {
       "artifact_role": "readme",
       "artifact_path": "README.md",
+      "content_sha256": "aa6754e5f1eb78132ca380f7b9c65a41f3db9b9fdde872b468381351bc16c56a",
+      "byte_count": 483
     },
     {
       "artifact_role": "requirements",
       "byte_count": 1008
     }
   ],
+  "bundle_hash": "aa231b606f39e4723a46c37e6c24a5a5c8711dd8f484921176c4839f200c536e"
 }