cjc0013 commited on
Commit
c518d3f
·
verified ·
1 Parent(s): 47078c4

Simplify drone Space for public readers

Browse files

Research-steered plain-language redesign: guided storylines first, simple map and report filters, readable source cards, and technical details moved to data notes.

Files changed (3) hide show
  1. README.md +4 -4
  2. public_space_app.py +421 -312
  3. space_manifest.json +6 -6
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Drone Sightings Map
3
- emoji: 🛸
4
  colorFrom: red
5
  colorTo: blue
6
  sdk: gradio
@@ -9,8 +9,8 @@ app_file: app.py
9
  python_version: 3.11
10
  ---
11
 
12
- # Mystery Drone Reports Around Sensitive Sites
13
 
14
- Map-first review surface for public-source reports about mystery, unidentified, suspicious, or unauthorized drone activity around sensitive sites.
15
 
16
- The default map groups repeated reports by coordinate/site so dense clusters are readable. Marker size is case count, color is strongest evidence tier, and symbol is coordinate quality. Selecting a marker opens the source-linked cases behind it.
 
1
  ---
2
  title: Drone Sightings Map
3
+ emoji: "🛸"
4
  colorFrom: red
5
  colorTo: blue
6
  sdk: gradio
 
9
  python_version: 3.11
10
  ---
11
 
12
+ # Mystery Drone Reports Near Sensitive Places
13
 
14
+ Plain-language Space for exploring public-source reports about mystery, unidentified, suspicious, or unauthorized drone activity near sensitive places.
15
 
16
+ Start with the guided storylines, then use the map and report list for source links, cautions, and technical details.
public_space_app.py CHANGED
@@ -8,40 +8,62 @@ import pandas as pd
8
  import plotly.express as px
9
 
10
 
11
- GROUP_COLUMNS = [
12
- "case_count",
13
- "strongest_evidence_tier",
14
- "plot_label",
15
- "coordinate_quality",
16
- "country",
17
- "date_span",
18
- "probable_cluster_count",
19
- "evidence_mix",
20
- "top_source_domains",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  ]
22
- CASE_COLUMNS = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  "case_rank",
24
  "evidence_tier",
25
- "report_date",
26
- "country",
27
- "site_name",
28
- "site_type",
29
  "coordinate_quality",
30
- "headline",
31
- "source_domain",
32
- "followup_status",
33
  ]
34
- TIER_RANK = {
35
- "resolved_sensitive_site_report": 0,
36
- "named_sensitive_site_report": 1,
37
- "source_discovered_report": 2,
38
- }
39
- TIER_LABEL = {
40
- "resolved_sensitive_site_report": "resolved site report",
41
- "named_sensitive_site_report": "named-site report",
42
- "source_discovered_report": "source-discovered report",
43
- }
44
- COARSE_COORDINATE_QUALITIES = {"region_centroid", "country_centroid", "city_area_centroid"}
45
 
46
 
47
  def _load_data(data_dir: Path) -> tuple[pd.DataFrame, dict, dict]:
@@ -51,14 +73,21 @@ def _load_data(data_dir: Path) -> tuple[pd.DataFrame, dict, dict]:
51
  cases["case_rank"] = pd.to_numeric(cases["case_rank"], errors="coerce").fillna(999999).astype(int)
52
  cases["plot_lat"] = pd.to_numeric(cases["plot_lat"], errors="coerce")
53
  cases["plot_lon"] = pd.to_numeric(cases["plot_lon"], errors="coerce")
54
- cases["report_year"] = cases["report_date"].astype(str).str.slice(0, 4).replace("", "unknown")
 
 
 
 
 
 
 
55
  cases["map_group_id"] = cases.apply(
56
  lambda row: "|".join(
57
  [
58
  f"{float(row['plot_lat']):.4f}" if pd.notna(row["plot_lat"]) else "",
59
  f"{float(row['plot_lon']):.4f}" if pd.notna(row["plot_lon"]) else "",
60
  str(row.get("plot_label", "")),
61
- str(row.get("coordinate_quality", "")),
62
  str(row.get("country", "")),
63
  ]
64
  ),
@@ -67,342 +96,422 @@ def _load_data(data_dir: Path) -> tuple[pd.DataFrame, dict, dict]:
67
  return cases, manifest, quality
68
 
69
 
70
- def _markdown_header(manifest: dict, quality: dict) -> str:
71
- tiers = manifest.get("counts_by_evidence_tier", {})
72
- return f"""# Mystery Drone Reports Around Sensitive Sites
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
- Map-first review surface for public-source reports around military, airport, maritime, emergency-service, and critical-infrastructure contexts.
75
 
76
- **{manifest.get("case_count", 0)} cases** | **{manifest.get("probable_cluster_count", 0)} probable clusters** | **{quality.get("mappable_case_count", 0)} mapped rows** | **release gate: {"pass" if quality.get("release_grade") else "review"}**
77
-
78
- Default view groups repeated reports at the same public coordinate so dense clusters, like New Jersey, read as one place-marker with a case list instead of a pile of overlapping dots.
79
-
80
- Evidence tiers: resolved site `{tiers.get("resolved_sensitive_site_report", 0)}`, named site `{tiers.get("named_sensitive_site_report", 0)}`, source-discovered `{tiers.get("source_discovered_report", 0)}`.
81
-
82
- Map points are source-indexed report locations, not verified findings of threat, attribution, anomalous origin, or hostile intent. `coordinate_quality` tells you whether a marker is a site, city, region, or country centroid.
83
- """
84
 
85
 
86
- def _options(values: pd.Series) -> list[str]:
87
- return sorted(str(value) for value in values.dropna().astype(str).unique() if str(value))
 
88
 
89
 
90
- def _strongest_tier(values: pd.Series) -> str:
91
- tiers = [str(value) for value in values if str(value)]
92
- if not tiers:
93
- return "source_discovered_report"
94
- return sorted(tiers, key=lambda value: TIER_RANK.get(value, 99))[0]
 
95
 
 
96
 
97
- def _count_text(values: pd.Series, *, label_map: dict[str, str] | None = None, limit: int = 4) -> str:
98
- counts = values.astype(str).replace("", "unknown").value_counts()
99
- parts = []
100
- for key, value in counts.head(limit).items():
101
- label = label_map.get(key, key) if label_map else key
102
- parts.append(f"{label}: {int(value)}")
103
- return "; ".join(parts)
104
 
105
 
106
- def _date_span(values: pd.Series) -> str:
107
- dates = sorted(str(value) for value in values if str(value))
108
- if not dates:
109
- return "undated"
110
- if dates[0] == dates[-1]:
111
- return dates[0]
112
- return f"{dates[0]} to {dates[-1]}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
 
115
- def _filter_cases(
116
- cases: pd.DataFrame,
117
- evidence_tiers: list[str] | None,
118
- coordinate_qualities: list[str] | None,
119
- countries: list[str] | None,
120
- site_types: list[str] | None,
121
- source_domains: list[str] | None,
122
- years: list[str] | None,
123
- query: str,
124
- ) -> pd.DataFrame:
125
- filtered = cases.copy()
126
- if evidence_tiers:
127
- filtered = filtered[filtered["evidence_tier"].isin(evidence_tiers)]
128
- if coordinate_qualities:
129
- filtered = filtered[filtered["coordinate_quality"].isin(coordinate_qualities)]
130
- if countries:
131
- filtered = filtered[filtered["country"].isin(countries)]
132
- if site_types:
133
- filtered = filtered[filtered["site_type"].isin(site_types)]
134
- if source_domains:
135
- filtered = filtered[filtered["source_domain"].isin(source_domains)]
136
- if years:
137
- filtered = filtered[filtered["report_year"].isin(years)]
138
- query = str(query or "").strip().lower()
139
- if query:
140
  haystack = (
141
- filtered["headline"].astype(str)
142
  + " "
143
- + filtered["site_name"].astype(str)
144
  + " "
145
- + filtered["plot_label"].astype(str)
146
  + " "
147
- + filtered["country"].astype(str)
148
  + " "
149
- + filtered["source_domain"].astype(str)
150
  ).str.lower()
151
- filtered = filtered[haystack.str.contains(query, regex=False)]
152
- return filtered.sort_values(["case_rank"]).reset_index(drop=True)
153
 
154
 
155
- def _group_cases(filtered: pd.DataFrame) -> pd.DataFrame:
156
- rows: list[dict] = []
157
- if filtered.empty:
158
- return pd.DataFrame(columns=GROUP_COLUMNS + ["map_group_id", "plot_lat", "plot_lon"])
159
- for group_id, group in filtered.groupby("map_group_id", sort=False):
160
- strongest = _strongest_tier(group["evidence_tier"])
161
- rows.append(
162
  {
163
  "map_group_id": group_id,
164
- "case_count": int(len(group)),
165
- "strongest_evidence_tier": strongest,
 
 
 
 
 
166
  "plot_lat": float(group["plot_lat"].iloc[0]),
167
  "plot_lon": float(group["plot_lon"].iloc[0]),
168
- "plot_label": str(group["plot_label"].iloc[0]),
169
- "coordinate_quality": str(group["coordinate_quality"].iloc[0]),
170
- "country": str(group["country"].iloc[0]),
171
- "date_span": _date_span(group["report_date"]),
172
- "probable_cluster_count": int(group["probable_cluster_id"].nunique()),
173
- "evidence_mix": _count_text(group["evidence_tier"], label_map=TIER_LABEL),
174
- "top_source_domains": _count_text(group["source_domain"], limit=3),
175
- "site_types": _count_text(group["site_type"], limit=3),
176
  }
177
  )
178
- grouped = pd.DataFrame(rows)
179
- grouped = grouped.sort_values(
180
- ["case_count", "strongest_evidence_tier", "plot_label"],
181
- ascending=[False, True, True],
182
- ).reset_index(drop=True)
183
- return grouped
184
-
185
-
186
- def _marker_rows(filtered: pd.DataFrame, mode: str, repeated_only: bool) -> pd.DataFrame:
187
- working = filtered.copy()
188
- if mode == "Coarse-location review":
189
- working = working[working["coordinate_quality"].isin(COARSE_COORDINATE_QUALITIES)]
190
- if mode == "Individual cases":
191
- group_sizes = working["map_group_id"].value_counts().to_dict()
192
- if repeated_only:
193
- working = working[working["map_group_id"].map(group_sizes).fillna(0) > 1]
194
- markers = working.copy()
195
- markers["case_count"] = 1
196
- markers["strongest_evidence_tier"] = markers["evidence_tier"]
197
- markers["date_span"] = markers["report_date"]
198
- markers["probable_cluster_count"] = 1
199
- markers["evidence_mix"] = markers["evidence_tier"].map(lambda value: TIER_LABEL.get(str(value), str(value)))
200
- markers["top_source_domains"] = markers["source_domain"]
201
- return markers.sort_values(["case_rank"]).reset_index(drop=True)
202
- grouped = _group_cases(working)
203
- if repeated_only and not grouped.empty:
204
- grouped = grouped[grouped["case_count"] > 1].reset_index(drop=True)
205
- return grouped
206
-
207
-
208
- def _summary_text(filtered: pd.DataFrame, markers: pd.DataFrame, mode: str) -> str:
209
- if filtered.empty:
210
- return "No rows match the current filters."
211
- precise_count = int((filtered["coordinate_quality"] == "site_centroid").sum())
212
- grouped_count = int(len(markers))
213
- largest_stack = int(markers["case_count"].max()) if "case_count" in markers and not markers.empty else 0
214
- return (
215
- f"Showing {len(filtered)} cases as {grouped_count} map markers in `{mode}` mode. "
216
- f"{precise_count} cases use site centroids; the largest visible marker groups {largest_stack} cases. "
217
- "Marker size is case count; color is strongest evidence tier; symbol is coordinate quality."
218
- )
219
 
220
 
221
- def _map(markers: pd.DataFrame, mode: str):
222
- if markers.empty:
223
- fig = px.scatter_geo(pd.DataFrame({"plot_lat": [], "plot_lon": []}), lat="plot_lat", lon="plot_lon", height=690)
224
- fig.update_layout(margin={"l": 0, "r": 0, "t": 20, "b": 0})
225
  return fig
226
  fig = px.scatter_geo(
227
- markers,
228
  lat="plot_lat",
229
  lon="plot_lon",
230
- color="strongest_evidence_tier",
231
- symbol="coordinate_quality",
232
- size="case_count",
233
- size_max=36 if mode != "Individual cases" else 14,
234
- hover_name="plot_label",
235
  hover_data={
236
- "case_count": True,
237
- "probable_cluster_count": True,
238
- "country": True,
239
- "date_span": True,
240
- "evidence_mix": True,
241
- "top_source_domains": True,
242
- "coordinate_quality": True,
243
  "plot_lat": False,
244
  "plot_lon": False,
245
  },
246
  projection="natural earth",
247
- height=690,
248
  color_discrete_map={
249
- "resolved_sensitive_site_report": "#b42318",
250
- "named_sensitive_site_report": "#b76e00",
251
- "source_discovered_report": "#2b6f9e",
 
 
252
  },
253
  )
254
- fig.update_traces(marker={"opacity": 0.78, "line": {"width": 0.6, "color": "white"}})
255
  fig.update_geos(showland=True, landcolor="#eef2f5", showocean=True, oceancolor="#dfeaf2", showcountries=True)
256
- fig.update_layout(
257
- margin={"l": 0, "r": 0, "t": 24, "b": 0},
258
- legend_orientation="h",
259
- legend_title_text="Evidence tier / coordinate quality",
260
- )
261
  return fig
262
 
263
 
264
- def _cases_for_marker(marker: dict, filtered_rows: list[dict], mode: str) -> list[dict]:
265
- if mode == "Individual cases":
266
- case_id = marker.get("case_id")
267
- return [row for row in filtered_rows if row.get("case_id") == case_id]
268
- group_id = marker.get("map_group_id")
269
- return [row for row in filtered_rows if row.get("map_group_id") == group_id]
270
-
271
-
272
- def _detail(markers: list[dict], filtered_rows: list[dict], index: int | None, mode: str) -> str:
273
- if not markers:
274
- return "No map marker selected."
275
- try:
276
- marker = markers[int(index or 0)]
277
- except (IndexError, TypeError, ValueError):
278
- marker = markers[0]
279
- marker_cases = _cases_for_marker(marker, filtered_rows, mode)
280
- marker_cases = sorted(
281
- marker_cases,
282
- key=lambda row: (
283
- TIER_RANK.get(str(row.get("evidence_tier")), 99),
284
- str(row.get("report_date", "")),
285
- int(row.get("case_rank") or 999999),
286
- ),
287
  )
288
- quality = marker.get("coordinate_quality", "")
289
- warning = ""
290
- if quality in COARSE_COORDINATE_QUALITIES:
291
- warning = "\n\n**Coordinate note:** this marker is a coarse centroid. Use it as a review location, not a precise sighting coordinate."
292
- lines = [
293
- f"### {marker.get('plot_label', '')}",
294
- "",
295
- f"- Map mode: `{mode}`",
296
- f"- Cases at marker: `{len(marker_cases)}`",
297
- f"- Probable clusters: `{marker.get('probable_cluster_count', '')}`",
298
- f"- Evidence mix: {marker.get('evidence_mix', '')}",
299
- f"- Date span: `{marker.get('date_span', '')}`",
300
- f"- Coordinate quality: `{quality}`",
301
- f"- Top source domains: {marker.get('top_source_domains', '')}",
302
- warning,
303
- "",
304
- "#### Cases behind this marker",
305
- ]
306
- for row in marker_cases[:18]:
307
  lines.extend(
308
  [
 
 
 
 
 
 
309
  "",
310
- f"**#{row.get('case_rank')} - {row.get('headline', '')}**",
311
- f"- `{row.get('evidence_tier', '')}` | `{row.get('report_date', '')}` | `{row.get('site_name', '')}`",
312
- f"- Source: [{row.get('publisher', '') or row.get('source_domain', '')}]({row.get('source_url', '')})",
313
- f"- Boundary: {row.get('claim_boundary', '')}",
314
  ]
315
  )
316
- if len(marker_cases) > 18:
317
- lines.append(f"\n...and {len(marker_cases) - 18} more rows in the marker table/filter result.")
318
- return "\n".join(line for line in lines if line is not None)
319
-
320
-
321
- def _render(
322
- cases: pd.DataFrame,
323
- evidence_tiers,
324
- coordinate_qualities,
325
- countries,
326
- site_types,
327
- source_domains,
328
- years,
329
- mode,
330
- repeated_only,
331
- query,
332
- ):
333
- filtered = _filter_cases(cases, evidence_tiers, coordinate_qualities, countries, site_types, source_domains, years, query)
334
- markers = _marker_rows(filtered, mode or "Grouped sites", bool(repeated_only))
335
- filtered_rows = filtered.to_dict("records")
336
- marker_records = markers.to_dict("records")
337
- marker_table_columns = GROUP_COLUMNS if mode != "Individual cases" else CASE_COLUMNS
338
- marker_table = markers[[column for column in marker_table_columns if column in markers.columns]].copy()
339
- return (
340
- _summary_text(filtered, markers, mode or "Grouped sites"),
341
- _map(markers, mode or "Grouped sites"),
342
- marker_table,
343
- marker_records,
344
- filtered_rows,
345
- _detail(marker_records, filtered_rows, 0, mode or "Grouped sites"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
 
348
 
349
  def build_app(data_dir: str | Path):
350
  data_dir = Path(data_dir)
351
  cases, manifest, quality = _load_data(data_dir)
352
- evidence_choices = _options(cases["evidence_tier"])
353
- coordinate_choices = _options(cases["coordinate_quality"])
354
- with gr.Blocks(title="Mystery Drone Reports Around Sensitive Sites") as app:
355
- gr.Markdown(_markdown_header(manifest, quality))
356
- with gr.Row():
357
- mode = gr.Radio(
358
- choices=["Grouped sites", "Individual cases", "Coarse-location review"],
359
- value="Grouped sites",
360
- label="Map mode",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  )
362
- repeated_only = gr.Checkbox(value=False, label="Only repeated markers")
363
- query = gr.Textbox(label="Search", placeholder="Try New Jersey, Langley, Copenhagen, airport, military base")
364
- with gr.Row():
365
- evidence_filter = gr.CheckboxGroup(choices=evidence_choices, value=evidence_choices, label="Evidence tier")
366
- coordinate_filter = gr.CheckboxGroup(choices=coordinate_choices, value=coordinate_choices, label="Coordinate quality")
367
- with gr.Row():
368
- country_filter = gr.Dropdown(choices=_options(cases["country"]), value=[], multiselect=True, label="Country")
369
- site_filter = gr.Dropdown(choices=_options(cases["site_type"]), value=[], multiselect=True, label="Site type")
370
- source_filter = gr.Dropdown(choices=_options(cases["source_domain"]), value=[], multiselect=True, label="Source domain")
371
- year_filter = gr.Dropdown(choices=_options(cases["report_year"]), value=[], multiselect=True, label="Report year")
372
- summary = gr.Markdown()
373
- with gr.Row():
374
- with gr.Column(scale=3):
375
- map_plot = gr.Plot(label="Grouped case map")
376
- with gr.Column(scale=2):
377
- detail = gr.Markdown()
378
- marker_table = gr.Dataframe(label="Visible map markers", interactive=False)
379
- marker_rows_state = gr.State([])
380
- filtered_rows_state = gr.State([])
381
-
382
- def render(evidence_tiers, coordinate_qualities, countries, site_types, source_domains, years, map_mode, repeats, search_query):
383
- return _render(cases, evidence_tiers, coordinate_qualities, countries, site_types, source_domains, years, map_mode, repeats, search_query)
384
-
385
- inputs = [
386
- evidence_filter,
387
- coordinate_filter,
388
- country_filter,
389
- site_filter,
390
- source_filter,
391
- year_filter,
392
- mode,
393
- repeated_only,
394
- query,
395
- ]
396
- outputs = [summary, map_plot, marker_table, marker_rows_state, filtered_rows_state, detail]
397
- for control in inputs:
398
- control.change(render, inputs=inputs, outputs=outputs)
399
-
400
- def select_marker(markers, filtered_rows, map_mode, evt: gr.SelectData):
401
- if not evt or evt.index is None:
402
- return _detail(markers, filtered_rows, 0, map_mode)
403
- row_index = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
404
- return _detail(markers, filtered_rows, row_index, map_mode)
405
-
406
- marker_table.select(select_marker, inputs=[marker_rows_state, filtered_rows_state, mode], outputs=detail)
407
- app.load(render, inputs=inputs, outputs=outputs)
408
  return app
 
8
  import plotly.express as px
9
 
10
 
11
+ EUROPE_COUNTRIES = {
12
+ "Belgium",
13
+ "Denmark",
14
+ "Germany",
15
+ "Ireland",
16
+ "Italy",
17
+ "Netherlands",
18
+ "Spain",
19
+ "Sweden",
20
+ "United Kingdom",
21
+ }
22
+ CLARITY_LABELS = {
23
+ "resolved_sensitive_site_report": "Specific site matched",
24
+ "named_sensitive_site_report": "Specific site named",
25
+ "source_discovered_report": "News lead to review",
26
+ }
27
+ LOCATION_LABELS = {
28
+ "site_centroid": "Specific site location",
29
+ "city_area_centroid": "City-area location",
30
+ "region_centroid": "General regional location",
31
+ "country_centroid": "Country-level location",
32
+ }
33
+ STORY_CHOICES = [
34
+ "Start here: main storylines",
35
+ "New Jersey coastal/security reports",
36
+ "European airport disruptions",
37
+ "Military base reports",
38
+ "All reports by place",
39
  ]
40
+ REPORT_COLUMNS = [
41
+ "Headline",
42
+ "Date",
43
+ "Place",
44
+ "Place type",
45
+ "Country",
46
+ "Source",
47
+ "Why included",
48
+ "Caution",
49
+ ]
50
+ PLACE_COLUMNS = [
51
+ "Place",
52
+ "Reports",
53
+ "Place type",
54
+ "Region",
55
+ "Location note",
56
+ "Date span",
57
+ "Why look here",
58
+ ]
59
+ TECH_COLUMNS = [
60
+ "case_id",
61
  "case_rank",
62
  "evidence_tier",
 
 
 
 
63
  "coordinate_quality",
64
+ "probable_cluster_id",
65
+ "public_row_sha256",
 
66
  ]
 
 
 
 
 
 
 
 
 
 
 
67
 
68
 
69
  def _load_data(data_dir: Path) -> tuple[pd.DataFrame, dict, dict]:
 
73
  cases["case_rank"] = pd.to_numeric(cases["case_rank"], errors="coerce").fillna(999999).astype(int)
74
  cases["plot_lat"] = pd.to_numeric(cases["plot_lat"], errors="coerce")
75
  cases["plot_lon"] = pd.to_numeric(cases["plot_lon"], errors="coerce")
76
+ cases["report_year"] = cases["report_date"].astype(str).str.slice(0, 4).replace("", "Older / unknown")
77
+ cases["reader_clarity"] = cases["evidence_tier"].map(CLARITY_LABELS).fillna("News lead to review")
78
+ cases["location_note"] = cases["coordinate_quality"].map(LOCATION_LABELS).fillna("General location")
79
+ cases["place_type_reader"] = cases.apply(_place_type_label, axis=1)
80
+ cases["region_reader"] = cases["country"].map(_region_label)
81
+ cases["story_group"] = cases.apply(_story_group, axis=1)
82
+ cases["reader_caution"] = cases.apply(_reader_caution, axis=1)
83
+ cases["why_included"] = cases.apply(_why_included, axis=1)
84
  cases["map_group_id"] = cases.apply(
85
  lambda row: "|".join(
86
  [
87
  f"{float(row['plot_lat']):.4f}" if pd.notna(row["plot_lat"]) else "",
88
  f"{float(row['plot_lon']):.4f}" if pd.notna(row["plot_lon"]) else "",
89
  str(row.get("plot_label", "")),
90
+ str(row.get("place_type_reader", "")),
91
  str(row.get("country", "")),
92
  ]
93
  ),
 
96
  return cases, manifest, quality
97
 
98
 
99
+ def _place_type_label(row: pd.Series) -> str:
100
+ text = f"{row.get('site_type', '')} {row.get('site_name', '')} {row.get('plot_label', '')} {row.get('headline', '')}".lower()
101
+ if "airport" in text or "runway" in text:
102
+ return "Airport"
103
+ if "coast guard" in text or "coastal" in text or "maritime" in text or "new jersey" in text:
104
+ return "Coastal/security"
105
+ if "military" in text or "air force" in text or "air base" in text or "arsenal" in text or "raf " in text or "joint base" in text:
106
+ return "Military site"
107
+ if "critical" in text or "infrastructure" in text or "nuclear" in text or "power" in text:
108
+ return "Critical infrastructure"
109
+ return "Other / unclear"
110
+
111
+
112
+ def _region_label(country: str) -> str:
113
+ if country == "United States":
114
+ return "United States"
115
+ if country in EUROPE_COUNTRIES:
116
+ return "Europe"
117
+ return "Other / unclear"
118
+
119
+
120
+ def _story_group(row: pd.Series) -> str:
121
+ text = f"{row.get('headline', '')} {row.get('site_name', '')} {row.get('plot_label', '')} {row.get('country', '')}".lower()
122
+ if "new jersey" in text or "coast guard" in text:
123
+ return "New Jersey coastal/security reports"
124
+ if row.get("region_reader") == "Europe" and ("airport" in text or row.get("place_type_reader") == "Airport"):
125
+ return "European airport disruptions"
126
+ if row.get("place_type_reader") == "Military site":
127
+ return "Military base reports"
128
+ return "All reports by place"
129
+
130
+
131
+ def _reader_caution(row: pd.Series) -> str:
132
+ clarity = row.get("reader_clarity", "")
133
+ location = row.get("location_note", "")
134
+ if clarity == "News lead to review":
135
+ return "Treat as a source lead, not a confirmed event."
136
+ if location != "Specific site location":
137
+ return "Location is approximate."
138
+ return "Check the linked source before drawing conclusions."
139
+
140
+
141
+ def _why_included(row: pd.Series) -> str:
142
+ clarity = row.get("reader_clarity", "")
143
+ place_type = row.get("place_type_reader", "")
144
+ if clarity == "Specific site matched":
145
+ return f"Matched to a {place_type.lower()} report location."
146
+ if clarity == "Specific site named":
147
+ return f"The source names a {place_type.lower()} or sensitive place."
148
+ return f"The source language points to a drone report near a {place_type.lower()} context."
149
 
 
150
 
151
+ def _date_span(values: pd.Series) -> str:
152
+ dates = sorted(str(value) for value in values if str(value))
153
+ if not dates:
154
+ return "Date unclear"
155
+ if dates[0] == dates[-1]:
156
+ return dates[0]
157
+ return f"{dates[0]} to {dates[-1]}"
 
158
 
159
 
160
+ def _count_text(values: pd.Series, limit: int = 4) -> str:
161
+ counts = values.astype(str).replace("", "unknown").value_counts()
162
+ return ", ".join(f"{key}: {int(value)}" for key, value in counts.head(limit).items())
163
 
164
 
165
+ def _header(manifest: dict) -> str:
166
+ named_or_matched = int(manifest.get("resolved_sensitive_site_report_count", 0)) + int(
167
+ manifest.get("named_sensitive_site_report_count", 0)
168
+ )
169
+ leads = int(manifest.get("source_discovered_report_count", 0))
170
+ return f"""# Mystery Drone Reports Near Sensitive Places
171
 
172
+ This is a public-source index of news reports near airports, military sites, coastal/security areas, and other sensitive places. It is not proof of threat, intent, or unusual origin.
173
 
174
+ **{manifest.get("case_count", 0)} public-source reports** | **{named_or_matched} name or match a specific sensitive site** | **{leads} broader leads for follow-up**
175
+ """
 
 
 
 
 
176
 
177
 
178
+ def _story_intro(story: str, rows: pd.DataFrame) -> str:
179
+ if rows.empty:
180
+ return "No reports match this storyline."
181
+ places = _count_text(rows["plot_label"], limit=5)
182
+ sources = _count_text(rows["source_domain"], limit=5)
183
+ dates = _date_span(rows["report_date"])
184
+ location_note = "Some markers are approximate because public reports often describe areas rather than exact coordinates."
185
+ if story == "New Jersey coastal/security reports":
186
+ lead = "This group collects public reports connected to the New Jersey drone wave and nearby coastal/security locations."
187
+ caution = "Many rows are broad reporting leads, so treat this as a reporting trail rather than a confirmed incident list."
188
+ elif story == "European airport disruptions":
189
+ lead = "This group follows reports around European airport disruptions and related drone activity."
190
+ caution = "Airport closures and disruption reports can involve repeated follow-up stories, so use the source links to separate event reports from later context."
191
+ elif story == "Military base reports":
192
+ lead = "This group focuses on reports that name or point toward military bases and military-site areas."
193
+ caution = "A report near a base does not prove origin, intent, or threat."
194
+ elif story == "All reports by place":
195
+ lead = "This view groups the full report set by place so repeated locations are easier to scan."
196
+ caution = "Marker size means number of source reports, not number of confirmed objects."
197
+ else:
198
+ lead = "Pick a storyline below to explore the main reporting trails."
199
+ caution = "Start with the story summaries, then use the map and sources for details."
200
+ return f"""## {story}
201
+
202
+ {lead}
203
+
204
+ - Reports in view: **{len(rows)}**
205
+ - Date range: **{dates}**
206
+ - Common places: {places}
207
+ - Common sources: {sources}
208
+
209
+ **What this does not prove:** {caution}
210
+
211
+ **Location note:** {location_note}
212
+ """
213
 
214
 
215
+ def _story_rows(cases: pd.DataFrame, story: str) -> pd.DataFrame:
216
+ if story == "Start here: main storylines":
217
+ return cases.copy()
218
+ if story == "All reports by place":
219
+ return cases.copy()
220
+ return cases[cases["story_group"] == story].copy()
221
+
222
+
223
+ def _filter_rows(cases: pd.DataFrame, search: str, region: str, place_type: str, clarity: str, year: str) -> pd.DataFrame:
224
+ rows = cases.copy()
225
+ if region and region != "All":
226
+ rows = rows[rows["region_reader"] == region]
227
+ if place_type and place_type != "All":
228
+ rows = rows[rows["place_type_reader"] == place_type]
229
+ if clarity and clarity != "All":
230
+ rows = rows[rows["reader_clarity"] == clarity]
231
+ if year and year != "All":
232
+ if year == "Older / unknown":
233
+ rows = rows[~rows["report_year"].isin(["2024", "2025", "2026"])]
234
+ else:
235
+ rows = rows[rows["report_year"] == year]
236
+ search = str(search or "").strip().lower()
237
+ if search:
 
 
238
  haystack = (
239
+ rows["headline"].astype(str)
240
  + " "
241
+ + rows["site_name"].astype(str)
242
  + " "
243
+ + rows["plot_label"].astype(str)
244
  + " "
245
+ + rows["country"].astype(str)
246
  + " "
247
+ + rows["source_domain"].astype(str)
248
  ).str.lower()
249
+ rows = rows[haystack.str.contains(search, regex=False)]
250
+ return rows.sort_values(["case_rank"]).reset_index(drop=True)
251
 
252
 
253
+ def _group_rows(rows: pd.DataFrame) -> pd.DataFrame:
254
+ out: list[dict] = []
255
+ if rows.empty:
256
+ return pd.DataFrame(columns=["Place", "Reports", "Place type", "Region", "Location note", "Date span", "Why look here", "map_group_id", "plot_lat", "plot_lon"])
257
+ for group_id, group in rows.groupby("map_group_id", sort=False):
258
+ out.append(
 
259
  {
260
  "map_group_id": group_id,
261
+ "Place": str(group["plot_label"].iloc[0]),
262
+ "Reports": int(len(group)),
263
+ "Place type": str(group["place_type_reader"].iloc[0]),
264
+ "Region": str(group["region_reader"].iloc[0]),
265
+ "Location note": str(group["location_note"].iloc[0]),
266
+ "Date span": _date_span(group["report_date"]),
267
+ "Why look here": _count_text(group["reader_clarity"], limit=3),
268
  "plot_lat": float(group["plot_lat"].iloc[0]),
269
  "plot_lon": float(group["plot_lon"].iloc[0]),
270
+ "source_summary": _count_text(group["source_domain"], limit=3),
 
 
 
 
 
 
 
271
  }
272
  )
273
+ grouped = pd.DataFrame(out)
274
+ return grouped.sort_values(["Reports", "Place"], ascending=[False, True]).reset_index(drop=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
 
277
+ def _map(groups: pd.DataFrame):
278
+ if groups.empty:
279
+ fig = px.scatter_geo(pd.DataFrame({"plot_lat": [], "plot_lon": []}), lat="plot_lat", lon="plot_lon", height=560)
280
+ fig.update_layout(margin={"l": 0, "r": 0, "t": 12, "b": 0})
281
  return fig
282
  fig = px.scatter_geo(
283
+ groups,
284
  lat="plot_lat",
285
  lon="plot_lon",
286
+ color="Place type",
287
+ size="Reports",
288
+ size_max=38,
289
+ hover_name="Place",
 
290
  hover_data={
291
+ "Reports": True,
292
+ "Region": True,
293
+ "Location note": True,
294
+ "Date span": True,
295
+ "Why look here": True,
296
+ "source_summary": True,
 
297
  "plot_lat": False,
298
  "plot_lon": False,
299
  },
300
  projection="natural earth",
301
+ height=560,
302
  color_discrete_map={
303
+ "Airport": "#1f77b4",
304
+ "Military site": "#b42318",
305
+ "Coastal/security": "#2e7d62",
306
+ "Critical infrastructure": "#8e5ea2",
307
+ "Other / unclear": "#6b7280",
308
  },
309
  )
310
+ fig.update_traces(marker={"opacity": 0.8, "line": {"width": 0.6, "color": "white"}})
311
  fig.update_geos(showland=True, landcolor="#eef2f5", showocean=True, oceancolor="#dfeaf2", showcountries=True)
312
+ fig.update_layout(margin={"l": 0, "r": 0, "t": 18, "b": 0}, legend_title_text="Place type")
 
 
 
 
313
  return fig
314
 
315
 
316
+ def _public_table(rows: pd.DataFrame) -> pd.DataFrame:
317
+ if rows.empty:
318
+ return pd.DataFrame(columns=REPORT_COLUMNS)
319
+ return pd.DataFrame(
320
+ {
321
+ "Headline": rows["headline"],
322
+ "Date": rows["report_date"].replace("", "Date unclear"),
323
+ "Place": rows["plot_label"],
324
+ "Place type": rows["place_type_reader"],
325
+ "Country": rows["country"].replace("", "unknown"),
326
+ "Source": rows["source_domain"],
327
+ "Why included": rows["why_included"],
328
+ "Caution": rows["reader_caution"],
329
+ }
 
 
 
 
 
 
 
 
 
330
  )
331
+
332
+
333
+ def _source_cards(rows: pd.DataFrame, limit: int = 10) -> str:
334
+ if rows.empty:
335
+ return "No reports match this view."
336
+ lines = ["## Source links to inspect", ""]
337
+ for _, row in rows.head(limit).iterrows():
 
 
 
 
 
 
 
 
 
 
 
 
338
  lines.extend(
339
  [
340
+ f"### {row['headline']}",
341
+ f"- Date: {row['report_date'] or 'Date unclear'}",
342
+ f"- Place: {row['plot_label']} ({row['location_note']})",
343
+ f"- Why included: {row['why_included']}",
344
+ f"- Caution: {row['reader_caution']}",
345
+ f"- Source: [{row['publisher'] or row['source_domain']}]({row['source_url']})",
346
  "",
 
 
 
 
347
  ]
348
  )
349
+ if len(rows) > limit:
350
+ lines.append(f"...and {len(rows) - limit} more reports in the list.")
351
+ return "\n".join(lines)
352
+
353
+
354
+ def _story_card_markdown(cases: pd.DataFrame) -> str:
355
+ cards = []
356
+ for story in STORY_CHOICES[1:]:
357
+ rows = _story_rows(cases, story)
358
+ if story == "All reports by place":
359
+ subtitle = "Scan every mapped report grouped by place."
360
+ elif story == "New Jersey coastal/security reports":
361
+ subtitle = "The largest reporting trail in this release."
362
+ elif story == "European airport disruptions":
363
+ subtitle = "Airport closures and disruption reports across Europe."
364
+ else:
365
+ subtitle = "Reports around bases and military-site areas."
366
+ cards.append(f"**{story}** - {len(rows)} reports. {subtitle}")
367
+ return "## Pick a storyline to explore\n\n" + "\n\n".join(cards)
368
+
369
+
370
+ def _render_story(cases: pd.DataFrame, story: str):
371
+ rows = _story_rows(cases, story)
372
+ groups = _group_rows(rows)
373
+ intro = _header_from_rows(cases) + "\n\n" + _story_card_markdown(cases) if story == "Start here: main storylines" else _story_intro(story, rows)
374
+ return intro, _map(groups), groups[PLACE_COLUMNS], _public_table(rows), _source_cards(rows)
375
+
376
+
377
+ def _header_from_rows(cases: pd.DataFrame) -> str:
378
+ specific = int((cases["reader_clarity"].isin(["Specific site matched", "Specific site named"])).sum())
379
+ leads = int((cases["reader_clarity"] == "News lead to review").sum())
380
+ return f"""# Mystery Drone Reports Near Sensitive Places
381
+
382
+ This is a public-source index of news reports near airports, military sites, coastal/security areas, and other sensitive places.
383
+
384
+ It is not proof of threat, intent, or unusual origin.
385
+
386
+ **{len(cases)} public-source reports** | **{specific} name or match a specific sensitive site** | **{leads} broader leads for follow-up**
387
+ """
388
+
389
+
390
+ def _render_map(cases: pd.DataFrame, search: str, region: str, place_type: str, clarity: str, year: str):
391
+ rows = _filter_rows(cases, search, region, place_type, clarity, year)
392
+ groups = _group_rows(rows)
393
+ summary = (
394
+ f"Showing {len(rows)} reports at {len(groups)} places. "
395
+ "Bigger markers mean more reports at that place. Colors show the kind of place."
396
  )
397
+ return summary, _map(groups), groups[PLACE_COLUMNS], _public_table(rows), _source_cards(rows)
398
+
399
+
400
+ def _render_reports(cases: pd.DataFrame, search: str, region: str, place_type: str, clarity: str, year: str):
401
+ rows = _filter_rows(cases, search, region, place_type, clarity, year)
402
+ summary = f"Showing {len(rows)} reports. Select a row by using the source links in the detail panel below."
403
+ return summary, _public_table(rows), _source_cards(rows), _technical_table(rows)
404
+
405
+
406
+ def _technical_table(rows: pd.DataFrame) -> pd.DataFrame:
407
+ if rows.empty:
408
+ return pd.DataFrame(columns=TECH_COLUMNS)
409
+ return rows[TECH_COLUMNS].copy()
410
+
411
+
412
+ def _data_notes(manifest: dict, quality: dict) -> str:
413
+ return f"""# Data notes
414
+
415
+ This Space keeps the technical classifications available, but keeps them out of the first screen.
416
+
417
+ - Release version: {manifest.get('release_version')}
418
+ - Public rows: {manifest.get('case_count')}
419
+ - Quality gate passed: {quality.get('release_grade')}
420
+ - Duplicate source URLs: {quality.get('duplicate_source_url_count')}
421
+ - Missing source URLs: {quality.get('missing_source_url_count')}
422
+ - Mappable rows: {quality.get('mappable_case_count')}
423
+
424
+ Plain-language translations:
425
+
426
+ - Specific site matched = stricter source/site matching found a sensitive-site report.
427
+ - Specific site named = the source names a sensitive site, but it still needs review.
428
+ - News lead to review = public source language suggests a relevant report, but this is a lead, not a confirmed event.
429
+ - Specific site location = marker uses a known site point.
430
+ - General regional location or country-level location = marker is approximate.
431
+ """
432
 
433
 
434
  def build_app(data_dir: str | Path):
435
  data_dir = Path(data_dir)
436
  cases, manifest, quality = _load_data(data_dir)
437
+ region_choices = ["All", "United States", "Europe", "Other / unclear"]
438
+ place_choices = ["All", "Airport", "Military site", "Coastal/security", "Critical infrastructure", "Other / unclear"]
439
+ clarity_choices = ["All", "Specific site matched", "Specific site named", "News lead to review"]
440
+ year_choices = ["All", "2026", "2025", "2024", "Older / unknown"]
441
+
442
+ with gr.Blocks(title="Mystery Drone Reports Near Sensitive Places") as app:
443
+ with gr.Tab("Start here"):
444
+ story = gr.Radio(choices=STORY_CHOICES, value=STORY_CHOICES[0], label="Pick a storyline")
445
+ story_intro = gr.Markdown()
446
+ with gr.Row():
447
+ story_map = gr.Plot(label="Story map")
448
+ story_sources = gr.Markdown()
449
+ story_places = gr.Dataframe(label="Places in this story", interactive=False)
450
+ story_reports = gr.Dataframe(label="Reports in this story", interactive=False)
451
+ story.change(
452
+ lambda selected: _render_story(cases, selected),
453
+ inputs=story,
454
+ outputs=[story_intro, story_map, story_places, story_reports, story_sources],
455
+ )
456
+ app.load(
457
+ lambda: _render_story(cases, STORY_CHOICES[0]),
458
+ outputs=[story_intro, story_map, story_places, story_reports, story_sources],
459
+ )
460
+
461
+ with gr.Tab("Map"):
462
+ gr.Markdown("## Map\n\nBigger markers mean more public-source reports at that place. Colors show the kind of place.")
463
+ with gr.Row():
464
+ map_search = gr.Textbox(label="Search", placeholder="Search a place, country, source, or headline")
465
+ map_region = gr.Dropdown(choices=region_choices, value="All", label="Region")
466
+ map_place = gr.Dropdown(choices=place_choices, value="All", label="Place type")
467
+ map_clarity = gr.Dropdown(choices=clarity_choices, value="All", label="Report clarity")
468
+ map_year = gr.Dropdown(choices=year_choices, value="All", label="Time")
469
+ map_summary = gr.Markdown()
470
+ map_plot = gr.Plot(label="Report map")
471
+ map_places = gr.Dataframe(label="Places shown on the map", interactive=False)
472
+ map_reports = gr.Dataframe(label="Reports shown by current filters", interactive=False)
473
+ map_sources = gr.Markdown()
474
+ map_inputs = [map_search, map_region, map_place, map_clarity, map_year]
475
+ for control in map_inputs:
476
+ control.change(
477
+ lambda search, region, place, clarity, year: _render_map(cases, search, region, place, clarity, year),
478
+ inputs=map_inputs,
479
+ outputs=[map_summary, map_plot, map_places, map_reports, map_sources],
480
+ )
481
+ app.load(
482
+ lambda: _render_map(cases, "", "All", "All", "All", "All"),
483
+ outputs=[map_summary, map_plot, map_places, map_reports, map_sources],
484
  )
485
+
486
+ with gr.Tab("Reports"):
487
+ gr.Markdown("## All reports\n\nUse this when you want source links and row-level cautions.")
488
+ with gr.Row():
489
+ report_search = gr.Textbox(label="Search", placeholder="Search a place, country, source, or headline")
490
+ report_region = gr.Dropdown(choices=region_choices, value="All", label="Region")
491
+ report_place = gr.Dropdown(choices=place_choices, value="All", label="Place type")
492
+ report_clarity = gr.Dropdown(choices=clarity_choices, value="All", label="Report clarity")
493
+ report_year = gr.Dropdown(choices=year_choices, value="All", label="Time")
494
+ report_summary = gr.Markdown()
495
+ report_table = gr.Dataframe(label="Readable report list", interactive=False)
496
+ report_sources = gr.Markdown()
497
+ with gr.Accordion("Show technical fields", open=False):
498
+ technical_table = gr.Dataframe(label="Technical row fields", interactive=False)
499
+ report_inputs = [report_search, report_region, report_place, report_clarity, report_year]
500
+ for control in report_inputs:
501
+ control.change(
502
+ lambda search, region, place, clarity, year: _render_reports(cases, search, region, place, clarity, year),
503
+ inputs=report_inputs,
504
+ outputs=[report_summary, report_table, report_sources, technical_table],
505
+ )
506
+ app.load(
507
+ lambda: _render_reports(cases, "", "All", "All", "All", "All"),
508
+ outputs=[report_summary, report_table, report_sources, technical_table],
509
+ )
510
+
511
+ with gr.Tab("Data notes"):
512
+ gr.Markdown(_data_notes(manifest, quality))
513
+ with gr.Accordion("Technical manifest", open=False):
514
+ gr.JSON(manifest)
515
+ with gr.Accordion("Quality report", open=False):
516
+ gr.JSON(quality)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
517
  return app
space_manifest.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "space_bundle_version": "mystery_drone_sensitive_site_space_v2",
3
  "source_release_version": "mystery-drone-sensitive-site-cases-2026-05-v1",
4
  "case_count": 149,
5
  "release_grade": true,
@@ -13,14 +13,14 @@
13
  {
14
  "artifact_role": "space_public_app",
15
  "artifact_path": "public_space_app.py",
16
- "content_sha256": "05ecfaa2d35d3cb16dcfba7a9bc94ea9a19d21a49fe875acc704b50f27c419f1",
17
- "byte_count": 17863
18
  },
19
  {
20
  "artifact_role": "readme",
21
  "artifact_path": "README.md",
22
- "content_sha256": "050e3c4fcac1cc1dd91c026f350d2e458d4641c0da2d8ec18c06009a8d7be990",
23
- "byte_count": 605
24
  },
25
  {
26
  "artifact_role": "requirements",
@@ -47,5 +47,5 @@
47
  "byte_count": 1008
48
  }
49
  ],
50
- "bundle_hash": "1efe312db89231fc27a11b2c2e540727fd9196ed7bec35115afee24d474cda6e"
51
  }
 
1
  {
2
+ "space_bundle_version": "mystery_drone_sensitive_site_space_v3_plain_language",
3
  "source_release_version": "mystery-drone-sensitive-site-cases-2026-05-v1",
4
  "case_count": 149,
5
  "release_grade": true,
 
13
  {
14
  "artifact_role": "space_public_app",
15
  "artifact_path": "public_space_app.py",
16
+ "content_sha256": "e1daff7c9f9772f8e87295eba2ac5bc346e06d2c2fde78ce8c0e01d33a359ad1",
17
+ "byte_count": 23312
18
  },
19
  {
20
  "artifact_role": "readme",
21
  "artifact_path": "README.md",
22
+ "content_sha256": "aa6754e5f1eb78132ca380f7b9c65a41f3db9b9fdde872b468381351bc16c56a",
23
+ "byte_count": 483
24
  },
25
  {
26
  "artifact_role": "requirements",
 
47
  "byte_count": 1008
48
  }
49
  ],
50
+ "bundle_hash": "aa231b606f39e4723a46c37e6c24a5a5c8711dd8f484921176c4839f200c536e"
51
  }