drdeveloper88 commited on
Commit
9cde61a
·
1 Parent(s): 51f7c12

Fix: restore worlddisasterlm/data package + fix README badges and clone URL

Browse files
scripts/sync_to_hf.py CHANGED
@@ -285,14 +285,19 @@ ROOT_DIRS = [
285
  ]
286
 
287
  EXCLUDE_DIRS = {".venv", "__pycache__", ".pytest_cache", "node_modules",
288
- ".git", "data", "outputs", "checkpoints", "artifacts"}
289
 
 
 
290
 
291
- def copy_tree(src: Path, dst: Path):
 
292
  dst.mkdir(parents=True, exist_ok=True)
293
  for item in src.iterdir():
294
  if item.name in EXCLUDE_DIRS:
295
  continue
 
 
296
  if item.is_dir():
297
  copy_tree(item, dst / item.name)
298
  else:
@@ -346,7 +351,7 @@ def main():
346
  dst = REPO_DIR / dname
347
  if dst.exists():
348
  shutil.rmtree(dst)
349
- copy_tree(src, dst)
350
  print(f" {dname}/")
351
 
352
  # Git add + commit + push
 
285
  ]
286
 
287
  EXCLUDE_DIRS = {".venv", "__pycache__", ".pytest_cache", "node_modules",
288
+ ".git", "outputs", "checkpoints", "artifacts"}
289
 
290
+ # Top-level only exclusions (don't apply recursively)
291
+ EXCLUDE_TOP_LEVEL_DIRS = {"data", "frontend"}
292
 
293
+
294
+ def copy_tree(src: Path, dst: Path, top_level: bool = False):
295
  dst.mkdir(parents=True, exist_ok=True)
296
  for item in src.iterdir():
297
  if item.name in EXCLUDE_DIRS:
298
  continue
299
+ if top_level and item.name in EXCLUDE_TOP_LEVEL_DIRS:
300
+ continue
301
  if item.is_dir():
302
  copy_tree(item, dst / item.name)
303
  else:
 
351
  dst = REPO_DIR / dname
352
  if dst.exists():
353
  shutil.rmtree(dst)
354
+ copy_tree(src, dst, top_level=True)
355
  print(f" {dname}/")
356
 
357
  # Git add + commit + push
worlddisasterlm/data/__init__.py ADDED
File without changes
worlddisasterlm/data/collectors/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from worlddisasterlm.data.collectors.reliefweb import collect_reliefweb
2
+ from worlddisasterlm.data.collectors.usgs import collect_usgs
3
+ from worlddisasterlm.data.collectors.gdacs import collect_gdacs
4
+ from worlddisasterlm.data.collectors.noaa import collect_noaa
5
+ from worlddisasterlm.data.collectors.openfema import collect_openfema
6
+ from worlddisasterlm.data.collectors.who_rss import collect_who
7
+
8
+ __all__ = [
9
+ "collect_reliefweb",
10
+ "collect_usgs",
11
+ "collect_gdacs",
12
+ "collect_noaa",
13
+ "collect_openfema",
14
+ "collect_who",
15
+ ]
worlddisasterlm/data/collectors/gdacs.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ GDACS (Global Disaster Alert and Coordination System) collector.
3
+
4
+ Free GeoRSS feed – no authentication required.
5
+ Feed: https://www.gdacs.org/xml/rss.xml
6
+ """
7
+ import logging
8
+ import re
9
+ from datetime import datetime
10
+
11
+ import feedparser
12
+
13
+ from worlddisasterlm.data.schemas import DisasterRecord
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ GDACS_RSS_URL = "https://www.gdacs.org/xml/rss.xml"
18
+ GDACS_SCORE_RSS_URL = "https://www.gdacs.org/xml/rss_score.xml"
19
+
20
+
21
+ def _alert_to_severity(alert_level: str) -> str:
22
+ level = alert_level.strip().lower()
23
+ if level == "red":
24
+ return "critical"
25
+ if level == "orange":
26
+ return "high"
27
+ return "moderate"
28
+
29
+
30
+ def _clean_html(text: str) -> str:
31
+ clean = re.sub(r"<[^>]+>", " ", text or "")
32
+ return " ".join(clean.split())[:600]
33
+
34
+
35
+ def collect_gdacs(max_records: int = 2000) -> list[DisasterRecord]:
36
+ """Collect disaster events from GDACS GeoRSS (free, no auth)."""
37
+ records: list[DisasterRecord] = []
38
+
39
+ for url in [GDACS_RSS_URL, GDACS_SCORE_RSS_URL]:
40
+ logger.info("Parsing GDACS feed: %s", url)
41
+ try:
42
+ feed = feedparser.parse(url)
43
+ except Exception as exc:
44
+ logger.warning("Failed to parse GDACS feed %s: %s", url, exc)
45
+ continue
46
+
47
+ if feed.bozo and feed.bozo_exception:
48
+ logger.warning("GDACS feed parse warning: %s", feed.bozo_exception)
49
+
50
+ for entry in feed.entries:
51
+ if len(records) >= max_records:
52
+ break
53
+
54
+ title = entry.get("title", "")
55
+ summary_raw = entry.get("summary", entry.get("description", ""))
56
+ summary = _clean_html(summary_raw) or title
57
+
58
+ # GDACS uses gdacs: namespace tags
59
+ alert_level = (
60
+ entry.get("gdacs_alertlevel")
61
+ or entry.get("gdacs_alertscore", "")
62
+ or "green"
63
+ )
64
+ event_type = (
65
+ entry.get("gdacs_eventtype")
66
+ or entry.get("gdacs_eventname", "disaster")
67
+ )
68
+
69
+ # Try to extract country/region
70
+ country = entry.get("gdacs_country") or entry.get("gdacs_iso3", "global")
71
+
72
+ if not summary:
73
+ continue
74
+
75
+ records.append(
76
+ DisasterRecord(
77
+ source="GDACS",
78
+ event_type=str(event_type).lower().strip(),
79
+ region=str(country).strip(),
80
+ summary=summary,
81
+ severity=_alert_to_severity(str(alert_level)),
82
+ )
83
+ )
84
+
85
+ logger.info("GDACS collection complete: %d records", len(records))
86
+ return records[:max_records]
worlddisasterlm/data/collectors/noaa.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ NOAA National Weather Service alerts collector.
3
+
4
+ Free API – no authentication required.
5
+ Docs: https://www.weather.gov/documentation/services-web-api
6
+ """
7
+ import logging
8
+ import time
9
+
10
+ import httpx
11
+
12
+ from worlddisasterlm.data.schemas import DisasterRecord
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ NOAA_ALERTS_API = "https://api.weather.gov/alerts"
17
+ HEADERS = {"User-Agent": "WorldDisasterLM/0.1.0 (worlddisasterlm@example.com)"}
18
+
19
+ SEVERITY_MAP = {
20
+ "extreme": "critical",
21
+ "severe": "high",
22
+ "moderate": "moderate",
23
+ "minor": "low",
24
+ "unknown": "low",
25
+ }
26
+
27
+ EVENT_NORMALIZE = {
28
+ "tornado warning": "tornado",
29
+ "flash flood warning": "flood",
30
+ "hurricane warning": "hurricane",
31
+ "blizzard warning": "blizzard",
32
+ "winter storm warning": "winter_storm",
33
+ "tsunami warning": "tsunami",
34
+ "earthquake warning": "earthquake",
35
+ "excessive heat warning": "heatwave",
36
+ "fire weather watch": "wildfire",
37
+ "red flag warning": "wildfire",
38
+ }
39
+
40
+
41
+ def collect_noaa(max_records: int = 5000) -> list[DisasterRecord]:
42
+ """Collect active and recent weather alerts from NOAA (free, no auth)."""
43
+ records: list[DisasterRecord] = []
44
+ cursor: str | None = None
45
+ page_count = 0
46
+
47
+ logger.info("Collecting NOAA weather alerts (max=%d)", max_records)
48
+
49
+ while len(records) < max_records:
50
+ params: dict[str, str | int] = {"limit": 500}
51
+ if cursor:
52
+ params["cursor"] = cursor
53
+
54
+ try:
55
+ response = httpx.get(NOAA_ALERTS_API, headers=HEADERS, params=params, timeout=30)
56
+ response.raise_for_status()
57
+ data = response.json()
58
+ except Exception as exc:
59
+ logger.warning("NOAA request failed (page %d): %s", page_count, exc)
60
+ break
61
+
62
+ features = data.get("features", [])
63
+ if not features:
64
+ break
65
+
66
+ for feature in features:
67
+ if len(records) >= max_records:
68
+ break
69
+
70
+ props = feature.get("properties", {})
71
+ event = str(props.get("event", "weather event")).lower()
72
+ area = props.get("areaDesc", "United States")
73
+ headline = props.get("headline", "")
74
+ description = props.get("description", "")
75
+ severity_raw = str(props.get("severity", "unknown")).lower()
76
+
77
+ summary = headline or description
78
+ if not summary:
79
+ continue
80
+ summary = " ".join(summary.split())[:600]
81
+
82
+ normalized_event = next(
83
+ (v for k, v in EVENT_NORMALIZE.items() if k in event),
84
+ event.replace(" warning", "").replace(" watch", "").strip(),
85
+ )
86
+
87
+ records.append(
88
+ DisasterRecord(
89
+ source="NOAA",
90
+ event_type=normalized_event or "weather_event",
91
+ region=area[:100],
92
+ summary=summary,
93
+ severity=SEVERITY_MAP.get(severity_raw, "moderate"),
94
+ )
95
+ )
96
+
97
+ # Pagination
98
+ pagination = data.get("pagination", {})
99
+ next_url = pagination.get("next")
100
+ if next_url:
101
+ # Extract cursor from next URL query param
102
+ from urllib.parse import urlparse, parse_qs
103
+ parsed = urlparse(next_url)
104
+ qs = parse_qs(parsed.query)
105
+ cursor = qs.get("cursor", [None])[0]
106
+ else:
107
+ break
108
+
109
+ page_count += 1
110
+ time.sleep(0.5)
111
+
112
+ logger.info("NOAA collection complete: %d records", len(records))
113
+ return records
worlddisasterlm/data/collectors/openfema.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ OpenFEMA disaster declarations collector.
3
+
4
+ Free API – no authentication required.
5
+ Docs: https://www.fema.gov/about/openfema/api
6
+ """
7
+ import logging
8
+ import time
9
+
10
+ import httpx
11
+
12
+ from worlddisasterlm.data.schemas import DisasterRecord
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ OPENFEMA_API = "https://www.fema.gov/api/open/v2/DisasterDeclarationsSummaries"
17
+
18
+ INCIDENT_TYPE_MAP = {
19
+ "hurricane": "hurricane",
20
+ "flood": "flood",
21
+ "tornado": "tornado",
22
+ "earthquake": "earthquake",
23
+ "fire": "wildfire",
24
+ "winter storm": "winter_storm",
25
+ "drought": "drought",
26
+ "mudslide": "landslide",
27
+ "typhoon": "typhoon",
28
+ "tsunami": "tsunami",
29
+ "volcano": "volcano",
30
+ "chemical": "chemical",
31
+ "biological": "epidemic",
32
+ }
33
+
34
+
35
+ def _map_incident(raw: str) -> str:
36
+ raw_lower = raw.lower()
37
+ for key, val in INCIDENT_TYPE_MAP.items():
38
+ if key in raw_lower:
39
+ return val
40
+ return raw_lower
41
+
42
+
43
+ def collect_openfema(max_records: int = 20000) -> list[DisasterRecord]:
44
+ """Collect disaster declarations from OpenFEMA (free, no auth)."""
45
+ records: list[DisasterRecord] = []
46
+ skip = 0
47
+ page_size = 1000
48
+
49
+ logger.info("Collecting OpenFEMA disaster declarations (max=%d)", max_records)
50
+
51
+ while len(records) < max_records:
52
+ params = {
53
+ "$format": "json",
54
+ "$top": min(page_size, max_records - len(records)),
55
+ "$skip": skip,
56
+ "$orderby": "declarationDate desc",
57
+ "$select": (
58
+ "disasterNumber,declarationTitle,incidentType,"
59
+ "declarationDate,state,incidentBeginDate,incidentEndDate,"
60
+ "closeoutDate,declarationType"
61
+ ),
62
+ }
63
+
64
+ try:
65
+ response = httpx.get(OPENFEMA_API, params=params, timeout=60)
66
+ response.raise_for_status()
67
+ data = response.json()
68
+ except Exception as exc:
69
+ logger.warning("OpenFEMA request failed at skip %d: %s", skip, exc)
70
+ break
71
+
72
+ declarations = data.get("DisasterDeclarationsSummaries", [])
73
+ if not declarations:
74
+ break
75
+
76
+ for dec in declarations:
77
+ title = dec.get("declarationTitle", "")
78
+ incident_raw = dec.get("incidentType", "disaster")
79
+ state = dec.get("state", "US")
80
+ declaration_date = dec.get("declarationDate", "")[:10]
81
+
82
+ incident_end = dec.get("incidentEndDate", "")
83
+ open_ended = "ongoing" if not incident_end else f"closed {incident_end[:10]}"
84
+
85
+ summary = (
86
+ f"Federal disaster declaration: {title}. "
87
+ f"Incident type: {incident_raw}. "
88
+ f"State/region: {state}. "
89
+ f"Declared: {declaration_date}. Status: {open_ended}."
90
+ )
91
+
92
+ records.append(
93
+ DisasterRecord(
94
+ source="OpenFEMA",
95
+ event_type=_map_incident(incident_raw),
96
+ region=state,
97
+ summary=summary[:600],
98
+ severity="high",
99
+ )
100
+ )
101
+
102
+ skip += len(declarations)
103
+ logger.info("OpenFEMA: %d declarations collected", len(records))
104
+
105
+ if len(declarations) < page_size:
106
+ break
107
+
108
+ time.sleep(0.3)
109
+
110
+ logger.info("OpenFEMA collection complete: %d records", len(records))
111
+ return records
worlddisasterlm/data/collectors/reliefweb.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ReliefWeb data collector.
3
+
4
+ Free API – no authentication required.
5
+ Docs: https://apidoc.rwlabs.org/
6
+ """
7
+ import logging
8
+ import time
9
+ from typing import Any
10
+
11
+ import httpx
12
+
13
+ from worlddisasterlm.data.schemas import DisasterRecord
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ RELIEFWEB_API = "https://api.reliefweb.int/v1/reports"
18
+ PAGE_SIZE = 100
19
+ REQUEST_DELAY = 0.5 # seconds between requests
20
+
21
+
22
+ def _severity_from_fields(fields: dict[str, Any]) -> str:
23
+ vuln_types = {e.get("name", "").lower() for e in fields.get("disaster_type", [])}
24
+ if any(t in vuln_types for t in {"earthquake", "tsunami", "nuclear", "hurricane", "cyclone"}):
25
+ return "critical"
26
+ if any(t in vuln_types for t in {"flood", "wildfire", "epidemic", "drought"}):
27
+ return "high"
28
+ return "moderate"
29
+
30
+
31
+ def collect_reliefweb(max_records: int = 5000) -> list[DisasterRecord]:
32
+ """Collect disaster reports from ReliefWeb (free, no auth)."""
33
+ records: list[DisasterRecord] = []
34
+ offset = 0
35
+
36
+ logger.info("Collecting ReliefWeb reports (max=%d)", max_records)
37
+
38
+ while len(records) < max_records:
39
+ batch_size = min(PAGE_SIZE, max_records - len(records))
40
+ payload = {
41
+ "appname": "worlddisasterlm",
42
+ "limit": batch_size,
43
+ "offset": offset,
44
+ "fields": {
45
+ "include": [
46
+ "title",
47
+ "body-html",
48
+ "primary_country.name",
49
+ "disaster_type.name",
50
+ "date.created",
51
+ "status",
52
+ ]
53
+ },
54
+ "filter": {
55
+ "operator": "AND",
56
+ "conditions": [{"field": "status", "value": "published"}],
57
+ },
58
+ "sort": ["date.created:desc"],
59
+ }
60
+
61
+ try:
62
+ response = httpx.post(RELIEFWEB_API, json=payload, timeout=30)
63
+ response.raise_for_status()
64
+ data = response.json()
65
+ except Exception as exc:
66
+ logger.warning("ReliefWeb request failed at offset %d: %s", offset, exc)
67
+ break
68
+
69
+ items = data.get("data", [])
70
+ if not items:
71
+ break
72
+
73
+ for item in items:
74
+ fields = item.get("fields", {})
75
+ title = fields.get("title", "")
76
+ body_html = fields.get("body-html", "")
77
+ # Strip HTML tags simply
78
+ import re
79
+ body_text = re.sub(r"<[^>]+>", " ", body_html)
80
+ body_text = " ".join(body_text.split())[:600]
81
+
82
+ country = fields.get("primary_country", {})
83
+ region = country.get("name", "global") if isinstance(country, dict) else "global"
84
+
85
+ dtype_list = fields.get("disaster_type", [])
86
+ event_type = dtype_list[0].get("name", "disaster").lower() if dtype_list else "disaster"
87
+
88
+ summary = body_text or title
89
+ if not summary:
90
+ continue
91
+
92
+ records.append(
93
+ DisasterRecord(
94
+ source="ReliefWeb",
95
+ event_type=event_type,
96
+ region=region,
97
+ summary=summary,
98
+ severity=_severity_from_fields(fields),
99
+ )
100
+ )
101
+
102
+ offset += len(items)
103
+ logger.info("ReliefWeb: collected %d / %d", len(records), max_records)
104
+
105
+ if len(items) < batch_size:
106
+ break # last page
107
+
108
+ time.sleep(REQUEST_DELAY)
109
+
110
+ logger.info("ReliefWeb collection complete: %d records", len(records))
111
+ return records
worlddisasterlm/data/collectors/usgs.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ USGS Earthquake data collector.
3
+
4
+ Free API – no authentication required.
5
+ Docs: https://earthquake.usgs.gov/fdsnws/event/1/
6
+ """
7
+ import logging
8
+ import time
9
+ from datetime import datetime, timedelta
10
+
11
+ import httpx
12
+
13
+ from worlddisasterlm.data.schemas import DisasterRecord
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ USGS_API = "https://earthquake.usgs.gov/fdsnws/event/1/query"
18
+
19
+
20
+ def _severity(magnitude: float) -> str:
21
+ if magnitude >= 7.5:
22
+ return "critical"
23
+ if magnitude >= 6.0:
24
+ return "high"
25
+ if magnitude >= 4.5:
26
+ return "moderate"
27
+ return "low"
28
+
29
+
30
+ def collect_usgs(
31
+ years_back: int = 10,
32
+ min_magnitude: float = 4.0,
33
+ max_records: int = 20000,
34
+ ) -> list[DisasterRecord]:
35
+ """Collect earthquake data from USGS FDSN (free, no auth)."""
36
+ records: list[DisasterRecord] = []
37
+ # USGS API max 20 000 per call; chunk by year to avoid that limit
38
+ end_time = datetime.utcnow()
39
+
40
+ for year_offset in range(years_back):
41
+ if len(records) >= max_records:
42
+ break
43
+
44
+ year_end = end_time - timedelta(days=365 * year_offset)
45
+ year_start = end_time - timedelta(days=365 * (year_offset + 1))
46
+
47
+ params = {
48
+ "format": "geojson",
49
+ "starttime": year_start.strftime("%Y-%m-%dT%H:%M:%S"),
50
+ "endtime": year_end.strftime("%Y-%m-%dT%H:%M:%S"),
51
+ "minmagnitude": min_magnitude,
52
+ "orderby": "time",
53
+ "limit": min(20000, max_records - len(records)),
54
+ }
55
+
56
+ try:
57
+ response = httpx.get(USGS_API, params=params, timeout=60)
58
+ response.raise_for_status()
59
+ data = response.json()
60
+ except Exception as exc:
61
+ logger.warning("USGS request failed for year offset %d: %s", year_offset, exc)
62
+ continue
63
+
64
+ features = data.get("features", [])
65
+ logger.info(
66
+ "USGS year -%d: %d earthquakes fetched", year_offset + 1, len(features)
67
+ )
68
+
69
+ for feature in features:
70
+ props = feature.get("properties", {})
71
+ mag = props.get("mag") or 0.0
72
+ place = props.get("place") or "Unknown location"
73
+ title = props.get("title") or f"M{mag} earthquake"
74
+
75
+ summary = (
76
+ f"Magnitude {mag:.1f} earthquake reported near {place}. "
77
+ f"Alert level: {props.get('alert', 'none')}. "
78
+ f"Tsunami risk: {'yes' if props.get('tsunami') else 'no'}. "
79
+ f"{title}"
80
+ )
81
+
82
+ records.append(
83
+ DisasterRecord(
84
+ source="USGS",
85
+ event_type="earthquake",
86
+ region=place,
87
+ summary=summary[:600],
88
+ severity=_severity(mag),
89
+ )
90
+ )
91
+
92
+ time.sleep(0.3)
93
+
94
+ logger.info("USGS collection complete: %d records", len(records))
95
+ return records[:max_records]
worlddisasterlm/data/collectors/who_rss.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ WHO Disease Outbreak News (DON) RSS collector.
3
+
4
+ Free feed – no authentication required.
5
+ Feed: https://www.who.int/feeds/entity/csr/don/en/rss.xml
6
+ """
7
+ import logging
8
+ import re
9
+
10
+ import feedparser
11
+
12
+ from worlddisasterlm.data.schemas import DisasterRecord
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ WHO_DON_RSS = "https://www.who.int/feeds/entity/csr/don/en/rss.xml"
17
+
18
+
19
+ DISEASE_KEYWORDS = {
20
+ "ebola": "epidemic",
21
+ "cholera": "epidemic",
22
+ "dengue": "epidemic",
23
+ "malaria": "epidemic",
24
+ "covid": "pandemic",
25
+ "influenza": "epidemic",
26
+ "mpox": "epidemic",
27
+ "monkeypox": "epidemic",
28
+ "plague": "epidemic",
29
+ "meningitis": "epidemic",
30
+ "lassa": "epidemic",
31
+ "marburg": "epidemic",
32
+ "rift valley": "epidemic",
33
+ "yellow fever": "epidemic",
34
+ "polio": "epidemic",
35
+ "measles": "epidemic",
36
+ "typhoid": "epidemic",
37
+ "hepatitis": "epidemic",
38
+ }
39
+
40
+
41
+ def _classify_event(text: str) -> str:
42
+ lower = text.lower()
43
+ for keyword, event_type in DISEASE_KEYWORDS.items():
44
+ if keyword in lower:
45
+ return event_type
46
+ return "public_health"
47
+
48
+
49
+ def _clean(text: str) -> str:
50
+ clean = re.sub(r"<[^>]+>", " ", text or "")
51
+ return " ".join(clean.split())[:600]
52
+
53
+
54
+ def collect_who(max_records: int = 1000) -> list[DisasterRecord]:
55
+ """Collect WHO disease outbreak news (free, no auth)."""
56
+ records: list[DisasterRecord] = []
57
+
58
+ logger.info("Parsing WHO Disease Outbreak News RSS feed")
59
+
60
+ try:
61
+ feed = feedparser.parse(WHO_DON_RSS)
62
+ except Exception as exc:
63
+ logger.warning("Failed to parse WHO RSS: %s", exc)
64
+ return records
65
+
66
+ if feed.bozo and feed.bozo_exception:
67
+ logger.warning("WHO RSS parse warning: %s", feed.bozo_exception)
68
+
69
+ for entry in feed.entries:
70
+ if len(records) >= max_records:
71
+ break
72
+
73
+ title = entry.get("title", "")
74
+ summary_raw = entry.get("summary", entry.get("description", ""))
75
+ summary = _clean(summary_raw) or title
76
+
77
+ if not summary:
78
+ continue
79
+
80
+ combined = f"{title} {summary}"
81
+ event_type = _classify_event(combined)
82
+
83
+ # Try to extract country from title (e.g. "Ebola virus disease – Democratic Republic of the Congo")
84
+ region = "global"
85
+ if "–" in title:
86
+ region = title.split("–")[-1].strip()
87
+ elif "-" in title:
88
+ region = title.split("-")[-1].strip()
89
+
90
+ records.append(
91
+ DisasterRecord(
92
+ source="WHO",
93
+ event_type=event_type,
94
+ region=region[:100],
95
+ summary=summary,
96
+ severity="high",
97
+ )
98
+ )
99
+
100
+ logger.info("WHO collection complete: %d records", len(records))
101
+ return records
worlddisasterlm/data/etl.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections.abc import Iterable
2
+
3
+ from worlddisasterlm.data.schemas import DisasterRecord
4
+
5
+
6
+ class DisasterETL:
7
+ def collect_records(self) -> list[DisasterRecord]:
8
+ # Replace with API clients and ingestion jobs for production-scale collection.
9
+ return [
10
+ DisasterRecord(
11
+ source="ReliefWeb",
12
+ event_type="flood",
13
+ region="South Asia",
14
+ summary="Severe flooding displaced 12000 people and disrupted road access.",
15
+ severity="high",
16
+ ),
17
+ DisasterRecord(
18
+ source="WHO",
19
+ event_type="epidemic",
20
+ region="East Africa",
21
+ summary="Localized cholera outbreak with urgent water sanitation requirements.",
22
+ severity="high",
23
+ ),
24
+ DisasterRecord(
25
+ source="USGS",
26
+ event_type="earthquake",
27
+ region="Pacific Rim",
28
+ summary="Magnitude 6.8 earthquake with aftershock risk and infrastructure damage.",
29
+ severity="critical",
30
+ ),
31
+ ]
32
+
33
+ def deduplicate(self, records: Iterable[DisasterRecord]) -> list[DisasterRecord]:
34
+ seen: set[tuple[str, str, str, str]] = set()
35
+ deduped: list[DisasterRecord] = []
36
+ for record in records:
37
+ key = (record.source, record.event_type, record.region, record.summary)
38
+ if key not in seen:
39
+ deduped.append(record)
40
+ seen.add(key)
41
+ return deduped
42
+
43
+ def normalize(self, records: Iterable[DisasterRecord]) -> list[DisasterRecord]:
44
+ normalized: list[DisasterRecord] = []
45
+ for record in records:
46
+ normalized.append(
47
+ DisasterRecord(
48
+ source=record.source.strip(),
49
+ event_type=record.event_type.strip().lower(),
50
+ region=record.region.strip(),
51
+ summary=" ".join(record.summary.split()),
52
+ severity=record.severity.strip().lower(),
53
+ )
54
+ )
55
+ return normalized
worlddisasterlm/data/processors.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+
4
+ from worlddisasterlm.data.schemas import DisasterRecord, InstructionSample
5
+ from worlddisasterlm.utils.io import ensure_dir
6
+
7
+
8
+ def build_instruction_dataset(records: list[DisasterRecord]) -> list[InstructionSample]:
9
+ dataset: list[InstructionSample] = []
10
+ for record in records:
11
+ instruction = "Assess the incident and provide emergency response steps."
12
+ sample_input = (
13
+ f"Region: {record.region}\nEvent: {record.event_type}\nSeverity: {record.severity}\n"
14
+ f"Situation: {record.summary}"
15
+ )
16
+ output = (
17
+ "1) Verify official alerts and incident perimeter. "
18
+ "2) Prioritize life-saving response and medical triage. "
19
+ "3) Coordinate shelter, water, food, and transport logistics. "
20
+ "4) Share multilingual updates every 30 minutes."
21
+ )
22
+ dataset.append(InstructionSample(instruction=instruction, input=sample_input, output=output))
23
+ return dataset
24
+
25
+
26
+ def save_instruction_dataset(samples: list[InstructionSample], output_path: str) -> Path:
27
+ target = Path(output_path)
28
+ ensure_dir(target.parent)
29
+ with target.open("w", encoding="utf-8") as handle:
30
+ for sample in samples:
31
+ handle.write(json.dumps(sample.__dict__, ensure_ascii=False) + "\n")
32
+ return target
worlddisasterlm/data/qa_generator.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ QA pair generator.
3
+
4
+ Converts raw DisasterRecord objects into diverse instruction-following samples.
5
+ Each record generates 8–10 QA variants to amplify the training corpus.
6
+ """
7
+ import random
8
+
9
+ from worlddisasterlm.data.schemas import DisasterRecord, InstructionSample
10
+
11
+ RESPONSE_STEPS = {
12
+ "earthquake": [
13
+ "Drop, Cover, and Hold On immediately.",
14
+ "Move away from windows, glass, and heavy furniture.",
15
+ "If outdoors, move to open space away from buildings.",
16
+ "After shaking stops, check for injuries and hazards.",
17
+ "Be prepared for aftershocks.",
18
+ "Do not use elevators; evacuate via stairways if building is unsafe.",
19
+ "Listen to emergency broadcasts for official guidance.",
20
+ ],
21
+ "flood": [
22
+ "Move immediately to higher ground; do not wait for official evacuation order.",
23
+ "Avoid walking or driving through flood waters – 6 inches can knock you down.",
24
+ "Disconnect electrical appliances; do not touch electrical equipment if wet.",
25
+ "Follow evacuation routes designated by local emergency management.",
26
+ "Store emergency supplies including water, food, and medication for 72 hours.",
27
+ "Monitor official weather and emergency alerts for updates.",
28
+ ],
29
+ "wildfire": [
30
+ "Evacuate immediately when ordered – do not wait.",
31
+ "Close all windows and doors to reduce smoke penetration.",
32
+ "Remove flammable items from around your home before leaving.",
33
+ "Wear N95 masks or wet cloth to protect against smoke inhalation.",
34
+ "Stay tuned to emergency broadcasts for evacuation route updates.",
35
+ ],
36
+ "hurricane": [
37
+ "Secure or bring indoors all outdoor furniture and objects.",
38
+ "Board up windows and reinforce garage doors.",
39
+ "Prepare emergency kit: water, food, medications, documents.",
40
+ "Know your evacuation zone and route.",
41
+ "Stay indoors during the storm; the eye of the hurricane is not the all-clear.",
42
+ "After the storm, watch for downed power lines and contaminated water.",
43
+ ],
44
+ "epidemic": [
45
+ "Report cases to local health authorities immediately.",
46
+ "Implement infection control measures: PPE, isolation protocols.",
47
+ "Coordinate with WHO, CDC, and national health agencies.",
48
+ "Establish clear case definition and surveillance system.",
49
+ "Activate contact tracing and quarantine procedures.",
50
+ "Communicate clearly with the public to prevent panic.",
51
+ ],
52
+ "pandemic": [
53
+ "Follow national health authority guidelines.",
54
+ "Implement non-pharmaceutical interventions: masking, distancing.",
55
+ "Prioritize healthcare system capacity management.",
56
+ "Accelerate vaccine development and equitable distribution.",
57
+ "Coordinate international response through WHO frameworks.",
58
+ ],
59
+ "default": [
60
+ "Activate emergency response plan immediately.",
61
+ "Prioritize life safety: triage injuries, evacuate if necessary.",
62
+ "Contact emergency services (fire, police, medical) as appropriate.",
63
+ "Coordinate with local emergency management authority.",
64
+ "Set up incident command structure.",
65
+ "Pre-position supplies: water, food, medical equipment, shelter.",
66
+ "Communicate regularly with affected population in plain language.",
67
+ "Document all actions for accountability and after-action review.",
68
+ ],
69
+ }
70
+
71
+ RESOURCE_GUIDANCE = {
72
+ "earthquake": "Search and rescue teams, medical triage units, heavy machinery for debris removal, temporary shelter, water purification, emergency food supplies, structural engineers.",
73
+ "flood": "Boats and water rescue teams, pumping equipment, water purification, temporary shelters on elevated ground, food and medical supplies, sanitation units.",
74
+ "wildfire": "Aerial firefighting assets, ground crews, evacuation transport, respiratory medical support, temporary shelters, animal rescue resources.",
75
+ "hurricane": "Pre-positioned food, water and fuel, emergency shelters, power restoration crews, debris removal, search and rescue teams, mental health support.",
76
+ "epidemic": "Medical personnel with PPE, testing kits, contact tracing capacity, isolation facilities, treatment medicines, communication system.",
77
+ "default": "Emergency medical teams, shelter supplies, clean water and food, communication equipment, transport resources, coordination staff.",
78
+ }
79
+
80
+
81
+ def _get_response_steps(event_type: str) -> list[str]:
82
+ for key in RESPONSE_STEPS:
83
+ if key in event_type.lower():
84
+ return RESPONSE_STEPS[key]
85
+ return RESPONSE_STEPS["default"]
86
+
87
+
88
+ def _get_resources(event_type: str) -> str:
89
+ for key in RESOURCE_GUIDANCE:
90
+ if key in event_type.lower():
91
+ return RESOURCE_GUIDANCE[key]
92
+ return RESOURCE_GUIDANCE["default"]
93
+
94
+
95
+ def generate_qa_pairs(records: list[DisasterRecord]) -> list[InstructionSample]:
96
+ samples: list[InstructionSample] = []
97
+
98
+ for record in records:
99
+ steps = _get_response_steps(record.event_type)
100
+ steps_text = "\n".join(f"{i + 1}. {s}" for i, s in enumerate(steps))
101
+ resources = _get_resources(record.event_type)
102
+
103
+ context = (
104
+ f"Region: {record.region}\n"
105
+ f"Hazard type: {record.event_type}\n"
106
+ f"Severity: {record.severity}\n"
107
+ f"Situation: {record.summary}"
108
+ )
109
+
110
+ # QA 1 – immediate response
111
+ samples.append(
112
+ InstructionSample(
113
+ instruction="What are the immediate emergency response steps for this disaster situation?",
114
+ input=context,
115
+ output=f"Immediate response priorities for a {record.severity}-severity {record.event_type} event:\n\n{steps_text}",
116
+ )
117
+ )
118
+
119
+ # QA 2 – incident summary
120
+ samples.append(
121
+ InstructionSample(
122
+ instruction="Summarize this disaster event for an emergency operations center briefing.",
123
+ input=context,
124
+ output=(
125
+ f"SITUATION REPORT — {record.event_type.upper()} | {record.region}\n\n"
126
+ f"Severity: {record.severity.capitalize()}\n"
127
+ f"Source: {record.source}\n\n"
128
+ f"Summary: {record.summary}\n\n"
129
+ f"Status: Active monitoring required. Coordinate with local authorities and relevant UN agencies."
130
+ ),
131
+ )
132
+ )
133
+
134
+ # QA 3 – resource planning
135
+ samples.append(
136
+ InstructionSample(
137
+ instruction="What resources and logistics are needed to respond to this disaster?",
138
+ input=context,
139
+ output=(
140
+ f"Resource requirements for a {record.event_type} event in {record.region} (severity: {record.severity}):\n\n"
141
+ f"{resources}\n\n"
142
+ f"Logistics priorities: establish forward operating base near the affected area, "
143
+ f"pre-position supplies along access routes, coordinate air and ground transport, "
144
+ f"and establish communication links with local partners."
145
+ ),
146
+ )
147
+ )
148
+
149
+ # QA 4 – risk assessment
150
+ samples.append(
151
+ InstructionSample(
152
+ instruction="Perform a risk assessment for this disaster scenario.",
153
+ input=context,
154
+ output=(
155
+ f"Risk Assessment: {record.event_type.capitalize()} in {record.region}\n\n"
156
+ f"Risk Level: {record.severity.capitalize()}\n"
157
+ f"Primary hazard: {record.event_type}\n"
158
+ f"Secondary hazards: displacement, water contamination, disease outbreak, infrastructure collapse\n"
159
+ f"Vulnerable populations: elderly, children, persons with disabilities, low-income households\n\n"
160
+ f"Recommended risk reduction actions:\n"
161
+ f"1. Activate early warning dissemination for at-risk zones.\n"
162
+ f"2. Pre-position emergency supplies and first responder teams.\n"
163
+ f"3. Establish coordination hub with government and NGO partners.\n"
164
+ f"4. Issue public guidance in multiple local languages."
165
+ ),
166
+ )
167
+ )
168
+
169
+ # QA 5 – humanitarian aid
170
+ samples.append(
171
+ InstructionSample(
172
+ instruction="What humanitarian aid priorities should be activated for this disaster?",
173
+ input=context,
174
+ output=(
175
+ f"Humanitarian Aid Priorities for {record.event_type} in {record.region}:\n\n"
176
+ f"1. Life-saving: Search and rescue, emergency medical care, trauma treatment.\n"
177
+ f"2. Basic needs: Emergency shelter, safe water, food assistance, sanitation.\n"
178
+ f"3. Protection: Safety monitoring for displaced persons, child protection, GBV prevention.\n"
179
+ f"4. Livelihoods: Cash transfers and livelihood support for affected households.\n"
180
+ f"5. Recovery: Debris clearance, shelter repair, economic recovery planning.\n\n"
181
+ f"Key coordination partners: OCHA, UNHCR, WFP, UNICEF, WHO, local disaster management authority."
182
+ ),
183
+ )
184
+ )
185
+
186
+ # QA 6 – preparedness
187
+ samples.append(
188
+ InstructionSample(
189
+ instruction="How can communities in this region prepare for this type of disaster?",
190
+ input=context,
191
+ output=(
192
+ f"Community Preparedness for {record.event_type} in {record.region}:\n\n"
193
+ f"1. Develop and rehearse household emergency plans.\n"
194
+ f"2. Build 72-hour emergency supply kits (water, food, medication, documents).\n"
195
+ f"3. Know evacuation routes and local shelter locations.\n"
196
+ f"4. Participate in community early warning systems.\n"
197
+ f"5. Strengthen local infrastructure and building codes.\n"
198
+ f"6. Conduct regular drills with schools, workplaces, and community organizations.\n"
199
+ f"7. Ensure vulnerable populations have specific support plans."
200
+ ),
201
+ )
202
+ )
203
+
204
+ # QA 7 – public communication
205
+ samples.append(
206
+ InstructionSample(
207
+ instruction="Draft an emergency public communication message for this disaster.",
208
+ input=context,
209
+ output=(
210
+ f"EMERGENCY ALERT — {record.region.upper()}\n\n"
211
+ f"A {record.severity}-severity {record.event_type} event has been reported.\n\n"
212
+ f"IMMEDIATE ACTIONS REQUIRED:\n"
213
+ f"• Follow official evacuation orders immediately.\n"
214
+ f"• Move to designated shelters or higher ground.\n"
215
+ f"• Call emergency services for life-threatening situations.\n"
216
+ f"• Do NOT spread unverified information.\n\n"
217
+ f"Stay tuned to official government and emergency management channels for updates.\n"
218
+ f"[This message should be verified and issued by the authorised emergency management authority.]"
219
+ ),
220
+ )
221
+ )
222
+
223
+ # QA 8 – recovery planning
224
+ samples.append(
225
+ InstructionSample(
226
+ instruction="What are the key steps in disaster recovery planning after this event?",
227
+ input=context,
228
+ output=(
229
+ f"Recovery Planning Framework: {record.event_type} — {record.region}\n\n"
230
+ f"Phase 1 (0–72 hours): Life safety, damage assessment, displaced persons registration.\n"
231
+ f"Phase 2 (1–4 weeks): Temporary shelter provision, debris clearance, basic services restoration.\n"
232
+ f"Phase 3 (1–6 months): Infrastructure repair, economic recovery, psychosocial support.\n"
233
+ f"Phase 4 (6+ months): Long-term reconstruction, risk reduction investments, lesson-learned review.\n\n"
234
+ f"Key principles: Build Back Better, inclusion of marginalized groups, environmental sustainability, "
235
+ f"community ownership of the recovery process."
236
+ ),
237
+ )
238
+ )
239
+
240
+ return samples
worlddisasterlm/data/scenario_builder.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Synthetic emergency scenario builder.
3
+
4
+ Generates multi-turn conversations, role-play scenarios, and cross-hazard
5
+ compound disaster samples to enrich training diversity.
6
+ """
7
+ from worlddisasterlm.data.schemas import InstructionSample
8
+
9
+ COMPOUND_SCENARIOS = [
10
+ {
11
+ "scenario": "An earthquake of magnitude 7.2 struck a coastal city causing building collapses, "
12
+ "followed 45 minutes later by a tsunami warning.",
13
+ "region": "Pacific coastal city",
14
+ "event_type": "compound: earthquake + tsunami",
15
+ "severity": "critical",
16
+ "response": (
17
+ "Compound Disaster Response — Earthquake + Tsunami:\n\n"
18
+ "IMMEDIATE (0–15 min):\n"
19
+ "1. Issue tsunami warning and mandatory coastal evacuation order immediately.\n"
20
+ "2. Search and rescue operations for earthquake-trapped victims must start inland first.\n"
21
+ "3. Hospitals on high ground remain operational; coastal facilities evacuate patients.\n\n"
22
+ "SHORT-TERM (15 min – 6 hours):\n"
23
+ "4. Establish triage stations above tsunami inundation zone.\n"
24
+ "5. Account for all search and rescue teams before wave arrival.\n"
25
+ "6. Deploy helicopters for coastal cliff rescues post-wave.\n\n"
26
+ "CRITICAL NOTE: Aftershocks will compromise already-damaged structures. "
27
+ "Do not re-enter buildings until structural engineers clear them."
28
+ ),
29
+ },
30
+ {
31
+ "scenario": "A category 4 hurricane is forecast to make landfall in 48 hours. "
32
+ "The region already has 30,000 displaced persons from last week's flooding.",
33
+ "region": "Caribbean island",
34
+ "event_type": "compound: hurricane + displaced persons",
35
+ "severity": "critical",
36
+ "response": (
37
+ "Compound Emergency — Hurricane Landfall with Pre-existing Displacement:\n\n"
38
+ "PRE-LANDFALL (48–24 hours):\n"
39
+ "1. Mandatory evacuation of all existing displacement camps in low-lying areas.\n"
40
+ "2. Identify inland strong buildings for mass shelter; assess capacity.\n"
41
+ "3. Pre-position 72-hour food and water supplies at inland shelters.\n"
42
+ "4. Coordinate with UNHCR and local government for camp-to-shelter transfers.\n\n"
43
+ "PRE-LANDFALL (24–0 hours):\n"
44
+ "5. All personnel secured; close airport; ban sea transport.\n"
45
+ "6. Emergency services on stand-by in hardened facilities.\n\n"
46
+ "POST-LANDFALL:\n"
47
+ "7. Damage assessment sweep before re-opening any displaced camp.\n"
48
+ "8. Restore access roads before relief convoy mobilization."
49
+ ),
50
+ },
51
+ {
52
+ "scenario": "During a wildfire evacuation, a fuel tanker overturned on the primary "
53
+ "evacuation highway, blocking egress for 15,000 evacuees.",
54
+ "region": "Southern California",
55
+ "event_type": "compound: wildfire + infrastructure failure",
56
+ "severity": "critical",
57
+ "response": (
58
+ "Multi-Hazard Evacuation Disruption — Wildfire + Road Blockage:\n\n"
59
+ "1. Activate alternate evacuation routes immediately; publish via Emergency Alert System.\n"
60
+ "2. Dispatch hazmat team to fuel tanker — assess fire ignition risk before moving vehicle.\n"
61
+ "3. If tanker poses fire risk: keep 300m standoff; use bulldozer to open shoulder lane.\n"
62
+ "4. Coordinate law enforcement for contraflow on alternate highways.\n"
63
+ "5. Deploy CHP and National Guard for traffic management at choke points.\n"
64
+ "6. Air resources pivot to aerial evacuation for mobility-impaired stranded persons.\n"
65
+ "7. Communicate updated routes every 15 minutes via radio, app, and social media."
66
+ ),
67
+ },
68
+ ]
69
+
70
+ MULTILINGUAL_SAMPLES = [
71
+ {
72
+ "lang": "Nepali",
73
+ "instruction": "भूकम्पको बेला के गर्ने?",
74
+ "output": (
75
+ "भूकम्पको बेला:\n"
76
+ "१. तुरुन्त झुक्नुहोस्, ढाक्नुहोस् र समात्नुहोस् (Drop, Cover, Hold On)।\n"
77
+ "२. झ्याल, ऐना र भारी सामानबाट टाढा रहनुहोस्।\n"
78
+ "३. बाहिर हुनुहुन्छ भने खुला ठाउँमा जानुहोस् — भवन, रुख र बिजुलीका तारबाट टाढा।\n"
79
+ "४. हल्लाउन बन्द भएपछि चोटपटक जाँच गर्नुहोस् र सावधानीसाथ बाहिर निस्कनुहोस्।\n"
80
+ "५. परकम्पको लागि तयार रहनुहोस्।\n"
81
+ "गम्भीर घाइतेको लागि तुरुन्त आपतकालीन सेवा (१०१ वा स्थानीय नम्बर) मा फोन गर्नुहोस्।"
82
+ ),
83
+ },
84
+ {
85
+ "lang": "Nepali",
86
+ "instruction": "बाढीको बेला के गर्ने?",
87
+ "output": (
88
+ "बाढीको बेला:\n"
89
+ "१. तुरुन्त अग्लो ठाउँमा जानुहोस् — निकासी आदेशको प्रतीक्षा नगर्नुहोस्।\n"
90
+ "२. बाढीको पानीमा नहिँड्नुहोस् वा गाडी नचलाउनुहोस् — ६ इन्च पानीले पनि गिराउन सक्छ।\n"
91
+ "३. बिजुलीका उपकरणहरू बन्द गर्नुहोस्।\n"
92
+ "४. स्थानीय अधिकारीको निकासी मार्ग पछ्याउनुहोस्।\n"
93
+ "५. आधिकारिक अलर्टको लागि रेडियो सुन्नुहोस्।\n"
94
+ "खतरामा परेमा तुरुन्त आपतकालीन सेवामा सम्पर्क गर्नुहोस्।"
95
+ ),
96
+ },
97
+ {
98
+ "lang": "Nepali",
99
+ "instruction": "विपद् व्यवस्थापनको लागि आधारभूत तयारी के के गर्नुपर्छ?",
100
+ "output": (
101
+ "विपद् तयारीका आधारभूत कदमहरू:\n"
102
+ "१. परिवारको आपतकालीन योजना बनाउनुहोस् र अभ्यास गर्नुहोस्।\n"
103
+ "२. ७२ घण्टाको आपतकालीन किट तयार राख्नुहोस्: पानी, खाना, औषधि, कागजात।\n"
104
+ "३. निकासी मार्ग र नजिकको आश्रयस्थलको जानकारी राख्नुहोस्।\n"
105
+ "४. समुदायको पूर्व चेतावनी प्रणालीमा सहभागी हुनुहोस्।\n"
106
+ "५. विद्यालय, कार्यस्थल र समुदायमा नियमित अभ्यास गर्नुहोस्।\n"
107
+ "राष्ट्रिय विपद् जोखिम न्यूनीकरण तथा व्यवस्थापन प्राधिकरण (NDRRMA) को निर्देशिका पालना गर्नुहोस्।"
108
+ ),
109
+ },
110
+ {
111
+ "lang": "Spanish",
112
+ "instruction": "¿Qué debo hacer durante un terremoto?",
113
+ "output": (
114
+ "Durante un terremoto:\n"
115
+ "1. Agáchese, cúbrase y sujétese (Drop, Cover, Hold On).\n"
116
+ "2. Aléjese de ventanas, espejos y objetos pesados.\n"
117
+ "3. Si está en el exterior, diríjase a un espacio abierto.\n"
118
+ "4. Después del sismo, verifique heridos y salga con cuidado.\n"
119
+ "5. Esté preparado para réplicas.\n"
120
+ "Llame al número de emergencias de su país para reportar heridos graves."
121
+ ),
122
+ },
123
+ {
124
+ "lang": "French",
125
+ "instruction": "Quelles sont les étapes à suivre en cas d'inondation ?",
126
+ "output": (
127
+ "En cas d'inondation :\n"
128
+ "1. Montez immédiatement en hauteur – ne restez pas au niveau du sol.\n"
129
+ "2. N'essayez jamais de traverser des eaux en crue, même à pied.\n"
130
+ "3. Coupez l'électricité et le gaz si c'est sans danger.\n"
131
+ "4. Suivez les consignes d'évacuation des autorités locales.\n"
132
+ "5. Appelez les secours si vous êtes en danger immédiat.\n"
133
+ "Restez à l'écoute des alertes officielles."
134
+ ),
135
+ },
136
+ {
137
+ "lang": "Arabic",
138
+ "instruction": "ما هي الخطوات الواجب اتخاذها أثناء الإعصار؟",
139
+ "output": (
140
+ "أثناء الإعصار:\n"
141
+ "١. ابق في الداخل بعيداً عن النوافذ والأبواب الزجاجية.\n"
142
+ "٢. اذهب إلى الطابق الأرضي أو الغرفة الداخلية دون نوافذ.\n"
143
+ "٣. لا تخرج لأن عين الإعصار تبدو هادئة لكنها مؤقتة.\n"
144
+ "٤. استمع إلى تعليمات الدفاع المدني عبر الراديو أو التليفزيون.\n"
145
+ "٥. ابتعد عن مجاري المياه والمناطق المنخفضة بعد مرور الإعصار."
146
+ ),
147
+ },
148
+ {
149
+ "lang": "Hindi",
150
+ "instruction": "बाढ़ के दौरान क्या करें?",
151
+ "output": (
152
+ "बाढ़ ���े दौरान:\n"
153
+ "1. तुरंत ऊंची जगह पर जाएं।\n"
154
+ "2. बाढ़ के पानी में न चलें—6 इंच पानी भी गिरा सकता है।\n"
155
+ "3. बिजली के उपकरण बंद करें।\n"
156
+ "4. आधिकारिक निकासी मार्ग का पालन करें।\n"
157
+ "5. आपातकालीन अलर्ट के लिए रेडियो सुनें।\n"
158
+ "खतरे में होने पर तुरंत आपातकालीन सेवा को कॉल करें।"
159
+ ),
160
+ },
161
+ ]
162
+
163
+
164
+ def build_compound_scenarios() -> list[InstructionSample]:
165
+ samples: list[InstructionSample] = []
166
+ for scene in COMPOUND_SCENARIOS:
167
+ samples.append(
168
+ InstructionSample(
169
+ instruction="Provide a comprehensive emergency response plan for this compound disaster scenario.",
170
+ input=(
171
+ f"Scenario: {scene['scenario']}\n"
172
+ f"Region: {scene['region']}\n"
173
+ f"Event type: {scene['event_type']}\n"
174
+ f"Severity: {scene['severity']}"
175
+ ),
176
+ output=scene["response"],
177
+ )
178
+ )
179
+ return samples
180
+
181
+
182
+ def build_multilingual_samples() -> list[InstructionSample]:
183
+ samples: list[InstructionSample] = []
184
+ for item in MULTILINGUAL_SAMPLES:
185
+ samples.append(
186
+ InstructionSample(
187
+ instruction=item["instruction"],
188
+ input="",
189
+ output=item["output"],
190
+ language=item["lang"],
191
+ )
192
+ )
193
+ return samples
194
+
195
+
196
+ def build_all_scenarios() -> list[InstructionSample]:
197
+ return build_compound_scenarios() + build_multilingual_samples()
worlddisasterlm/data/schemas.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass
5
+ class DisasterRecord:
6
+ source: str
7
+ event_type: str
8
+ region: str
9
+ summary: str
10
+ severity: str
11
+
12
+
13
+ @dataclass
14
+ class InstructionSample:
15
+ instruction: str
16
+ input: str
17
+ output: str
18
+ language: str = "English"
worlddisasterlm/data/sources.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INTERNATIONAL_ORGS = [
2
+ "UN",
3
+ "UNDRR",
4
+ "WHO",
5
+ "UNICEF",
6
+ "WFP",
7
+ "UNHCR",
8
+ ]
9
+
10
+ DISASTER_DATABASES = [
11
+ "EM-DAT",
12
+ "ReliefWeb",
13
+ "GDACS",
14
+ "NASA Earth Data",
15
+ "NOAA",
16
+ "USGS",
17
+ "FEMA",
18
+ "World Bank Open Data",
19
+ ]
20
+
21
+ RESEARCH_SOURCES = [
22
+ "Scientific papers",
23
+ "Government reports",
24
+ "Emergency response manuals",
25
+ "Disaster preparedness guidelines",
26
+ "Humanitarian response frameworks",
27
+ ]
28
+
29
+ REAL_TIME_SOURCES = [
30
+ "Weather feeds",
31
+ "Satellite imagery metadata",
32
+ "Emergency alerts",
33
+ "Public safety bulletins",
34
+ ]