Text Generation
Transformers
PEFT
llama
disaster-management
emergency-response
humanitarian-ai
multilingual
fine-tuned
qlora
lora
llama3
conversational
4-bit precision
bitsandbytes
Instructions to use drdeveloper88/WorldDisasterLM-8B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use drdeveloper88/WorldDisasterLM-8B with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="drdeveloper88/WorldDisasterLM-8B") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("drdeveloper88/WorldDisasterLM-8B") model = AutoModelForCausalLM.from_pretrained("drdeveloper88/WorldDisasterLM-8B") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - PEFT
How to use drdeveloper88/WorldDisasterLM-8B with PEFT:
Task type is invalid.
- Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use drdeveloper88/WorldDisasterLM-8B with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "drdeveloper88/WorldDisasterLM-8B" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "drdeveloper88/WorldDisasterLM-8B", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/drdeveloper88/WorldDisasterLM-8B
- SGLang
How to use drdeveloper88/WorldDisasterLM-8B with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "drdeveloper88/WorldDisasterLM-8B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "drdeveloper88/WorldDisasterLM-8B", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "drdeveloper88/WorldDisasterLM-8B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "drdeveloper88/WorldDisasterLM-8B", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use drdeveloper88/WorldDisasterLM-8B with Docker Model Runner:
docker model run hf.co/drdeveloper88/WorldDisasterLM-8B
Commit ·
9cde61a
1
Parent(s): 51f7c12
Fix: restore worlddisasterlm/data package + fix README badges and clone URL
Browse files- scripts/sync_to_hf.py +8 -3
- worlddisasterlm/data/__init__.py +0 -0
- worlddisasterlm/data/collectors/__init__.py +15 -0
- worlddisasterlm/data/collectors/gdacs.py +86 -0
- worlddisasterlm/data/collectors/noaa.py +113 -0
- worlddisasterlm/data/collectors/openfema.py +111 -0
- worlddisasterlm/data/collectors/reliefweb.py +111 -0
- worlddisasterlm/data/collectors/usgs.py +95 -0
- worlddisasterlm/data/collectors/who_rss.py +101 -0
- worlddisasterlm/data/etl.py +55 -0
- worlddisasterlm/data/processors.py +32 -0
- worlddisasterlm/data/qa_generator.py +240 -0
- worlddisasterlm/data/scenario_builder.py +197 -0
- worlddisasterlm/data/schemas.py +18 -0
- worlddisasterlm/data/sources.py +34 -0
scripts/sync_to_hf.py
CHANGED
|
@@ -285,14 +285,19 @@ ROOT_DIRS = [
|
|
| 285 |
]
|
| 286 |
|
| 287 |
EXCLUDE_DIRS = {".venv", "__pycache__", ".pytest_cache", "node_modules",
|
| 288 |
-
".git", "
|
| 289 |
|
|
|
|
|
|
|
| 290 |
|
| 291 |
-
|
|
|
|
| 292 |
dst.mkdir(parents=True, exist_ok=True)
|
| 293 |
for item in src.iterdir():
|
| 294 |
if item.name in EXCLUDE_DIRS:
|
| 295 |
continue
|
|
|
|
|
|
|
| 296 |
if item.is_dir():
|
| 297 |
copy_tree(item, dst / item.name)
|
| 298 |
else:
|
|
@@ -346,7 +351,7 @@ def main():
|
|
| 346 |
dst = REPO_DIR / dname
|
| 347 |
if dst.exists():
|
| 348 |
shutil.rmtree(dst)
|
| 349 |
-
copy_tree(src, dst)
|
| 350 |
print(f" {dname}/")
|
| 351 |
|
| 352 |
# Git add + commit + push
|
|
|
|
| 285 |
]
|
| 286 |
|
| 287 |
EXCLUDE_DIRS = {".venv", "__pycache__", ".pytest_cache", "node_modules",
|
| 288 |
+
".git", "outputs", "checkpoints", "artifacts"}
|
| 289 |
|
| 290 |
+
# Top-level only exclusions (don't apply recursively)
|
| 291 |
+
EXCLUDE_TOP_LEVEL_DIRS = {"data", "frontend"}
|
| 292 |
|
| 293 |
+
|
| 294 |
+
def copy_tree(src: Path, dst: Path, top_level: bool = False):
|
| 295 |
dst.mkdir(parents=True, exist_ok=True)
|
| 296 |
for item in src.iterdir():
|
| 297 |
if item.name in EXCLUDE_DIRS:
|
| 298 |
continue
|
| 299 |
+
if top_level and item.name in EXCLUDE_TOP_LEVEL_DIRS:
|
| 300 |
+
continue
|
| 301 |
if item.is_dir():
|
| 302 |
copy_tree(item, dst / item.name)
|
| 303 |
else:
|
|
|
|
| 351 |
dst = REPO_DIR / dname
|
| 352 |
if dst.exists():
|
| 353 |
shutil.rmtree(dst)
|
| 354 |
+
copy_tree(src, dst, top_level=True)
|
| 355 |
print(f" {dname}/")
|
| 356 |
|
| 357 |
# Git add + commit + push
|
worlddisasterlm/data/__init__.py
ADDED
|
File without changes
|
worlddisasterlm/data/collectors/__init__.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from worlddisasterlm.data.collectors.reliefweb import collect_reliefweb
|
| 2 |
+
from worlddisasterlm.data.collectors.usgs import collect_usgs
|
| 3 |
+
from worlddisasterlm.data.collectors.gdacs import collect_gdacs
|
| 4 |
+
from worlddisasterlm.data.collectors.noaa import collect_noaa
|
| 5 |
+
from worlddisasterlm.data.collectors.openfema import collect_openfema
|
| 6 |
+
from worlddisasterlm.data.collectors.who_rss import collect_who
|
| 7 |
+
|
| 8 |
+
__all__ = [
|
| 9 |
+
"collect_reliefweb",
|
| 10 |
+
"collect_usgs",
|
| 11 |
+
"collect_gdacs",
|
| 12 |
+
"collect_noaa",
|
| 13 |
+
"collect_openfema",
|
| 14 |
+
"collect_who",
|
| 15 |
+
]
|
worlddisasterlm/data/collectors/gdacs.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
GDACS (Global Disaster Alert and Coordination System) collector.
|
| 3 |
+
|
| 4 |
+
Free GeoRSS feed – no authentication required.
|
| 5 |
+
Feed: https://www.gdacs.org/xml/rss.xml
|
| 6 |
+
"""
|
| 7 |
+
import logging
|
| 8 |
+
import re
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
|
| 11 |
+
import feedparser
|
| 12 |
+
|
| 13 |
+
from worlddisasterlm.data.schemas import DisasterRecord
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
GDACS_RSS_URL = "https://www.gdacs.org/xml/rss.xml"
|
| 18 |
+
GDACS_SCORE_RSS_URL = "https://www.gdacs.org/xml/rss_score.xml"
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def _alert_to_severity(alert_level: str) -> str:
|
| 22 |
+
level = alert_level.strip().lower()
|
| 23 |
+
if level == "red":
|
| 24 |
+
return "critical"
|
| 25 |
+
if level == "orange":
|
| 26 |
+
return "high"
|
| 27 |
+
return "moderate"
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _clean_html(text: str) -> str:
|
| 31 |
+
clean = re.sub(r"<[^>]+>", " ", text or "")
|
| 32 |
+
return " ".join(clean.split())[:600]
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def collect_gdacs(max_records: int = 2000) -> list[DisasterRecord]:
|
| 36 |
+
"""Collect disaster events from GDACS GeoRSS (free, no auth)."""
|
| 37 |
+
records: list[DisasterRecord] = []
|
| 38 |
+
|
| 39 |
+
for url in [GDACS_RSS_URL, GDACS_SCORE_RSS_URL]:
|
| 40 |
+
logger.info("Parsing GDACS feed: %s", url)
|
| 41 |
+
try:
|
| 42 |
+
feed = feedparser.parse(url)
|
| 43 |
+
except Exception as exc:
|
| 44 |
+
logger.warning("Failed to parse GDACS feed %s: %s", url, exc)
|
| 45 |
+
continue
|
| 46 |
+
|
| 47 |
+
if feed.bozo and feed.bozo_exception:
|
| 48 |
+
logger.warning("GDACS feed parse warning: %s", feed.bozo_exception)
|
| 49 |
+
|
| 50 |
+
for entry in feed.entries:
|
| 51 |
+
if len(records) >= max_records:
|
| 52 |
+
break
|
| 53 |
+
|
| 54 |
+
title = entry.get("title", "")
|
| 55 |
+
summary_raw = entry.get("summary", entry.get("description", ""))
|
| 56 |
+
summary = _clean_html(summary_raw) or title
|
| 57 |
+
|
| 58 |
+
# GDACS uses gdacs: namespace tags
|
| 59 |
+
alert_level = (
|
| 60 |
+
entry.get("gdacs_alertlevel")
|
| 61 |
+
or entry.get("gdacs_alertscore", "")
|
| 62 |
+
or "green"
|
| 63 |
+
)
|
| 64 |
+
event_type = (
|
| 65 |
+
entry.get("gdacs_eventtype")
|
| 66 |
+
or entry.get("gdacs_eventname", "disaster")
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
# Try to extract country/region
|
| 70 |
+
country = entry.get("gdacs_country") or entry.get("gdacs_iso3", "global")
|
| 71 |
+
|
| 72 |
+
if not summary:
|
| 73 |
+
continue
|
| 74 |
+
|
| 75 |
+
records.append(
|
| 76 |
+
DisasterRecord(
|
| 77 |
+
source="GDACS",
|
| 78 |
+
event_type=str(event_type).lower().strip(),
|
| 79 |
+
region=str(country).strip(),
|
| 80 |
+
summary=summary,
|
| 81 |
+
severity=_alert_to_severity(str(alert_level)),
|
| 82 |
+
)
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
logger.info("GDACS collection complete: %d records", len(records))
|
| 86 |
+
return records[:max_records]
|
worlddisasterlm/data/collectors/noaa.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
NOAA National Weather Service alerts collector.
|
| 3 |
+
|
| 4 |
+
Free API – no authentication required.
|
| 5 |
+
Docs: https://www.weather.gov/documentation/services-web-api
|
| 6 |
+
"""
|
| 7 |
+
import logging
|
| 8 |
+
import time
|
| 9 |
+
|
| 10 |
+
import httpx
|
| 11 |
+
|
| 12 |
+
from worlddisasterlm.data.schemas import DisasterRecord
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
NOAA_ALERTS_API = "https://api.weather.gov/alerts"
|
| 17 |
+
HEADERS = {"User-Agent": "WorldDisasterLM/0.1.0 (worlddisasterlm@example.com)"}
|
| 18 |
+
|
| 19 |
+
SEVERITY_MAP = {
|
| 20 |
+
"extreme": "critical",
|
| 21 |
+
"severe": "high",
|
| 22 |
+
"moderate": "moderate",
|
| 23 |
+
"minor": "low",
|
| 24 |
+
"unknown": "low",
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
EVENT_NORMALIZE = {
|
| 28 |
+
"tornado warning": "tornado",
|
| 29 |
+
"flash flood warning": "flood",
|
| 30 |
+
"hurricane warning": "hurricane",
|
| 31 |
+
"blizzard warning": "blizzard",
|
| 32 |
+
"winter storm warning": "winter_storm",
|
| 33 |
+
"tsunami warning": "tsunami",
|
| 34 |
+
"earthquake warning": "earthquake",
|
| 35 |
+
"excessive heat warning": "heatwave",
|
| 36 |
+
"fire weather watch": "wildfire",
|
| 37 |
+
"red flag warning": "wildfire",
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def collect_noaa(max_records: int = 5000) -> list[DisasterRecord]:
|
| 42 |
+
"""Collect active and recent weather alerts from NOAA (free, no auth)."""
|
| 43 |
+
records: list[DisasterRecord] = []
|
| 44 |
+
cursor: str | None = None
|
| 45 |
+
page_count = 0
|
| 46 |
+
|
| 47 |
+
logger.info("Collecting NOAA weather alerts (max=%d)", max_records)
|
| 48 |
+
|
| 49 |
+
while len(records) < max_records:
|
| 50 |
+
params: dict[str, str | int] = {"limit": 500}
|
| 51 |
+
if cursor:
|
| 52 |
+
params["cursor"] = cursor
|
| 53 |
+
|
| 54 |
+
try:
|
| 55 |
+
response = httpx.get(NOAA_ALERTS_API, headers=HEADERS, params=params, timeout=30)
|
| 56 |
+
response.raise_for_status()
|
| 57 |
+
data = response.json()
|
| 58 |
+
except Exception as exc:
|
| 59 |
+
logger.warning("NOAA request failed (page %d): %s", page_count, exc)
|
| 60 |
+
break
|
| 61 |
+
|
| 62 |
+
features = data.get("features", [])
|
| 63 |
+
if not features:
|
| 64 |
+
break
|
| 65 |
+
|
| 66 |
+
for feature in features:
|
| 67 |
+
if len(records) >= max_records:
|
| 68 |
+
break
|
| 69 |
+
|
| 70 |
+
props = feature.get("properties", {})
|
| 71 |
+
event = str(props.get("event", "weather event")).lower()
|
| 72 |
+
area = props.get("areaDesc", "United States")
|
| 73 |
+
headline = props.get("headline", "")
|
| 74 |
+
description = props.get("description", "")
|
| 75 |
+
severity_raw = str(props.get("severity", "unknown")).lower()
|
| 76 |
+
|
| 77 |
+
summary = headline or description
|
| 78 |
+
if not summary:
|
| 79 |
+
continue
|
| 80 |
+
summary = " ".join(summary.split())[:600]
|
| 81 |
+
|
| 82 |
+
normalized_event = next(
|
| 83 |
+
(v for k, v in EVENT_NORMALIZE.items() if k in event),
|
| 84 |
+
event.replace(" warning", "").replace(" watch", "").strip(),
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
records.append(
|
| 88 |
+
DisasterRecord(
|
| 89 |
+
source="NOAA",
|
| 90 |
+
event_type=normalized_event or "weather_event",
|
| 91 |
+
region=area[:100],
|
| 92 |
+
summary=summary,
|
| 93 |
+
severity=SEVERITY_MAP.get(severity_raw, "moderate"),
|
| 94 |
+
)
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
# Pagination
|
| 98 |
+
pagination = data.get("pagination", {})
|
| 99 |
+
next_url = pagination.get("next")
|
| 100 |
+
if next_url:
|
| 101 |
+
# Extract cursor from next URL query param
|
| 102 |
+
from urllib.parse import urlparse, parse_qs
|
| 103 |
+
parsed = urlparse(next_url)
|
| 104 |
+
qs = parse_qs(parsed.query)
|
| 105 |
+
cursor = qs.get("cursor", [None])[0]
|
| 106 |
+
else:
|
| 107 |
+
break
|
| 108 |
+
|
| 109 |
+
page_count += 1
|
| 110 |
+
time.sleep(0.5)
|
| 111 |
+
|
| 112 |
+
logger.info("NOAA collection complete: %d records", len(records))
|
| 113 |
+
return records
|
worlddisasterlm/data/collectors/openfema.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
OpenFEMA disaster declarations collector.
|
| 3 |
+
|
| 4 |
+
Free API – no authentication required.
|
| 5 |
+
Docs: https://www.fema.gov/about/openfema/api
|
| 6 |
+
"""
|
| 7 |
+
import logging
|
| 8 |
+
import time
|
| 9 |
+
|
| 10 |
+
import httpx
|
| 11 |
+
|
| 12 |
+
from worlddisasterlm.data.schemas import DisasterRecord
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
OPENFEMA_API = "https://www.fema.gov/api/open/v2/DisasterDeclarationsSummaries"
|
| 17 |
+
|
| 18 |
+
INCIDENT_TYPE_MAP = {
|
| 19 |
+
"hurricane": "hurricane",
|
| 20 |
+
"flood": "flood",
|
| 21 |
+
"tornado": "tornado",
|
| 22 |
+
"earthquake": "earthquake",
|
| 23 |
+
"fire": "wildfire",
|
| 24 |
+
"winter storm": "winter_storm",
|
| 25 |
+
"drought": "drought",
|
| 26 |
+
"mudslide": "landslide",
|
| 27 |
+
"typhoon": "typhoon",
|
| 28 |
+
"tsunami": "tsunami",
|
| 29 |
+
"volcano": "volcano",
|
| 30 |
+
"chemical": "chemical",
|
| 31 |
+
"biological": "epidemic",
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def _map_incident(raw: str) -> str:
|
| 36 |
+
raw_lower = raw.lower()
|
| 37 |
+
for key, val in INCIDENT_TYPE_MAP.items():
|
| 38 |
+
if key in raw_lower:
|
| 39 |
+
return val
|
| 40 |
+
return raw_lower
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def collect_openfema(max_records: int = 20000) -> list[DisasterRecord]:
|
| 44 |
+
"""Collect disaster declarations from OpenFEMA (free, no auth)."""
|
| 45 |
+
records: list[DisasterRecord] = []
|
| 46 |
+
skip = 0
|
| 47 |
+
page_size = 1000
|
| 48 |
+
|
| 49 |
+
logger.info("Collecting OpenFEMA disaster declarations (max=%d)", max_records)
|
| 50 |
+
|
| 51 |
+
while len(records) < max_records:
|
| 52 |
+
params = {
|
| 53 |
+
"$format": "json",
|
| 54 |
+
"$top": min(page_size, max_records - len(records)),
|
| 55 |
+
"$skip": skip,
|
| 56 |
+
"$orderby": "declarationDate desc",
|
| 57 |
+
"$select": (
|
| 58 |
+
"disasterNumber,declarationTitle,incidentType,"
|
| 59 |
+
"declarationDate,state,incidentBeginDate,incidentEndDate,"
|
| 60 |
+
"closeoutDate,declarationType"
|
| 61 |
+
),
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
try:
|
| 65 |
+
response = httpx.get(OPENFEMA_API, params=params, timeout=60)
|
| 66 |
+
response.raise_for_status()
|
| 67 |
+
data = response.json()
|
| 68 |
+
except Exception as exc:
|
| 69 |
+
logger.warning("OpenFEMA request failed at skip %d: %s", skip, exc)
|
| 70 |
+
break
|
| 71 |
+
|
| 72 |
+
declarations = data.get("DisasterDeclarationsSummaries", [])
|
| 73 |
+
if not declarations:
|
| 74 |
+
break
|
| 75 |
+
|
| 76 |
+
for dec in declarations:
|
| 77 |
+
title = dec.get("declarationTitle", "")
|
| 78 |
+
incident_raw = dec.get("incidentType", "disaster")
|
| 79 |
+
state = dec.get("state", "US")
|
| 80 |
+
declaration_date = dec.get("declarationDate", "")[:10]
|
| 81 |
+
|
| 82 |
+
incident_end = dec.get("incidentEndDate", "")
|
| 83 |
+
open_ended = "ongoing" if not incident_end else f"closed {incident_end[:10]}"
|
| 84 |
+
|
| 85 |
+
summary = (
|
| 86 |
+
f"Federal disaster declaration: {title}. "
|
| 87 |
+
f"Incident type: {incident_raw}. "
|
| 88 |
+
f"State/region: {state}. "
|
| 89 |
+
f"Declared: {declaration_date}. Status: {open_ended}."
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
records.append(
|
| 93 |
+
DisasterRecord(
|
| 94 |
+
source="OpenFEMA",
|
| 95 |
+
event_type=_map_incident(incident_raw),
|
| 96 |
+
region=state,
|
| 97 |
+
summary=summary[:600],
|
| 98 |
+
severity="high",
|
| 99 |
+
)
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
skip += len(declarations)
|
| 103 |
+
logger.info("OpenFEMA: %d declarations collected", len(records))
|
| 104 |
+
|
| 105 |
+
if len(declarations) < page_size:
|
| 106 |
+
break
|
| 107 |
+
|
| 108 |
+
time.sleep(0.3)
|
| 109 |
+
|
| 110 |
+
logger.info("OpenFEMA collection complete: %d records", len(records))
|
| 111 |
+
return records
|
worlddisasterlm/data/collectors/reliefweb.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ReliefWeb data collector.
|
| 3 |
+
|
| 4 |
+
Free API – no authentication required.
|
| 5 |
+
Docs: https://apidoc.rwlabs.org/
|
| 6 |
+
"""
|
| 7 |
+
import logging
|
| 8 |
+
import time
|
| 9 |
+
from typing import Any
|
| 10 |
+
|
| 11 |
+
import httpx
|
| 12 |
+
|
| 13 |
+
from worlddisasterlm.data.schemas import DisasterRecord
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
RELIEFWEB_API = "https://api.reliefweb.int/v1/reports"
|
| 18 |
+
PAGE_SIZE = 100
|
| 19 |
+
REQUEST_DELAY = 0.5 # seconds between requests
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _severity_from_fields(fields: dict[str, Any]) -> str:
|
| 23 |
+
vuln_types = {e.get("name", "").lower() for e in fields.get("disaster_type", [])}
|
| 24 |
+
if any(t in vuln_types for t in {"earthquake", "tsunami", "nuclear", "hurricane", "cyclone"}):
|
| 25 |
+
return "critical"
|
| 26 |
+
if any(t in vuln_types for t in {"flood", "wildfire", "epidemic", "drought"}):
|
| 27 |
+
return "high"
|
| 28 |
+
return "moderate"
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def collect_reliefweb(max_records: int = 5000) -> list[DisasterRecord]:
|
| 32 |
+
"""Collect disaster reports from ReliefWeb (free, no auth)."""
|
| 33 |
+
records: list[DisasterRecord] = []
|
| 34 |
+
offset = 0
|
| 35 |
+
|
| 36 |
+
logger.info("Collecting ReliefWeb reports (max=%d)", max_records)
|
| 37 |
+
|
| 38 |
+
while len(records) < max_records:
|
| 39 |
+
batch_size = min(PAGE_SIZE, max_records - len(records))
|
| 40 |
+
payload = {
|
| 41 |
+
"appname": "worlddisasterlm",
|
| 42 |
+
"limit": batch_size,
|
| 43 |
+
"offset": offset,
|
| 44 |
+
"fields": {
|
| 45 |
+
"include": [
|
| 46 |
+
"title",
|
| 47 |
+
"body-html",
|
| 48 |
+
"primary_country.name",
|
| 49 |
+
"disaster_type.name",
|
| 50 |
+
"date.created",
|
| 51 |
+
"status",
|
| 52 |
+
]
|
| 53 |
+
},
|
| 54 |
+
"filter": {
|
| 55 |
+
"operator": "AND",
|
| 56 |
+
"conditions": [{"field": "status", "value": "published"}],
|
| 57 |
+
},
|
| 58 |
+
"sort": ["date.created:desc"],
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
try:
|
| 62 |
+
response = httpx.post(RELIEFWEB_API, json=payload, timeout=30)
|
| 63 |
+
response.raise_for_status()
|
| 64 |
+
data = response.json()
|
| 65 |
+
except Exception as exc:
|
| 66 |
+
logger.warning("ReliefWeb request failed at offset %d: %s", offset, exc)
|
| 67 |
+
break
|
| 68 |
+
|
| 69 |
+
items = data.get("data", [])
|
| 70 |
+
if not items:
|
| 71 |
+
break
|
| 72 |
+
|
| 73 |
+
for item in items:
|
| 74 |
+
fields = item.get("fields", {})
|
| 75 |
+
title = fields.get("title", "")
|
| 76 |
+
body_html = fields.get("body-html", "")
|
| 77 |
+
# Strip HTML tags simply
|
| 78 |
+
import re
|
| 79 |
+
body_text = re.sub(r"<[^>]+>", " ", body_html)
|
| 80 |
+
body_text = " ".join(body_text.split())[:600]
|
| 81 |
+
|
| 82 |
+
country = fields.get("primary_country", {})
|
| 83 |
+
region = country.get("name", "global") if isinstance(country, dict) else "global"
|
| 84 |
+
|
| 85 |
+
dtype_list = fields.get("disaster_type", [])
|
| 86 |
+
event_type = dtype_list[0].get("name", "disaster").lower() if dtype_list else "disaster"
|
| 87 |
+
|
| 88 |
+
summary = body_text or title
|
| 89 |
+
if not summary:
|
| 90 |
+
continue
|
| 91 |
+
|
| 92 |
+
records.append(
|
| 93 |
+
DisasterRecord(
|
| 94 |
+
source="ReliefWeb",
|
| 95 |
+
event_type=event_type,
|
| 96 |
+
region=region,
|
| 97 |
+
summary=summary,
|
| 98 |
+
severity=_severity_from_fields(fields),
|
| 99 |
+
)
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
offset += len(items)
|
| 103 |
+
logger.info("ReliefWeb: collected %d / %d", len(records), max_records)
|
| 104 |
+
|
| 105 |
+
if len(items) < batch_size:
|
| 106 |
+
break # last page
|
| 107 |
+
|
| 108 |
+
time.sleep(REQUEST_DELAY)
|
| 109 |
+
|
| 110 |
+
logger.info("ReliefWeb collection complete: %d records", len(records))
|
| 111 |
+
return records
|
worlddisasterlm/data/collectors/usgs.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
USGS Earthquake data collector.
|
| 3 |
+
|
| 4 |
+
Free API – no authentication required.
|
| 5 |
+
Docs: https://earthquake.usgs.gov/fdsnws/event/1/
|
| 6 |
+
"""
|
| 7 |
+
import logging
|
| 8 |
+
import time
|
| 9 |
+
from datetime import datetime, timedelta
|
| 10 |
+
|
| 11 |
+
import httpx
|
| 12 |
+
|
| 13 |
+
from worlddisasterlm.data.schemas import DisasterRecord
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
USGS_API = "https://earthquake.usgs.gov/fdsnws/event/1/query"
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def _severity(magnitude: float) -> str:
|
| 21 |
+
if magnitude >= 7.5:
|
| 22 |
+
return "critical"
|
| 23 |
+
if magnitude >= 6.0:
|
| 24 |
+
return "high"
|
| 25 |
+
if magnitude >= 4.5:
|
| 26 |
+
return "moderate"
|
| 27 |
+
return "low"
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def collect_usgs(
|
| 31 |
+
years_back: int = 10,
|
| 32 |
+
min_magnitude: float = 4.0,
|
| 33 |
+
max_records: int = 20000,
|
| 34 |
+
) -> list[DisasterRecord]:
|
| 35 |
+
"""Collect earthquake data from USGS FDSN (free, no auth)."""
|
| 36 |
+
records: list[DisasterRecord] = []
|
| 37 |
+
# USGS API max 20 000 per call; chunk by year to avoid that limit
|
| 38 |
+
end_time = datetime.utcnow()
|
| 39 |
+
|
| 40 |
+
for year_offset in range(years_back):
|
| 41 |
+
if len(records) >= max_records:
|
| 42 |
+
break
|
| 43 |
+
|
| 44 |
+
year_end = end_time - timedelta(days=365 * year_offset)
|
| 45 |
+
year_start = end_time - timedelta(days=365 * (year_offset + 1))
|
| 46 |
+
|
| 47 |
+
params = {
|
| 48 |
+
"format": "geojson",
|
| 49 |
+
"starttime": year_start.strftime("%Y-%m-%dT%H:%M:%S"),
|
| 50 |
+
"endtime": year_end.strftime("%Y-%m-%dT%H:%M:%S"),
|
| 51 |
+
"minmagnitude": min_magnitude,
|
| 52 |
+
"orderby": "time",
|
| 53 |
+
"limit": min(20000, max_records - len(records)),
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
try:
|
| 57 |
+
response = httpx.get(USGS_API, params=params, timeout=60)
|
| 58 |
+
response.raise_for_status()
|
| 59 |
+
data = response.json()
|
| 60 |
+
except Exception as exc:
|
| 61 |
+
logger.warning("USGS request failed for year offset %d: %s", year_offset, exc)
|
| 62 |
+
continue
|
| 63 |
+
|
| 64 |
+
features = data.get("features", [])
|
| 65 |
+
logger.info(
|
| 66 |
+
"USGS year -%d: %d earthquakes fetched", year_offset + 1, len(features)
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
for feature in features:
|
| 70 |
+
props = feature.get("properties", {})
|
| 71 |
+
mag = props.get("mag") or 0.0
|
| 72 |
+
place = props.get("place") or "Unknown location"
|
| 73 |
+
title = props.get("title") or f"M{mag} earthquake"
|
| 74 |
+
|
| 75 |
+
summary = (
|
| 76 |
+
f"Magnitude {mag:.1f} earthquake reported near {place}. "
|
| 77 |
+
f"Alert level: {props.get('alert', 'none')}. "
|
| 78 |
+
f"Tsunami risk: {'yes' if props.get('tsunami') else 'no'}. "
|
| 79 |
+
f"{title}"
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
records.append(
|
| 83 |
+
DisasterRecord(
|
| 84 |
+
source="USGS",
|
| 85 |
+
event_type="earthquake",
|
| 86 |
+
region=place,
|
| 87 |
+
summary=summary[:600],
|
| 88 |
+
severity=_severity(mag),
|
| 89 |
+
)
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
time.sleep(0.3)
|
| 93 |
+
|
| 94 |
+
logger.info("USGS collection complete: %d records", len(records))
|
| 95 |
+
return records[:max_records]
|
worlddisasterlm/data/collectors/who_rss.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
WHO Disease Outbreak News (DON) RSS collector.
|
| 3 |
+
|
| 4 |
+
Free feed – no authentication required.
|
| 5 |
+
Feed: https://www.who.int/feeds/entity/csr/don/en/rss.xml
|
| 6 |
+
"""
|
| 7 |
+
import logging
|
| 8 |
+
import re
|
| 9 |
+
|
| 10 |
+
import feedparser
|
| 11 |
+
|
| 12 |
+
from worlddisasterlm.data.schemas import DisasterRecord
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
WHO_DON_RSS = "https://www.who.int/feeds/entity/csr/don/en/rss.xml"
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
DISEASE_KEYWORDS = {
|
| 20 |
+
"ebola": "epidemic",
|
| 21 |
+
"cholera": "epidemic",
|
| 22 |
+
"dengue": "epidemic",
|
| 23 |
+
"malaria": "epidemic",
|
| 24 |
+
"covid": "pandemic",
|
| 25 |
+
"influenza": "epidemic",
|
| 26 |
+
"mpox": "epidemic",
|
| 27 |
+
"monkeypox": "epidemic",
|
| 28 |
+
"plague": "epidemic",
|
| 29 |
+
"meningitis": "epidemic",
|
| 30 |
+
"lassa": "epidemic",
|
| 31 |
+
"marburg": "epidemic",
|
| 32 |
+
"rift valley": "epidemic",
|
| 33 |
+
"yellow fever": "epidemic",
|
| 34 |
+
"polio": "epidemic",
|
| 35 |
+
"measles": "epidemic",
|
| 36 |
+
"typhoid": "epidemic",
|
| 37 |
+
"hepatitis": "epidemic",
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _classify_event(text: str) -> str:
|
| 42 |
+
lower = text.lower()
|
| 43 |
+
for keyword, event_type in DISEASE_KEYWORDS.items():
|
| 44 |
+
if keyword in lower:
|
| 45 |
+
return event_type
|
| 46 |
+
return "public_health"
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _clean(text: str) -> str:
|
| 50 |
+
clean = re.sub(r"<[^>]+>", " ", text or "")
|
| 51 |
+
return " ".join(clean.split())[:600]
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def collect_who(max_records: int = 1000) -> list[DisasterRecord]:
|
| 55 |
+
"""Collect WHO disease outbreak news (free, no auth)."""
|
| 56 |
+
records: list[DisasterRecord] = []
|
| 57 |
+
|
| 58 |
+
logger.info("Parsing WHO Disease Outbreak News RSS feed")
|
| 59 |
+
|
| 60 |
+
try:
|
| 61 |
+
feed = feedparser.parse(WHO_DON_RSS)
|
| 62 |
+
except Exception as exc:
|
| 63 |
+
logger.warning("Failed to parse WHO RSS: %s", exc)
|
| 64 |
+
return records
|
| 65 |
+
|
| 66 |
+
if feed.bozo and feed.bozo_exception:
|
| 67 |
+
logger.warning("WHO RSS parse warning: %s", feed.bozo_exception)
|
| 68 |
+
|
| 69 |
+
for entry in feed.entries:
|
| 70 |
+
if len(records) >= max_records:
|
| 71 |
+
break
|
| 72 |
+
|
| 73 |
+
title = entry.get("title", "")
|
| 74 |
+
summary_raw = entry.get("summary", entry.get("description", ""))
|
| 75 |
+
summary = _clean(summary_raw) or title
|
| 76 |
+
|
| 77 |
+
if not summary:
|
| 78 |
+
continue
|
| 79 |
+
|
| 80 |
+
combined = f"{title} {summary}"
|
| 81 |
+
event_type = _classify_event(combined)
|
| 82 |
+
|
| 83 |
+
# Try to extract country from title (e.g. "Ebola virus disease – Democratic Republic of the Congo")
|
| 84 |
+
region = "global"
|
| 85 |
+
if "–" in title:
|
| 86 |
+
region = title.split("–")[-1].strip()
|
| 87 |
+
elif "-" in title:
|
| 88 |
+
region = title.split("-")[-1].strip()
|
| 89 |
+
|
| 90 |
+
records.append(
|
| 91 |
+
DisasterRecord(
|
| 92 |
+
source="WHO",
|
| 93 |
+
event_type=event_type,
|
| 94 |
+
region=region[:100],
|
| 95 |
+
summary=summary,
|
| 96 |
+
severity="high",
|
| 97 |
+
)
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
logger.info("WHO collection complete: %d records", len(records))
|
| 101 |
+
return records
|
worlddisasterlm/data/etl.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections.abc import Iterable
|
| 2 |
+
|
| 3 |
+
from worlddisasterlm.data.schemas import DisasterRecord
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class DisasterETL:
|
| 7 |
+
def collect_records(self) -> list[DisasterRecord]:
|
| 8 |
+
# Replace with API clients and ingestion jobs for production-scale collection.
|
| 9 |
+
return [
|
| 10 |
+
DisasterRecord(
|
| 11 |
+
source="ReliefWeb",
|
| 12 |
+
event_type="flood",
|
| 13 |
+
region="South Asia",
|
| 14 |
+
summary="Severe flooding displaced 12000 people and disrupted road access.",
|
| 15 |
+
severity="high",
|
| 16 |
+
),
|
| 17 |
+
DisasterRecord(
|
| 18 |
+
source="WHO",
|
| 19 |
+
event_type="epidemic",
|
| 20 |
+
region="East Africa",
|
| 21 |
+
summary="Localized cholera outbreak with urgent water sanitation requirements.",
|
| 22 |
+
severity="high",
|
| 23 |
+
),
|
| 24 |
+
DisasterRecord(
|
| 25 |
+
source="USGS",
|
| 26 |
+
event_type="earthquake",
|
| 27 |
+
region="Pacific Rim",
|
| 28 |
+
summary="Magnitude 6.8 earthquake with aftershock risk and infrastructure damage.",
|
| 29 |
+
severity="critical",
|
| 30 |
+
),
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
def deduplicate(self, records: Iterable[DisasterRecord]) -> list[DisasterRecord]:
|
| 34 |
+
seen: set[tuple[str, str, str, str]] = set()
|
| 35 |
+
deduped: list[DisasterRecord] = []
|
| 36 |
+
for record in records:
|
| 37 |
+
key = (record.source, record.event_type, record.region, record.summary)
|
| 38 |
+
if key not in seen:
|
| 39 |
+
deduped.append(record)
|
| 40 |
+
seen.add(key)
|
| 41 |
+
return deduped
|
| 42 |
+
|
| 43 |
+
def normalize(self, records: Iterable[DisasterRecord]) -> list[DisasterRecord]:
|
| 44 |
+
normalized: list[DisasterRecord] = []
|
| 45 |
+
for record in records:
|
| 46 |
+
normalized.append(
|
| 47 |
+
DisasterRecord(
|
| 48 |
+
source=record.source.strip(),
|
| 49 |
+
event_type=record.event_type.strip().lower(),
|
| 50 |
+
region=record.region.strip(),
|
| 51 |
+
summary=" ".join(record.summary.split()),
|
| 52 |
+
severity=record.severity.strip().lower(),
|
| 53 |
+
)
|
| 54 |
+
)
|
| 55 |
+
return normalized
|
worlddisasterlm/data/processors.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
from worlddisasterlm.data.schemas import DisasterRecord, InstructionSample
|
| 5 |
+
from worlddisasterlm.utils.io import ensure_dir
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def build_instruction_dataset(records: list[DisasterRecord]) -> list[InstructionSample]:
|
| 9 |
+
dataset: list[InstructionSample] = []
|
| 10 |
+
for record in records:
|
| 11 |
+
instruction = "Assess the incident and provide emergency response steps."
|
| 12 |
+
sample_input = (
|
| 13 |
+
f"Region: {record.region}\nEvent: {record.event_type}\nSeverity: {record.severity}\n"
|
| 14 |
+
f"Situation: {record.summary}"
|
| 15 |
+
)
|
| 16 |
+
output = (
|
| 17 |
+
"1) Verify official alerts and incident perimeter. "
|
| 18 |
+
"2) Prioritize life-saving response and medical triage. "
|
| 19 |
+
"3) Coordinate shelter, water, food, and transport logistics. "
|
| 20 |
+
"4) Share multilingual updates every 30 minutes."
|
| 21 |
+
)
|
| 22 |
+
dataset.append(InstructionSample(instruction=instruction, input=sample_input, output=output))
|
| 23 |
+
return dataset
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def save_instruction_dataset(samples: list[InstructionSample], output_path: str) -> Path:
|
| 27 |
+
target = Path(output_path)
|
| 28 |
+
ensure_dir(target.parent)
|
| 29 |
+
with target.open("w", encoding="utf-8") as handle:
|
| 30 |
+
for sample in samples:
|
| 31 |
+
handle.write(json.dumps(sample.__dict__, ensure_ascii=False) + "\n")
|
| 32 |
+
return target
|
worlddisasterlm/data/qa_generator.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
QA pair generator.
|
| 3 |
+
|
| 4 |
+
Converts raw DisasterRecord objects into diverse instruction-following samples.
|
| 5 |
+
Each record generates 8–10 QA variants to amplify the training corpus.
|
| 6 |
+
"""
|
| 7 |
+
import random
|
| 8 |
+
|
| 9 |
+
from worlddisasterlm.data.schemas import DisasterRecord, InstructionSample
|
| 10 |
+
|
| 11 |
+
RESPONSE_STEPS = {
|
| 12 |
+
"earthquake": [
|
| 13 |
+
"Drop, Cover, and Hold On immediately.",
|
| 14 |
+
"Move away from windows, glass, and heavy furniture.",
|
| 15 |
+
"If outdoors, move to open space away from buildings.",
|
| 16 |
+
"After shaking stops, check for injuries and hazards.",
|
| 17 |
+
"Be prepared for aftershocks.",
|
| 18 |
+
"Do not use elevators; evacuate via stairways if building is unsafe.",
|
| 19 |
+
"Listen to emergency broadcasts for official guidance.",
|
| 20 |
+
],
|
| 21 |
+
"flood": [
|
| 22 |
+
"Move immediately to higher ground; do not wait for official evacuation order.",
|
| 23 |
+
"Avoid walking or driving through flood waters – 6 inches can knock you down.",
|
| 24 |
+
"Disconnect electrical appliances; do not touch electrical equipment if wet.",
|
| 25 |
+
"Follow evacuation routes designated by local emergency management.",
|
| 26 |
+
"Store emergency supplies including water, food, and medication for 72 hours.",
|
| 27 |
+
"Monitor official weather and emergency alerts for updates.",
|
| 28 |
+
],
|
| 29 |
+
"wildfire": [
|
| 30 |
+
"Evacuate immediately when ordered – do not wait.",
|
| 31 |
+
"Close all windows and doors to reduce smoke penetration.",
|
| 32 |
+
"Remove flammable items from around your home before leaving.",
|
| 33 |
+
"Wear N95 masks or wet cloth to protect against smoke inhalation.",
|
| 34 |
+
"Stay tuned to emergency broadcasts for evacuation route updates.",
|
| 35 |
+
],
|
| 36 |
+
"hurricane": [
|
| 37 |
+
"Secure or bring indoors all outdoor furniture and objects.",
|
| 38 |
+
"Board up windows and reinforce garage doors.",
|
| 39 |
+
"Prepare emergency kit: water, food, medications, documents.",
|
| 40 |
+
"Know your evacuation zone and route.",
|
| 41 |
+
"Stay indoors during the storm; the eye of the hurricane is not the all-clear.",
|
| 42 |
+
"After the storm, watch for downed power lines and contaminated water.",
|
| 43 |
+
],
|
| 44 |
+
"epidemic": [
|
| 45 |
+
"Report cases to local health authorities immediately.",
|
| 46 |
+
"Implement infection control measures: PPE, isolation protocols.",
|
| 47 |
+
"Coordinate with WHO, CDC, and national health agencies.",
|
| 48 |
+
"Establish clear case definition and surveillance system.",
|
| 49 |
+
"Activate contact tracing and quarantine procedures.",
|
| 50 |
+
"Communicate clearly with the public to prevent panic.",
|
| 51 |
+
],
|
| 52 |
+
"pandemic": [
|
| 53 |
+
"Follow national health authority guidelines.",
|
| 54 |
+
"Implement non-pharmaceutical interventions: masking, distancing.",
|
| 55 |
+
"Prioritize healthcare system capacity management.",
|
| 56 |
+
"Accelerate vaccine development and equitable distribution.",
|
| 57 |
+
"Coordinate international response through WHO frameworks.",
|
| 58 |
+
],
|
| 59 |
+
"default": [
|
| 60 |
+
"Activate emergency response plan immediately.",
|
| 61 |
+
"Prioritize life safety: triage injuries, evacuate if necessary.",
|
| 62 |
+
"Contact emergency services (fire, police, medical) as appropriate.",
|
| 63 |
+
"Coordinate with local emergency management authority.",
|
| 64 |
+
"Set up incident command structure.",
|
| 65 |
+
"Pre-position supplies: water, food, medical equipment, shelter.",
|
| 66 |
+
"Communicate regularly with affected population in plain language.",
|
| 67 |
+
"Document all actions for accountability and after-action review.",
|
| 68 |
+
],
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
RESOURCE_GUIDANCE = {
|
| 72 |
+
"earthquake": "Search and rescue teams, medical triage units, heavy machinery for debris removal, temporary shelter, water purification, emergency food supplies, structural engineers.",
|
| 73 |
+
"flood": "Boats and water rescue teams, pumping equipment, water purification, temporary shelters on elevated ground, food and medical supplies, sanitation units.",
|
| 74 |
+
"wildfire": "Aerial firefighting assets, ground crews, evacuation transport, respiratory medical support, temporary shelters, animal rescue resources.",
|
| 75 |
+
"hurricane": "Pre-positioned food, water and fuel, emergency shelters, power restoration crews, debris removal, search and rescue teams, mental health support.",
|
| 76 |
+
"epidemic": "Medical personnel with PPE, testing kits, contact tracing capacity, isolation facilities, treatment medicines, communication system.",
|
| 77 |
+
"default": "Emergency medical teams, shelter supplies, clean water and food, communication equipment, transport resources, coordination staff.",
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def _get_response_steps(event_type: str) -> list[str]:
|
| 82 |
+
for key in RESPONSE_STEPS:
|
| 83 |
+
if key in event_type.lower():
|
| 84 |
+
return RESPONSE_STEPS[key]
|
| 85 |
+
return RESPONSE_STEPS["default"]
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def _get_resources(event_type: str) -> str:
|
| 89 |
+
for key in RESOURCE_GUIDANCE:
|
| 90 |
+
if key in event_type.lower():
|
| 91 |
+
return RESOURCE_GUIDANCE[key]
|
| 92 |
+
return RESOURCE_GUIDANCE["default"]
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def generate_qa_pairs(records: list[DisasterRecord]) -> list[InstructionSample]:
|
| 96 |
+
samples: list[InstructionSample] = []
|
| 97 |
+
|
| 98 |
+
for record in records:
|
| 99 |
+
steps = _get_response_steps(record.event_type)
|
| 100 |
+
steps_text = "\n".join(f"{i + 1}. {s}" for i, s in enumerate(steps))
|
| 101 |
+
resources = _get_resources(record.event_type)
|
| 102 |
+
|
| 103 |
+
context = (
|
| 104 |
+
f"Region: {record.region}\n"
|
| 105 |
+
f"Hazard type: {record.event_type}\n"
|
| 106 |
+
f"Severity: {record.severity}\n"
|
| 107 |
+
f"Situation: {record.summary}"
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
# QA 1 – immediate response
|
| 111 |
+
samples.append(
|
| 112 |
+
InstructionSample(
|
| 113 |
+
instruction="What are the immediate emergency response steps for this disaster situation?",
|
| 114 |
+
input=context,
|
| 115 |
+
output=f"Immediate response priorities for a {record.severity}-severity {record.event_type} event:\n\n{steps_text}",
|
| 116 |
+
)
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
# QA 2 – incident summary
|
| 120 |
+
samples.append(
|
| 121 |
+
InstructionSample(
|
| 122 |
+
instruction="Summarize this disaster event for an emergency operations center briefing.",
|
| 123 |
+
input=context,
|
| 124 |
+
output=(
|
| 125 |
+
f"SITUATION REPORT — {record.event_type.upper()} | {record.region}\n\n"
|
| 126 |
+
f"Severity: {record.severity.capitalize()}\n"
|
| 127 |
+
f"Source: {record.source}\n\n"
|
| 128 |
+
f"Summary: {record.summary}\n\n"
|
| 129 |
+
f"Status: Active monitoring required. Coordinate with local authorities and relevant UN agencies."
|
| 130 |
+
),
|
| 131 |
+
)
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
# QA 3 – resource planning
|
| 135 |
+
samples.append(
|
| 136 |
+
InstructionSample(
|
| 137 |
+
instruction="What resources and logistics are needed to respond to this disaster?",
|
| 138 |
+
input=context,
|
| 139 |
+
output=(
|
| 140 |
+
f"Resource requirements for a {record.event_type} event in {record.region} (severity: {record.severity}):\n\n"
|
| 141 |
+
f"{resources}\n\n"
|
| 142 |
+
f"Logistics priorities: establish forward operating base near the affected area, "
|
| 143 |
+
f"pre-position supplies along access routes, coordinate air and ground transport, "
|
| 144 |
+
f"and establish communication links with local partners."
|
| 145 |
+
),
|
| 146 |
+
)
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
# QA 4 – risk assessment
|
| 150 |
+
samples.append(
|
| 151 |
+
InstructionSample(
|
| 152 |
+
instruction="Perform a risk assessment for this disaster scenario.",
|
| 153 |
+
input=context,
|
| 154 |
+
output=(
|
| 155 |
+
f"Risk Assessment: {record.event_type.capitalize()} in {record.region}\n\n"
|
| 156 |
+
f"Risk Level: {record.severity.capitalize()}\n"
|
| 157 |
+
f"Primary hazard: {record.event_type}\n"
|
| 158 |
+
f"Secondary hazards: displacement, water contamination, disease outbreak, infrastructure collapse\n"
|
| 159 |
+
f"Vulnerable populations: elderly, children, persons with disabilities, low-income households\n\n"
|
| 160 |
+
f"Recommended risk reduction actions:\n"
|
| 161 |
+
f"1. Activate early warning dissemination for at-risk zones.\n"
|
| 162 |
+
f"2. Pre-position emergency supplies and first responder teams.\n"
|
| 163 |
+
f"3. Establish coordination hub with government and NGO partners.\n"
|
| 164 |
+
f"4. Issue public guidance in multiple local languages."
|
| 165 |
+
),
|
| 166 |
+
)
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
# QA 5 – humanitarian aid
|
| 170 |
+
samples.append(
|
| 171 |
+
InstructionSample(
|
| 172 |
+
instruction="What humanitarian aid priorities should be activated for this disaster?",
|
| 173 |
+
input=context,
|
| 174 |
+
output=(
|
| 175 |
+
f"Humanitarian Aid Priorities for {record.event_type} in {record.region}:\n\n"
|
| 176 |
+
f"1. Life-saving: Search and rescue, emergency medical care, trauma treatment.\n"
|
| 177 |
+
f"2. Basic needs: Emergency shelter, safe water, food assistance, sanitation.\n"
|
| 178 |
+
f"3. Protection: Safety monitoring for displaced persons, child protection, GBV prevention.\n"
|
| 179 |
+
f"4. Livelihoods: Cash transfers and livelihood support for affected households.\n"
|
| 180 |
+
f"5. Recovery: Debris clearance, shelter repair, economic recovery planning.\n\n"
|
| 181 |
+
f"Key coordination partners: OCHA, UNHCR, WFP, UNICEF, WHO, local disaster management authority."
|
| 182 |
+
),
|
| 183 |
+
)
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
# QA 6 – preparedness
|
| 187 |
+
samples.append(
|
| 188 |
+
InstructionSample(
|
| 189 |
+
instruction="How can communities in this region prepare for this type of disaster?",
|
| 190 |
+
input=context,
|
| 191 |
+
output=(
|
| 192 |
+
f"Community Preparedness for {record.event_type} in {record.region}:\n\n"
|
| 193 |
+
f"1. Develop and rehearse household emergency plans.\n"
|
| 194 |
+
f"2. Build 72-hour emergency supply kits (water, food, medication, documents).\n"
|
| 195 |
+
f"3. Know evacuation routes and local shelter locations.\n"
|
| 196 |
+
f"4. Participate in community early warning systems.\n"
|
| 197 |
+
f"5. Strengthen local infrastructure and building codes.\n"
|
| 198 |
+
f"6. Conduct regular drills with schools, workplaces, and community organizations.\n"
|
| 199 |
+
f"7. Ensure vulnerable populations have specific support plans."
|
| 200 |
+
),
|
| 201 |
+
)
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
# QA 7 – public communication
|
| 205 |
+
samples.append(
|
| 206 |
+
InstructionSample(
|
| 207 |
+
instruction="Draft an emergency public communication message for this disaster.",
|
| 208 |
+
input=context,
|
| 209 |
+
output=(
|
| 210 |
+
f"EMERGENCY ALERT — {record.region.upper()}\n\n"
|
| 211 |
+
f"A {record.severity}-severity {record.event_type} event has been reported.\n\n"
|
| 212 |
+
f"IMMEDIATE ACTIONS REQUIRED:\n"
|
| 213 |
+
f"• Follow official evacuation orders immediately.\n"
|
| 214 |
+
f"• Move to designated shelters or higher ground.\n"
|
| 215 |
+
f"• Call emergency services for life-threatening situations.\n"
|
| 216 |
+
f"• Do NOT spread unverified information.\n\n"
|
| 217 |
+
f"Stay tuned to official government and emergency management channels for updates.\n"
|
| 218 |
+
f"[This message should be verified and issued by the authorised emergency management authority.]"
|
| 219 |
+
),
|
| 220 |
+
)
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
# QA 8 – recovery planning
|
| 224 |
+
samples.append(
|
| 225 |
+
InstructionSample(
|
| 226 |
+
instruction="What are the key steps in disaster recovery planning after this event?",
|
| 227 |
+
input=context,
|
| 228 |
+
output=(
|
| 229 |
+
f"Recovery Planning Framework: {record.event_type} — {record.region}\n\n"
|
| 230 |
+
f"Phase 1 (0–72 hours): Life safety, damage assessment, displaced persons registration.\n"
|
| 231 |
+
f"Phase 2 (1–4 weeks): Temporary shelter provision, debris clearance, basic services restoration.\n"
|
| 232 |
+
f"Phase 3 (1–6 months): Infrastructure repair, economic recovery, psychosocial support.\n"
|
| 233 |
+
f"Phase 4 (6+ months): Long-term reconstruction, risk reduction investments, lesson-learned review.\n\n"
|
| 234 |
+
f"Key principles: Build Back Better, inclusion of marginalized groups, environmental sustainability, "
|
| 235 |
+
f"community ownership of the recovery process."
|
| 236 |
+
),
|
| 237 |
+
)
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
return samples
|
worlddisasterlm/data/scenario_builder.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Synthetic emergency scenario builder.
|
| 3 |
+
|
| 4 |
+
Generates multi-turn conversations, role-play scenarios, and cross-hazard
|
| 5 |
+
compound disaster samples to enrich training diversity.
|
| 6 |
+
"""
|
| 7 |
+
from worlddisasterlm.data.schemas import InstructionSample
|
| 8 |
+
|
| 9 |
+
COMPOUND_SCENARIOS = [
|
| 10 |
+
{
|
| 11 |
+
"scenario": "An earthquake of magnitude 7.2 struck a coastal city causing building collapses, "
|
| 12 |
+
"followed 45 minutes later by a tsunami warning.",
|
| 13 |
+
"region": "Pacific coastal city",
|
| 14 |
+
"event_type": "compound: earthquake + tsunami",
|
| 15 |
+
"severity": "critical",
|
| 16 |
+
"response": (
|
| 17 |
+
"Compound Disaster Response — Earthquake + Tsunami:\n\n"
|
| 18 |
+
"IMMEDIATE (0–15 min):\n"
|
| 19 |
+
"1. Issue tsunami warning and mandatory coastal evacuation order immediately.\n"
|
| 20 |
+
"2. Search and rescue operations for earthquake-trapped victims must start inland first.\n"
|
| 21 |
+
"3. Hospitals on high ground remain operational; coastal facilities evacuate patients.\n\n"
|
| 22 |
+
"SHORT-TERM (15 min – 6 hours):\n"
|
| 23 |
+
"4. Establish triage stations above tsunami inundation zone.\n"
|
| 24 |
+
"5. Account for all search and rescue teams before wave arrival.\n"
|
| 25 |
+
"6. Deploy helicopters for coastal cliff rescues post-wave.\n\n"
|
| 26 |
+
"CRITICAL NOTE: Aftershocks will compromise already-damaged structures. "
|
| 27 |
+
"Do not re-enter buildings until structural engineers clear them."
|
| 28 |
+
),
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"scenario": "A category 4 hurricane is forecast to make landfall in 48 hours. "
|
| 32 |
+
"The region already has 30,000 displaced persons from last week's flooding.",
|
| 33 |
+
"region": "Caribbean island",
|
| 34 |
+
"event_type": "compound: hurricane + displaced persons",
|
| 35 |
+
"severity": "critical",
|
| 36 |
+
"response": (
|
| 37 |
+
"Compound Emergency — Hurricane Landfall with Pre-existing Displacement:\n\n"
|
| 38 |
+
"PRE-LANDFALL (48–24 hours):\n"
|
| 39 |
+
"1. Mandatory evacuation of all existing displacement camps in low-lying areas.\n"
|
| 40 |
+
"2. Identify inland strong buildings for mass shelter; assess capacity.\n"
|
| 41 |
+
"3. Pre-position 72-hour food and water supplies at inland shelters.\n"
|
| 42 |
+
"4. Coordinate with UNHCR and local government for camp-to-shelter transfers.\n\n"
|
| 43 |
+
"PRE-LANDFALL (24–0 hours):\n"
|
| 44 |
+
"5. All personnel secured; close airport; ban sea transport.\n"
|
| 45 |
+
"6. Emergency services on stand-by in hardened facilities.\n\n"
|
| 46 |
+
"POST-LANDFALL:\n"
|
| 47 |
+
"7. Damage assessment sweep before re-opening any displaced camp.\n"
|
| 48 |
+
"8. Restore access roads before relief convoy mobilization."
|
| 49 |
+
),
|
| 50 |
+
},
|
| 51 |
+
{
|
| 52 |
+
"scenario": "During a wildfire evacuation, a fuel tanker overturned on the primary "
|
| 53 |
+
"evacuation highway, blocking egress for 15,000 evacuees.",
|
| 54 |
+
"region": "Southern California",
|
| 55 |
+
"event_type": "compound: wildfire + infrastructure failure",
|
| 56 |
+
"severity": "critical",
|
| 57 |
+
"response": (
|
| 58 |
+
"Multi-Hazard Evacuation Disruption — Wildfire + Road Blockage:\n\n"
|
| 59 |
+
"1. Activate alternate evacuation routes immediately; publish via Emergency Alert System.\n"
|
| 60 |
+
"2. Dispatch hazmat team to fuel tanker — assess fire ignition risk before moving vehicle.\n"
|
| 61 |
+
"3. If tanker poses fire risk: keep 300m standoff; use bulldozer to open shoulder lane.\n"
|
| 62 |
+
"4. Coordinate law enforcement for contraflow on alternate highways.\n"
|
| 63 |
+
"5. Deploy CHP and National Guard for traffic management at choke points.\n"
|
| 64 |
+
"6. Air resources pivot to aerial evacuation for mobility-impaired stranded persons.\n"
|
| 65 |
+
"7. Communicate updated routes every 15 minutes via radio, app, and social media."
|
| 66 |
+
),
|
| 67 |
+
},
|
| 68 |
+
]
|
| 69 |
+
|
| 70 |
+
MULTILINGUAL_SAMPLES = [
|
| 71 |
+
{
|
| 72 |
+
"lang": "Nepali",
|
| 73 |
+
"instruction": "भूकम्पको बेला के गर्ने?",
|
| 74 |
+
"output": (
|
| 75 |
+
"भूकम्पको बेला:\n"
|
| 76 |
+
"१. तुरुन्त झुक्नुहोस्, ढाक्नुहोस् र समात्नुहोस् (Drop, Cover, Hold On)।\n"
|
| 77 |
+
"२. झ्याल, ऐना र भारी सामानबाट टाढा रहनुहोस्।\n"
|
| 78 |
+
"३. बाहिर हुनुहुन्छ भने खुला ठाउँमा जानुहोस् — भवन, रुख र बिजुलीका तारबाट टाढा।\n"
|
| 79 |
+
"४. हल्लाउन बन्द भएपछि चोटपटक जाँच गर्नुहोस् र सावधानीसाथ बाहिर निस्कनुहोस्।\n"
|
| 80 |
+
"५. परकम्पको लागि तयार रहनुहोस्।\n"
|
| 81 |
+
"गम्भीर घाइतेको लागि तुरुन्त आपतकालीन सेवा (१०१ वा स्थानीय नम्बर) मा फोन गर्नुहोस्।"
|
| 82 |
+
),
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"lang": "Nepali",
|
| 86 |
+
"instruction": "बाढीको बेला के गर्ने?",
|
| 87 |
+
"output": (
|
| 88 |
+
"बाढीको बेला:\n"
|
| 89 |
+
"१. तुरुन्त अग्लो ठाउँमा जानुहोस् — निकासी आदेशको प्रतीक्षा नगर्नुहोस्।\n"
|
| 90 |
+
"२. बाढीको पानीमा नहिँड्नुहोस् वा गाडी नचलाउनुहोस् — ६ इन्च पानीले पनि गिराउन सक्छ।\n"
|
| 91 |
+
"३. बिजुलीका उपकरणहरू बन्द गर्नुहोस्।\n"
|
| 92 |
+
"४. स्थानीय अधिकारीको निकासी मार्ग पछ्याउनुहोस्।\n"
|
| 93 |
+
"५. आधिकारिक अलर्टको लागि रेडियो सुन्नुहोस्।\n"
|
| 94 |
+
"खतरामा परेमा तुरुन्त आपतकालीन सेवामा सम्पर्क गर्नुहोस्।"
|
| 95 |
+
),
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"lang": "Nepali",
|
| 99 |
+
"instruction": "विपद् व्यवस्थापनको लागि आधारभूत तयारी के के गर्नुपर्छ?",
|
| 100 |
+
"output": (
|
| 101 |
+
"विपद् तयारीका आधारभूत कदमहरू:\n"
|
| 102 |
+
"१. परिवारको आपतकालीन योजना बनाउनुहोस् र अभ्यास गर्नुहोस्।\n"
|
| 103 |
+
"२. ७२ घण्टाको आपतकालीन किट तयार राख्नुहोस्: पानी, खाना, औषधि, कागजात।\n"
|
| 104 |
+
"३. निकासी मार्ग र नजिकको आश्रयस्थलको जानकारी राख्नुहोस्।\n"
|
| 105 |
+
"४. समुदायको पूर्व चेतावनी प्रणालीमा सहभागी हुनुहोस्।\n"
|
| 106 |
+
"५. विद्यालय, कार्यस्थल र समुदायमा नियमित अभ्यास गर्नुहोस्।\n"
|
| 107 |
+
"राष्ट्रिय विपद् जोखिम न्यूनीकरण तथा व्यवस्थापन प्राधिकरण (NDRRMA) को निर्देशिका पालना गर्नुहोस्।"
|
| 108 |
+
),
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"lang": "Spanish",
|
| 112 |
+
"instruction": "¿Qué debo hacer durante un terremoto?",
|
| 113 |
+
"output": (
|
| 114 |
+
"Durante un terremoto:\n"
|
| 115 |
+
"1. Agáchese, cúbrase y sujétese (Drop, Cover, Hold On).\n"
|
| 116 |
+
"2. Aléjese de ventanas, espejos y objetos pesados.\n"
|
| 117 |
+
"3. Si está en el exterior, diríjase a un espacio abierto.\n"
|
| 118 |
+
"4. Después del sismo, verifique heridos y salga con cuidado.\n"
|
| 119 |
+
"5. Esté preparado para réplicas.\n"
|
| 120 |
+
"Llame al número de emergencias de su país para reportar heridos graves."
|
| 121 |
+
),
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"lang": "French",
|
| 125 |
+
"instruction": "Quelles sont les étapes à suivre en cas d'inondation ?",
|
| 126 |
+
"output": (
|
| 127 |
+
"En cas d'inondation :\n"
|
| 128 |
+
"1. Montez immédiatement en hauteur – ne restez pas au niveau du sol.\n"
|
| 129 |
+
"2. N'essayez jamais de traverser des eaux en crue, même à pied.\n"
|
| 130 |
+
"3. Coupez l'électricité et le gaz si c'est sans danger.\n"
|
| 131 |
+
"4. Suivez les consignes d'évacuation des autorités locales.\n"
|
| 132 |
+
"5. Appelez les secours si vous êtes en danger immédiat.\n"
|
| 133 |
+
"Restez à l'écoute des alertes officielles."
|
| 134 |
+
),
|
| 135 |
+
},
|
| 136 |
+
{
|
| 137 |
+
"lang": "Arabic",
|
| 138 |
+
"instruction": "ما هي الخطوات الواجب اتخاذها أثناء الإعصار؟",
|
| 139 |
+
"output": (
|
| 140 |
+
"أثناء الإعصار:\n"
|
| 141 |
+
"١. ابق في الداخل بعيداً عن النوافذ والأبواب الزجاجية.\n"
|
| 142 |
+
"٢. اذهب إلى الطابق الأرضي أو الغرفة الداخلية دون نوافذ.\n"
|
| 143 |
+
"٣. لا تخرج لأن عين الإعصار تبدو هادئة لكنها مؤقتة.\n"
|
| 144 |
+
"٤. استمع إلى تعليمات الدفاع المدني عبر الراديو أو التليفزيون.\n"
|
| 145 |
+
"٥. ابتعد عن مجاري المياه والمناطق المنخفضة بعد مرور الإعصار."
|
| 146 |
+
),
|
| 147 |
+
},
|
| 148 |
+
{
|
| 149 |
+
"lang": "Hindi",
|
| 150 |
+
"instruction": "बाढ़ के दौरान क्या करें?",
|
| 151 |
+
"output": (
|
| 152 |
+
"बाढ़ ���े दौरान:\n"
|
| 153 |
+
"1. तुरंत ऊंची जगह पर जाएं।\n"
|
| 154 |
+
"2. बाढ़ के पानी में न चलें—6 इंच पानी भी गिरा सकता है।\n"
|
| 155 |
+
"3. बिजली के उपकरण बंद करें।\n"
|
| 156 |
+
"4. आधिकारिक निकासी मार्ग का पालन करें।\n"
|
| 157 |
+
"5. आपातकालीन अलर्ट के लिए रेडियो सुनें।\n"
|
| 158 |
+
"खतरे में होने पर तुरंत आपातकालीन सेवा को कॉल करें।"
|
| 159 |
+
),
|
| 160 |
+
},
|
| 161 |
+
]
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def build_compound_scenarios() -> list[InstructionSample]:
|
| 165 |
+
samples: list[InstructionSample] = []
|
| 166 |
+
for scene in COMPOUND_SCENARIOS:
|
| 167 |
+
samples.append(
|
| 168 |
+
InstructionSample(
|
| 169 |
+
instruction="Provide a comprehensive emergency response plan for this compound disaster scenario.",
|
| 170 |
+
input=(
|
| 171 |
+
f"Scenario: {scene['scenario']}\n"
|
| 172 |
+
f"Region: {scene['region']}\n"
|
| 173 |
+
f"Event type: {scene['event_type']}\n"
|
| 174 |
+
f"Severity: {scene['severity']}"
|
| 175 |
+
),
|
| 176 |
+
output=scene["response"],
|
| 177 |
+
)
|
| 178 |
+
)
|
| 179 |
+
return samples
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def build_multilingual_samples() -> list[InstructionSample]:
|
| 183 |
+
samples: list[InstructionSample] = []
|
| 184 |
+
for item in MULTILINGUAL_SAMPLES:
|
| 185 |
+
samples.append(
|
| 186 |
+
InstructionSample(
|
| 187 |
+
instruction=item["instruction"],
|
| 188 |
+
input="",
|
| 189 |
+
output=item["output"],
|
| 190 |
+
language=item["lang"],
|
| 191 |
+
)
|
| 192 |
+
)
|
| 193 |
+
return samples
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def build_all_scenarios() -> list[InstructionSample]:
|
| 197 |
+
return build_compound_scenarios() + build_multilingual_samples()
|
worlddisasterlm/data/schemas.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dataclasses import dataclass
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
@dataclass
|
| 5 |
+
class DisasterRecord:
|
| 6 |
+
source: str
|
| 7 |
+
event_type: str
|
| 8 |
+
region: str
|
| 9 |
+
summary: str
|
| 10 |
+
severity: str
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@dataclass
|
| 14 |
+
class InstructionSample:
|
| 15 |
+
instruction: str
|
| 16 |
+
input: str
|
| 17 |
+
output: str
|
| 18 |
+
language: str = "English"
|
worlddisasterlm/data/sources.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INTERNATIONAL_ORGS = [
|
| 2 |
+
"UN",
|
| 3 |
+
"UNDRR",
|
| 4 |
+
"WHO",
|
| 5 |
+
"UNICEF",
|
| 6 |
+
"WFP",
|
| 7 |
+
"UNHCR",
|
| 8 |
+
]
|
| 9 |
+
|
| 10 |
+
DISASTER_DATABASES = [
|
| 11 |
+
"EM-DAT",
|
| 12 |
+
"ReliefWeb",
|
| 13 |
+
"GDACS",
|
| 14 |
+
"NASA Earth Data",
|
| 15 |
+
"NOAA",
|
| 16 |
+
"USGS",
|
| 17 |
+
"FEMA",
|
| 18 |
+
"World Bank Open Data",
|
| 19 |
+
]
|
| 20 |
+
|
| 21 |
+
RESEARCH_SOURCES = [
|
| 22 |
+
"Scientific papers",
|
| 23 |
+
"Government reports",
|
| 24 |
+
"Emergency response manuals",
|
| 25 |
+
"Disaster preparedness guidelines",
|
| 26 |
+
"Humanitarian response frameworks",
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
+
REAL_TIME_SOURCES = [
|
| 30 |
+
"Weather feeds",
|
| 31 |
+
"Satellite imagery metadata",
|
| 32 |
+
"Emergency alerts",
|
| 33 |
+
"Public safety bulletins",
|
| 34 |
+
]
|