File size: 1,340 Bytes
3b9fcc7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import json
from pathlib import Path

from worlddisasterlm.data.schemas import DisasterRecord, InstructionSample
from worlddisasterlm.utils.io import ensure_dir


def build_instruction_dataset(records: list[DisasterRecord]) -> list[InstructionSample]:
    dataset: list[InstructionSample] = []
    for record in records:
        instruction = "Assess the incident and provide emergency response steps."
        sample_input = (
            f"Region: {record.region}\nEvent: {record.event_type}\nSeverity: {record.severity}\n"
            f"Situation: {record.summary}"
        )
        output = (
            "1) Verify official alerts and incident perimeter. "
            "2) Prioritize life-saving response and medical triage. "
            "3) Coordinate shelter, water, food, and transport logistics. "
            "4) Share multilingual updates every 30 minutes."
        )
        dataset.append(InstructionSample(instruction=instruction, input=sample_input, output=output))
    return dataset


def save_instruction_dataset(samples: list[InstructionSample], output_path: str) -> Path:
    target = Path(output_path)
    ensure_dir(target.parent)
    with target.open("w", encoding="utf-8") as handle:
        for sample in samples:
            handle.write(json.dumps(sample.__dict__, ensure_ascii=False) + "\n")
    return target