import json from pathlib import Path from worlddisasterlm.data.schemas import DisasterRecord, InstructionSample from worlddisasterlm.utils.io import ensure_dir def build_instruction_dataset(records: list[DisasterRecord]) -> list[InstructionSample]: dataset: list[InstructionSample] = [] for record in records: instruction = "Assess the incident and provide emergency response steps." sample_input = ( f"Region: {record.region}\nEvent: {record.event_type}\nSeverity: {record.severity}\n" f"Situation: {record.summary}" ) output = ( "1) Verify official alerts and incident perimeter. " "2) Prioritize life-saving response and medical triage. " "3) Coordinate shelter, water, food, and transport logistics. " "4) Share multilingual updates every 30 minutes." ) dataset.append(InstructionSample(instruction=instruction, input=sample_input, output=output)) return dataset def save_instruction_dataset(samples: list[InstructionSample], output_path: str) -> Path: target = Path(output_path) ensure_dir(target.parent) with target.open("w", encoding="utf-8") as handle: for sample in samples: handle.write(json.dumps(sample.__dict__, ensure_ascii=False) + "\n") return target