drdeveloper88's picture
Add usage code example (from_pretrained + BitsAndBytes + multilingual) to README
3b9fcc7
Raw
History Blame Contribute Delete
1.34 kB
import json
from pathlib import Path
from worlddisasterlm.data.schemas import DisasterRecord, InstructionSample
from worlddisasterlm.utils.io import ensure_dir
def build_instruction_dataset(records: list[DisasterRecord]) -> list[InstructionSample]:
dataset: list[InstructionSample] = []
for record in records:
instruction = "Assess the incident and provide emergency response steps."
sample_input = (
f"Region: {record.region}\nEvent: {record.event_type}\nSeverity: {record.severity}\n"
f"Situation: {record.summary}"
)
output = (
"1) Verify official alerts and incident perimeter. "
"2) Prioritize life-saving response and medical triage. "
"3) Coordinate shelter, water, food, and transport logistics. "
"4) Share multilingual updates every 30 minutes."
)
dataset.append(InstructionSample(instruction=instruction, input=sample_input, output=output))
return dataset
def save_instruction_dataset(samples: list[InstructionSample], output_path: str) -> Path:
target = Path(output_path)
ensure_dir(target.parent)
with target.open("w", encoding="utf-8") as handle:
for sample in samples:
handle.write(json.dumps(sample.__dict__, ensure_ascii=False) + "\n")
return target