cascade_risk / scripts /01_parse_emdat.py
Lucasoppem's picture
Sync from GitHub main (part 2)
36f9d47 verified
Raw
History Blame Contribute Delete
967 Bytes
"""Step 1: Parse EMDAT data and create event catalog with train/test splits."""
from src.data.emdat_parser import parse_emdat, save_events, split_events
from src.llm.client import load_config
def main():
config = load_config()
paths = config["paths"]
print("Parsing EMDAT data...")
events = parse_emdat(config)
print(f" Found {len(events)} European flood events")
print("Splitting into train/test sets...")
train, test = split_events(events, config)
print(f" Train (2023-2024): {len(train)} events")
print(f" Test (2025): {len(test)} events")
print("Saving...")
save_events(events, paths["events_catalog"])
save_events(train, paths["train_events"])
save_events(test, paths["test_events"])
print(f" Catalog: {paths['events_catalog']}")
print(f" Train: {paths['train_events']}")
print(f" Test: {paths['test_events']}")
print("Done.")
if __name__ == "__main__":
main()