| """Dataset preparation entrypoint.""" | |
| from __future__ import annotations | |
| import argparse | |
| from pathlib import Path | |
| import pandas as pd | |
| from dipauglib.utils.dataset import SplitConfig, build_split_manifest, save_manifest | |
| def parse_args() -> argparse.Namespace: | |
| parser = argparse.ArgumentParser(description="Prepare wheat disease dataset manifest.") | |
| parser.add_argument("--input-csv", type=Path, required=False, help="Input CSV with columns path,label") | |
| parser.add_argument("--output-csv", type=Path, default=Path("results/manifests/split_manifest.csv")) | |
| return parser.parse_args() | |
| def main() -> None: | |
| args = parse_args() | |
| if args.input_csv is None: | |
| print("Scaffold ready. Provide --input-csv to build a split manifest.") | |
| return | |
| records = pd.read_csv(args.input_csv) | |
| manifest = build_split_manifest(records, label_column="label", config=SplitConfig(seed=42)) | |
| save_manifest(manifest, args.output_csv) | |
| print(f"Saved manifest to {args.output_csv}") | |
| if __name__ == "__main__": | |
| main() | |