File size: 1,046 Bytes
9c2e807 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | """Dataset preparation entrypoint."""
from __future__ import annotations
import argparse
from pathlib import Path
import pandas as pd
from dipauglib.utils.dataset import SplitConfig, build_split_manifest, save_manifest
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Prepare wheat disease dataset manifest.")
parser.add_argument("--input-csv", type=Path, required=False, help="Input CSV with columns path,label")
parser.add_argument("--output-csv", type=Path, default=Path("results/manifests/split_manifest.csv"))
return parser.parse_args()
def main() -> None:
args = parse_args()
if args.input_csv is None:
print("Scaffold ready. Provide --input-csv to build a split manifest.")
return
records = pd.read_csv(args.input_csv)
manifest = build_split_manifest(records, label_column="label", config=SplitConfig(seed=42))
save_manifest(manifest, args.output_csv)
print(f"Saved manifest to {args.output_csv}")
if __name__ == "__main__":
main()
|