dipaug-project-hub / scripts /prepare_dataset.py
abersbail's picture
Deploy fixed DIPAug project hub
b5c1055 verified
"""Dataset preparation entrypoint."""
from __future__ import annotations
import argparse
from pathlib import Path
import pandas as pd
from dipauglib.utils.dataset import SplitConfig, build_split_manifest, save_manifest
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Prepare wheat disease dataset manifest.")
parser.add_argument("--input-csv", type=Path, required=False, help="Input CSV with columns path,label")
parser.add_argument("--output-csv", type=Path, default=Path("results/manifests/split_manifest.csv"))
return parser.parse_args()
def main() -> None:
args = parse_args()
if args.input_csv is None:
print("Scaffold ready. Provide --input-csv to build a split manifest.")
return
records = pd.read_csv(args.input_csv)
manifest = build_split_manifest(records, label_column="label", config=SplitConfig(seed=42))
save_manifest(manifest, args.output_csv)
print(f"Saved manifest to {args.output_csv}")
if __name__ == "__main__":
main()