Spaces:
Sleeping
Sleeping
| """Prepare leakage-safe feature metadata for the synthetic v0 modeling run.""" | |
| from __future__ import annotations | |
| import argparse | |
| from pathlib import Path | |
| try: | |
| from .common import DEFAULT_SEED, determine_feature_columns, ensure_dir, load_feature_table, make_episode_split, write_json | |
| except ImportError: # pragma: no cover - direct script execution | |
| from common import DEFAULT_SEED, determine_feature_columns, ensure_dir, load_feature_table, make_episode_split, write_json | |
| def prepare_features(features_path: Path, output_dir: Path, seed: int = DEFAULT_SEED) -> dict[str, object]: | |
| df = load_feature_table(features_path) | |
| split_df, split_manifest = make_episode_split(df, seed=seed) | |
| feature_columns, excluded_columns = determine_feature_columns(split_df) | |
| ensure_dir(output_dir) | |
| write_json(output_dir / "split_manifest.json", split_manifest) | |
| write_json(output_dir / "feature_columns.json", feature_columns) | |
| write_json(output_dir / "excluded_columns.json", excluded_columns) | |
| return { | |
| "rows": int(len(split_df)), | |
| "episodes": int(split_df["episode_id"].nunique()), | |
| "feature_count": int(len(feature_columns)), | |
| "split_summary": split_manifest["summary"], | |
| } | |
| def main(argv: list[str] | None = None) -> int: | |
| parser = argparse.ArgumentParser(description=__doc__) | |
| parser.add_argument("--features", type=Path, required=True) | |
| parser.add_argument("--output", type=Path, required=True) | |
| parser.add_argument("--seed", type=int, default=DEFAULT_SEED) | |
| args = parser.parse_args(argv) | |
| summary = prepare_features(args.features, args.output, seed=args.seed) | |
| print(f"rows: {summary['rows']}") | |
| print(f"episodes: {summary['episodes']}") | |
| print(f"model_features: {summary['feature_count']}") | |
| for split, values in summary["split_summary"].items(): | |
| print(f"{split}: rows={values['rows']} episodes={values['episodes']}") | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |