idacy's picture
Upload live inference API deployment files
e4b1ed6 verified
Raw
History Blame Contribute Delete
1.99 kB
"""Prepare leakage-safe feature metadata for the synthetic v0 modeling run."""
from __future__ import annotations
import argparse
from pathlib import Path
try:
from .common import DEFAULT_SEED, determine_feature_columns, ensure_dir, load_feature_table, make_episode_split, write_json
except ImportError: # pragma: no cover - direct script execution
from common import DEFAULT_SEED, determine_feature_columns, ensure_dir, load_feature_table, make_episode_split, write_json
def prepare_features(features_path: Path, output_dir: Path, seed: int = DEFAULT_SEED) -> dict[str, object]:
df = load_feature_table(features_path)
split_df, split_manifest = make_episode_split(df, seed=seed)
feature_columns, excluded_columns = determine_feature_columns(split_df)
ensure_dir(output_dir)
write_json(output_dir / "split_manifest.json", split_manifest)
write_json(output_dir / "feature_columns.json", feature_columns)
write_json(output_dir / "excluded_columns.json", excluded_columns)
return {
"rows": int(len(split_df)),
"episodes": int(split_df["episode_id"].nunique()),
"feature_count": int(len(feature_columns)),
"split_summary": split_manifest["summary"],
}
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--features", type=Path, required=True)
parser.add_argument("--output", type=Path, required=True)
parser.add_argument("--seed", type=int, default=DEFAULT_SEED)
args = parser.parse_args(argv)
summary = prepare_features(args.features, args.output, seed=args.seed)
print(f"rows: {summary['rows']}")
print(f"episodes: {summary['episodes']}")
print(f"model_features: {summary['feature_count']}")
for split, values in summary["split_summary"].items():
print(f"{split}: rows={values['rows']} episodes={values['episodes']}")
return 0
if __name__ == "__main__":
raise SystemExit(main())