NLP_Project / scripts /run_cancer_risk_batch.py
hchevva's picture
Upload 43 files
630d650 verified
#!/usr/bin/env python3
import argparse
import json
import sys
from pathlib import Path
import pandas as pd
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from toxra_core.artifacts import make_run_dir, write_dataframe_csv, write_json
from toxra_core.calculation_client import run_batch_cancer_risk
from toxra_core.contracts import CANCER_RISK_TEMPLATE_COLUMNS
def main() -> None:
parser = argparse.ArgumentParser(description="Run deterministic cancer risk batch using local MCP server")
parser.add_argument("--input-csv", required=True, help="Path to cancer risk input CSV")
parser.add_argument("--run-id", default="", help="Optional run ID")
parser.add_argument("--runs-dir", default="runs", help="Runs base directory")
args = parser.parse_args()
inp = Path(args.input_csv)
if not inp.exists():
raise FileNotFoundError(f"Input CSV not found: {inp}")
df = pd.read_csv(inp)
missing = [c for c in CANCER_RISK_TEMPLATE_COLUMNS if c not in df.columns]
if missing:
raise ValueError(f"Missing required columns: {missing}")
run_dir = make_run_dir(run_id=args.run_id or None, base_dir=args.runs_dir)
rows = df.fillna("").to_dict("records")
result = run_batch_cancer_risk(rows, run_dir=str(run_dir))
res_rows = result.get("rows", []) if isinstance(result.get("rows", []), list) else []
out_df = pd.DataFrame(res_rows)
out_csv = run_dir / "cancer_risk_results.csv"
out_json = run_dir / "cancer_risk_results.json"
write_dataframe_csv(out_csv, out_df)
write_json(out_json, result)
print(json.dumps({
"run_dir": str(run_dir),
"results_csv": str(out_csv),
"results_json": str(out_json),
"log_jsonl": result.get("artifacts", {}).get("log_jsonl", ""),
"report_md": result.get("artifacts", {}).get("report_md", ""),
"summary": result.get("summary", {}),
}, indent=2))
if __name__ == "__main__":
main()