from __future__ import annotations import json from pathlib import Path CURRENT_FILE = Path(__file__).resolve() def _find_inference_root() -> Path: for parent in CURRENT_FILE.parents: if (parent / "model").exists() and (parent / "code").exists() and (parent / "data").exists(): return parent return CURRENT_FILE.parents[2] INFERENCE_ROOT = _find_inference_root() PROJECT_ROOT = INFERENCE_ROOT.parent.parent SOURCE_FILE = PROJECT_ROOT / "LightXML_B" / "infer_outcome" / "full_v1_epoch9_test50_top10.json" OUTPUT_FILE = INFERENCE_ROOT / "data" / "input" / "sample_raw_input_5.jsonl" ZONE_KEYS = [ "z1_header", "z2_facts", "z3_issues", "z5_result", "z6_dissent", "z7_other", ] FALLBACK_SAMPLES = [ {"id": "sample_1", "text": "原告主张被告未按约还款,请求判令偿还借款本息及相关费用。", "labels": []}, {"id": "sample_2", "text": "双方就民间借贷关系发生争议,法院审查借条、转账记录后作出判决。", "labels": []}, {"id": "sample_3", "text": "被告抗辩借款已部分清偿,原告提交证据不足以完全支持其主张。", "labels": []}, {"id": "sample_4", "text": "本院认为借贷关系成立,逾期利息应按合同约定及法律规定计算。", "labels": []}, {"id": "sample_5", "text": "判决被告于限定期限内支付本金、利息及案件受理费。", "labels": []}, ] def main() -> None: records: list[dict] if SOURCE_FILE.exists(): items = json.loads(SOURCE_FILE.read_text(encoding="utf-8"))[:5] records = [] for i, item in enumerate(items, start=1): zones = item.get("input", {}) text = "\n".join([zones.get(k, "") for k in ZONE_KEYS if zones.get(k, "").strip()]) records.append( { "id": f"sample_{i}", "text": text, "labels": item.get("labels", []), } ) print(f"[source] {SOURCE_FILE}") else: records = FALLBACK_SAMPLES print("[source] external infer_outcome not found, use built-in fallback samples") OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True) OUTPUT_FILE.write_text( "\n".join(json.dumps(record, ensure_ascii=False) for record in records), encoding="utf-8", ) print(f"[out] {OUTPUT_FILE}") if __name__ == "__main__": main()