| from __future__ import annotations |
|
|
| import json |
| from pathlib import Path |
|
|
|
|
| CURRENT_FILE = Path(__file__).resolve() |
|
|
|
|
| def _find_inference_root() -> Path: |
| for parent in CURRENT_FILE.parents: |
| if (parent / "model").exists() and (parent / "code").exists() and (parent / "data").exists(): |
| return parent |
| return CURRENT_FILE.parents[2] |
|
|
|
|
| INFERENCE_ROOT = _find_inference_root() |
| PROJECT_ROOT = INFERENCE_ROOT.parent.parent |
|
|
| SOURCE_FILE = PROJECT_ROOT / "LightXML_B" / "infer_outcome" / "full_v1_epoch9_test50_top10.json" |
| OUTPUT_FILE = INFERENCE_ROOT / "data" / "input" / "sample_raw_input_5.jsonl" |
|
|
| ZONE_KEYS = [ |
| "z1_header", |
| "z2_facts", |
| "z3_issues", |
| "z5_result", |
| "z6_dissent", |
| "z7_other", |
| ] |
|
|
| FALLBACK_SAMPLES = [ |
| {"id": "sample_1", "text": "原告主张被告未按约还款,请求判令偿还借款本息及相关费用。", "labels": []}, |
| {"id": "sample_2", "text": "双方就民间借贷关系发生争议,法院审查借条、转账记录后作出判决。", "labels": []}, |
| {"id": "sample_3", "text": "被告抗辩借款已部分清偿,原告提交证据不足以完全支持其主张。", "labels": []}, |
| {"id": "sample_4", "text": "本院认为借贷关系成立,逾期利息应按合同约定及法律规定计算。", "labels": []}, |
| {"id": "sample_5", "text": "判决被告于限定期限内支付本金、利息及案件受理费。", "labels": []}, |
| ] |
|
|
|
|
| def main() -> None: |
| records: list[dict] |
|
|
| if SOURCE_FILE.exists(): |
| items = json.loads(SOURCE_FILE.read_text(encoding="utf-8"))[:5] |
| records = [] |
| for i, item in enumerate(items, start=1): |
| zones = item.get("input", {}) |
| text = "\n".join([zones.get(k, "") for k in ZONE_KEYS if zones.get(k, "").strip()]) |
| records.append( |
| { |
| "id": f"sample_{i}", |
| "text": text, |
| "labels": item.get("labels", []), |
| } |
| ) |
| print(f"[source] {SOURCE_FILE}") |
| else: |
| records = FALLBACK_SAMPLES |
| print("[source] external infer_outcome not found, use built-in fallback samples") |
|
|
| OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True) |
| OUTPUT_FILE.write_text( |
| "\n".join(json.dumps(record, ensure_ascii=False) for record in records), |
| encoding="utf-8", |
| ) |
| print(f"[out] {OUTPUT_FILE}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|