WilliamCHN's picture
Upload standalone Inference of LightXML package
f0bda3b verified
from __future__ import annotations
import json
from pathlib import Path
CURRENT_FILE = Path(__file__).resolve()
def _find_inference_root() -> Path:
for parent in CURRENT_FILE.parents:
if (parent / "model").exists() and (parent / "code").exists() and (parent / "data").exists():
return parent
return CURRENT_FILE.parents[2]
INFERENCE_ROOT = _find_inference_root()
PROJECT_ROOT = INFERENCE_ROOT.parent.parent
SOURCE_FILE = PROJECT_ROOT / "LightXML_B" / "infer_outcome" / "full_v1_epoch9_test50_top10.json"
OUTPUT_FILE = INFERENCE_ROOT / "data" / "input" / "sample_raw_input_5.jsonl"
ZONE_KEYS = [
"z1_header",
"z2_facts",
"z3_issues",
"z5_result",
"z6_dissent",
"z7_other",
]
FALLBACK_SAMPLES = [
{"id": "sample_1", "text": "原告主张被告未按约还款,请求判令偿还借款本息及相关费用。", "labels": []},
{"id": "sample_2", "text": "双方就民间借贷关系发生争议,法院审查借条、转账记录后作出判决。", "labels": []},
{"id": "sample_3", "text": "被告抗辩借款已部分清偿,原告提交证据不足以完全支持其主张。", "labels": []},
{"id": "sample_4", "text": "本院认为借贷关系成立,逾期利息应按合同约定及法律规定计算。", "labels": []},
{"id": "sample_5", "text": "判决被告于限定期限内支付本金、利息及案件受理费。", "labels": []},
]
def main() -> None:
records: list[dict]
if SOURCE_FILE.exists():
items = json.loads(SOURCE_FILE.read_text(encoding="utf-8"))[:5]
records = []
for i, item in enumerate(items, start=1):
zones = item.get("input", {})
text = "\n".join([zones.get(k, "") for k in ZONE_KEYS if zones.get(k, "").strip()])
records.append(
{
"id": f"sample_{i}",
"text": text,
"labels": item.get("labels", []),
}
)
print(f"[source] {SOURCE_FILE}")
else:
records = FALLBACK_SAMPLES
print("[source] external infer_outcome not found, use built-in fallback samples")
OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)
OUTPUT_FILE.write_text(
"\n".join(json.dumps(record, ensure_ascii=False) for record in records),
encoding="utf-8",
)
print(f"[out] {OUTPUT_FILE}")
if __name__ == "__main__":
main()