pdf-trainer-api / backend /scripts /generate_template_schema_skeletons.py
Avinash
integrate real backend api
4a5269c
from __future__ import annotations
import json
from pathlib import Path
from typing import Any, Dict, List
TEMPLATES_DIR = Path(__file__).resolve().parents[1] / "templates"
FIELDS: List[Dict[str, Any]] = [
{"field_id": "facility_organization", "label": "Facility / Organization", "type": "entity"},
{"field_id": "case_location", "label": "Case Location / Address", "type": "text"},
{"field_id": "vendor", "label": "Vendor", "type": "entity"},
{"field_id": "physician_name", "label": "Physician Name", "type": "person"},
{"field_id": "date_of_surgery", "label": "Date of Surgery", "type": "date"},
{"field_id": "items", "label": "Items / Line Items", "type": "table"},
]
TABLE_ANCHORS = [
{"key": "item_number", "expected_text": "Item Number"},
{"key": "description", "expected_text": "Description"},
{"key": "qty", "expected_text": "Qty"},
]
TABLE_COLUMNS = [
{"key": "item_number", "label": "Item Number"},
{"key": "lot_number", "label": "Lot Number"},
{"key": "description", "label": "Description"},
{"key": "qty", "label": "Qty"},
{"key": "price", "label": "Price"},
]
def schema_skeleton(form_id: str) -> Dict[str, Any]:
return {
"form_id": form_id,
"version": 3,
"page": 1,
"scalar_value_region_mode": "offset_from_anchor_v1",
"fields": [
# scalar fields
{
"field_id": "facility_organization",
"label": "Facility / Organization",
"type": "entity",
"anchor_bbox_norm": None,
"value_bbox_norm": None,
"value_offset_norm": None,
},
{
"field_id": "case_location",
"label": "Case Location / Address",
"type": "text",
"anchor_bbox_norm": None,
"value_bbox_norm": None,
"value_offset_norm": None,
},
{
"field_id": "vendor",
"label": "Vendor",
"type": "entity",
"anchor_bbox_norm": None,
"value_bbox_norm": None,
"value_offset_norm": None,
},
{
"field_id": "physician_name",
"label": "Physician Name",
"type": "person",
"anchor_bbox_norm": None,
"value_bbox_norm": None,
"value_offset_norm": None,
},
{
"field_id": "date_of_surgery",
"label": "Date of Surgery",
"type": "date",
"anchor_bbox_norm": None,
"value_bbox_norm": None,
"value_offset_norm": None,
},
# table field
{
"field_id": "items",
"label": "Items / Line Items",
"type": "table",
"table_bbox_norm": None,
"header_bbox_norm": None,
"row_height_hint_norm": None,
"columns": [
{"key": "item_number", "label": "Item Number", "bbox_rel_norm": None},
{"key": "lot_number", "label": "Lot Number", "bbox_rel_norm": None},
{"key": "description", "label": "Description", "bbox_rel_norm": None},
{"key": "qty", "label": "Qty", "bbox_rel_norm": None},
{"key": "price", "label": "Price", "bbox_rel_norm": None},
],
"table_anchors": [
{"key": "item_number", "expected_text": "Item Number", "bbox_norm": None},
{"key": "description", "expected_text": "Description", "bbox_norm": None},
{"key": "qty", "expected_text": "Qty", "bbox_norm": None},
],
"notes": "Anchors are used at runtime to localize table/header/columns under drift.",
},
],
"notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items.",
}
def main() -> None:
if not TEMPLATES_DIR.exists():
raise SystemExit(f"templates dir not found: {TEMPLATES_DIR}")
files = sorted(TEMPLATES_DIR.glob("*.json"))
if not files:
raise SystemExit(f"No template json files found in: {TEMPLATES_DIR}")
updated = 0
for fp in files:
data = json.loads(fp.read_text(encoding="utf-8"))
template_id = (data.get("template_id") or fp.stem).strip()
# Only touch your known template IDs if you want:
# if not template_id.startswith("T"): continue
# Overwrite or create schema skeleton
data["schema"] = schema_skeleton(form_id=f"template_{template_id}")
fp.write_text(json.dumps(data, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
print(f"updated schema skeleton: {fp}")
updated += 1
print(f"done. updated {updated} template files.")
if __name__ == "__main__":
main()