import json import pathlib class PromptMsgResultLogKey: custom_id = "custom_id" id = "id" result_type = "result_type" error = "error" response = "response" usage_input_tokens = "usage_input_tokens" usage_output_tokens = "usage_output_tokens" class MessageProcessingLogKey: custom_id = "custom_id" result_type = "result_type" num_documents_expected = "num_documents_expected" num_documents_found = "num_documents_found" document_ids = "document_ids" class DocLogKey: document_id = "document_id" html_len = "html_len" raw_json_gt_found = "raw_json_gt_found" raw_json_gt_valid_json = "raw_json_gt_valid_json" raw_annotation_gt_found = "raw_annotation_gt_found" raw_annotation_gt_extraction_errors = "raw_annotation_gt_extraction_errors" raw_gt_or_annotation_annotations_count = "raw_gt_or_annotation_annotations_count" render_html_width = "render_html_width" render_html_height = "render_html_height" pdf_num_pages = "pdf_num_pages" pdf_render_error = "pdf_render_error" num_geometries_extracted = "num_geometries_extracted" num_word_bboxes = "num_word_bboxes" num_char_bboxes = "num_char_bboxes" can_map_chars_to_words = "can_map_chars_to_words" handwriting_num_elements = "handwriting_num_elements" handwriting_element_extraction_errors = "handwriting_element_extraction_errors" handwriting_generation_authorid_to_writerstyle = ( "handwriting_generation_authorid_to_writerstyle" ) handwriting_insertion_success = "handwriting_insertion_success" handwriting_images_were_generated = "handwriting_images_were_generated" handwriting_missing_images = "handwriting_missing_images" visual_elements_insertion_success = "visual_elements_insertion_success" visual_elements_were_generated = "visual_elements_were_generated" visual_elements_missing_images = "visual_elements_missing_images" visual_elements_num_elements = "visual_elements_num_elements" visual_elements_extraction_errors = "visual_elements_extraction_errors" visual_elements_generation_logs = "visual_elements_generation_logs" visual_elements_generation_errors = "visual_elements_generation_errors" layout_elements_num_elements = "layout_elements_num_elements" layout_elements_extraction_errors = "layout_elements_extraction_errors" layout_elements_generation_logs = "layout_elements_generation_logs" layout_elements_generation_errors = "layout_elements_generation_errors" ocr_required = "ocr_required" ocr_found = "ocr_found" ocr_num_bboxes_words = "ocr_num_bboxes_words" ocr_num_bboxes_lines = "ocr_num_bboxes_lines" ocr_error = "ocr_error" gt_verification_confirmed_keys = "gt_verification_confirmed_keys" gt_verification_similarities = "gt_verification_similarities" gt_verification_passed = "gt_verification_passed" gt_verification_skipped = "gt_verification_skipped" class SynDocumentLog: def __init__(self, document_id: str, logdir: pathlib.Path): self.document_id = document_id logpath = logdir / f"{document_id}.json" self.log = json.loads(logpath.read_text(encoding="utf-8")) @property def raw_json_gt_found(self): return self.log.get(DocLogKey.raw_json_gt_found, False) @property def raw_json_gt_valid_json(self): return self.log.get(DocLogKey.raw_json_gt_valid_json, False) @property def raw_annotation_gt_found(self): return self.log.get(DocLogKey.raw_annotation_gt_found, False) @property def raw_annotation_gt_extraction_errors(self): return self.log.get(DocLogKey.raw_annotation_gt_extraction_errors, [-1]) @property def gt_verification_passed(self): return self.log.get(DocLogKey.gt_verification_passed, False) @property def gt_verification_skipped(self): return self.log.get(DocLogKey.gt_verification_skipped, False) @property def pdf_num_pages(self): return self.log.get(DocLogKey.pdf_num_pages, -1) @property def num_word_bboxes(self): return self.log.get(DocLogKey.num_word_bboxes, -1) @property def num_char_bboxes(self): return self.log.get(DocLogKey.num_char_bboxes, -1) @property def can_map_chars_to_words(self): return self.log.get(DocLogKey.can_map_chars_to_words, False) @property def handwriting_num_elements(self): return self.log.get(DocLogKey.handwriting_num_elements, -1) @property def handwriting_element_extraction_errors(self): return self.log.get(DocLogKey.handwriting_element_extraction_errors, [-1]) @property def handwriting_missing_images(self): return self.log.get(DocLogKey.handwriting_missing_images, [-1]) @property def visual_elements_num_elements(self): return self.log.get(DocLogKey.visual_elements_num_elements, -1) @property def visual_elements_extraction_errors(self): return self.log.get(DocLogKey.visual_elements_extraction_errors, [-1]) @property def layout_elements_num_elements(self): return self.log.get(DocLogKey.layout_elements_num_elements, -1) @property def layout_elements_extraction_errors(self): return self.log.get(DocLogKey.layout_elements_extraction_errors, [-1]) @property def ocr_required(self): return self.log.get(DocLogKey.ocr_required, False) @property def ocr_found(self): return self.log.get(DocLogKey.ocr_found, False) @property def render_html_width(self) -> int | None: return self.log.get(DocLogKey.render_html_width, None) @property def render_html_height(self) -> int | None: return self.log.get(DocLogKey.render_html_height, None) @property def annotations_count(self) -> int: return self.log.get(DocLogKey.raw_gt_or_annotation_annotations_count, 0)