# -*- coding: utf-8 -*- from io import BytesIO from unittest import TestCase import binascii import codecs import json import sys from hwp5.binmodel import BinData from hwp5.binmodel import BorderFill from hwp5.binmodel import Control from hwp5.binmodel import ControlChar from hwp5.binmodel import ControlData from hwp5.binmodel import FaceName from hwp5.binmodel import GShapeObjectControl from hwp5.binmodel import HeaderParagraphList from hwp5.binmodel import Hwp5File from hwp5.binmodel import LanguageStruct from hwp5.binmodel import ListHeader from hwp5.binmodel import ModelStream from hwp5.binmodel import ParaLineSegList from hwp5.binmodel import ParaText from hwp5.binmodel import Paragraph from hwp5.binmodel import RecordModel from hwp5.binmodel import ShapeComponent from hwp5.binmodel import Style from hwp5.binmodel import TableBody from hwp5.binmodel import TableCaption from hwp5.binmodel import TableCell from hwp5.binmodel import TableControl from hwp5.binmodel import TextboxParagraphList from hwp5.binmodel import init_record_parsing_context from hwp5.binmodel import model_to_json from hwp5.binmodel import parse_model from hwp5.binmodel import parse_models from hwp5.binmodel import parse_models_intern from hwp5.dataio import Enum from hwp5.dataio import Flags from hwp5.dataio import UINT32 from hwp5.dataio import WORD from hwp5.recordstream import Record from hwp5.recordstream import read_records from hwp5.tagids import HWPTAG_BEGIN from hwp5.treeop import STARTEVENT, ENDEVENT from hwp5.treeop import prefix_event from hwp5.utils import cached_property from . import test_recordstream from .fixtures import get_fixture_path def TestContext(**ctx): ''' test context ''' if 'version' not in ctx: ctx['version'] = (5, 0, 0, 0) return ctx testcontext = TestContext() class TestRecordParsing(TestCase): def test_init_record_parsing_context(self): record = dict(tagid=HWPTAG_BEGIN, payload=b'abcd') context = init_record_parsing_context(testcontext, record) self.assertEqual(record, context['record']) self.assertEqual(b'abcd', context['stream'].read()) class BinEmbeddedTest(TestCase): ctx = TestContext() stream = BytesIO(b'\x12\x04\xc0\x00\x01\x00\x02\x00\x03\x00' b'\x6a\x00\x70\x00\x67\x00') def testParse(self): record = next(read_records(self.stream)) context = init_record_parsing_context(testcontext, record) model = record parse_model(context, model) self.assertTrue(BinData, model['type']) self.assertEqual(BinData.StorageType.EMBEDDING, BinData.Flags(model['content']['flags']).storage) self.assertEqual(2, model['content']['bindata']['storage_id']) self.assertEqual('jpg', model['content']['bindata']['ext']) class LanguageStructTest(TestCase): def test_cls_dict_has_attributes(self): FontFace = LanguageStruct(b'FontFace', WORD) self.assertTrue('attributes' in FontFace.__dict__) class TestBase(test_recordstream.TestBase): @cached_property def hwp5file_bin(self): return Hwp5File(self.olestg) hwp5file = hwp5file_bin class FaceNameTest(TestBase): hwp5file_name = 'facename.hwp' def test_font_file_type(self): docinfo = self.hwp5file.docinfo facenames = (model for model in docinfo.models() if model['type'] is FaceName) facenames = list(facenames) facename = facenames[0]['content'] self.assertEqual(u'굴림', facename['name']) self.assertEqual(FaceName.FontFileType.TTF, facename['flags'].font_file_type) facename = facenames[3]['content'] self.assertEqual(u'휴먼명조', facename['name']) self.assertEqual(FaceName.FontFileType.HFT, facename['flags'].font_file_type) facename = facenames[4]['content'] self.assertEqual(u'한양신명조', facename['name']) self.assertEqual(FaceName.FontFileType.HFT, facename['flags'].font_file_type) class DocInfoTest(TestBase): hwp5file_name = 'facename2.hwp' def test_charshape_lang_facename(self): docinfo = self.hwp5file.docinfo styles = list(m for m in docinfo.models() if m['type'] is Style) def style_lang_facename(style, lang): charshape_id = style['content']['charshape_id'] return docinfo.charshape_lang_facename(charshape_id, lang) def style_lang_facename_name(style, lang): facename = style_lang_facename(style, lang) return facename['content']['name'] self.assertEqual(u'바탕', style_lang_facename_name(styles[0], 'ko')) self.assertEqual(u'한컴돋움', style_lang_facename_name(styles[1], 'ko')) self.assertEqual(u'Times New Roman', style_lang_facename_name(styles[2], 'en')) self.assertEqual(u'Arial', style_lang_facename_name(styles[3], 'en')) self.assertEqual(u'해서 약자', style_lang_facename_name(styles[4], 'cn')) self.assertEqual(u'해서 간자', style_lang_facename_name(styles[5], 'cn')) self.assertEqual(u'명조', style_lang_facename_name(styles[6], 'jp')) self.assertEqual(u'고딕', style_lang_facename_name(styles[7], 'jp')) class BorderFillTest(TestBase): hwp5file_name = 'borderfill.hwp' def test_parse_borderfill(self): docinfo = self.hwp5file.docinfo borderfills = (model for model in docinfo.models() if model['type'] is BorderFill) borderfills = list(borderfills) section = self.hwp5file.bodytext.section(0) tablecells = list(model for model in section.models() if model['type'] is TableCell) for tablecell in tablecells: borderfill_id = tablecell['content']['borderfill_id'] borderfill = borderfills[borderfill_id - 1]['content'] tablecell['borderfill'] = borderfill borderfill = tablecells[0]['borderfill'] self.assertEqual(0, borderfill['fillflags']) self.assertEqual(None, borderfill.get('fill_colorpattern')) self.assertEqual(None, borderfill.get('fill_gradation')) self.assertEqual(None, borderfill.get('fill_image')) borderfill = tablecells[1]['borderfill'] self.assertEqual(1, borderfill['fillflags']) self.assertEqual(dict(background_color=0xff7f3f, pattern_color=0, pattern_type_flags=0xffffffff), borderfill['fill_colorpattern']) self.assertEqual(None, borderfill.get('fill_gradation')) self.assertEqual(None, borderfill.get('fill_image')) borderfill = tablecells[2]['borderfill'] self.assertEqual(4, borderfill['fillflags']) self.assertEqual(None, borderfill.get('fill_colorpattern')) self.assertEqual(dict(blur=40, center=dict(x=0, y=0), colors=[0xff7f3f, 0], shear=90, type=1), borderfill['fill_gradation']) self.assertEqual(None, borderfill.get('fill_image')) borderfill = tablecells[3]['borderfill'] self.assertEqual(2, borderfill['fillflags']) self.assertEqual(None, borderfill.get('fill_colorpattern')) self.assertEqual(None, borderfill.get('fill_gradation')) self.assertEqual(dict(flags=5, bindata_id=1, effect=0, brightness=0, contrast=0), borderfill.get('fill_image')) borderfill = tablecells[4]['borderfill'] self.assertEqual(3, borderfill['fillflags']) self.assertEqual(dict(background_color=0xff7f3f, pattern_color=0, pattern_type_flags=0xffffffff), borderfill['fill_colorpattern']) self.assertEqual(None, borderfill.get('fill_gradation')) self.assertEqual(dict(flags=5, bindata_id=1, effect=0, brightness=0, contrast=0), borderfill.get('fill_image')) borderfill = tablecells[5]['borderfill'] self.assertEqual(6, borderfill['fillflags']) self.assertEqual(None, borderfill.get('fill_colorpattern')) self.assertEqual(dict(blur=40, center=dict(x=0, y=0), colors=[0xff7f3f, 0], shear=90, type=1), borderfill['fill_gradation']) self.assertEqual(dict(flags=5, bindata_id=1, effect=0, brightness=0, contrast=0), borderfill.get('fill_image')) class StyleTest(TestBase): hwp5file_name = 'charstyle.hwp' def test_charstyle(self): docinfo = self.hwp5file.docinfo styles = (model for model in docinfo.models() if model['type'] is Style) styles = list(styles) style = styles[0]['content'] self.assertEqual(dict(name='Normal', unknown=0, parashape_id=0, charshape_id=1, next_style_id=0, lang_id=1042, flags=0, local_name=u'바탕글'), style) charstyle = styles[13]['content'] self.assertEqual(dict(name='', unknown=0, parashape_id=0, charshape_id=1, next_style_id=0, lang_id=1042, flags=1, local_name=u'글자스타일'), charstyle) class ParaCharShapeTest(TestBase): @property def paracharshape_record(self): return self.bodytext.section(0).record(2) def test_read_paracharshape(self): parent_context = dict() parent_model = dict(content=dict(charshapes=5)) record = self.paracharshape_record context = init_record_parsing_context(dict(), record) context['parent'] = parent_context, parent_model model = record parse_model(context, model) self.assertEqual(dict(charshapes=[(0, 7), (19, 8), (23, 7), (24, 9), (26, 7)]), model['content']) class TableTest(TestBase): @property def stream(self): return BytesIO(b'G\x04\xc0\x02 lbt\x11#*\x08\x00\x00\x00\x00\x00\x00' b'\x00\x00\x06\x9e\x00\x00D\x10\x00\x00\x00\x00\x00\x00' b'\x1b\x01\x1b\x01\x1b\x01\x1b\x01\xed\xad\xa2V\x00\x00' b'\x00\x00') @cached_property def tablecontrol_record(self): return self.bodytext.section(0).record(30) @cached_property def tablecaption_record(self): return self.bodytext.section(0).record(68) @cached_property def tablebody_record(self): return self.bodytext.section(0).record(31) @cached_property def tablecell_record(self): return self.bodytext.section(0).record(32) def testParsePass1(self): record = next(read_records(self.stream)) context = init_record_parsing_context(testcontext, record) model = record parse_model(context, model) self.assertTrue(TableControl, model['type']) self.assertEqual(1453501933, model['content']['instance_id']) self.assertEqual(0x0, model['content']['x']) self.assertEqual(0x0, model['content']['y']) self.assertEqual(0x1044, model['content']['height']) self.assertEqual(0x9e06, model['content']['width']) self.assertEqual(0, model['content']['unknown1']) self.assertEqual(0x82a2311, model['content']['flags']) self.assertEqual(0, model['content']['z_order']) self.assertEqual(dict(left=283, right=283, top=283, bottom=283), model['content']['margin']) self.assertEqual('tbl ', model['content']['chid']) def test_parse_child_table_body(self): record = self.tablecontrol_record context = init_record_parsing_context(testcontext, record) tablebody_record = self.tablebody_record child_context = init_record_parsing_context(testcontext, tablebody_record) child_model = dict(type=TableBody, content=dict()) child = (child_context, child_model) self.assertFalse(context.get('seen_table_body')) TableControl.on_child(dict(), context, child) # 'seen_table_body' in table record context should have been changed # to True self.assertTrue(context['seen_table_body']) # model and attributes should not have been changed self.assertEqual(dict(), child_model['content']) def test_parse_child_table_cell(self): record = self.tablecontrol_record context = init_record_parsing_context(testcontext, record) model = record parse_model(context, model) context['seen_table_body'] = True child_record = self.tablecell_record child_context = init_record_parsing_context(testcontext, child_record) child_model = child_record child_context['parent'] = context, model parse_model(child_context, child_model) self.assertEqual(TableCell, child_model['type']) self.assertEqual(TableCell, child_model['type']) self.assertEqual(dict(padding=dict(top=141, right=141, bottom=141, left=141), rowspan=1, colspan=1, borderfill_id=1, height=282, listflags=32, width=20227, unknown1=0, unknown_width=20227, paragraphs=1, col=0, row=0), child_model['content']) self.assertEqual(b'', child_context['stream'].read()) def test_parse_child_table_caption(self): record = self.tablecontrol_record context = init_record_parsing_context(testcontext, record) model = record parse_model(context, model) context['seen_table_body'] = False child_record = self.tablecaption_record child_context = init_record_parsing_context(testcontext, child_record) child_context['parent'] = context, model child_model = child_record parse_model(child_context, child_model) self.assertEqual(TableCaption, child_model['type']) self.assertEqual(dict(listflags=0, width=8504, max_width=40454, unknown1=0, flags=3, separation=850, paragraphs=2), child_model['content']) self.assertEqual(b'', child_context['stream'].read()) class ShapeComponentTest(TestBase): hwp5file_name = 'textbox.hwp' @cached_property def control_gso_record(self): return self.bodytext.section(0).record(12) @cached_property def shapecomponent_record(self): return self.bodytext.section(0).record(19) @cached_property def textbox_paragraph_list_record(self): return self.bodytext.section(0).record(20) def test_parse_shapecomponent_textbox_paragraph_list(self): record = self.shapecomponent_record context = init_record_parsing_context(testcontext, record) model = record model['type'] = ShapeComponent child_record = self.textbox_paragraph_list_record child_context = init_record_parsing_context(testcontext, child_record) child_context['parent'] = context, model child_model = child_record parse_model(child_context, child_model) self.assertEqual(TextboxParagraphList, child_model['type']) self.assertEqual(dict(listflags=32, padding=dict(top=283, right=283, bottom=283, left=283), unknown1=0, maxwidth=11763, paragraphs=1), child_model['content']) self.assertEqual(b'', child_context['stream'].read()) def test_parse(self): # parent_record = self.control_gso_record # if parent model is GShapeObjectControl parent_model = dict(type=GShapeObjectControl) record = self.shapecomponent_record context = init_record_parsing_context(testcontext, record) context['parent'] = dict(), parent_model model = record parse_model(context, model) self.assertEqual(model['type'], ShapeComponent) self.assertTrue('chid0' in model['content']) # if parent model is not GShapeObjectControl # TODO def test_rect_fill(self): self.hwp5file_name = 'shapecomponent-rect-fill.hwp' section = self.hwp5file_bin.bodytext.section(0) shapecomps = (model for model in section.models() if model['type'] is ShapeComponent) shapecomps = list(shapecomps) shapecomp = shapecomps.pop(0)['content'] self.assertFalse(shapecomp['fill_flags'].fill_colorpattern) self.assertFalse(shapecomp['fill_flags'].fill_gradation) self.assertFalse(shapecomp['fill_flags'].fill_image) shapecomp = shapecomps.pop(0)['content'] self.assertTrue(shapecomp['fill_flags'].fill_colorpattern) self.assertFalse(shapecomp['fill_flags'].fill_gradation) self.assertFalse(shapecomp['fill_flags'].fill_image) self.assertEqual(dict(background_color=0xff7f3f, pattern_color=0, pattern_type_flags=0xffffffff), shapecomp['fill_colorpattern']) self.assertEqual(None, shapecomp.get('fill_gradation')) self.assertEqual(None, shapecomp.get('fill_image')) shapecomp = shapecomps.pop(0)['content'] self.assertFalse(shapecomp['fill_flags'].fill_colorpattern) self.assertTrue(shapecomp['fill_flags'].fill_gradation) self.assertFalse(shapecomp['fill_flags'].fill_image) self.assertEqual(None, shapecomp.get('fill_colorpattern')) self.assertEqual(dict(type=1, shear=90, center=dict(x=0, y=0), colors=[0xff7f3f, 0], blur=50), shapecomp['fill_gradation']) self.assertEqual(None, shapecomp.get('fill_image')) shapecomp = shapecomps.pop(0)['content'] self.assertFalse(shapecomp['fill_flags'].fill_colorpattern) self.assertFalse(shapecomp['fill_flags'].fill_gradation) self.assertTrue(shapecomp['fill_flags'].fill_image) self.assertEqual(None, shapecomp.get('fill_colorpattern')) self.assertEqual(None, shapecomp.get('fill_gradation')) self.assertEqual(dict(flags=5, bindata_id=1, effect=0, brightness=0, contrast=0), shapecomp['fill_image']) shapecomp = shapecomps.pop(0)['content'] self.assertTrue(shapecomp['fill_flags'].fill_colorpattern) self.assertFalse(shapecomp['fill_flags'].fill_gradation) self.assertTrue(shapecomp['fill_flags'].fill_image) self.assertEqual(dict(background_color=0xff7f3f, pattern_color=0, pattern_type_flags=0xffffffff), shapecomp['fill_colorpattern']) self.assertEqual(None, shapecomp.get('fill_gradation')) self.assertEqual(dict(flags=5, bindata_id=1, effect=0, brightness=0, contrast=0), shapecomp['fill_image']) shapecomp = shapecomps.pop(0)['content'] self.assertFalse(shapecomp['fill_flags'].fill_colorpattern) self.assertTrue(shapecomp['fill_flags'].fill_gradation) self.assertTrue(shapecomp['fill_flags'].fill_image) self.assertEqual(None, shapecomp.get('fill_colorpattern')) self.assertEqual(dict(type=1, shear=90, center=dict(x=0, y=0), colors=[0xff7f3f, 0], blur=50), shapecomp['fill_gradation']) self.assertEqual(dict(flags=5, bindata_id=1, effect=0, brightness=0, contrast=0), shapecomp['fill_image']) def test_colorpattern_gradation(self): # 5005-shapecomponent-with-colorpattern-and-gradation.dat records = \ [{'level': 1, 'payload': b' osg\x10bj\x04\x00\x00\x00\x00\x0c\x00\x00\x004\xbc\x00\x00l\x06\x01\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00G\xechf\x00\x00\x00\x00', # noqa 'seqno': 12, 'size': 44, 'tagid': 71, 'tagname': 'HWPTAG_CTRL_HEADER'}, {'level': 2, 'payload': b'noc$noc$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x004\xbc\x00\x00l\x06\x01\x004\xbc\x00\x00l\x06\x01\x00\x00\x00\x03\x00\x00\x00w\x00WPS \xb9\xae\x01\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00lop$noc$', # noqa 'seqno': 13, 'size': 206, 'tagid': 76, 'tagname': 'HWPTAG_SHAPE_COMPONENT'}, {'level': 3, 'payload': b'lop$\x00\x00\x00\x00\xcc\x1b\x00\x00\x01\x00\x01\x004\xbc\x00\x00\xa0\xea\x00\x004\xbc\x00\x00\xa0\xea\x00\x00\x00\x00\x00\x00\x10\x00\x1a^\x00\x00Pu\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\xcc\xbb@\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe4\x00\x00\xc8\x00\x00\x00\x01\x00\x00\xd1\x00\x00\x00\x00\x00\x00\x00\x00\x00', # noqa 'seqno': 14, 'size': 309, 'tagid': 76, 'tagname': 'HWPTAG_SHAPE_COMPONENT'}, {'level': 4, 'payload': b'\x06\x00\x00\x00`,\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x9c\xea\x00\x000\xbc\x00\x00\x9c\xea\x00\x000\xbc\x00\x00\x00\x00\x00\x00\xac\x8a\x00\x00\x00\x00\x00\x00', # noqa 'seqno': 15, 'size': 52, 'tagid': 82, 'tagname': 'HWPTAG_SHAPE_COMPONENT_POLYGON'}, {'level': 3, 'payload': b'noc$\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x01\x00tb\x00\x00\xf4-\x00\x00tb\x00\x00\xf4-\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\r\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00cer$cer$cer$cer$cer$', # noqa 'seqno': 16, 'size': 310, 'tagid': 76, 'tagname': 'HWPTAG_SHAPE_COMPONENT'}, {'level': 4, 'payload': b'cer$\xd8,\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x04\x0f\x00\x00\xf4-\x00\x00\x04\x0f\x00\x00\xf4-\x00\x00\x00\x00\x00\x01\x00\x00\x82\x07\x00\x00\xfa\x16\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00l\xc6@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\xc8\x00\x00\x00\x00\x00\x00\xd1\x00\x05\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\x01\xb4\x00\x00\x00\x00\x00\x00\x00d\x00\x00\x002\x00\x00\x00\x02\x00\x00\x00\x00\xfc\x00\x00\xe0\xfc\xc8\x00\x01\x00\x00\x002', # noqa 'seqno': 17, 'size': 447, 'tagid': 76, 'tagname': 'HWPTAG_SHAPE_COMPONENT'}] context = dict(version=(5, 0, 0, 5)) models = parse_models(context, records) models = list(models) self.assertEqual(1280, models[-1]['content']['fill_flags']) colorpattern = models[-1]['content']['fill_colorpattern'] gradation = models[-1]['content']['fill_gradation'] self.assertEqual(32768, colorpattern['background_color']) self.assertEqual(0, colorpattern['pattern_color']) self.assertEqual(0xffffffff, colorpattern['pattern_type_flags']) self.assertEqual(50, gradation['blur']) self.assertEqual(dict(x=0, y=100), gradation['center']) self.assertEqual([64512, 13171936], gradation['colors']) self.assertEqual(180, gradation['shear']) self.assertEqual(1, gradation['type']) self.assertEqual(1, models[-1]['content']['fill_shape']) self.assertEqual(50, models[-1]['content']['fill_blur_center']) def test_colorpattern_gradation_5017(self): fixturename = '5017-shapecomponent-with-colorpattern-and-gradation.bin' f = self.open_fixture(fixturename, 'rb') try: records = list(read_records(f)) finally: f.close() context = dict(version=(5, 0, 1, 7)) models = parse_models(context, records) models = list(models) self.assertEqual(1280, models[-1]['content']['fill_flags']) colorpattern = models[-1]['content']['fill_colorpattern'] gradation = models[-1]['content']['fill_gradation'] self.assertEqual(32768, colorpattern['background_color']) self.assertEqual(0, colorpattern['pattern_color']) self.assertEqual(0xffffffff, colorpattern['pattern_type_flags']) self.assertEqual(50, gradation['blur']) self.assertEqual(dict(x=0, y=100), gradation['center']) self.assertEqual([64512, 13171936], gradation['colors']) self.assertEqual(180, gradation['shear']) self.assertEqual(1, gradation['type']) self.assertEqual(1, models[-1]['content']['fill_shape']) self.assertEqual(50, models[-1]['content']['fill_blur_center']) class HeaderFooterTest(TestBase): hwp5file_name = 'headerfooter.hwp' @cached_property def header_record(self): return self.bodytext.section(0).record(16) @cached_property def header_paragraph_list_record(self): return self.bodytext.section(0).record(17) def test_parse_child(self): record = self.header_record context = init_record_parsing_context(testcontext, record) model = record parse_model(context, model) child_record = self.header_paragraph_list_record child_context = init_record_parsing_context(testcontext, child_record) child_context['parent'] = context, model child_model = child_record parse_model(child_context, child_model) self.assertEqual(HeaderParagraphList, child_model['type']) self.assertEqual(dict(textrefsbitmap=0, numberrefsbitmap=0, height=4252, listflags=0, width=42520, unknown1=0, paragraphs=1), child_model['content']) # TODO # self.assertEqual('', child_context['stream'].read()) class ListHeaderTest(TestCase): ctx = TestContext() record_bytes = (b'H\x08`\x02\x01\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00' b'\x01\x00\x01\x00\x03O\x00\x00\x1a\x01\x00\x00\x8d\x00' b'\x8d\x00\x8d\x00\x8d\x00\x01\x00\x03O\x00\x00') stream = BytesIO(record_bytes) def testParse(self): record = next(read_records(self.stream)) context = init_record_parsing_context(testcontext, record) model = record parse_model(context, model) self.assertEqual(ListHeader, model['type']) self.assertEqual(1, model['content']['paragraphs']) self.assertEqual(0x20, model['content']['listflags']) self.assertEqual(0, model['content']['unknown1']) self.assertEqual(8, context['stream'].tell()) class TableBodyTest(TestCase): ctx = TestContext(version=(5, 0, 1, 7)) stream = BytesIO(b'M\x08\xa0\x01\x06\x00\x00\x04\x02\x00\x02\x00\x00\x00' b'\x8d\x00\x8d\x00\x8d\x00\x8d\x00\x02\x00\x02\x00\x01' b'\x00\x00\x00') def test_parse_model(self): record = next(read_records(self.stream)) context = init_record_parsing_context(self.ctx, record) model = record parse_model(context, model) model_type = model['type'] model_content = model['content'] self.assertEqual(TableBody, model_type) self.assertEqual(dict(left=141, right=141, top=141, bottom=141), model_content['padding']) self.assertEqual(0x4000006, model_content['flags']) self.assertEqual(2, model_content['cols']) self.assertEqual(2, model_content['rows']) self.assertEqual(1, model_content['borderfill_id']) self.assertEqual([2, 2], model_content['rowcols']) self.assertEqual(0, model_content['cellspacing']) self.assertEqual([], model_content['validZones']) class Pass2Test(TestCase): ctx = TestContext() def test_pass2_events(self): def items(): yield Record(HWPTAG_BEGIN + 4, 0, ''), yield Record(HWPTAG_BEGIN + 3, 1, ''), yield Record(HWPTAG_BEGIN + 2, 0, ''), yield Record(HWPTAG_BEGIN + 1, 0, ''), items = list(item for item in items()) leveld_items = zip([0, 1, 0, 0], items) events = list(prefix_event(leveld_items)) def expected(): yield STARTEVENT, items[0] yield STARTEVENT, items[1] yield ENDEVENT, items[1] yield ENDEVENT, items[0] yield STARTEVENT, items[2] yield ENDEVENT, items[2] yield STARTEVENT, items[3] yield ENDEVENT, items[3] expected = list(expected()) self.assertEqual(expected, events) class LineSegTest(TestCase): def testDecode(self): data = ('00000000481e0000e8030000e80300005203000058020000dc0500003ca00' '000000006003300000088240000e8030000e80300005203000058020000dc' '0500003ca000000000060067000000c82a0000e8030000e80300005203000' '058020000dc0500003ca0000000000600') data = binascii.a2b_hex(data) lines = list(ParaLineSegList.decode(dict(), data)) self.assertEqual(0, lines[0]['chpos']) self.assertEqual(51, lines[1]['chpos']) self.assertEqual(103, lines[2]['chpos']) class TableCaptionCellTest(TestCase): ctx = TestContext(version=(5, 0, 1, 7)) records_bytes = (b'G\x04\xc0\x02 lbt\x10#*(\x00\x00\x00\x00\x00\x00\x00\x00' # noqa b'\x06\x9e\x00\x00\x04\n\x00\x00\x03\x00\x00\x00\x1b\x01R' # noqa b'\x037\x02n\x04\n^\xc0V\x00\x00\x00\x00H\x08`\x01\x02\x00' # noqa b'\x00\x00\x00\x00\x00\x00\x03\x00\x00\x008!\x00\x00R\x03' # noqa b'\x06\x9e\x00\x00M\x08\xa0\x01\x06\x00\x00\x04\x02\x00' # noqa b'\x02\x00\x00\x00\x8d\x00\x8d\x00\x8d\x00\x8d\x00\x02\x00' # noqa b'\x02\x00\x01\x00\x00\x00H\x08`\x02\x01\x00\x00\x00 \x00' # noqa b'\x00\x00\x00\x00\x00\x00\x01\x00\x01\x00\x03O\x00\x00' # noqa b'\x1a\x01\x00\x00\x8d\x00\x8d\x00\x8d\x00\x8d\x00\x01\x00' # noqa b'\x03O\x00\x00') def testParsePass1(self): stream = BytesIO(self.records_bytes) records = list(read_records(stream)) result = list(parse_models_intern(self.ctx, records)) tablecaption = result[1] context, model = tablecaption model_type = model['type'] model_content = model['content'] stream = context['stream'] self.assertEqual(TableCaption, model_type) self.assertEqual(22, stream.tell()) # ListHeader attributes self.assertEqual(2, model_content['paragraphs']) self.assertEqual(0x0, model_content['listflags']) self.assertEqual(0, model_content['unknown1']) # TableCaption model_content self.assertEqual(3, model_content['flags']) self.assertEqual(8504, model_content['width']) self.assertEqual(850, model_content['separation']) self.assertEqual(40454, model_content['max_width']) tablecell = result[3] context, model = tablecell model_type = model['type'] model_content = model['content'] stream = context['stream'] self.assertEqual(TableCell, model_type) self.assertEqual(38, stream.tell()) # ListHeader model_content self.assertEqual(1, model_content['paragraphs']) self.assertEqual(0x20, model_content['listflags']) self.assertEqual(0, model_content['unknown1']) # TableCell model_content self.assertEqual(0, model_content['col']) self.assertEqual(0, model_content['row']) self.assertEqual(1, model_content['colspan']) self.assertEqual(1, model_content['rowspan']) self.assertEqual(0x4f03, model_content['width']) self.assertEqual(0x11a, model_content['height']) self.assertEqual(dict(left=141, right=141, top=141, bottom=141), model_content['padding']) self.assertEqual(1, model_content['borderfill_id'],) self.assertEqual(0x4f03, model_content['unknown_width']) class TestRecordModel(TestCase): def test_assign_enum_flags_name(self): class FooRecord(RecordModel): Bar = Flags(UINT32) Baz = Enum() self.assertEqual('Bar', FooRecord.Bar.__name__) self.assertEqual('Baz', FooRecord.Baz.__name__) class TestControlType(TestCase): def test_ControlType(self): class FooControl(Control): chid = 'foo!' try: class Foo2Control(Control): chid = 'foo!' except Exception: pass else: assert False, 'Exception expected' class TestControlChar(TestBase): def test_decode(self): paratext_record = self.hwp5file.bodytext.section(0).record(1) payload = paratext_record['payload'] controlchar = ControlChar.decode(payload[0:16]) self.assertEqual(dict(code=ord(ControlChar.SECTION_COLUMN_DEF), chid='secd', param=b'\x00' * 8), controlchar) def test_find(self): bytes = b'\x41\x00' self.assertEqual((2, 2), ControlChar.find(bytes, 0)) def test_tab(self): self.hwp5file_name = 'tabdef.hwp' models = self.hwp5file.bodytext.section(0).models() paratexts = list(model for model in models if model['type'] is ParaText) def paratext_tabs(paratext): for range, chunk in paratext['content']['chunks']: if isinstance(chunk, dict): if unichr(chunk['code']) == ControlChar.TAB: yield chunk self.assertEqual(set(['code', 'param']), set(next(paratext_tabs(paratexts[0])).keys())) def paratext_tab_params(paratext): for tab in paratext_tabs(paratext): yield tab['param'] tabs = list(paratext_tab_params(paratexts.pop(0))) self.assertEqual([(4000, 1)] * 3, list((tab['width'], tab['unknown1']) for tab in tabs)) tabs = list(paratext_tab_params(paratexts.pop(0))) self.assertEqual([(2000, 1), (1360, 1), (1360, 1)], list((tab['width'], tab['unknown1']) for tab in tabs)) tabs = list(paratext_tab_params(paratexts.pop(0))) self.assertEqual([(2328, 2)] * 3, list((tab['width'], tab['unknown1']) for tab in tabs)) tabs = list(paratext_tab_params(paratexts.pop(0))) self.assertEqual([(2646, 3), (2292, 3), (2292, 3)], list((tab['width'], tab['unknown1']) for tab in tabs)) tabs = list(paratext_tab_params(paratexts.pop(0))) self.assertEqual([(2104, 4)] * 3, list((tab['width'], tab['unknown1']) for tab in tabs)) tabs = list(paratext_tab_params(paratexts.pop(0))) self.assertEqual([(4000, 1), (3360, 1), (3360, 1)], list((tab['width'], tab['unknown1']) for tab in tabs)) tabs = list(paratext_tab_params(paratexts.pop(0))) self.assertEqual([(4000, 1), (3328, 1)], list((tab['width'], tab['unknown1']) for tab in tabs)) tabs = list(paratext_tab_params(paratexts.pop(0))) self.assertEqual([(4000, 1), (3672, 1), (33864, 2)], list((tab['width'], tab['unknown1']) for tab in tabs)) class TestFootnoteShape(TestBase): def test_footnote_shape(self): path = get_fixture_path('footnote-endnote.hwp') hwp5file = Hwp5File(path) models = hwp5file.bodytext.section(0).models() models = list(models) fnshape = models[6] self.assertEqual(850, fnshape['content']['splitter_margin_top']) self.assertEqual(567, fnshape['content']['splitter_margin_bottom']) class TestControlData(TestBase): def test_parse(self): # 5006-controldata.record record = \ {'level': 2, 'payload': b'\x1b\x02\x01\x00\x00\x00\x00@\x01\x00\x03\x00X\xc7H\xc5\x85\xba', # noqa 'seqno': 27, 'size': 18, 'tagid': 87, 'tagname': 'HWPTAG_CTRL_DATA'} context = init_record_parsing_context(dict(), record) model = record parse_model(context, model) self.assertEqual(ControlData, model['type']) self.assertEqual(dict(), model['content']) class TestModelJson(TestBase): def test_model_to_json(self): model = self.hwp5file.docinfo.model(0) json_string = model_to_json(model) jsonobject = json.loads(json_string) self.assertEqual('DocumentProperties', jsonobject['type']) def test_model_to_json_should_not_modify_input(self): model = self.hwp5file.docinfo.model(0) model_to_json(model, indent=2, sort_keys=True) self.assertFalse(isinstance(model['type'], basestring)) def test_model_to_json_with_controlchar(self): model = self.hwp5file.bodytext.section(0).model(1) json_string = model_to_json(model) jsonobject = json.loads(json_string) self.assertEqual('ParaText', jsonobject['type']) self.assertEqual([[0, 8], dict(code=2, param='\x00' * 8, chid='secd')], jsonobject['content']['chunks'][0]) def test_model_to_json_with_unparsed(self): model = dict(type=RecordModel, content=[], payload=b'\x00\x01\x02\x03', unparsed=b'\xff\xfe\xfd\xfc') json_string = model_to_json(model) jsonobject = json.loads(json_string) self.assertEqual(['ff fe fd fc'], jsonobject['unparsed']) def test_generate_models_json_array(self): models_json = self.hwp5file.bodytext.section(0).models_json() gen = models_json.generate() json_array = json.loads(''.join(gen)) self.assertEqual(128, len(json_array)) class TestModelStream(TestBase): @cached_property def docinfo(self): return ModelStream(self.hwp5file_rec['DocInfo'], self.hwp5file_rec.header.version) def test_models(self): self.assertEqual(67, len(list(self.docinfo.models()))) def test_models_treegrouped(self): section = self.bodytext.section(0) for idx, paragraph_models in enumerate(section.models_treegrouped()): paragraph_models = list(paragraph_models) leader = paragraph_models[0] # leader should be a Paragraph self.assertEqual(Paragraph, leader['type']) # leader should be at top-level self.assertEqual(0, leader['level']) # print idx, leader['record']['seqno'], len(paragraph_models) def test_model(self): model = self.docinfo.model(0) self.assertEqual(0, model['seqno']) model = self.docinfo.model(10) self.assertEqual(10, model['seqno']) def test_models_json_open(self): f = self.docinfo.models_json().open() f = codecs.getreader('utf-8')(f) try: self.assertEqual(67, len(json.load(f))) finally: f.close()