| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| from __future__ import absolute_import |
| from __future__ import print_function |
| from __future__ import unicode_literals |
|
|
| from .binmodel import ControlChar |
| from .binmodel import ParaTextChunks |
| from .binmodel import Text |
| from .bintype import resolve_type_events |
| from .bintype import resolve_values_from_stream |
| from .dataio import ArrayType |
| from .dataio import EnumType |
| from .dataio import FlagsType |
| from .dataio import SelectiveType |
| from .dataio import StructType |
| from .dataio import X_ARRAY |
| from .filestructure import FileHeader |
| from .treeop import ENDEVENT |
| from .treeop import STARTEVENT |
| from .xmlformat import xmlevents_to_bytechunks |
|
|
|
|
| def xmldump_flat(hwp5file, output, xml_declaration=True): |
| xmlevents = xmlevents_from_hwp5file(hwp5file) |
| bytechunks = xmlevents_to_bytechunks(xmlevents) |
| if xml_declaration: |
| output.write('<?xml version="1.0" encoding="utf-8"?>') |
| for x in bytechunks: |
| output.write(x) |
|
|
|
|
| def xmlevents_from_hwp5file(hwp5file): |
| version = '%d.%d.%d.%d' % hwp5file.fileheader.version |
| hexversion = '%02x%02x%02x%02x' % hwp5file.fileheader.version |
| yield STARTEVENT, ('Hwp5Doc', { |
| 'version': version, |
| 'hexversion': hexversion |
| }) |
|
|
| with hwp5file.fileheader.open() as f: |
| yield STARTEVENT, ('FileHeader', { |
| }) |
| resolve_values = resolve_values_from_stream(f) |
| model_events = resolve_type_events(FileHeader, {}, resolve_values) |
| for x in xmlevents_from_modelevents(model_events): |
| yield x |
| yield ENDEVENT, 'FileHeader' |
|
|
| yield STARTEVENT, ('DocInfo', { |
| }) |
| model_events = hwp5file.docinfo.parse_model_events() |
| for x in xmlevents_from_modelevents(model_events): |
| yield x |
| yield ENDEVENT, 'DocInfo' |
|
|
| for section_name in hwp5file.bodytext: |
| section = hwp5file.bodytext[section_name] |
| yield STARTEVENT, ('Section', { |
| 'name': section_name |
| }) |
| model_events = section.parse_model_events() |
| for x in xmlevents_from_modelevents(model_events): |
| yield x |
| yield ENDEVENT, 'Section' |
|
|
| yield ENDEVENT, 'Hwp5Doc' |
|
|
|
|
| def expand_item_value(ev, data): |
| if ev is None and data['type'] is ParaTextChunks: |
| yield STARTEVENT, data |
| for (start, end), item in data['value']: |
| if isinstance(item, unicode): |
| yield None, { |
| 'bin_offset': data['bin_offset'] + start * 2, |
| 'type': Text, |
| 'value': item, |
| } |
| else: |
| x = { |
| 'bin_offset': data['bin_offset'] + start * 2, |
| 'type': ControlChar, |
| 'value': item, |
| } |
| yield None, x |
| yield ENDEVENT, data |
| else: |
| yield ev, data |
|
|
|
|
| def expand_item_values(model_events): |
| for ev, item in model_events: |
| for x in expand_item_value(ev, item): |
| yield x |
|
|
|
|
| def xmlevents_from_modelevents(model_events): |
| expanded_events = expand_item_values(model_events) |
| for ev, data in expanded_events: |
| record = data.get('record') |
| if record: |
| if ev is STARTEVENT: |
| yield ev, ('Record', { |
| 'tagname': record['tagname'], |
| 'tagid': unicode(record['tagid']), |
| 'seqno': unicode(record['seqno']), |
| 'level': unicode(record['level']), |
| 'size': unicode(record['size']), |
| }) |
| elif ev is ENDEVENT: |
| yield ev, 'Record' |
| else: |
| assert False |
| else: |
| datatype = data['type'] |
| typename = datatype.__name__ |
|
|
| if ev in (STARTEVENT, ENDEVENT): |
| if isinstance(datatype, (ArrayType, X_ARRAY)): |
| elem = 'array' |
| atrs = { |
| } |
| elif isinstance(datatype, (StructType, SelectiveType)): |
| elem = 'struct' |
| atrs = { |
| 'type': typename |
| } |
| else: |
| raise Exception(datatype.__name__) |
|
|
| if 'name' in data: |
| atrs['name'] = data['name'] |
| if ev is STARTEVENT: |
| yield ev, (elem, atrs) |
| else: |
| yield ev, elem |
| elif ev is None: |
| atrs = { |
| 'type': typename, |
| 'value': unicode(data['value']) |
| } |
| if 'name' in data: |
| atrs['name'] = data['name'] |
| if 'bin_offset' in data: |
| atrs['offset'] = unicode(data['bin_offset']) |
| if 'bin_value' in data: |
| atrs['bin_value'] = unicode(data['bin_value']) |
| if 'bin_type' in data: |
| atrs['type'] = data['bin_type'].__name__ |
| yield STARTEVENT, ('item', atrs) |
|
|
| if isinstance(datatype, FlagsType): |
| fixed_size = datatype.basetype.fixed_size |
| b = bin(data['value'])[2:] |
| if len(b) < fixed_size * 8: |
| b = '0' * (fixed_size * 8 - len(b)) + b |
| h = hex(data['value'])[2:] |
| if len(h) < fixed_size * 2: |
| h = '0' * (fixed_size * 2 - len(h)) + h |
| if h.endswith('L'): |
| h = h[:-1] |
| atrs = { |
| 'hex': h, |
| 'bin': b |
| } |
| yield STARTEVENT, ('bitflags', atrs) |
|
|
| for bitfield_name in datatype.bitfields: |
| desc = datatype.bitfields[bitfield_name] |
| bitfield_type = desc.valuetype |
| value = desc.__get__(data['value'], None) |
|
|
| atrs = { |
| 'type': datatype.basetype.__name__, |
| 'name': bitfield_name, |
| 'msb': unicode(desc.msb), |
| 'lsb': unicode(desc.lsb), |
| 'value': unicode(int(value)) |
| } |
| yield STARTEVENT, ('bits', atrs) |
|
|
| if isinstance(bitfield_type, EnumType): |
| atrs = { |
| 'type': bitfield_type.__name__, |
| 'value': value.name |
| } |
| yield STARTEVENT, ('enum', atrs) |
| yield ENDEVENT, 'enum' |
|
|
| yield ENDEVENT, 'bits' |
|
|
| yield ENDEVENT, 'bitflags' |
|
|
| yield ENDEVENT, 'item' |
|
|