cobiz / src /hwp5 /binmodel /controlchar.py
seawolf2357's picture
Add src
d94b56e verified
# -*- coding: utf-8 -*-
#
# pyhwp : hwp file format parser in python
# Copyright (C) 2010-2023 mete0r <https://github.com/mete0r>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
import re
import sys
from hwp5.dataio import PrimitiveType
from hwp5.dataio import UINT32
from hwp5.dataio import UINT16
from hwp5.dataio import UINT8
PY3 = sys.version_info.major == 3
if PY3:
unichr = chr
class CHID(str, metaclass=PrimitiveType):
fixed_size = 4
# Common controls
GSO = 'gso '
TBL = 'tbl '
LINE = '$lin'
RECT = '$rec'
ELLI = '$ell'
ARC = '$arc'
POLY = '$pol'
CURV = '$cur'
EQED = 'eqed'
PICT = '$pic'
OLE = '$ole'
CONTAINER = '$con'
# Controls
SECD = 'secd'
COLD = 'cold'
HEADER = 'head'
FOOTER = 'foot'
FN = 'fn '
EN = 'en '
ATNO = 'atno'
NWNO = 'nwno'
PGHD = 'pghd'
PGCT = 'pgct'
PGNP = 'pgnp'
IDXM = 'idxm'
BOKM = 'bokm'
TCPS = 'tcps'
TDUT = 'tdut'
TCMT = 'tcmt'
# Field starts
FIELD_UNK = '%unk'
FIELD_DTE = '%dte'
FIELD_DDT = '%ddt'
FIELD_PAT = '%pat'
FIELD_BMK = '%bmk'
FIELD_MMG = '%mmg'
FIELD_XRF = '%xrf'
FIELD_FMU = '%fmu'
FIELD_CLK = '%clk'
FIELD_SMR = '%smr'
FIELD_USR = '%usr'
FIELD_HLK = '%hlk'
FIELD_REVISION_SIGN = '%sig'
FIELD_REVISION_DELETE = '%%*d'
FIELD_REVISION_ATTACH = '%%*a'
FIELD_REVISION_CLIPPING = '%%*C'
FIELD_REVISION_SAWTOOTH = '%%*S'
FIELD_REVISION_THINKING = '%%*T'
FIELD_REVISION_PRAISE = '%%*P'
FIELD_REVISION_LINE = '%%*L'
FIELD_REVISION_SIMPLECHANGE = '%%*c'
FIELD_REVISION_HYPERLINK = '%%*h'
FIELD_REVISION_LINEATTACH = '%%*A'
FIELD_REVISION_LINELINK = '%%*i'
FIELD_REVISION_LINETRANSFER = '%%*t'
FIELD_REVISION_RIGHTMOVE = '%%*r'
FIELD_REVISION_LEFTMOVE = '%%*l'
FIELD_REVISION_TRANSFER = '%%*n'
FIELD_REVISION_SIMPLEINSERT = '%%*e'
FIELD_REVISION_SPLIT = '%spl'
FIELD_REVISION_CHANGE = '%%mr'
FIELD_MEMO = '%%me'
FIELD_PRIVATE_INFO_SECURITY = '%cpr'
def decode(bytes, context=None):
if PY3:
return (
chr(bytes[3]) +
chr(bytes[2]) +
chr(bytes[1]) +
chr(bytes[0])
)
else:
return bytes[3] + bytes[2] + bytes[1] + bytes[0]
decode = staticmethod(decode)
class ControlChar(object):
class CHAR(object):
size = 1
class INLINE(object):
size = 8
class EXTENDED(object):
size = 8
chars = {0x00: ('NULL', CHAR),
0x01: ('CTLCHR01', EXTENDED),
0x02: ('SECTION_COLUMN_DEF', EXTENDED),
0x03: ('FIELD_START', EXTENDED),
0x04: ('FIELD_END', INLINE),
0x05: ('CTLCHR05', INLINE),
0x06: ('CTLCHR06', INLINE),
0x07: ('CTLCHR07', INLINE),
0x08: ('TITLE_MARK', INLINE),
0x09: ('TAB', INLINE),
0x0a: ('LINE_BREAK', CHAR),
0x0b: ('DRAWING_TABLE_OBJECT', EXTENDED),
0x0c: ('CTLCHR0C', EXTENDED),
0x0d: ('PARAGRAPH_BREAK', CHAR),
0x0e: ('CTLCHR0E', EXTENDED),
0x0f: ('HIDDEN_EXPLANATION', EXTENDED),
0x10: ('HEADER_FOOTER', EXTENDED),
0x11: ('FOOT_END_NOTE', EXTENDED),
0x12: ('AUTO_NUMBER', EXTENDED),
0x13: ('CTLCHR13', INLINE),
0x14: ('CTLCHR14', INLINE),
0x15: ('PAGE_CTLCHR', EXTENDED),
0x16: ('BOOKMARK', EXTENDED),
0x17: ('CTLCHR17', EXTENDED),
0x18: ('HYPHEN', CHAR),
0x1e: ('NONBREAK_SPACE', CHAR),
0x1f: ('FIXWIDTH_SPACE', CHAR)}
names = dict((unichr(code), name) for code, (name, kind) in chars.items())
kinds = dict((unichr(code), kind) for code, (name, kind) in chars.items())
def _populate(cls):
for ch, name in cls.names.items():
setattr(cls, name, ch)
_populate = classmethod(_populate)
REGEX_CONTROL_CHAR = re.compile(b'[\x00-\x1f]\x00')
def find(cls, data, start_idx):
while True:
m = cls.REGEX_CONTROL_CHAR.search(data, start_idx)
if m is not None:
i = m.start()
if i & 1 == 1:
start_idx = i + 1
continue
if PY3:
char = unichr(data[i])
else:
char = unichr(ord(data[i]))
size = cls.kinds[char].size
return i, i + (size * 2)
data_len = len(data)
return data_len, data_len
find = classmethod(find)
def decode(cls, bytes):
code = UINT16.decode(bytes[0:2])
ch = unichr(code)
if cls.kinds[ch].size == 8:
bytes = bytes[2:2 + 12]
if ch == ControlChar.TAB:
param = dict(width=UINT32.decode(bytes[0:4]),
unknown0=UINT8.decode(bytes[4:5]),
unknown1=UINT8.decode(bytes[5:6]),
unknown2=bytes[6:])
return dict(code=code, param=param)
else:
chid = CHID.decode(bytes[0:4])
param = bytes[4:12]
return dict(code=code, chid=chid, param=param)
else:
return dict(code=code)
decode = classmethod(decode)
def get_kind_by_code(cls, code):
ch = unichr(code)
return cls.kinds[ch]
get_kind_by_code = classmethod(get_kind_by_code)
def get_name_by_code(cls, code):
ch = unichr(code)
return cls.names.get(ch, 'CTLCHR%02x' % code)
get_name_by_code = classmethod(get_name_by_code)
ControlChar._populate()