|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import absolute_import |
|
|
from __future__ import print_function |
|
|
from __future__ import unicode_literals |
|
|
import re |
|
|
import sys |
|
|
|
|
|
|
|
|
|
|
|
from hwp5.dataio import PrimitiveType |
|
|
from hwp5.dataio import UINT32 |
|
|
from hwp5.dataio import UINT16 |
|
|
from hwp5.dataio import UINT8 |
|
|
|
|
|
|
|
|
PY3 = sys.version_info.major == 3 |
|
|
if PY3: |
|
|
unichr = chr |
|
|
|
|
|
|
|
|
class CHID(str, metaclass=PrimitiveType): |
|
|
|
|
|
fixed_size = 4 |
|
|
|
|
|
|
|
|
GSO = 'gso ' |
|
|
TBL = 'tbl ' |
|
|
LINE = '$lin' |
|
|
RECT = '$rec' |
|
|
ELLI = '$ell' |
|
|
ARC = '$arc' |
|
|
POLY = '$pol' |
|
|
CURV = '$cur' |
|
|
EQED = 'eqed' |
|
|
PICT = '$pic' |
|
|
OLE = '$ole' |
|
|
CONTAINER = '$con' |
|
|
|
|
|
|
|
|
SECD = 'secd' |
|
|
COLD = 'cold' |
|
|
HEADER = 'head' |
|
|
FOOTER = 'foot' |
|
|
FN = 'fn ' |
|
|
EN = 'en ' |
|
|
ATNO = 'atno' |
|
|
NWNO = 'nwno' |
|
|
PGHD = 'pghd' |
|
|
PGCT = 'pgct' |
|
|
PGNP = 'pgnp' |
|
|
IDXM = 'idxm' |
|
|
BOKM = 'bokm' |
|
|
TCPS = 'tcps' |
|
|
TDUT = 'tdut' |
|
|
TCMT = 'tcmt' |
|
|
|
|
|
|
|
|
FIELD_UNK = '%unk' |
|
|
FIELD_DTE = '%dte' |
|
|
FIELD_DDT = '%ddt' |
|
|
FIELD_PAT = '%pat' |
|
|
FIELD_BMK = '%bmk' |
|
|
FIELD_MMG = '%mmg' |
|
|
FIELD_XRF = '%xrf' |
|
|
FIELD_FMU = '%fmu' |
|
|
FIELD_CLK = '%clk' |
|
|
FIELD_SMR = '%smr' |
|
|
FIELD_USR = '%usr' |
|
|
FIELD_HLK = '%hlk' |
|
|
FIELD_REVISION_SIGN = '%sig' |
|
|
FIELD_REVISION_DELETE = '%%*d' |
|
|
FIELD_REVISION_ATTACH = '%%*a' |
|
|
FIELD_REVISION_CLIPPING = '%%*C' |
|
|
FIELD_REVISION_SAWTOOTH = '%%*S' |
|
|
FIELD_REVISION_THINKING = '%%*T' |
|
|
FIELD_REVISION_PRAISE = '%%*P' |
|
|
FIELD_REVISION_LINE = '%%*L' |
|
|
FIELD_REVISION_SIMPLECHANGE = '%%*c' |
|
|
FIELD_REVISION_HYPERLINK = '%%*h' |
|
|
FIELD_REVISION_LINEATTACH = '%%*A' |
|
|
FIELD_REVISION_LINELINK = '%%*i' |
|
|
FIELD_REVISION_LINETRANSFER = '%%*t' |
|
|
FIELD_REVISION_RIGHTMOVE = '%%*r' |
|
|
FIELD_REVISION_LEFTMOVE = '%%*l' |
|
|
FIELD_REVISION_TRANSFER = '%%*n' |
|
|
FIELD_REVISION_SIMPLEINSERT = '%%*e' |
|
|
FIELD_REVISION_SPLIT = '%spl' |
|
|
FIELD_REVISION_CHANGE = '%%mr' |
|
|
FIELD_MEMO = '%%me' |
|
|
FIELD_PRIVATE_INFO_SECURITY = '%cpr' |
|
|
|
|
|
def decode(bytes, context=None): |
|
|
if PY3: |
|
|
return ( |
|
|
chr(bytes[3]) + |
|
|
chr(bytes[2]) + |
|
|
chr(bytes[1]) + |
|
|
chr(bytes[0]) |
|
|
) |
|
|
else: |
|
|
return bytes[3] + bytes[2] + bytes[1] + bytes[0] |
|
|
decode = staticmethod(decode) |
|
|
|
|
|
|
|
|
class ControlChar(object): |
|
|
class CHAR(object): |
|
|
size = 1 |
|
|
|
|
|
class INLINE(object): |
|
|
size = 8 |
|
|
|
|
|
class EXTENDED(object): |
|
|
size = 8 |
|
|
chars = {0x00: ('NULL', CHAR), |
|
|
0x01: ('CTLCHR01', EXTENDED), |
|
|
0x02: ('SECTION_COLUMN_DEF', EXTENDED), |
|
|
0x03: ('FIELD_START', EXTENDED), |
|
|
0x04: ('FIELD_END', INLINE), |
|
|
0x05: ('CTLCHR05', INLINE), |
|
|
0x06: ('CTLCHR06', INLINE), |
|
|
0x07: ('CTLCHR07', INLINE), |
|
|
0x08: ('TITLE_MARK', INLINE), |
|
|
0x09: ('TAB', INLINE), |
|
|
0x0a: ('LINE_BREAK', CHAR), |
|
|
0x0b: ('DRAWING_TABLE_OBJECT', EXTENDED), |
|
|
0x0c: ('CTLCHR0C', EXTENDED), |
|
|
0x0d: ('PARAGRAPH_BREAK', CHAR), |
|
|
0x0e: ('CTLCHR0E', EXTENDED), |
|
|
0x0f: ('HIDDEN_EXPLANATION', EXTENDED), |
|
|
0x10: ('HEADER_FOOTER', EXTENDED), |
|
|
0x11: ('FOOT_END_NOTE', EXTENDED), |
|
|
0x12: ('AUTO_NUMBER', EXTENDED), |
|
|
0x13: ('CTLCHR13', INLINE), |
|
|
0x14: ('CTLCHR14', INLINE), |
|
|
0x15: ('PAGE_CTLCHR', EXTENDED), |
|
|
0x16: ('BOOKMARK', EXTENDED), |
|
|
0x17: ('CTLCHR17', EXTENDED), |
|
|
0x18: ('HYPHEN', CHAR), |
|
|
0x1e: ('NONBREAK_SPACE', CHAR), |
|
|
0x1f: ('FIXWIDTH_SPACE', CHAR)} |
|
|
names = dict((unichr(code), name) for code, (name, kind) in chars.items()) |
|
|
kinds = dict((unichr(code), kind) for code, (name, kind) in chars.items()) |
|
|
|
|
|
def _populate(cls): |
|
|
for ch, name in cls.names.items(): |
|
|
setattr(cls, name, ch) |
|
|
_populate = classmethod(_populate) |
|
|
REGEX_CONTROL_CHAR = re.compile(b'[\x00-\x1f]\x00') |
|
|
|
|
|
def find(cls, data, start_idx): |
|
|
while True: |
|
|
m = cls.REGEX_CONTROL_CHAR.search(data, start_idx) |
|
|
if m is not None: |
|
|
i = m.start() |
|
|
if i & 1 == 1: |
|
|
start_idx = i + 1 |
|
|
continue |
|
|
if PY3: |
|
|
char = unichr(data[i]) |
|
|
else: |
|
|
char = unichr(ord(data[i])) |
|
|
size = cls.kinds[char].size |
|
|
return i, i + (size * 2) |
|
|
data_len = len(data) |
|
|
return data_len, data_len |
|
|
find = classmethod(find) |
|
|
|
|
|
def decode(cls, bytes): |
|
|
code = UINT16.decode(bytes[0:2]) |
|
|
ch = unichr(code) |
|
|
if cls.kinds[ch].size == 8: |
|
|
bytes = bytes[2:2 + 12] |
|
|
if ch == ControlChar.TAB: |
|
|
param = dict(width=UINT32.decode(bytes[0:4]), |
|
|
unknown0=UINT8.decode(bytes[4:5]), |
|
|
unknown1=UINT8.decode(bytes[5:6]), |
|
|
unknown2=bytes[6:]) |
|
|
return dict(code=code, param=param) |
|
|
else: |
|
|
chid = CHID.decode(bytes[0:4]) |
|
|
param = bytes[4:12] |
|
|
return dict(code=code, chid=chid, param=param) |
|
|
else: |
|
|
return dict(code=code) |
|
|
decode = classmethod(decode) |
|
|
|
|
|
def get_kind_by_code(cls, code): |
|
|
ch = unichr(code) |
|
|
return cls.kinds[ch] |
|
|
get_kind_by_code = classmethod(get_kind_by_code) |
|
|
|
|
|
def get_name_by_code(cls, code): |
|
|
ch = unichr(code) |
|
|
return cls.names.get(ch, 'CTLCHR%02x' % code) |
|
|
get_name_by_code = classmethod(get_name_by_code) |
|
|
|
|
|
|
|
|
ControlChar._populate() |
|
|
|