|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import absolute_import |
|
|
from __future__ import print_function |
|
|
from __future__ import unicode_literals |
|
|
|
|
|
from collections import namedtuple |
|
|
from datetime import datetime |
|
|
from datetime import timedelta |
|
|
from uuid import UUID |
|
|
import logging |
|
|
import struct |
|
|
|
|
|
from hwp5.dataio import Struct |
|
|
from hwp5.dataio import Flags |
|
|
from hwp5.dataio import N_ARRAY |
|
|
from hwp5.dataio import ARRAY |
|
|
from hwp5.dataio import BYTE |
|
|
from hwp5.dataio import UINT16 |
|
|
from hwp5.dataio import UINT32 |
|
|
from hwp5.dataio import INT32 |
|
|
from hwp5.bintype import read_type |
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
vt_types = dict() |
|
|
|
|
|
|
|
|
def PropertyType(code): |
|
|
|
|
|
def decorator(cls): |
|
|
cls.code = code |
|
|
vt_types[code] = cls |
|
|
return cls |
|
|
|
|
|
return decorator |
|
|
|
|
|
|
|
|
@PropertyType(code=0x0003) |
|
|
class VT_I4(object): |
|
|
|
|
|
@classmethod |
|
|
def read_value(cls, context, f): |
|
|
return read_type(INT32, context, f) |
|
|
|
|
|
|
|
|
@PropertyType(code=0x001F) |
|
|
class VT_LPWSTR(object): |
|
|
|
|
|
@classmethod |
|
|
def read_value(cls, context, f): |
|
|
length = read_type(UINT32, context, f) |
|
|
data = f.read(length * 2) |
|
|
return data.decode('utf-16le')[:-1] |
|
|
|
|
|
|
|
|
@PropertyType(code=0x0040) |
|
|
class VT_FILETIME(object): |
|
|
|
|
|
@classmethod |
|
|
def read_value(cls, context, f): |
|
|
lword = read_type(UINT32, context, f) |
|
|
hword = read_type(UINT32, context, f) |
|
|
value = hword << 32 | lword |
|
|
value = FILETIME(value) |
|
|
return value |
|
|
|
|
|
|
|
|
class FILETIME(object): |
|
|
__slots__ = ('value', ) |
|
|
|
|
|
def __init__(self, value): |
|
|
self.value = value |
|
|
|
|
|
def __str__(self): |
|
|
return str(self.datetime) |
|
|
|
|
|
@property |
|
|
def datetime(self): |
|
|
return ( |
|
|
datetime(1601, 1, 1, 0, 0, 0) + |
|
|
timedelta(microseconds=self.value / 10) |
|
|
) |
|
|
|
|
|
|
|
|
PropertyIdentifier = namedtuple('PropertyIdentifier', [ |
|
|
'id', |
|
|
'label', |
|
|
]) |
|
|
|
|
|
|
|
|
PID_DICTIONARY = PropertyIdentifier( |
|
|
id=0x00000000, |
|
|
label='PID_DICTIONARY', |
|
|
) |
|
|
PID_CODEPAGE = PropertyIdentifier( |
|
|
id=0x00000001, |
|
|
label='PID_CODEPAGE', |
|
|
) |
|
|
PID_LOCALE = PropertyIdentifier( |
|
|
id=0x80000000, |
|
|
label='PID_LOCALE', |
|
|
) |
|
|
PID_BEHAVIOR = PropertyIdentifier( |
|
|
id=0x80000003, |
|
|
label='PID_BEHAVIOR', |
|
|
) |
|
|
PIDSI_TITLE = PropertyIdentifier( |
|
|
id=0x02, |
|
|
label='PIDSI_TITLE' |
|
|
) |
|
|
PIDSI_SUBJECT = PropertyIdentifier( |
|
|
id=0x03, |
|
|
label='PIDSI_SUBJECT' |
|
|
) |
|
|
PIDSI_AUTHOR = PropertyIdentifier( |
|
|
id=0x04, |
|
|
label='PIDSI_AUTHOR' |
|
|
) |
|
|
PIDSI_KEYWORDS = PropertyIdentifier( |
|
|
id=0x05, |
|
|
label='PIDSI_KEYWORDS' |
|
|
) |
|
|
PIDSI_COMMENTS = PropertyIdentifier( |
|
|
id=0x06, |
|
|
label='PIDSI_COMMENTS' |
|
|
) |
|
|
PIDSI_TEMPLATE = PropertyIdentifier( |
|
|
id=0x07, |
|
|
label='PIDSI_TEMPLATE' |
|
|
) |
|
|
PIDSI_LASTAUTHOR = PropertyIdentifier( |
|
|
id=0x08, |
|
|
label='PIDSI_LASTAUTHOR' |
|
|
) |
|
|
PIDSI_REVNUMBER = PropertyIdentifier( |
|
|
id=0x09, |
|
|
label='PIDSI_REVNUMBER' |
|
|
) |
|
|
PIDSI_EDITTIME = PropertyIdentifier( |
|
|
id=0x0a, |
|
|
label='PIDSI_EDITTIME' |
|
|
) |
|
|
PIDSI_LASTPRINTED = PropertyIdentifier( |
|
|
id=0x0b, |
|
|
label='PIDSI_LASTPRINTED' |
|
|
) |
|
|
PIDSI_CREATE_DTM = PropertyIdentifier( |
|
|
id=0x0c, |
|
|
label='PIDSI_CREATE_DTM' |
|
|
) |
|
|
PIDSI_LASTSAVE_DTM = PropertyIdentifier( |
|
|
id=0x0d, |
|
|
label='PIDSI_LASTSAVE_DTM' |
|
|
) |
|
|
PIDSI_PAGECOUNT = PropertyIdentifier( |
|
|
id=0x0e, |
|
|
label='PIDSI_PAGECOUNT' |
|
|
) |
|
|
PIDSI_WORDCOUNT = PropertyIdentifier( |
|
|
id=0x0f, |
|
|
label='PIDSI_WORDCOUNT' |
|
|
) |
|
|
PIDSI_CHARCOUNT = PropertyIdentifier( |
|
|
id=0x10, |
|
|
label='PIDSI_CHARCOUNT' |
|
|
) |
|
|
PIDSI_THUMBNAIL = PropertyIdentifier( |
|
|
id=0x11, |
|
|
label='PIDSI_THUMBNAIL' |
|
|
) |
|
|
PIDSI_APPNAME = PropertyIdentifier( |
|
|
id=0x12, |
|
|
label='PIDSI_APPNAME' |
|
|
) |
|
|
PIDSI_SECURITY = PropertyIdentifier( |
|
|
id=0x13, |
|
|
label='PIDSI_SECURITY' |
|
|
) |
|
|
|
|
|
|
|
|
RESERVED_PROPERTIES = ( |
|
|
PID_DICTIONARY, |
|
|
PID_CODEPAGE, |
|
|
PID_LOCALE, |
|
|
PID_BEHAVIOR, |
|
|
) |
|
|
|
|
|
|
|
|
SUMMARY_INFORMATION_PROPERTIES = ( |
|
|
PIDSI_TITLE, |
|
|
PIDSI_SUBJECT, |
|
|
PIDSI_AUTHOR, |
|
|
PIDSI_KEYWORDS, |
|
|
PIDSI_COMMENTS, |
|
|
PIDSI_TEMPLATE, |
|
|
PIDSI_LASTAUTHOR, |
|
|
PIDSI_REVNUMBER, |
|
|
PIDSI_EDITTIME, |
|
|
PIDSI_LASTPRINTED, |
|
|
PIDSI_CREATE_DTM, |
|
|
PIDSI_LASTSAVE_DTM, |
|
|
PIDSI_PAGECOUNT, |
|
|
PIDSI_WORDCOUNT, |
|
|
PIDSI_CHARCOUNT, |
|
|
PIDSI_THUMBNAIL, |
|
|
PIDSI_APPNAME, |
|
|
PIDSI_SECURITY, |
|
|
) |
|
|
|
|
|
|
|
|
class Property(object): |
|
|
|
|
|
def __init__(self, desc, idLabel, type, value): |
|
|
self.desc = desc |
|
|
self.idLabel = idLabel |
|
|
self.type = type |
|
|
self.value = value |
|
|
|
|
|
@property |
|
|
def id(self): |
|
|
return self.desc.id |
|
|
|
|
|
|
|
|
class PropertyDesc(Struct): |
|
|
|
|
|
def __init__(self, id, offset): |
|
|
self.id = id |
|
|
self.offset = offset |
|
|
|
|
|
@classmethod |
|
|
def fromDict(cls, d): |
|
|
return cls(id=d['id'], offset=d['offset']) |
|
|
|
|
|
def attributes(): |
|
|
yield UINT32, 'id' |
|
|
yield UINT32, 'offset' |
|
|
attributes = staticmethod(attributes) |
|
|
|
|
|
|
|
|
class PropertyReader(object): |
|
|
|
|
|
def __init__(self, propsetDesc, propDesc, idLabel, codepage, |
|
|
displayName=None): |
|
|
self.propsetDesc = propsetDesc |
|
|
self.propDesc = propDesc |
|
|
self.idLabel = idLabel |
|
|
self.codepage = codepage |
|
|
self.displayName = displayName |
|
|
|
|
|
def read(self, f): |
|
|
f.seek(self.propsetDesc.offset + self.propDesc.offset) |
|
|
|
|
|
context = {} |
|
|
propType = read_type(TypedPropertyValue, context, f) |
|
|
propType = TypedPropertyValue.fromDict(propType) |
|
|
vt_type = vt_types[propType.code] |
|
|
propValue = vt_type.read_value(context, f) |
|
|
|
|
|
return Property( |
|
|
desc=self.propDesc, |
|
|
idLabel=self.idLabel, |
|
|
type=propType, |
|
|
value=propValue, |
|
|
) |
|
|
|
|
|
|
|
|
class TypedPropertyValue(Struct): |
|
|
''' |
|
|
[MS-OLEPS] 2.15 TypedPropertyValue |
|
|
''' |
|
|
|
|
|
def __init__(self, code): |
|
|
self.code = code |
|
|
|
|
|
@classmethod |
|
|
def fromDict(cls, d): |
|
|
return cls(code=d['type'].code) |
|
|
|
|
|
TypeFlags = Flags(UINT32, |
|
|
0, 16, 'code') |
|
|
|
|
|
def attributes(cls): |
|
|
yield cls.TypeFlags, 'type' |
|
|
attributes = classmethod(attributes) |
|
|
|
|
|
@property |
|
|
def vt_type(self): |
|
|
try: |
|
|
return vt_types[self.code] |
|
|
except KeyError: |
|
|
return None |
|
|
|
|
|
|
|
|
class DictionaryEntry(Struct): |
|
|
''' |
|
|
[MS-OLEPS] 2.16 DictionaryEntry |
|
|
''' |
|
|
|
|
|
def __init__(self, id, name): |
|
|
self.id = id |
|
|
self.name = name |
|
|
|
|
|
@classmethod |
|
|
def fromDict(cls, d): |
|
|
return cls( |
|
|
id=d['id'], |
|
|
name=nullterminated_string(d['name']), |
|
|
) |
|
|
|
|
|
def attributes(): |
|
|
from hwp5.dataio import N_ARRAY |
|
|
from hwp5.dataio import BYTE |
|
|
yield UINT32, 'id' |
|
|
yield N_ARRAY(UINT32, BYTE), 'name' |
|
|
attributes = staticmethod(attributes) |
|
|
|
|
|
|
|
|
class Dictionary(Struct): |
|
|
''' |
|
|
[MS-OLEPS] 2.17 Dictionary |
|
|
''' |
|
|
|
|
|
def __init__(self, entries): |
|
|
self.entries = entries |
|
|
|
|
|
@classmethod |
|
|
def fromDict(cls, d): |
|
|
entries = tuple( |
|
|
DictionaryEntry.fromDict(entry) |
|
|
for entry in d['entries'] |
|
|
) |
|
|
return cls(entries=entries) |
|
|
|
|
|
def attributes(): |
|
|
from hwp5.dataio import N_ARRAY |
|
|
yield N_ARRAY(UINT32, DictionaryEntry), 'entries' |
|
|
attributes = staticmethod(attributes) |
|
|
|
|
|
def get(self, id, defvalue=None): |
|
|
for entry in self.entries: |
|
|
if id == entry.id: |
|
|
return entry.name |
|
|
return defvalue |
|
|
|
|
|
|
|
|
class DictionaryReader(object): |
|
|
|
|
|
def __init__(self, propsetDesc, propDesc, idLabel, codepage): |
|
|
self.propsetDesc = propsetDesc |
|
|
self.propDesc = propDesc |
|
|
self.idLabel = idLabel |
|
|
self.codepage = codepage |
|
|
|
|
|
def read(self, f): |
|
|
propsetDesc = self.propsetDesc |
|
|
propDesc = self.propDesc |
|
|
idLabel = self.idLabel |
|
|
|
|
|
f.seek(propsetDesc.offset + propDesc.offset) |
|
|
context = {} |
|
|
propType = None |
|
|
propValue = read_type(Dictionary, context, f) |
|
|
propValue = Dictionary.fromDict(propValue) |
|
|
return Property( |
|
|
desc=propDesc, |
|
|
idLabel=idLabel, |
|
|
type=propType, |
|
|
value=propValue, |
|
|
) |
|
|
|
|
|
|
|
|
class PropertySet(object): |
|
|
''' |
|
|
[MS-OLEPS] 2.20 PropertySet |
|
|
''' |
|
|
|
|
|
def __init__(self, desc, header, properties): |
|
|
self.desc = desc |
|
|
self.header = header |
|
|
self.properties = properties |
|
|
|
|
|
@property |
|
|
def fmtid(self): |
|
|
return self.desc.fmtid |
|
|
|
|
|
def __getitem__(self, propertyIdentifier): |
|
|
for property in self.properties: |
|
|
if property.id == propertyIdentifier.id: |
|
|
return property.value |
|
|
raise KeyError(propertyIdentifier) |
|
|
|
|
|
|
|
|
class PropertySetHeader(Struct): |
|
|
|
|
|
def __init__(self, bytesize, propDescList): |
|
|
self.bytesize = bytesize, |
|
|
self.propDescList = propDescList |
|
|
|
|
|
@classmethod |
|
|
def fromDict(cls, d): |
|
|
return cls( |
|
|
bytesize=d['bytesize'], |
|
|
propDescList=tuple( |
|
|
PropertyDesc.fromDict( |
|
|
propDesc |
|
|
) |
|
|
for propDesc in d['propDescList'] |
|
|
), |
|
|
) |
|
|
|
|
|
def attributes(): |
|
|
from hwp5.dataio import N_ARRAY |
|
|
yield UINT32, 'bytesize' |
|
|
yield N_ARRAY(UINT32, PropertyDesc), 'propDescList' |
|
|
attributes = staticmethod(attributes) |
|
|
|
|
|
|
|
|
class PropertySetDesc(Struct): |
|
|
|
|
|
def __init__(self, fmtid, offset): |
|
|
self.fmtid = fmtid |
|
|
self.offset = offset |
|
|
|
|
|
def attributes(): |
|
|
yield ARRAY(BYTE, 16), 'fmtid' |
|
|
yield UINT32, 'offset' |
|
|
attributes = staticmethod(attributes) |
|
|
|
|
|
@classmethod |
|
|
def fromDict(cls, d): |
|
|
return cls( |
|
|
fmtid=uuid_from_bytes_tuple(d['fmtid']), |
|
|
offset=d['offset'], |
|
|
) |
|
|
|
|
|
|
|
|
class PropertySetStreamHeader(Struct): |
|
|
|
|
|
def __init__(self, byteOrder, version, systemIdentifier, clsid, |
|
|
propsetDescList): |
|
|
self.byteOrder = byteOrder |
|
|
self.version = version |
|
|
self.systemIdentifier = systemIdentifier |
|
|
self.clsid = clsid |
|
|
self.propsetDescList = propsetDescList |
|
|
|
|
|
@classmethod |
|
|
def fromDict(cls, d): |
|
|
return cls( |
|
|
byteOrder=d['byteOrder'], |
|
|
version=d['version'], |
|
|
systemIdentifier=d['systemIdentifier'], |
|
|
clsid=uuid_from_bytes_tuple(d['clsid']), |
|
|
propsetDescList=tuple( |
|
|
PropertySetDesc.fromDict( |
|
|
propsetDesc |
|
|
) |
|
|
for propsetDesc in d['propsetDescList'] |
|
|
) |
|
|
) |
|
|
|
|
|
def attributes(): |
|
|
yield UINT16, 'byteOrder' |
|
|
yield UINT16, 'version' |
|
|
yield UINT32, 'systemIdentifier' |
|
|
yield ARRAY(BYTE, 16), 'clsid' |
|
|
yield N_ARRAY(UINT32, PropertySetDesc), 'propsetDescList' |
|
|
attributes = staticmethod(attributes) |
|
|
|
|
|
|
|
|
class PropertySetStream(object): |
|
|
''' |
|
|
[MS-OLEPS] 2.21 PropertySetStream |
|
|
''' |
|
|
|
|
|
def __init__(self, header, propertysets): |
|
|
self.header = header |
|
|
self.propertysets = propertysets |
|
|
|
|
|
@property |
|
|
def byteOrder(self): |
|
|
return self.header.byteOrder |
|
|
|
|
|
@property |
|
|
def version(self): |
|
|
return self.header.version |
|
|
|
|
|
@property |
|
|
def systemIdentifier(self): |
|
|
return self.header.systemIdentifier |
|
|
|
|
|
@property |
|
|
def clsid(self): |
|
|
return self.header.clsid |
|
|
|
|
|
|
|
|
class PropertySetFormat(object): |
|
|
|
|
|
def __init__(self, fmtid, propertyIdentifiers): |
|
|
self.fmtid = fmtid |
|
|
self.propertyIdentifiers = propertyIdentifiers |
|
|
|
|
|
@property |
|
|
def idLabels(self): |
|
|
return { |
|
|
p.id: p.label |
|
|
for p in self.propertyIdentifiers |
|
|
} |
|
|
|
|
|
|
|
|
class PropertySetStreamReader(object): |
|
|
|
|
|
def __init__(self, propertySetFormats): |
|
|
self.propertySetFormats = { |
|
|
propsetFormat.fmtid: propsetFormat |
|
|
for propsetFormat in propertySetFormats |
|
|
} |
|
|
|
|
|
def read(self, f): |
|
|
context = {} |
|
|
streamHeader = read_type(PropertySetStreamHeader, context, f) |
|
|
streamHeader = PropertySetStreamHeader.fromDict(streamHeader) |
|
|
propertysetList = list() |
|
|
for propsetDesc in streamHeader.propsetDescList: |
|
|
f.seek(propsetDesc.offset) |
|
|
propsetHeader = read_type(PropertySetHeader, context, f) |
|
|
propsetHeader = PropertySetHeader.fromDict( |
|
|
propsetHeader, |
|
|
) |
|
|
try: |
|
|
propsetFormat = self.propertySetFormats[propsetDesc.fmtid] |
|
|
except KeyError: |
|
|
idLabels = {} |
|
|
else: |
|
|
idLabels = propsetFormat.idLabels |
|
|
|
|
|
properties = [] |
|
|
propDescMap = { |
|
|
propDesc.id: propDesc |
|
|
for propDesc in propsetHeader.propDescList |
|
|
} |
|
|
|
|
|
propDesc = propDescMap.pop(PID_CODEPAGE.id, None) |
|
|
if propDesc is not None: |
|
|
idLabel = idLabels.get(propDesc.id) |
|
|
propReader = PropertyReader( |
|
|
propsetDesc=propsetDesc, |
|
|
propDesc=propDesc, |
|
|
idLabel=idLabel, |
|
|
codepage=None, |
|
|
displayName=None, |
|
|
) |
|
|
prop = propReader.read(f) |
|
|
properties.append(prop) |
|
|
|
|
|
codepage = prop.value |
|
|
else: |
|
|
codepage = None |
|
|
|
|
|
propDesc = propDescMap.pop(PID_DICTIONARY.id, None) |
|
|
if propDesc is not None: |
|
|
idLabel = idLabels.get(propDesc.id) |
|
|
propReader = DictionaryReader( |
|
|
propsetDesc, |
|
|
propDesc, |
|
|
idLabel, |
|
|
codepage, |
|
|
) |
|
|
prop = propReader.read(f) |
|
|
properties.append(prop) |
|
|
|
|
|
dictionary = prop.value |
|
|
else: |
|
|
dictionary = None |
|
|
|
|
|
for propDesc in propDescMap.values(): |
|
|
idLabel = idLabels.get(propDesc.id) |
|
|
displayName = dictionary.get(propDesc.id, None) |
|
|
propReader = PropertyReader( |
|
|
propsetDesc=propsetDesc, |
|
|
propDesc=propDesc, |
|
|
idLabel=idLabel, |
|
|
codepage=codepage, |
|
|
displayName=displayName, |
|
|
) |
|
|
prop = propReader.read(f) |
|
|
properties.append(prop) |
|
|
|
|
|
propertyset = PropertySet( |
|
|
desc=propsetDesc, |
|
|
header=propsetHeader, |
|
|
properties=properties, |
|
|
) |
|
|
propertysetList.append(propertyset) |
|
|
|
|
|
return PropertySetStream( |
|
|
header=streamHeader, |
|
|
propertysets=propertysetList, |
|
|
) |
|
|
|
|
|
|
|
|
class PropertySetStreamTextFormatter(object): |
|
|
|
|
|
def formatTextLines(self, stream): |
|
|
yield '- ByteOrder: 0x%x' % stream.byteOrder |
|
|
yield '- Version: %d' % stream.version |
|
|
yield '- SystemIdentifier: 0x%08x' % stream.systemIdentifier |
|
|
yield '- CLSID: %s' % stream.clsid |
|
|
yield '' |
|
|
|
|
|
for propertyset in stream.propertysets: |
|
|
title = 'Property Set {}'.format( |
|
|
propertyset.fmtid, |
|
|
) |
|
|
yield '- {:08x}: {}'.format( |
|
|
propertyset.desc.offset, |
|
|
title, |
|
|
) |
|
|
yield ' {}'.format( |
|
|
'-' * len(title) |
|
|
) |
|
|
|
|
|
properties = sorted( |
|
|
propertyset.properties, |
|
|
key=lambda property: property.desc.offset, |
|
|
) |
|
|
for property in properties: |
|
|
if property.id == PID_DICTIONARY.id: |
|
|
yield '- {:08x}: {}(=0x{:08x}):'.format( |
|
|
propertyset.desc.offset + property.desc.offset, |
|
|
property.idLabel if property.idLabel is not None |
|
|
else '', |
|
|
property.id, |
|
|
) |
|
|
for entry in property.value.entries: |
|
|
yield ' - {}: {}'.format( |
|
|
entry.id, |
|
|
entry.name, |
|
|
) |
|
|
else: |
|
|
yield '- {:08x}: {}(=0x{:08x}): {}'.format( |
|
|
propertyset.desc.offset + property.desc.offset, |
|
|
property.idLabel if property.idLabel is not None |
|
|
else '', |
|
|
property.id, |
|
|
property.value |
|
|
) |
|
|
|
|
|
|
|
|
def uuid_from_bytes_tuple(t): |
|
|
fmt = 'B' * len(t) |
|
|
fmt = '<' + fmt |
|
|
bytes_le = struct.pack(fmt, *t) |
|
|
return UUID(bytes_le=bytes_le) |
|
|
|
|
|
|
|
|
def nullterminated_string(bs): |
|
|
return ''.join(chr(x) for x in bs)[:-1] |
|
|
|