| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | from __future__ import absolute_import |
| | from __future__ import print_function |
| | from __future__ import unicode_literals |
| |
|
| | from collections import namedtuple |
| | from datetime import datetime |
| | from datetime import timedelta |
| | from uuid import UUID |
| | import logging |
| | import struct |
| |
|
| | from hwp5.dataio import Struct |
| | from hwp5.dataio import Flags |
| | from hwp5.dataio import N_ARRAY |
| | from hwp5.dataio import ARRAY |
| | from hwp5.dataio import BYTE |
| | from hwp5.dataio import UINT16 |
| | from hwp5.dataio import UINT32 |
| | from hwp5.dataio import INT32 |
| | from hwp5.bintype import read_type |
| |
|
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| |
|
| | vt_types = dict() |
| |
|
| |
|
| | def PropertyType(code): |
| |
|
| | def decorator(cls): |
| | cls.code = code |
| | vt_types[code] = cls |
| | return cls |
| |
|
| | return decorator |
| |
|
| |
|
| | @PropertyType(code=0x0003) |
| | class VT_I4(object): |
| |
|
| | @classmethod |
| | def read_value(cls, context, f): |
| | return read_type(INT32, context, f) |
| |
|
| |
|
| | @PropertyType(code=0x001F) |
| | class VT_LPWSTR(object): |
| |
|
| | @classmethod |
| | def read_value(cls, context, f): |
| | length = read_type(UINT32, context, f) |
| | data = f.read(length * 2) |
| | return data.decode('utf-16le')[:-1] |
| |
|
| |
|
| | @PropertyType(code=0x0040) |
| | class VT_FILETIME(object): |
| |
|
| | @classmethod |
| | def read_value(cls, context, f): |
| | lword = read_type(UINT32, context, f) |
| | hword = read_type(UINT32, context, f) |
| | value = hword << 32 | lword |
| | value = FILETIME(value) |
| | return value |
| |
|
| |
|
| | class FILETIME(object): |
| | __slots__ = ('value', ) |
| |
|
| | def __init__(self, value): |
| | self.value = value |
| |
|
| | def __str__(self): |
| | return str(self.datetime) |
| |
|
| | @property |
| | def datetime(self): |
| | return ( |
| | datetime(1601, 1, 1, 0, 0, 0) + |
| | timedelta(microseconds=self.value / 10) |
| | ) |
| |
|
| |
|
| | PropertyIdentifier = namedtuple('PropertyIdentifier', [ |
| | 'id', |
| | 'label', |
| | ]) |
| |
|
| |
|
| | PID_DICTIONARY = PropertyIdentifier( |
| | id=0x00000000, |
| | label='PID_DICTIONARY', |
| | ) |
| | PID_CODEPAGE = PropertyIdentifier( |
| | id=0x00000001, |
| | label='PID_CODEPAGE', |
| | ) |
| | PID_LOCALE = PropertyIdentifier( |
| | id=0x80000000, |
| | label='PID_LOCALE', |
| | ) |
| | PID_BEHAVIOR = PropertyIdentifier( |
| | id=0x80000003, |
| | label='PID_BEHAVIOR', |
| | ) |
| | PIDSI_TITLE = PropertyIdentifier( |
| | id=0x02, |
| | label='PIDSI_TITLE' |
| | ) |
| | PIDSI_SUBJECT = PropertyIdentifier( |
| | id=0x03, |
| | label='PIDSI_SUBJECT' |
| | ) |
| | PIDSI_AUTHOR = PropertyIdentifier( |
| | id=0x04, |
| | label='PIDSI_AUTHOR' |
| | ) |
| | PIDSI_KEYWORDS = PropertyIdentifier( |
| | id=0x05, |
| | label='PIDSI_KEYWORDS' |
| | ) |
| | PIDSI_COMMENTS = PropertyIdentifier( |
| | id=0x06, |
| | label='PIDSI_COMMENTS' |
| | ) |
| | PIDSI_TEMPLATE = PropertyIdentifier( |
| | id=0x07, |
| | label='PIDSI_TEMPLATE' |
| | ) |
| | PIDSI_LASTAUTHOR = PropertyIdentifier( |
| | id=0x08, |
| | label='PIDSI_LASTAUTHOR' |
| | ) |
| | PIDSI_REVNUMBER = PropertyIdentifier( |
| | id=0x09, |
| | label='PIDSI_REVNUMBER' |
| | ) |
| | PIDSI_EDITTIME = PropertyIdentifier( |
| | id=0x0a, |
| | label='PIDSI_EDITTIME' |
| | ) |
| | PIDSI_LASTPRINTED = PropertyIdentifier( |
| | id=0x0b, |
| | label='PIDSI_LASTPRINTED' |
| | ) |
| | PIDSI_CREATE_DTM = PropertyIdentifier( |
| | id=0x0c, |
| | label='PIDSI_CREATE_DTM' |
| | ) |
| | PIDSI_LASTSAVE_DTM = PropertyIdentifier( |
| | id=0x0d, |
| | label='PIDSI_LASTSAVE_DTM' |
| | ) |
| | PIDSI_PAGECOUNT = PropertyIdentifier( |
| | id=0x0e, |
| | label='PIDSI_PAGECOUNT' |
| | ) |
| | PIDSI_WORDCOUNT = PropertyIdentifier( |
| | id=0x0f, |
| | label='PIDSI_WORDCOUNT' |
| | ) |
| | PIDSI_CHARCOUNT = PropertyIdentifier( |
| | id=0x10, |
| | label='PIDSI_CHARCOUNT' |
| | ) |
| | PIDSI_THUMBNAIL = PropertyIdentifier( |
| | id=0x11, |
| | label='PIDSI_THUMBNAIL' |
| | ) |
| | PIDSI_APPNAME = PropertyIdentifier( |
| | id=0x12, |
| | label='PIDSI_APPNAME' |
| | ) |
| | PIDSI_SECURITY = PropertyIdentifier( |
| | id=0x13, |
| | label='PIDSI_SECURITY' |
| | ) |
| |
|
| |
|
| | RESERVED_PROPERTIES = ( |
| | PID_DICTIONARY, |
| | PID_CODEPAGE, |
| | PID_LOCALE, |
| | PID_BEHAVIOR, |
| | ) |
| |
|
| |
|
| | SUMMARY_INFORMATION_PROPERTIES = ( |
| | PIDSI_TITLE, |
| | PIDSI_SUBJECT, |
| | PIDSI_AUTHOR, |
| | PIDSI_KEYWORDS, |
| | PIDSI_COMMENTS, |
| | PIDSI_TEMPLATE, |
| | PIDSI_LASTAUTHOR, |
| | PIDSI_REVNUMBER, |
| | PIDSI_EDITTIME, |
| | PIDSI_LASTPRINTED, |
| | PIDSI_CREATE_DTM, |
| | PIDSI_LASTSAVE_DTM, |
| | PIDSI_PAGECOUNT, |
| | PIDSI_WORDCOUNT, |
| | PIDSI_CHARCOUNT, |
| | PIDSI_THUMBNAIL, |
| | PIDSI_APPNAME, |
| | PIDSI_SECURITY, |
| | ) |
| |
|
| |
|
| | class Property(object): |
| |
|
| | def __init__(self, desc, idLabel, type, value): |
| | self.desc = desc |
| | self.idLabel = idLabel |
| | self.type = type |
| | self.value = value |
| |
|
| | @property |
| | def id(self): |
| | return self.desc.id |
| |
|
| |
|
| | class PropertyDesc(Struct): |
| |
|
| | def __init__(self, id, offset): |
| | self.id = id |
| | self.offset = offset |
| |
|
| | @classmethod |
| | def fromDict(cls, d): |
| | return cls(id=d['id'], offset=d['offset']) |
| |
|
| | def attributes(): |
| | yield UINT32, 'id' |
| | yield UINT32, 'offset' |
| | attributes = staticmethod(attributes) |
| |
|
| |
|
| | class PropertyReader(object): |
| |
|
| | def __init__(self, propsetDesc, propDesc, idLabel, codepage, |
| | displayName=None): |
| | self.propsetDesc = propsetDesc |
| | self.propDesc = propDesc |
| | self.idLabel = idLabel |
| | self.codepage = codepage |
| | self.displayName = displayName |
| |
|
| | def read(self, f): |
| | f.seek(self.propsetDesc.offset + self.propDesc.offset) |
| |
|
| | context = {} |
| | propType = read_type(TypedPropertyValue, context, f) |
| | propType = TypedPropertyValue.fromDict(propType) |
| | vt_type = vt_types[propType.code] |
| | propValue = vt_type.read_value(context, f) |
| |
|
| | return Property( |
| | desc=self.propDesc, |
| | idLabel=self.idLabel, |
| | type=propType, |
| | value=propValue, |
| | ) |
| |
|
| |
|
| | class TypedPropertyValue(Struct): |
| | ''' |
| | [MS-OLEPS] 2.15 TypedPropertyValue |
| | ''' |
| |
|
| | def __init__(self, code): |
| | self.code = code |
| |
|
| | @classmethod |
| | def fromDict(cls, d): |
| | return cls(code=d['type'].code) |
| |
|
| | TypeFlags = Flags(UINT32, |
| | 0, 16, 'code') |
| |
|
| | def attributes(cls): |
| | yield cls.TypeFlags, 'type' |
| | attributes = classmethod(attributes) |
| |
|
| | @property |
| | def vt_type(self): |
| | try: |
| | return vt_types[self.code] |
| | except KeyError: |
| | return None |
| |
|
| |
|
| | class DictionaryEntry(Struct): |
| | ''' |
| | [MS-OLEPS] 2.16 DictionaryEntry |
| | ''' |
| |
|
| | def __init__(self, id, name): |
| | self.id = id |
| | self.name = name |
| |
|
| | @classmethod |
| | def fromDict(cls, d): |
| | return cls( |
| | id=d['id'], |
| | name=nullterminated_string(d['name']), |
| | ) |
| |
|
| | def attributes(): |
| | from hwp5.dataio import N_ARRAY |
| | from hwp5.dataio import BYTE |
| | yield UINT32, 'id' |
| | yield N_ARRAY(UINT32, BYTE), 'name' |
| | attributes = staticmethod(attributes) |
| |
|
| |
|
| | class Dictionary(Struct): |
| | ''' |
| | [MS-OLEPS] 2.17 Dictionary |
| | ''' |
| |
|
| | def __init__(self, entries): |
| | self.entries = entries |
| |
|
| | @classmethod |
| | def fromDict(cls, d): |
| | entries = tuple( |
| | DictionaryEntry.fromDict(entry) |
| | for entry in d['entries'] |
| | ) |
| | return cls(entries=entries) |
| |
|
| | def attributes(): |
| | from hwp5.dataio import N_ARRAY |
| | yield N_ARRAY(UINT32, DictionaryEntry), 'entries' |
| | attributes = staticmethod(attributes) |
| |
|
| | def get(self, id, defvalue=None): |
| | for entry in self.entries: |
| | if id == entry.id: |
| | return entry.name |
| | return defvalue |
| |
|
| |
|
| | class DictionaryReader(object): |
| |
|
| | def __init__(self, propsetDesc, propDesc, idLabel, codepage): |
| | self.propsetDesc = propsetDesc |
| | self.propDesc = propDesc |
| | self.idLabel = idLabel |
| | self.codepage = codepage |
| |
|
| | def read(self, f): |
| | propsetDesc = self.propsetDesc |
| | propDesc = self.propDesc |
| | idLabel = self.idLabel |
| |
|
| | f.seek(propsetDesc.offset + propDesc.offset) |
| | context = {} |
| | propType = None |
| | propValue = read_type(Dictionary, context, f) |
| | propValue = Dictionary.fromDict(propValue) |
| | return Property( |
| | desc=propDesc, |
| | idLabel=idLabel, |
| | type=propType, |
| | value=propValue, |
| | ) |
| |
|
| |
|
| | class PropertySet(object): |
| | ''' |
| | [MS-OLEPS] 2.20 PropertySet |
| | ''' |
| |
|
| | def __init__(self, desc, header, properties): |
| | self.desc = desc |
| | self.header = header |
| | self.properties = properties |
| |
|
| | @property |
| | def fmtid(self): |
| | return self.desc.fmtid |
| |
|
| | def __getitem__(self, propertyIdentifier): |
| | for property in self.properties: |
| | if property.id == propertyIdentifier.id: |
| | return property.value |
| | raise KeyError(propertyIdentifier) |
| |
|
| |
|
| | class PropertySetHeader(Struct): |
| |
|
| | def __init__(self, bytesize, propDescList): |
| | self.bytesize = bytesize, |
| | self.propDescList = propDescList |
| |
|
| | @classmethod |
| | def fromDict(cls, d): |
| | return cls( |
| | bytesize=d['bytesize'], |
| | propDescList=tuple( |
| | PropertyDesc.fromDict( |
| | propDesc |
| | ) |
| | for propDesc in d['propDescList'] |
| | ), |
| | ) |
| |
|
| | def attributes(): |
| | from hwp5.dataio import N_ARRAY |
| | yield UINT32, 'bytesize' |
| | yield N_ARRAY(UINT32, PropertyDesc), 'propDescList' |
| | attributes = staticmethod(attributes) |
| |
|
| |
|
| | class PropertySetDesc(Struct): |
| |
|
| | def __init__(self, fmtid, offset): |
| | self.fmtid = fmtid |
| | self.offset = offset |
| |
|
| | def attributes(): |
| | yield ARRAY(BYTE, 16), 'fmtid' |
| | yield UINT32, 'offset' |
| | attributes = staticmethod(attributes) |
| |
|
| | @classmethod |
| | def fromDict(cls, d): |
| | return cls( |
| | fmtid=uuid_from_bytes_tuple(d['fmtid']), |
| | offset=d['offset'], |
| | ) |
| |
|
| |
|
| | class PropertySetStreamHeader(Struct): |
| |
|
| | def __init__(self, byteOrder, version, systemIdentifier, clsid, |
| | propsetDescList): |
| | self.byteOrder = byteOrder |
| | self.version = version |
| | self.systemIdentifier = systemIdentifier |
| | self.clsid = clsid |
| | self.propsetDescList = propsetDescList |
| |
|
| | @classmethod |
| | def fromDict(cls, d): |
| | return cls( |
| | byteOrder=d['byteOrder'], |
| | version=d['version'], |
| | systemIdentifier=d['systemIdentifier'], |
| | clsid=uuid_from_bytes_tuple(d['clsid']), |
| | propsetDescList=tuple( |
| | PropertySetDesc.fromDict( |
| | propsetDesc |
| | ) |
| | for propsetDesc in d['propsetDescList'] |
| | ) |
| | ) |
| |
|
| | def attributes(): |
| | yield UINT16, 'byteOrder' |
| | yield UINT16, 'version' |
| | yield UINT32, 'systemIdentifier' |
| | yield ARRAY(BYTE, 16), 'clsid' |
| | yield N_ARRAY(UINT32, PropertySetDesc), 'propsetDescList' |
| | attributes = staticmethod(attributes) |
| |
|
| |
|
| | class PropertySetStream(object): |
| | ''' |
| | [MS-OLEPS] 2.21 PropertySetStream |
| | ''' |
| |
|
| | def __init__(self, header, propertysets): |
| | self.header = header |
| | self.propertysets = propertysets |
| |
|
| | @property |
| | def byteOrder(self): |
| | return self.header.byteOrder |
| |
|
| | @property |
| | def version(self): |
| | return self.header.version |
| |
|
| | @property |
| | def systemIdentifier(self): |
| | return self.header.systemIdentifier |
| |
|
| | @property |
| | def clsid(self): |
| | return self.header.clsid |
| |
|
| |
|
| | class PropertySetFormat(object): |
| |
|
| | def __init__(self, fmtid, propertyIdentifiers): |
| | self.fmtid = fmtid |
| | self.propertyIdentifiers = propertyIdentifiers |
| |
|
| | @property |
| | def idLabels(self): |
| | return { |
| | p.id: p.label |
| | for p in self.propertyIdentifiers |
| | } |
| |
|
| |
|
| | class PropertySetStreamReader(object): |
| |
|
| | def __init__(self, propertySetFormats): |
| | self.propertySetFormats = { |
| | propsetFormat.fmtid: propsetFormat |
| | for propsetFormat in propertySetFormats |
| | } |
| |
|
| | def read(self, f): |
| | context = {} |
| | streamHeader = read_type(PropertySetStreamHeader, context, f) |
| | streamHeader = PropertySetStreamHeader.fromDict(streamHeader) |
| | propertysetList = list() |
| | for propsetDesc in streamHeader.propsetDescList: |
| | f.seek(propsetDesc.offset) |
| | propsetHeader = read_type(PropertySetHeader, context, f) |
| | propsetHeader = PropertySetHeader.fromDict( |
| | propsetHeader, |
| | ) |
| | try: |
| | propsetFormat = self.propertySetFormats[propsetDesc.fmtid] |
| | except KeyError: |
| | idLabels = {} |
| | else: |
| | idLabels = propsetFormat.idLabels |
| |
|
| | properties = [] |
| | propDescMap = { |
| | propDesc.id: propDesc |
| | for propDesc in propsetHeader.propDescList |
| | } |
| |
|
| | propDesc = propDescMap.pop(PID_CODEPAGE.id, None) |
| | if propDesc is not None: |
| | idLabel = idLabels.get(propDesc.id) |
| | propReader = PropertyReader( |
| | propsetDesc=propsetDesc, |
| | propDesc=propDesc, |
| | idLabel=idLabel, |
| | codepage=None, |
| | displayName=None, |
| | ) |
| | prop = propReader.read(f) |
| | properties.append(prop) |
| |
|
| | codepage = prop.value |
| | else: |
| | codepage = None |
| |
|
| | propDesc = propDescMap.pop(PID_DICTIONARY.id, None) |
| | if propDesc is not None: |
| | idLabel = idLabels.get(propDesc.id) |
| | propReader = DictionaryReader( |
| | propsetDesc, |
| | propDesc, |
| | idLabel, |
| | codepage, |
| | ) |
| | prop = propReader.read(f) |
| | properties.append(prop) |
| |
|
| | dictionary = prop.value |
| | else: |
| | dictionary = None |
| |
|
| | for propDesc in propDescMap.values(): |
| | idLabel = idLabels.get(propDesc.id) |
| | displayName = dictionary.get(propDesc.id, None) |
| | propReader = PropertyReader( |
| | propsetDesc=propsetDesc, |
| | propDesc=propDesc, |
| | idLabel=idLabel, |
| | codepage=codepage, |
| | displayName=displayName, |
| | ) |
| | prop = propReader.read(f) |
| | properties.append(prop) |
| |
|
| | propertyset = PropertySet( |
| | desc=propsetDesc, |
| | header=propsetHeader, |
| | properties=properties, |
| | ) |
| | propertysetList.append(propertyset) |
| |
|
| | return PropertySetStream( |
| | header=streamHeader, |
| | propertysets=propertysetList, |
| | ) |
| |
|
| |
|
| | class PropertySetStreamTextFormatter(object): |
| |
|
| | def formatTextLines(self, stream): |
| | yield '- ByteOrder: 0x%x' % stream.byteOrder |
| | yield '- Version: %d' % stream.version |
| | yield '- SystemIdentifier: 0x%08x' % stream.systemIdentifier |
| | yield '- CLSID: %s' % stream.clsid |
| | yield '' |
| |
|
| | for propertyset in stream.propertysets: |
| | title = 'Property Set {}'.format( |
| | propertyset.fmtid, |
| | ) |
| | yield '- {:08x}: {}'.format( |
| | propertyset.desc.offset, |
| | title, |
| | ) |
| | yield ' {}'.format( |
| | '-' * len(title) |
| | ) |
| |
|
| | properties = sorted( |
| | propertyset.properties, |
| | key=lambda property: property.desc.offset, |
| | ) |
| | for property in properties: |
| | if property.id == PID_DICTIONARY.id: |
| | yield '- {:08x}: {}(=0x{:08x}):'.format( |
| | propertyset.desc.offset + property.desc.offset, |
| | property.idLabel if property.idLabel is not None |
| | else '', |
| | property.id, |
| | ) |
| | for entry in property.value.entries: |
| | yield ' - {}: {}'.format( |
| | entry.id, |
| | entry.name, |
| | ) |
| | else: |
| | yield '- {:08x}: {}(=0x{:08x}): {}'.format( |
| | propertyset.desc.offset + property.desc.offset, |
| | property.idLabel if property.idLabel is not None |
| | else '', |
| | property.id, |
| | property.value |
| | ) |
| |
|
| |
|
| | def uuid_from_bytes_tuple(t): |
| | fmt = 'B' * len(t) |
| | fmt = '<' + fmt |
| | bytes_le = struct.pack(fmt, *t) |
| | return UUID(bytes_le=bytes_le) |
| |
|
| |
|
| | def nullterminated_string(bs): |
| | return ''.join(chr(x) for x in bs)[:-1] |
| |
|