|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import absolute_import |
|
|
from __future__ import print_function |
|
|
from __future__ import unicode_literals |
|
|
from itertools import islice |
|
|
import sys |
|
|
|
|
|
from ..binmodel import Hwp5File |
|
|
from ..binmodel import ModelStream |
|
|
from ..binmodel import RecordModel |
|
|
from ..binmodel import model_to_json |
|
|
from ..cli import parse_recordstream_name |
|
|
from ..dataio import hexdump |
|
|
from ..storage import Open2Stream |
|
|
from ..treeop import ENDEVENT |
|
|
from ..utils import generate_json_array |
|
|
from ..utils import unicode_unescape |
|
|
|
|
|
|
|
|
PY2 = sys.version_info.major == 2 |
|
|
|
|
|
|
|
|
def main(args): |
|
|
stream = stream_from_args(args) |
|
|
if args.events: |
|
|
for event, item in stream.parse_model_events(): |
|
|
type = item['type'].__name__ |
|
|
if event is not None: |
|
|
if item['type'] is RecordModel: |
|
|
record = item['record'] |
|
|
fmt = ' %s Record %s level=%s %s' |
|
|
print(fmt % (event.__name__, |
|
|
record['seqno'], |
|
|
record['level'], |
|
|
record['tagname'])) |
|
|
if event is ENDEVENT: |
|
|
leftover = item['leftover'] |
|
|
print('%04x' % leftover['offset']) |
|
|
if len(leftover['bytes']): |
|
|
print('') |
|
|
print('leftover:') |
|
|
print(hexdump(leftover['bytes'])) |
|
|
print('-' * 20) |
|
|
else: |
|
|
print(' ', event.__name__, type, item.get('name', '')) |
|
|
else: |
|
|
offset = item['bin_offset'] |
|
|
name = item.get('name', '-') |
|
|
value = item.get('value', '-') |
|
|
print('%04x' % offset, type, name, repr(value)) |
|
|
return |
|
|
|
|
|
models_from_stream = models_from_args(args) |
|
|
models = models_from_stream(stream) |
|
|
|
|
|
print_models = print_models_from_args(args) |
|
|
print_models(models) |
|
|
|
|
|
|
|
|
def models_argparser(subparsers, _): |
|
|
parser = subparsers.add_parser( |
|
|
'models', |
|
|
help=_( |
|
|
'Print parsed binary models of .hwp file record streams.' |
|
|
), |
|
|
description=_( |
|
|
'Print parsed binary models in the specified <record-stream>.' |
|
|
), |
|
|
) |
|
|
parser.add_argument( |
|
|
'hwp5file', |
|
|
nargs='?', |
|
|
metavar='<hwp5file>', |
|
|
help=_('.hwp file to analyze'), |
|
|
) |
|
|
parser.add_argument( |
|
|
'record_stream', |
|
|
nargs='?', |
|
|
metavar='<record-stream>', |
|
|
help=_( |
|
|
'Record-structured internal streams.\n' |
|
|
'(e.g. DocInfo, BodyText/*)\n' |
|
|
), |
|
|
) |
|
|
parser.add_argument( |
|
|
'--file-format-version', |
|
|
'-V', |
|
|
metavar='<version>', |
|
|
help=_( |
|
|
'Specifies HWPv5 file format version of the standard input stream' |
|
|
), |
|
|
) |
|
|
output_formats = parser.add_mutually_exclusive_group() |
|
|
output_formats.add_argument( |
|
|
'--simple', |
|
|
action='store_true', |
|
|
help=_( |
|
|
'Print records as simple tree' |
|
|
) |
|
|
) |
|
|
output_formats.add_argument( |
|
|
'--json', |
|
|
action='store_true', |
|
|
help=_( |
|
|
'Print records as json' |
|
|
) |
|
|
) |
|
|
output_formats.add_argument( |
|
|
'--format', |
|
|
metavar='<format>', |
|
|
help=_( |
|
|
'Print records formatted' |
|
|
) |
|
|
) |
|
|
output_formats.add_argument( |
|
|
'--events', |
|
|
action='store_true', |
|
|
help=_( |
|
|
'Print records as events' |
|
|
) |
|
|
) |
|
|
subset = parser.add_mutually_exclusive_group() |
|
|
subset.add_argument( |
|
|
'--treegroup', |
|
|
metavar='<treegroup>', |
|
|
help=_( |
|
|
'Specifies the N-th subtree of the record structure.' |
|
|
) |
|
|
) |
|
|
subset.add_argument( |
|
|
'--seqno', |
|
|
metavar='<treegroup>', |
|
|
help=_( |
|
|
'Print a model of <seqno>-th record' |
|
|
) |
|
|
) |
|
|
parser.set_defaults(func=main) |
|
|
return parser |
|
|
|
|
|
|
|
|
def stream_from_args(args): |
|
|
filename = args.hwp5file |
|
|
if filename: |
|
|
|
|
|
streamname = args.record_stream |
|
|
hwpfile = Hwp5File(filename) |
|
|
return parse_recordstream_name(hwpfile, streamname) |
|
|
else: |
|
|
version = args.file_format_version or '5.0.0.0' |
|
|
version = version.split('.') |
|
|
version = tuple(int(x) for x in version) |
|
|
|
|
|
if PY2: |
|
|
stdin_binary = sys.stdin |
|
|
else: |
|
|
stdin_binary = sys.stdin.buffer |
|
|
|
|
|
return ModelStream(Open2Stream(lambda: stdin_binary), version) |
|
|
|
|
|
|
|
|
def models_from_args(args): |
|
|
|
|
|
if args.treegroup: |
|
|
treegroup = int(args.treegroup) |
|
|
return lambda stream: stream.models(treegroup=treegroup) |
|
|
|
|
|
if args.seqno: |
|
|
seqno = int(args.seqno) |
|
|
return lambda stream: islice(stream.models(), |
|
|
seqno, seqno + 1) |
|
|
|
|
|
return lambda stream: stream.models() |
|
|
|
|
|
|
|
|
def print_models_from_args(args): |
|
|
|
|
|
if args.simple: |
|
|
return print_models_with_print_model(print_model_simple) |
|
|
|
|
|
if args.format: |
|
|
fmt = args.format |
|
|
fmt = unicode_unescape(fmt) |
|
|
print_model = print_model_with_format(fmt) |
|
|
return print_models_with_print_model(print_model) |
|
|
|
|
|
return print_models_json |
|
|
|
|
|
|
|
|
def print_models_json(models): |
|
|
jsonobjects = (model_to_json(model, sort_keys=True, indent=2) |
|
|
for model in models) |
|
|
for s in generate_json_array(jsonobjects): |
|
|
sys.stdout.write(s) |
|
|
|
|
|
|
|
|
def print_models_with_print_model(print_model): |
|
|
def models_printer(models): |
|
|
for model in models: |
|
|
print_model(model) |
|
|
return models_printer |
|
|
|
|
|
|
|
|
def print_model_simple(model): |
|
|
sys.stdout.write('%04d ' % model['seqno']) |
|
|
sys.stdout.write(' ' * model['level'] + model['type'].__name__) |
|
|
sys.stdout.write('\n') |
|
|
|
|
|
|
|
|
def print_model_with_format(fmt): |
|
|
def print_model(model): |
|
|
model = transform_model_formattable(model) |
|
|
sys.stdout.write(fmt % model) |
|
|
return print_model |
|
|
|
|
|
|
|
|
def transform_model_formattable(model): |
|
|
return dict(model, type=model['type'].__name__) |
|
|
|