| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| from __future__ import absolute_import |
| from __future__ import print_function |
| from __future__ import unicode_literals |
| import sys |
|
|
| from ..cli import parse_recordstream_name |
| from ..recordstream import Hwp5File |
| from ..recordstream import RecordStream |
| from ..recordstream import encode_record_header |
| from ..recordstream import dump_record |
| from ..storage import Open2Stream |
|
|
|
|
| PY2 = sys.version_info.major == 2 |
|
|
|
|
| def main(args): |
| if PY2: |
| stdout_text = sys.stdout |
| stdout_binary = sys.stdout |
| else: |
| stdout_text = sys.stdout |
| stdout_binary = sys.stdout.buffer |
|
|
| filename = args.hwp5file |
| if filename: |
| hwpfile = Hwp5File(filename) |
| |
| streamname = args.record_stream |
| stream = parse_recordstream_name(hwpfile, streamname) |
| else: |
| stream = RecordStream(Open2Stream(lambda: sys.stdin), None) |
|
|
| opts = dict() |
| rng = args.range |
| if rng: |
| rng = rng.split('-', 1) |
| rng = tuple(int(x) for x in rng) |
| if len(rng) == 1: |
| rng = (rng[0], rng[0] + 1) |
| opts['range'] = rng |
| treegroup = args.treegroup |
| if treegroup is not None: |
| opts['treegroup'] = int(treegroup) |
|
|
| if args.simple: |
| for record in stream.records(**opts): |
| stdout_text.write('{:04d} {} {}\n'.format( |
| record['seqno'], |
| ' ' * record['level'], |
| record['tagname'], |
| )) |
| elif args.raw: |
| for record in stream.records(**opts): |
| dump_record(stdout_binary, record) |
| elif args.raw_header: |
| for record in stream.records(**opts): |
| hdr = encode_record_header(record) |
| stdout_binary.write(hdr) |
| elif args.raw_payload: |
| for record in stream.records(**opts): |
| stdout_binary.write(record['payload']) |
| else: |
| stream.records_json(**opts).dump(stdout_text) |
|
|
|
|
| def records_argparser(subparsers, _): |
| parser = subparsers.add_parser( |
| 'records', |
| help=_( |
| 'Print the record structure of .hwp file record streams.' |
| ), |
| description=_( |
| 'Print the record structure of the specified stream.' |
| ), |
| ) |
| parser.add_argument( |
| 'hwp5file', |
| nargs='?', |
| metavar='<hwp5file>', |
| help=_('.hwp file to analyze'), |
| ) |
| parser.add_argument( |
| 'record_stream', |
| nargs='?', |
| metavar='<record-stream>', |
| help=_( |
| 'Record-structured internal streams.\n' |
| '(e.g. DocInfo, BodyText/*)\n' |
| ), |
| ) |
| output_formats = parser.add_mutually_exclusive_group() |
| output_formats.add_argument( |
| '--simple', |
| action='store_true', |
| help=_( |
| 'Print records as simple tree' |
| ) |
| ) |
| output_formats.add_argument( |
| '--json', |
| action='store_true', |
| help=_( |
| 'Print records as json' |
| ) |
| ) |
| output_formats.add_argument( |
| '--raw', |
| action='store_true', |
| help=_( |
| 'Print records as is' |
| ) |
| ) |
| output_formats.add_argument( |
| '--raw-header', |
| action='store_true', |
| help=_( |
| 'Print record headers as is' |
| ) |
| ) |
| output_formats.add_argument( |
| '--raw-payload', |
| action='store_true', |
| help=_( |
| 'Print record payloads as is' |
| ) |
| ) |
| subset = parser.add_mutually_exclusive_group() |
| subset.add_argument( |
| '--range', |
| metavar='<range>', |
| help=_( |
| 'Specifies the range of the records.\n' |
| 'N-M means "from the record N to M-1 (excluding M)"\n' |
| 'N means just the record N\n' |
| ) |
| ) |
| subset.add_argument( |
| '--treegroup', |
| metavar='<treegroup>', |
| help=_( |
| 'Specifies the N-th subtree of the record structure.' |
| ) |
| ) |
| parser.set_defaults(func=main) |
| return parser |
|
|