# -*- coding: utf-8 -*- # # pyhwp : hwp file format parser in python # Copyright (C) 2010-2023 mete0r # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # from __future__ import absolute_import from __future__ import print_function from __future__ import unicode_literals import sys from ..cli import parse_recordstream_name from ..recordstream import Hwp5File from ..recordstream import RecordStream from ..recordstream import encode_record_header from ..recordstream import dump_record from ..storage import Open2Stream PY2 = sys.version_info.major == 2 def main(args): if PY2: stdout_text = sys.stdout stdout_binary = sys.stdout else: stdout_text = sys.stdout stdout_binary = sys.stdout.buffer filename = args.hwp5file if filename: hwpfile = Hwp5File(filename) # TODO: args.record_stream is None streamname = args.record_stream stream = parse_recordstream_name(hwpfile, streamname) else: stream = RecordStream(Open2Stream(lambda: sys.stdin), None) opts = dict() rng = args.range if rng: rng = rng.split('-', 1) rng = tuple(int(x) for x in rng) if len(rng) == 1: rng = (rng[0], rng[0] + 1) opts['range'] = rng treegroup = args.treegroup if treegroup is not None: opts['treegroup'] = int(treegroup) if args.simple: for record in stream.records(**opts): stdout_text.write('{:04d} {} {}\n'.format( record['seqno'], ' ' * record['level'], record['tagname'], )) elif args.raw: for record in stream.records(**opts): dump_record(stdout_binary, record) elif args.raw_header: for record in stream.records(**opts): hdr = encode_record_header(record) stdout_binary.write(hdr) elif args.raw_payload: for record in stream.records(**opts): stdout_binary.write(record['payload']) else: stream.records_json(**opts).dump(stdout_text) def records_argparser(subparsers, _): parser = subparsers.add_parser( 'records', help=_( 'Print the record structure of .hwp file record streams.' ), description=_( 'Print the record structure of the specified stream.' ), ) parser.add_argument( 'hwp5file', nargs='?', metavar='', help=_('.hwp file to analyze'), ) parser.add_argument( 'record_stream', nargs='?', metavar='', help=_( 'Record-structured internal streams.\n' '(e.g. DocInfo, BodyText/*)\n' ), ) output_formats = parser.add_mutually_exclusive_group() output_formats.add_argument( '--simple', action='store_true', help=_( 'Print records as simple tree' ) ) output_formats.add_argument( '--json', action='store_true', help=_( 'Print records as json' ) ) output_formats.add_argument( '--raw', action='store_true', help=_( 'Print records as is' ) ) output_formats.add_argument( '--raw-header', action='store_true', help=_( 'Print record headers as is' ) ) output_formats.add_argument( '--raw-payload', action='store_true', help=_( 'Print record payloads as is' ) ) subset = parser.add_mutually_exclusive_group() subset.add_argument( '--range', metavar='', help=_( 'Specifies the range of the records.\n' 'N-M means "from the record N to M-1 (excluding M)"\n' 'N means just the record N\n' ) ) subset.add_argument( '--treegroup', metavar='', help=_( 'Specifies the N-th subtree of the record structure.' ) ) parser.set_defaults(func=main) return parser