"""The executable of pdfxmeta"""

import getopt
import sys
import pdfxmeta
import io

from getopt import GetoptError
from typing import Optional, TextIO
from fitzutils import open_pdf
from textwrap import indent
from pdfxmeta import dump_meta, dump_toml, extract_meta


usage_s = """
usage: pdfxmeta [options] doc.pdf [pattern]
""".strip()

help_s = """
usage: pdfxmeta [options] doc.pdf [pattern]

Extract the metadata for pattern in doc.pdf.

To use this command, first open up the pdf file with your
favorite pdf reader and find the text you want to search
for. Then use

    $ pdfxmeta -p 1 in.pdf "Subsection One"

to find the metadata, mainly the font attributes and
bounding box, of lines containing the pattern "Subsection
One" on page 1. Specifying a page number is optional but
highly recommended, since it greatly reduces the ambiguity
of matches and execution time.

The output of this command can be directly copy-pasted to
build a recipe file for pdftocgen. Alternatively, you could
also use the --auto or -a flag to output a valid heading
filter directly

    $ pdfxmeta -p 1 -a 2 in.pdf "Subsection One" >> recipe.toml

where the argument of -a is the level of the heading filter,
which in this case is 2.

arguments
  doc.pdf            path to the input PDF document
  [pattern]          the pattern to search for (python regex). if not
                     given, dump the entire document

options
  -h, --help         show help
  -p, --page=PAGE    specify the page to search for (1-based index)
  -i, --ignore-case  when flag is set, search will be case-insensitive
  -a, --auto=LEVEL   when flag is set, the output would be a valid
                     heading filter of the specified heading level in
                     default settings. it is directly usable by
                     pdftocgen.
  -o, --out=FILE     path to the output file. if this flag is not
                     specified, the default is stdout
  -V, --version      show version number
""".strip()


def print_result(meta: dict) -> str:
    """pretty print results in a structured manner"""
    return f"{meta.get('text', '')}:\n{indent(dump_meta(meta), '    ')}"


def main():
    # parse arguments
    try:
        opts, args = getopt.gnu_getopt(
            sys.argv[1:],
            "hiVp:a:o:",
            ["help", "ignore-case", "version", "page=", "auto=", "out="]
        )
    except GetoptError as e:
        print(e, file=sys.stderr)
        print(usage_s, file=sys.stderr)
        sys.exit(2)

    ignore_case: bool = False
    page: Optional[int] = None
    auto_level: Optional[int] = None
    out: TextIO = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='ignore')

    for o, a in opts:
        if o in ("-i", "--ignore-case"):
            ignore_case = True
        elif o in ("-p", "--page"):
            try:
                page = int(a)
            except ValueError as e:
                print("error: invalid page number", file=sys.stderr)
                sys.exit(1)
        elif o in ("-a", "--auto"):
            try:
                auto_level = int(a)
            except ValueError as e:
                print("error: invalid level", file=sys.stderr)
                sys.exit(1)
        elif o in ("-o", "--out"):
            try:
                out = open(a, "w", encoding='utf-8', errors='ignore')
            except IOError as e:
                print("error: can't open file for writing", file=sys.stderr)
                print(e, file=sys.stderr)
                sys.exit(1)
        elif o in ("-V", "--version"):
            print("pdfxmeta", pdfxmeta.__version__, file=sys.stderr)
            sys.exit()
        elif o in ("-h", "--help"):
            print(help_s, file=sys.stderr)
            sys.exit()

    argc = len(args)

    if argc < 1:
        print("error: no input pdf is given", file=sys.stderr)
        print(usage_s, file=sys.stderr)
        sys.exit(1)

    path_in: str = args[0]
    pattern: str = ""

    if argc >= 2:
        pattern = args[1]

    # done parsing arguments

    with open_pdf(path_in) as doc:
        meta = extract_meta(doc, pattern, page, ignore_case)

        # nothing found
        if len(meta) == 0:
            sys.exit(1)

        # should we add \n between each output?
        addnl = not out.isatty()

        if auto_level:
            print('\n'.join(
                [dump_toml(m, auto_level, addnl) for m in meta]
            ), file=out)
        else:
            print('\n'.join(map(print_result, meta)), file=out)