# -*- coding: utf-8 -*- """ HWP to HTML Converter module. """ import os from hwp5.hwp5html import HTMLTransform from hwp5.xmlmodel import Hwp5File from contextlib import closing class HwpToHtmlConverter: def __init__(self, hwp_file): self.hwp_file = hwp_file def convert(self, output_path): """ Convert the HWP file to HTML. :param output_path: Path to save the generated HTML file. """ if not os.path.exists(self.hwp_file): raise FileNotFoundError(f"HWP file not found: {self.hwp_file}") # Ensure output directory exists output_dir = os.path.dirname(output_path) if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir) # Use HTMLTransform from hwp5 package # The existing transform_hwp5_to_xhtml takes a file path or file object # but transform_hwp5_to_xhtml in HTMLTransform returns a transform function # We need to instantiate HTMLTransform and use its methods correctly. # Looking at hwp5html.py: # transform = html_transform.transform_hwp5_to_xhtml # transform(hwp5file, dest) transformer = HTMLTransform() output_dir = os.path.dirname(os.path.abspath(output_path)) with closing(Hwp5File(self.hwp_file)) as hwp5file: with transformer.transformed_xhwp5_at_temp(hwp5file) as xhwp5path: # 1. Generage HTML with open(output_path, 'wb') as f: transformer.transform_xhwp5_to_xhtml(xhwp5path, f) # 2. Generate CSS # The XSLT usually expects styles.css css_path = os.path.join(output_dir, 'styles.css') with open(css_path, 'wb') as f: transformer.transform_xhwp5_to_css(xhwp5path, f) # 3. Extract BinData bindata_dir = os.path.join(output_dir, 'bindata') transformer.extract_bindata_dir(hwp5file, bindata_dir) def main(): import argparse import sys parser = argparse.ArgumentParser(description='Convert HWP file to HTML with CSS and images.') parser.add_argument('input', help='Input HWP file') parser.add_argument('--output', '-o', help='Output HTML file path (default: input_filename.html)') args = parser.parse_args() input_file = args.input if args.output: output_file = args.output else: base_name = os.path.splitext(input_file)[0] output_file = base_name + '.html' try: converter = HwpToHtmlConverter(input_file) print(f"Converting '{input_file}' to '{output_file}'...") converter.convert(output_file) print("Conversion successful!") print(f"Generated files:") print(f" - HTML: {output_file}") print(f" - CSS: {os.path.join(os.path.dirname(os.path.abspath(output_file)), 'styles.css')}") print(f" - Data: {os.path.join(os.path.dirname(os.path.abspath(output_file)), 'bindata')}") except Exception as e: print(f"Error: {e}", file=sys.stderr) sys.exit(1) if __name__ == '__main__': main()