#!/usr/bin/env python3 """PDF to Word (DOCX) converter using pdfplumber + python-docx approach via PyPDF2.""" import argparse, json, sys, os from pathlib import Path def convert(input_path, output_path): try: import pdfplumber from reportlab.lib.pagesizes import A4 except ImportError: pass from PyPDF2 import PdfReader reader = PdfReader(input_path) text_content = [] for page in reader.pages: text = page.extract_text() if text: text_content.append(text) if not output_path: output_path = str(Path(input_path).with_suffix('.docx')) # Write as simple DOCX using python-docx if available, otherwise plain text try: from docx import Document doc = Document() doc.add_heading('Converted Document', 0) for page_text in text_content: doc.add_paragraph(page_text) doc.add_page_break() doc.save(output_path) except ImportError: # Fallback: save as text output_path = str(Path(output_path).with_suffix('.txt')) with open(output_path, 'w', encoding='utf-8') as f: f.write('\n\n--- Page Break ---\n\n'.join(text_content)) return output_path def main(): parser = argparse.ArgumentParser(description='Convert PDF to Word') parser.add_argument('--input', required=True, help='Input PDF path') parser.add_argument('--output', required=True, help='Output file path') args = parser.parse_args() try: result_path = convert(args.input, args.output) print(json.dumps({"success": True, "output": result_path, "message": "PDF converted to Word successfully"})) except Exception as e: print(json.dumps({"success": False, "output": "", "message": str(e)})) sys.exit(1) if __name__ == '__main__': main()