#!/usr/bin/env python3 """PDF to Text extractor using PyPDF2.""" import argparse, json, sys from pathlib import Path def convert(input_path, output_path): from PyPDF2 import PdfReader reader = PdfReader(input_path) text_parts = [] for i, page in enumerate(reader.pages): text = page.extract_text() if text: text_parts.append(f"--- Page {i+1} ---\n{text}") full_text = '\n\n'.join(text_parts) if not output_path: output_path = str(Path(input_path).with_suffix('.txt')) with open(output_path, 'w', encoding='utf-8') as f: f.write(full_text) return output_path def main(): parser = argparse.ArgumentParser(description='Extract text from PDF') parser.add_argument('--input', required=True) parser.add_argument('--output', required=True) args = parser.parse_args() try: result = convert(args.input, args.output) print(json.dumps({"success": True, "output": result, "message": "Text extracted successfully"})) except Exception as e: print(json.dumps({"success": False, "output": "", "message": str(e)})) sys.exit(1) if __name__ == '__main__': main()