#!/usr/bin/env python3 """PDF to Excel converter using pdfplumber for table extraction.""" import argparse, json, sys, csv from pathlib import Path def convert(input_path, output_path): import pdfplumber all_rows = [] with pdfplumber.open(input_path) as pdf: for page in pdf.pages: tables = page.extract_tables() for table in tables: for row in table: cleaned = [cell if cell else '' for cell in row] all_rows.append(cleaned) if not tables: text = page.extract_text() if text: for line in text.split('\n'): all_rows.append([line.strip()]) if not output_path: output_path = str(Path(input_path).with_suffix('.csv')) try: import openpyxl wb = openpyxl.Workbook() ws = wb.active ws.title = "Extracted Data" for row in all_rows: ws.append(row) xlsx_path = str(Path(output_path).with_suffix('.xlsx')) wb.save(xlsx_path) return xlsx_path except ImportError: csv_path = str(Path(output_path).with_suffix('.csv')) with open(csv_path, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerows(all_rows) return csv_path def main(): parser = argparse.ArgumentParser(description='Convert PDF to Excel') parser.add_argument('--input', required=True) parser.add_argument('--output', required=True) args = parser.parse_args() try: result = convert(args.input, args.output) print(json.dumps({"success": True, "output": result, "message": "PDF converted to Excel successfully"})) except Exception as e: print(json.dumps({"success": False, "output": "", "message": str(e)})) sys.exit(1) if __name__ == '__main__': main()