Spaces:
Build error
Build error
| # File: remittance_pdf_processing_cli.py | |
| import argparse | |
| from remittance_pdf_processor import process_pdf, extract_text_from_pdf | |
| from ccmt_verifier_aus import verify_invoice_numbers | |
| from remittance_pdf_processing_types import Candidate, ProcessedPDFResult | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Extract text and invoice numbers from a PDF file.") | |
| parser.add_argument("pdf_file", help="Path to the PDF file") | |
| parser.add_argument("-o", "--output", help="Output file path for extracted text (optional)") | |
| parser.add_argument("-v", "--verbose", action="store_true", help="Print extracted text from PDF") | |
| parser.add_argument("-f", "--force-image", action="store_true", help="Force processing PDF as image") | |
| parser.add_argument("--verify", action="store_true", help="Enable invoice number verification") | |
| parser.add_argument("--multi-hop", action="store_true", help="Force multi-hop processing") | |
| args = parser.parse_args() | |
| extracted_text = extract_text_from_pdf(args.pdf_file, wrap_pages=True) | |
| # Determine the invoice verifier function | |
| invoice_verifier = verify_invoice_numbers if args.verify else None | |
| extracted_result: ProcessedPDFResult = process_pdf(args.pdf_file, | |
| force_image_processing=args.force_image, | |
| force_multi_hop=args.multi_hop, | |
| invoice_verifier=invoice_verifier) | |
| verified_candidate, unverified_candidate = extracted_result | |
| if args.verbose: | |
| print("Extracted text from PDF:") | |
| print("-" * 40) | |
| print(extracted_text) | |
| print("-" * 40) | |
| print() | |
| print("Extracted invoice numbers and payment amounts:") | |
| print(f"Multi-hop processing: {'Enabled' if args.multi_hop else 'Disabled'}") | |
| print(f"Verification: {'Enabled' if args.verify else 'Disabled'}") | |
| print("-" * 40) | |
| print("Verified Results:") | |
| if len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 1: | |
| print(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}") | |
| print(f" Verified Amount: {verified_candidate[1][0]}") | |
| elif len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 0: | |
| print(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}") | |
| print(" Amount doesn't match. This might be a partial payment.") | |
| elif len(verified_candidate[0]) >= 2: | |
| print(" Verified Invoice Numbers:") | |
| for i, invoice_numbers in enumerate(verified_candidate[0], 1): | |
| print(f" List {i}: {', '.join(invoice_numbers)}") | |
| else: | |
| print(" No verified results.") | |
| print("\nUnverified Invoice Numbers:") | |
| for i, invoice_numbers in enumerate(unverified_candidate[0], 1): | |
| print(f" Candidate {i}: {', '.join(invoice_numbers)}") | |
| print("\nUnverified Payment Amounts:") | |
| for i, amount in enumerate(unverified_candidate[1], 1): | |
| print(f" Candidate {i}: {amount}") | |
| print("-" * 40) | |
| if args.output: | |
| with open(args.output, 'w', encoding='utf-8') as f: | |
| f.write("Extracted text:\n") | |
| f.write(extracted_text) | |
| f.write("\n\nExtracted invoice numbers and payment amounts:\n") | |
| f.write(f"Multi-hop processing: {'Enabled' if args.multi_hop else 'Disabled'}\n") | |
| f.write(f"Verification: {'Enabled' if args.verify else 'Disabled'}\n") | |
| f.write("Verified Results:\n") | |
| if len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 1: | |
| f.write(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}\n") | |
| f.write(f" Verified Amount: {verified_candidate[1][0]}\n") | |
| elif len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 0: | |
| f.write(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}\n") | |
| f.write(" Amount doesn't match. This might be a partial payment.\n") | |
| elif len(verified_candidate[0]) >= 2: | |
| f.write(" Verified Invoice Numbers:\n") | |
| for i, invoice_numbers in enumerate(verified_candidate[0], 1): | |
| f.write(f" List {i}: {', '.join(invoice_numbers)}\n") | |
| else: | |
| f.write(" No verified results.\n") | |
| f.write("\nUnverified Invoice Numbers:\n") | |
| for i, invoice_numbers in enumerate(unverified_candidate[0], 1): | |
| f.write(f" Candidate {i}: {', '.join(invoice_numbers)}\n") | |
| f.write("\nUnverified Payment Amounts:\n") | |
| for i, amount in enumerate(unverified_candidate[1], 1): | |
| f.write(f" Candidate {i}: {amount}\n") | |
| print(f"Extracted text and invoice numbers written to {args.output}") | |
| if __name__ == "__main__": | |
| main() |