| |
|
| | """
|
| | Quick test script for Google Document AI markdown table output.
|
| | """
|
| |
|
| | import os
|
| | import sys
|
| | from pathlib import Path
|
| |
|
| |
|
| | sys.path.append(str(Path(__file__).parent / "src"))
|
| |
|
| | from extract_text.google_document_api import GoogleDocumentAPI
|
| |
|
| | def quick_test():
|
| | """Quick test of the markdown table generation."""
|
| |
|
| | credentials_path = "src/extract_text/photon-services-f0d3ec1417d0.json"
|
| | test_pdf_path = "requirements_library/client-requirements/Kir-Kat/kitkat-f1.pdf"
|
| |
|
| | if not os.path.exists(credentials_path):
|
| | print(f"β Credentials file not found: {credentials_path}")
|
| | return
|
| |
|
| | if not os.path.exists(test_pdf_path):
|
| | print(f"β Test PDF file not found: {test_pdf_path}")
|
| | return
|
| |
|
| | try:
|
| | print("π Quick test of Google Document AI...")
|
| |
|
| |
|
| | doc_api = GoogleDocumentAPI(credentials_path)
|
| | document = doc_api.process_document(test_pdf_path)
|
| |
|
| |
|
| | text_blocks = doc_api.extract_text_with_bounding_boxes(document)
|
| | print(f"π Found {len(text_blocks)} text blocks")
|
| |
|
| |
|
| | print("\nπ First 5 text blocks with height:")
|
| | print("-" * 60)
|
| | for i, block in enumerate(text_blocks[:5]):
|
| | print(f"Block {i+1}: Height={block['height']:.2f}mm | Text: {block['text'][:50]}...")
|
| |
|
| |
|
| | print("\nπ Markdown Table Output:")
|
| | print("=" * 80)
|
| | markdown_table = doc_api.extract_text_with_markdown_table(document)
|
| | print(markdown_table)
|
| |
|
| |
|
| | with open("quick_test_results.md", "w", encoding="utf-8") as f:
|
| | f.write("# Quick Test Results\n\n")
|
| | f.write(markdown_table)
|
| |
|
| | print(f"\nβ
Results saved to: quick_test_results.md")
|
| |
|
| | except Exception as e:
|
| | print(f"β Error: {str(e)}")
|
| |
|
| | if __name__ == "__main__":
|
| | quick_test() |