File size: 2,159 Bytes
6c16992 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | #!/usr/bin/env python3
"""
Quick test script for Google Document AI markdown table output.
"""
import os
import sys
from pathlib import Path
# Add the src directory to the path
sys.path.append(str(Path(__file__).parent / "src"))
from extract_text.google_document_api import GoogleDocumentAPI
def quick_test():
"""Quick test of the markdown table generation."""
credentials_path = "src/extract_text/photon-services-f0d3ec1417d0.json"
test_pdf_path = "requirements_library/client-requirements/Kir-Kat/kitkat-f1.pdf"
if not os.path.exists(credentials_path):
print(f"β Credentials file not found: {credentials_path}")
return
if not os.path.exists(test_pdf_path):
print(f"β Test PDF file not found: {test_pdf_path}")
return
try:
print("π Quick test of Google Document AI...")
# Initialize and process
doc_api = GoogleDocumentAPI(credentials_path)
document = doc_api.process_document(test_pdf_path)
# Get text blocks with height
text_blocks = doc_api.extract_text_with_bounding_boxes(document)
print(f"π Found {len(text_blocks)} text blocks")
# Show first few blocks with height
print("\nπ First 5 text blocks with height:")
print("-" * 60)
for i, block in enumerate(text_blocks[:5]):
print(f"Block {i+1}: Height={block['height']:.2f}mm | Text: {block['text'][:50]}...")
# Generate and display markdown table
print("\nπ Markdown Table Output:")
print("=" * 80)
markdown_table = doc_api.extract_text_with_markdown_table(document)
print(markdown_table)
# Save to file
with open("quick_test_results.md", "w", encoding="utf-8") as f:
f.write("# Quick Test Results\n\n")
f.write(markdown_table)
print(f"\nβ
Results saved to: quick_test_results.md")
except Exception as e:
print(f"β Error: {str(e)}")
if __name__ == "__main__":
quick_test() |