QC_Rules / test_pdf_requirements.py
Jakecole1's picture
Upload 11 files
6c16992 verified
#!/usr/bin/env python3
"""
Test script for PDF requirements functionality
"""
import os
import tempfile
from src.extract_text.ingest import RequirementsIngest
def test_pdf_requirements():
"""Test PDF requirements ingestion"""
print("Testing PDF requirements functionality...")
# Create a simple test PDF (we'll use an existing one if available)
test_pdf_path = None
# Look for any PDF file in the requirements_library
for root, dirs, files in os.walk("requirements_library"):
for file in files:
if file.lower().endswith('.pdf'):
test_pdf_path = os.path.join(root, file)
break
if test_pdf_path:
break
if not test_pdf_path:
print("No PDF files found for testing. Creating a simple test...")
# Create a simple test with text file
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write("Test requirement: All products must have allergen information.")
test_file_path = f.name
print(f"Created test text file: {test_file_path}")
else:
print(f"Using existing PDF for testing: {test_pdf_path}")
test_file_path = test_pdf_path
try:
# Test the ingestion
ingest = RequirementsIngest()
# Open the file and test ingestion
with open(test_file_path, 'rb') as f:
result = ingest.ingest_requirements_document(f)
print("✅ Ingestion successful!")
print(f"Result type: {type(result)}")
if isinstance(result, dict):
print(f"File type: {result.get('type', 'unknown')}")
print(f"Filename: {result.get('filename', 'unknown')}")
print(f"File size: {result.get('file_size', 0)} bytes")
print(f"Text content preview: {result.get('text_content', '')[:200]}...")
else:
print(f"Text content: {result[:200]}...")
print("\n✅ PDF requirements functionality is working!")
except Exception as e:
print(f"❌ Error during testing: {e}")
import traceback
traceback.print_exc()
finally:
# Clean up test file if we created one
if test_pdf_path is None and 'test_file_path' in locals():
try:
os.unlink(test_file_path)
print(f"Cleaned up test file: {test_file_path}")
except:
pass
if __name__ == "__main__":
test_pdf_requirements()