|
|
| """
|
| Test script for PDF requirements functionality
|
| """
|
|
|
| import os
|
| import tempfile
|
| from src.extract_text.ingest import RequirementsIngest
|
|
|
| def test_pdf_requirements():
|
| """Test PDF requirements ingestion"""
|
| print("Testing PDF requirements functionality...")
|
|
|
|
|
| test_pdf_path = None
|
|
|
|
|
| for root, dirs, files in os.walk("requirements_library"):
|
| for file in files:
|
| if file.lower().endswith('.pdf'):
|
| test_pdf_path = os.path.join(root, file)
|
| break
|
| if test_pdf_path:
|
| break
|
|
|
| if not test_pdf_path:
|
| print("No PDF files found for testing. Creating a simple test...")
|
|
|
| with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
|
| f.write("Test requirement: All products must have allergen information.")
|
| test_file_path = f.name
|
|
|
| print(f"Created test text file: {test_file_path}")
|
| else:
|
| print(f"Using existing PDF for testing: {test_pdf_path}")
|
| test_file_path = test_pdf_path
|
|
|
| try:
|
|
|
| ingest = RequirementsIngest()
|
|
|
|
|
| with open(test_file_path, 'rb') as f:
|
| result = ingest.ingest_requirements_document(f)
|
|
|
| print("✅ Ingestion successful!")
|
| print(f"Result type: {type(result)}")
|
|
|
| if isinstance(result, dict):
|
| print(f"File type: {result.get('type', 'unknown')}")
|
| print(f"Filename: {result.get('filename', 'unknown')}")
|
| print(f"File size: {result.get('file_size', 0)} bytes")
|
| print(f"Text content preview: {result.get('text_content', '')[:200]}...")
|
| else:
|
| print(f"Text content: {result[:200]}...")
|
|
|
| print("\n✅ PDF requirements functionality is working!")
|
|
|
| except Exception as e:
|
| print(f"❌ Error during testing: {e}")
|
| import traceback
|
| traceback.print_exc()
|
|
|
| finally:
|
|
|
| if test_pdf_path is None and 'test_file_path' in locals():
|
| try:
|
| os.unlink(test_file_path)
|
| print(f"Cleaned up test file: {test_file_path}")
|
| except:
|
| pass
|
|
|
| if __name__ == "__main__":
|
| test_pdf_requirements() |