import os import logging from rag_engine import process_file # Configure logger to see the internal output of your chunkers logging.basicConfig(level=logging.INFO) logger = logging.getLogger("IntegrationTest") def create_dummy_files(): """Creates temporary test files for validation.""" # 1. Create a dummy Markdown file md_content = """# Navy RAG Test ## Section 1: Introduction This is a test of the markdown splitting capability. It should respect headers. ## Section 2: Technical Specs The system must handle: * Paragraphs * Headers * Metadata """ with open("test_doc.md", "w", encoding="utf-8") as f: f.write(md_content) # 2. Create a dummy Text file txt_content = """This is a standard text file. It uses double line breaks to indicate paragraphs. The custom ParagraphChunker should detect this separation. This is the third paragraph. It should be treated as a distinct chunk.""" with open("test_doc.txt", "w", encoding="utf-8") as f: f.write(txt_content) def cleanup_dummy_files(): """Removes temporary files.""" if os.path.exists("test_doc.md"): os.remove("test_doc.md") if os.path.exists("test_doc.txt"): os.remove("test_doc.txt") def run_tests(): print("\n--- STARTING INTEGRATION TEST ---\n") create_dummy_files() try: # TEST 1: Markdown Processing print(">> Testing Markdown Handler...") md_docs = process_file("test_doc.md") if len(md_docs) > 0 and 'Header 1' in md_docs[0].metadata: print(f"✅ PASS: Markdown processed {len(md_docs)} chunks with header metadata.") else: print(f"❌ FAIL: Markdown processing failed or missing metadata. Docs found: {len(md_docs)}") # TEST 2: Text Processing (Paragraph Strategy) print("\n>> Testing Text Handler (Paragraph Strategy)...") # We expect 3 paragraphs based on the input above p_docs = process_file("test_doc.txt", chunking_strategy="paragraph") if len(p_docs) == 3: print(f"✅ PASS: Paragraph strategy identified {len(p_docs)} distinct paragraphs.") else: print(f"⚠️ WARNING: Paragraph strategy found {len(p_docs)} chunks (Expected 3). Check min-length threshold settings in ParagraphChunker.") # TEST 3: Text Processing (Token Strategy) print("\n>> Testing Text Handler (Token Strategy)...") t_docs = process_file("test_doc.txt", chunking_strategy="token", chunk_size=50) if len(t_docs) > 0: print(f"✅ PASS: Token strategy successfully chunked text into {len(t_docs)} segments.") else: print("❌ FAIL: Token strategy returned 0 chunks.") except ImportError as e: print(f"\n❌ CRITICAL ERROR: Import failed. Check folder structure.\nDetails: {e}") except Exception as e: print(f"\n❌ CRITICAL ERROR: {e}") finally: cleanup_dummy_files() print("\n--- TEST COMPLETE ---") if __name__ == "__main__": run_tests()