Spaces:

NavyDevilDoc
/

AI_Toolkit

Sleeping

App Files Files Community

NavyDevilDoc commited on Dec 20, 2025

Commit

b868763

verified ·

1 Parent(s): 6695d4a

Create test_integration.py

Browse files

Files changed (1) hide show

src/test_integration.py +84 -0

src/test_integration.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import os
+import logging
+from rag_engine import process_file
+# Configure logger to see the internal output of your chunkers
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("IntegrationTest")
+def create_dummy_files():
+    """Creates temporary test files for validation."""
+    # 1. Create a dummy Markdown file
+    md_content = """# Navy RAG Test
+## Section 1: Introduction
+This is a test of the markdown splitting capability.
+It should respect headers.
+## Section 2: Technical Specs
+The system must handle:
+* Paragraphs
+* Headers
+* Metadata
+"""
+    with open("test_doc.md", "w", encoding="utf-8") as f:
+        f.write(md_content)
+    # 2. Create a dummy Text file
+    txt_content = """This is a standard text file.
+It uses double line breaks to indicate paragraphs.
+The custom ParagraphChunker should detect this separation.
+This is the third paragraph. It should be treated as a distinct chunk."""
+    with open("test_doc.txt", "w", encoding="utf-8") as f:
+        f.write(txt_content)
+def cleanup_dummy_files():
+    """Removes temporary files."""
+    if os.path.exists("test_doc.md"):
+        os.remove("test_doc.md")
+    if os.path.exists("test_doc.txt"):
+        os.remove("test_doc.txt")
+def run_tests():
+    print("\n--- STARTING INTEGRATION TEST ---\n")
+    create_dummy_files()
+    try:
+        # TEST 1: Markdown Processing
+        print(">> Testing Markdown Handler...")
+        md_docs = process_file("test_doc.md")
+        if len(md_docs) > 0 and 'Header 1' in md_docs[0].metadata:
+            print(f"✅ PASS: Markdown processed {len(md_docs)} chunks with header metadata.")
+        else:
+            print(f"❌ FAIL: Markdown processing failed or missing metadata. Docs found: {len(md_docs)}")
+        # TEST 2: Text Processing (Paragraph Strategy)
+        print("\n>> Testing Text Handler (Paragraph Strategy)...")
+        # We expect 3 paragraphs based on the input above
+        p_docs = process_file("test_doc.txt", chunking_strategy="paragraph")
+        if len(p_docs) == 3:
+            print(f"✅ PASS: Paragraph strategy identified {len(p_docs)} distinct paragraphs.")
+        else:
+            print(f"⚠️ WARNING: Paragraph strategy found {len(p_docs)} chunks (Expected 3). Check min-length threshold settings in ParagraphChunker.")
+        # TEST 3: Text Processing (Token Strategy)
+        print("\n>> Testing Text Handler (Token Strategy)...")
+        t_docs = process_file("test_doc.txt", chunking_strategy="token", chunk_size=50)
+        if len(t_docs) > 0:
+            print(f"✅ PASS: Token strategy successfully chunked text into {len(t_docs)} segments.")
+        else:
+            print("❌ FAIL: Token strategy returned 0 chunks.")
+    except ImportError as e:
+        print(f"\n❌ CRITICAL ERROR: Import failed. Check folder structure.\nDetails: {e}")
+    except Exception as e:
+        print(f"\n❌ CRITICAL ERROR: {e}")
+    finally:
+        cleanup_dummy_files()
+        print("\n--- TEST COMPLETE ---")
+if __name__ == "__main__":
+    run_tests()