File size: 3,169 Bytes
09498fa | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | #!/usr/bin/env python3
"""
Test script to verify all dependencies and PDF handling capabilities.
"""
import os
import sys
print("=== CogniChat Dependencies & PDF Handling Test ===")
# Test imports
try:
print("\nTesting core imports...")
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.retrievers import BM25Retriever
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
print("β Core LangChain imports successful!")
except ImportError as e:
print(f"β Import error: {e}")
if "rank_bm25" in str(e):
print(" β Missing dependency: pip install rank-bm25==0.2.2")
sys.exit(1)
# Test PDF loading capabilities
try:
print("\nTesting PDF loading capabilities...")
# Test PyPDF availability
try:
from langchain_community.document_loaders import PyPDFLoader
print("β PyPDFLoader available")
except ImportError:
print("β PyPDFLoader not available")
# Test PyMuPDF availability
try:
import fitz
print("β PyMuPDF (fitz) available - can handle corrupted PDFs")
except ImportError:
print("β PyMuPDF (fitz) not available")
# Test pdfplumber availability
try:
import pdfplumber
print("β pdfplumber available - additional PDF parsing method")
except ImportError:
print("β pdfplumber not available")
except Exception as e:
print(f"β Error testing PDF capabilities: {e}")
# Test BM25 Retriever
try:
print("\nTesting BM25 Retriever...")
# Create some test documents
test_docs = [
Document(page_content="This is the first test document about machine learning."),
Document(page_content="This is the second document discussing natural language processing."),
Document(page_content="The third document covers artificial intelligence topics."),
]
# Create BM25 retriever
bm25_retriever = BM25Retriever.from_documents(test_docs)
bm25_retriever.k = 2
# Test retrieval
query = "machine learning"
results = bm25_retriever.get_relevant_documents(query)
print(f"β BM25 retriever created and tested successfully!")
print(f"β Retrieved {len(results)} documents for query: '{query}'")
except Exception as e:
print(f"β Error testing BM25 retriever: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
print("\n=== All tests completed successfully! ===")
print("\nThe application should now handle:")
print(" β’ Regular file uploads and processing")
print(" β’ Corrupted PDF files with multiple fallback methods")
print(" β’ BM25 and FAISS hybrid retrieval")
print(" β’ Proper error messages for failed file processing")
print("\nMake sure to install all dependencies with:")
print(" pip install -r requirements.txt")
# Display dependency summary
print("\n=== Key Dependencies Added/Updated ===")
print(" β’ rank-bm25==0.2.2 (for BM25 retrieval)")
print(" β’ pymupdf==1.23.26 (PDF fallback method)")
print(" β’ pdfplumber==0.10.3 (additional PDF parsing)") |