File size: 2,716 Bytes
496c567
 
 
ba63231
496c567
 
 
 
 
 
 
 
ba63231
496c567
 
ba63231
496c567
ba63231
496c567
 
 
 
 
ba63231
496c567
ba63231
 
496c567
 
ba63231
496c567
ba63231
496c567
 
 
ba63231
496c567
ba63231
496c567
 
ba63231
496c567
 
 
 
 
 
 
 
 
 
 
 
 
ba63231
 
496c567
 
 
 
 
 
 
ba63231
496c567
 
 
 
 
 
 
 
ba63231
496c567
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import sys

print("CogniChat Dependencies & PDF Handling Test")

# Test imports
try:
    print("\nTesting core imports...")
    from langchain_community.embeddings import HuggingFaceEmbeddings
    from langchain_community.retrievers import BM25Retriever
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    from langchain_core.documents import Document
    print("Core LangChain imports successful!")
    
except ImportError as e:
    print(f"Import error: {e}")
    if "rank_bm25" in str(e):
        print("Missing dependency: pip install rank-bm25==0.2.2")
    sys.exit(1)
try:
    print("\nTesting PDF loading capabilities...")
    try:
        from langchain_community.document_loaders import PyPDFLoader
        print("PyPDFLoader available")
    except ImportError:
        print("PyPDFLoader not available")

    try:
        import fitz
        print("PyMuPDF (fitz) available - can handle corrupted PDFs")
    except ImportError:
        print("PyMuPDF (fitz) not available")
    
    try:
        import pdfplumber
        print("pdfplumber available - additional PDF parsing method")
    except ImportError:
        print("pdfplumber not available")
        
except Exception as e:
    print(f"Error testing PDF capabilities: {e}")
try:
    print("\nTesting BM25 Retriever...")
    
    test_docs = [
        Document(page_content="This is the first test document about machine learning."),
        Document(page_content="This is the second document discussing natural language processing."),
        Document(page_content="The third document covers artificial intelligence topics."),
    ]
    
    bm25_retriever = BM25Retriever.from_documents(test_docs)
    bm25_retriever.k = 2
    query = "machine learning"
    results = bm25_retriever.get_relevant_documents(query)
    print(f"BM25 retriever created and tested successfully!")
    print(f"Retrieved {len(results)} documents for query: '{query}'")
    
except Exception as e:
    print(f"✗ Error testing BM25 retriever: {e}")
    import traceback
    traceback.print_exc()
    sys.exit(1)

print("\nAll tests completed successfully!")
print("\nThe application should now handle:")
print("  • Regular file uploads and processing")
print("  • Corrupted PDF files with multiple fallback methods")
print("  • BM25 and FAISS hybrid retrieval")
print("  • Proper error messages for failed file processing")
print("\nMake sure to install all dependencies with:")
print("  pip install -r requirements.txt")

print("\nKey Dependencies Added/Updated")
print("  • rank-bm25==0.2.2 (for BM25 retrieval)")
print("  • pymupdf==1.23.26 (PDF fallback method)")
print("  • pdfplumber==0.10.3 (additional PDF parsing)")