Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| print("CogniChat Dependencies & PDF Handling Test") | |
| # Test imports | |
| try: | |
| print("\nTesting core imports...") | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.retrievers import BM25Retriever | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_core.documents import Document | |
| print("Core LangChain imports successful!") | |
| except ImportError as e: | |
| print(f"Import error: {e}") | |
| if "rank_bm25" in str(e): | |
| print("Missing dependency: pip install rank-bm25==0.2.2") | |
| sys.exit(1) | |
| try: | |
| print("\nTesting PDF loading capabilities...") | |
| try: | |
| from langchain_community.document_loaders import PyPDFLoader | |
| print("PyPDFLoader available") | |
| except ImportError: | |
| print("PyPDFLoader not available") | |
| try: | |
| import fitz | |
| print("PyMuPDF (fitz) available - can handle corrupted PDFs") | |
| except ImportError: | |
| print("PyMuPDF (fitz) not available") | |
| try: | |
| import pdfplumber | |
| print("pdfplumber available - additional PDF parsing method") | |
| except ImportError: | |
| print("pdfplumber not available") | |
| except Exception as e: | |
| print(f"Error testing PDF capabilities: {e}") | |
| try: | |
| print("\nTesting BM25 Retriever...") | |
| test_docs = [ | |
| Document(page_content="This is the first test document about machine learning."), | |
| Document(page_content="This is the second document discussing natural language processing."), | |
| Document(page_content="The third document covers artificial intelligence topics."), | |
| ] | |
| bm25_retriever = BM25Retriever.from_documents(test_docs) | |
| bm25_retriever.k = 2 | |
| query = "machine learning" | |
| results = bm25_retriever.get_relevant_documents(query) | |
| print(f"BM25 retriever created and tested successfully!") | |
| print(f"Retrieved {len(results)} documents for query: '{query}'") | |
| except Exception as e: | |
| print(f"✗ Error testing BM25 retriever: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| sys.exit(1) | |
| print("\nAll tests completed successfully!") | |
| print("\nThe application should now handle:") | |
| print(" • Regular file uploads and processing") | |
| print(" • Corrupted PDF files with multiple fallback methods") | |
| print(" • BM25 and FAISS hybrid retrieval") | |
| print(" • Proper error messages for failed file processing") | |
| print("\nMake sure to install all dependencies with:") | |
| print(" pip install -r requirements.txt") | |
| print("\nKey Dependencies Added/Updated") | |
| print(" • rank-bm25==0.2.2 (for BM25 retrieval)") | |
| print(" • pymupdf==1.23.26 (PDF fallback method)") | |
| print(" • pdfplumber==0.10.3 (additional PDF parsing)") |