from pathlib import Path from langchain_openai import OpenAIEmbeddings from langchain_chroma import Chroma from dotenv import load_dotenv import os import sqlite3 load_dotenv() def test_load_embeddings(): print("=== Testing Embeddings Load ===") base_dir = Path.cwd() chroma_dir = base_dir / "data" / "processed" / "embeddings" / "chroma" # Test SQLite connection directly print("\nTesting SQLite database:") try: conn = sqlite3.connect(str(chroma_dir / "chroma.sqlite3")) cursor = conn.cursor() # Check tables cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") tables = cursor.fetchall() print(f"Found tables: {tables}") # Try to count records for table in tables: cursor.execute(f"SELECT COUNT(*) FROM {table[0]};") count = cursor.fetchone()[0] print(f"Table {table[0]}: {count} records") except Exception as e: print(f"SQLite Error: {str(e)}") finally: if 'conn' in locals(): conn.close() # Now try ChromaDB with langchain collection print("\nTesting ChromaDB load:") try: embeddings = OpenAIEmbeddings( openai_api_key=os.getenv("OPENAI_API_KEY") ) db = Chroma( persist_directory=str(chroma_dir), embedding_function=embeddings, collection_name="langchain" ) print("\nChroma instance created") print(f"Collection names: {db._client.list_collections()}") # Try to get collection details collection = db._client.get_collection("langchain") print(f"\nCollection count: {collection.count()}") print(f"Collection peek: {collection.peek()}") except Exception as e: print(f"\nChroma Error: {str(e)}") if __name__ == "__main__": test_load_embeddings()