import sys from dotenv import load_dotenv import os from langchain_openai import OpenAIEmbeddings, ChatOpenAI from langchain_community.vectorstores import Chroma from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough from langchain_core.prompts import ChatPromptTemplate load_dotenv() openai_api_key = os.getenv("OPENAI_API_KEY") def main(): if len(sys.argv) < 2: print("Please provide a query as a command-line argument.") sys.exit(1) query = sys.argv[1] embedding_function = OpenAIEmbeddings(openai_api_key=openai_api_key) print("Loading Chroma database...") vectorstore = Chroma(persist_directory="./chroma_db2", embedding_function=embedding_function) print(f"Chroma collection name: {vectorstore._collection.name}") print(f"Number of documents in Chroma: {vectorstore._collection.count()}") retriever = vectorstore.as_retriever() model = ChatOpenAI(openai_api_key=openai_api_key) template = """Answer the question based only on the following context: {context} Question: {question} """ prompt = ChatPromptTemplate.from_template(template) chain = ( {"context": retriever, "question": RunnablePassthrough()} | prompt | model | StrOutputParser() ) print("Invoking the chain...") response = chain.invoke(query) print("Response:", response) print("\nRetrieving relevant documents...") docs = retriever.invoke(query) print(f"Number of retrieved documents: {len(docs)}") print("\nSources:") for i, doc in enumerate(docs, 1): print(f"Document {i}:") print(f" Metadata: {doc.metadata}") print(f" Content (first 100 chars): {doc.page_content[:100]}...") print() if not docs: print("No documents were retrieved. This might indicate an issue with the document storage or retrieval process.") if __name__ == "__main__": main()