rag / query_data.py
poemsforaphrodite's picture
Upload folder using huggingface_hub
8e0205b verified
import sys
from dotenv import load_dotenv
import os
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")
def main():
if len(sys.argv) < 2:
print("Please provide a query as a command-line argument.")
sys.exit(1)
query = sys.argv[1]
embedding_function = OpenAIEmbeddings(openai_api_key=openai_api_key)
print("Loading Chroma database...")
vectorstore = Chroma(persist_directory="./chroma_db2", embedding_function=embedding_function)
print(f"Chroma collection name: {vectorstore._collection.name}")
print(f"Number of documents in Chroma: {vectorstore._collection.count()}")
retriever = vectorstore.as_retriever()
model = ChatOpenAI(openai_api_key=openai_api_key)
template = """Answer the question based only on the following context:
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
chain = (
{"context": retriever, "question": RunnablePassthrough()}
| prompt
| model
| StrOutputParser()
)
print("Invoking the chain...")
response = chain.invoke(query)
print("Response:", response)
print("\nRetrieving relevant documents...")
docs = retriever.invoke(query)
print(f"Number of retrieved documents: {len(docs)}")
print("\nSources:")
for i, doc in enumerate(docs, 1):
print(f"Document {i}:")
print(f" Metadata: {doc.metadata}")
print(f" Content (first 100 chars): {doc.page_content[:100]}...")
print()
if not docs:
print("No documents were retrieved. This might indicate an issue with the document storage or retrieval process.")
if __name__ == "__main__":
main()