Spaces:
Sleeping
Sleeping
| from pymongo import MongoClient | |
| # error since Jan 2024, from langchain.embeddings.openai import OpenAIEmbeddings | |
| from langchain_openai import OpenAIEmbeddings | |
| # error since Jan 2024, from langchain.vectorstores import MongoDBAtlasVectorSearch | |
| from langchain_community.vectorstores import MongoDBAtlasVectorSearch | |
| # error since Jan 2024, from langchain.document_loaders import DirectoryLoader | |
| from langchain_community.document_loaders import DirectoryLoader | |
| # error since Jan 2024, from langchain.llms import OpenAI | |
| from langchain_community.llms import OpenAI | |
| from langchain.chains import RetrievalQA | |
| import gradio as gr | |
| from gradio.themes.base import Base | |
| #import key_param | |
| import os | |
| def query_data(query,openai_api_key,mongo_uri): | |
| os.environ["OPENAI_API_KEY"] = openai_api_key | |
| os.environ["MONGO_URI"] = mongo_uri | |
| client = MongoClient(mongo_uri) | |
| dbName = "langchain_demo" | |
| collectionName = "collection_of_text_blobs" | |
| collection = client[dbName][collectionName] | |
| # Define the text embedding model | |
| embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) | |
| # Initialize the Vector Store | |
| vectorStore = MongoDBAtlasVectorSearch( collection, embeddings, index_name="default" ) | |
| # Convert question to vector using OpenAI embeddings | |
| # Perform Atlas Vector Search using Langchain's vectorStore | |
| # similarity_search returns MongoDB documents most similar to the query | |
| docs = vectorStore.similarity_search(query, K=1) | |
| as_output = docs[0].page_content | |
| # Leveraging Atlas Vector Search paired with Langchain's QARetriever | |
| # Define the LLM that we want to use -- note that this is the Language Generation Model and NOT an Embedding Model | |
| # If it's not specified (for example like in the code below), | |
| # then the default OpenAI model used in LangChain is OpenAI GPT-3.5-turbo, as of August 30, 2023 | |
| llm = OpenAI(openai_api_key=openai_api_key, temperature=0) | |
| # Get VectorStoreRetriever: Specifically, Retriever for MongoDB VectorStore. | |
| # Implements _get_relevant_documents which retrieves documents relevant to a query. | |
| retriever = vectorStore.as_retriever() | |
| # Load "stuff" documents chain. Stuff documents chain takes a list of documents, | |
| # inserts them all into a prompt and passes that prompt to an LLM. | |
| qa = RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=retriever) | |
| # Execute the chain | |
| retriever_output = qa.run(query) | |
| # Return Atlas Vector Search output, and output generated using RAG Architecture | |
| return as_output, retriever_output | |
| # Create a web interface for the app, using Gradio | |
| with gr.Blocks(theme=Base(), title="MongoDB Atlas Vector Search + RAG Architecture") as demo: | |
| gr.Markdown( | |
| """ | |
| # MongoDB Atlas Vector Search + RAG Architecture | |
| """) | |
| openai_api_key = gr.Textbox(label = "OpenAI API Key", value = "sk-", lines = 1) | |
| mongo_uri = gr.Textbox(label = "Mongo Atlas URI", value = "mongodb+srv://", lines = 1) | |
| textbox = gr.Textbox(label="Enter your Question:") | |
| with gr.Row(): | |
| button = gr.Button("Submit", variant="primary") | |
| with gr.Column(): | |
| output1 = gr.Textbox(lines=1, max_lines=10, label="Atlas Vector Search output (document field as is):") | |
| output2 = gr.Textbox(lines=1, max_lines=10, label="Atlas Vector Search output + Langchain's RetrieverQA + OpenAI LLM:") | |
| # Call query_data function upon clicking the Submit button | |
| button.click(query_data, | |
| inputs=[textbox, openai_api_key, mongo_uri], | |
| outputs=[output1, output2] | |
| ) | |
| demo.launch() | |