| import getpass |
| from dotenv import dotenv_values, load_dotenv |
|
|
| |
|
|
| config = dict(dotenv_values(".env")) |
| load_dotenv(".env") |
|
|
|
|
| import bs4 |
| from langchain import hub |
| from langchain_community.document_loaders import WebBaseLoader |
| from langchain_community.vectorstores import Chroma |
| from langchain_core.output_parsers import StrOutputParser |
| from langchain_core.runnables import RunnablePassthrough |
| from langchain_openai import ChatOpenAI, OpenAIEmbeddings |
| from langchain_text_splitters import RecursiveCharacterTextSplitter |
|
|
| |
| loader = WebBaseLoader( |
| web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",), |
| bs_kwargs=dict( |
| parse_only=bs4.SoupStrainer( |
| class_=("post-content", "post-title", "post-header") |
| ) |
| ), |
| ) |
| docs = loader.load() |
|
|
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) |
| splits = text_splitter.split_documents(docs) |
| vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings()) |
|
|
| |
| retriever = vectorstore.as_retriever() |
| prompt = hub.pull("rlm/rag-prompt") |
| llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) |
|
|
|
|
| def format_docs(docs): |
| return "\n\n".join(doc.page_content for doc in docs) |
|
|
|
|
| rag_chain = ( |
| {"context": retriever | format_docs, "question": RunnablePassthrough()} |
| | prompt |
| | llm |
| | StrOutputParser() |
| ) |
|
|
| print(rag_chain.invoke("What is Task Decomposition?")) |
|
|
|
|
|
|