import os import chainlit as cl from dotenv import load_dotenv # LangChain imports for retrieval and generation from langchain.document_loaders import WebBaseLoader from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain.chains import RetrievalQA from langchain.llms import OpenAI # Load environment variables (e.g., OPENAI_API_KEY) load_dotenv() # Global variable to store our QA chain. qa_chain = None @cl.on_chat_start async def start_chat(): """ When the chat starts, load the document using WebBaseLoader, split it into chunks, create embeddings, build a vector store, and finally initialize a RetrievalQA chain. This chain will serve as the backend for our RAG system. """ global qa_chain # URL to crawl (German Wikipedia page on Künstliche Intelligenz) url = "https://de.wikipedia.org/wiki/K%C3%BCnstliche_Intelligenz" # Retrieve the document from the webpage loader = WebBaseLoader(url) documents = loader.load() # returns a list of Document objects # Split the document into manageable chunks for better retrieval text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200) docs = text_splitter.split_documents(documents) # Create embeddings (make sure your OPENAI_API_KEY is set in your environment) embeddings = OpenAIEmbeddings() # Build a vector store from the documents using FAISS vectorstore = FAISS.from_documents(docs, embeddings) # Configure the retriever: retrieve the top 3 most relevant chunks retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) # Set up the language model (using OpenAI LLM here) with desired parameters llm = OpenAI(temperature=0) # Create a RetrievalQA chain that first retrieves relevant context and then generates an answer. qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever) await cl.Message( content="✅ Document loaded and processed successfully! " "You can now ask me questions about 'Künstliche Intelligenz'." ).send() @cl.on_message async def process_question(message: cl.Message): """ When a message is received, use the QA chain to process the query. The chain: 1. Retrieves relevant document chunks. 2. Augments your query with the retrieved context. 3. Generates an answer via the language model. """ global qa_chain if qa_chain is None: await cl.Message(content="❌ The document has not been loaded yet.").send() return # Get the user's query query = message.content.strip() # Process the query using the RetrievalQA chain result = qa_chain.run(query) # Send the answer back to the user await cl.Message(content=result).send()