import gradio as gr from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings from langchain.chains import ConversationalRetrievalChain from langchain.memory import ConversationBufferMemory # Step 1: Load and Split Documents def load_documents(pdf_files): loaders = [PyPDFLoader(file.name) for file in pdf_files] docs = [] for loader in loaders: docs.extend(loader.load()) # Split documents into smaller chunks text_splitter = RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=50 ) return text_splitter.split_documents(docs) # Step 2: Create Vector Database def create_vector_db(splits): embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vector_db = FAISS.from_documents(splits, embeddings) return vector_db # Step 3: Initialize Conversational Retrieval Chain def initialize_qa_chain(vector_db): memory = ConversationBufferMemory( memory_key="chat_history", return_messages=True ) qa_chain = ConversationalRetrievalChain.from_chain_type( retriever=vector_db.as_retriever(), chain_type="stuff", memory=memory ) return qa_chain # Step 4: Handle Conversation def handle_conversation(qa_chain, query, history): result = qa_chain({"question": query, "chat_history": history}) response = result["answer"] history.append((query, response)) return history, history # Gradio UI def demo(): vector_db = gr.State() qa_chain = gr.State() with gr.Blocks() as interface: gr.Markdown("