import streamlit as st from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import Chroma from langchain.embeddings import HuggingFaceEmbeddings from transformers import pipeline import os # Page setup st.title("Ai-Buddy Chatbot") # Load and process PDF @st.cache_resource def initialize_system(): # Set up persistent directory for Chroma persist_directory = "chroma_db" # Create embeddings embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) # Check if database already exists if not os.path.exists(persist_directory): # Load PDF data = PyPDFLoader("ai_buddy.pdf").load() # Split into chunks splitter = RecursiveCharacterTextSplitter( chunk_size=750, chunk_overlap=150 ) splits = splitter.split_documents(data) # Create and persist vector store vector_db = Chroma.from_documents( documents=splits, embedding=embeddings, persist_directory=persist_directory ) vector_db.persist() else: # Load existing database vector_db = Chroma( persist_directory=persist_directory, embedding_function=embeddings ) # Setup QA pipeline qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2") return vector_db, qa_model # Initialize the system if 'vector_db' not in st.session_state: st.session_state.vector_db, st.session_state.qa_model = initialize_system() # Function to answer questions def get_answer(question): try: # Get relevant documents docs = st.session_state.vector_db.as_retriever().get_relevant_documents(question) if not docs: return "Sorry, I couldn't find any relevant information." # Combine document contents context = " ".join([doc.page_content for doc in docs]) # Get answer response = st.session_state.qa_model( question=question, context=context ) return response['answer'] except Exception as e: return f"An error occurred: {str(e)}" # Simple input/output interface question = st.text_input("Ask your question:") if question: with st.spinner("Finding answer..."): answer = get_answer(question) st.write("Answer:", answer)