Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.vectorstores import Chroma | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from transformers import pipeline | |
| import os | |
| # Page setup | |
| st.title("Ai-Buddy Chatbot") | |
| # Load and process PDF | |
| def initialize_system(): | |
| # Set up persistent directory for Chroma | |
| persist_directory = "chroma_db" | |
| # Create embeddings | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2" | |
| ) | |
| # Check if database already exists | |
| if not os.path.exists(persist_directory): | |
| # Load PDF | |
| data = PyPDFLoader("ai_buddy.pdf").load() | |
| # Split into chunks | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=750, | |
| chunk_overlap=150 | |
| ) | |
| splits = splitter.split_documents(data) | |
| # Create and persist vector store | |
| vector_db = Chroma.from_documents( | |
| documents=splits, | |
| embedding=embeddings, | |
| persist_directory=persist_directory | |
| ) | |
| vector_db.persist() | |
| else: | |
| # Load existing database | |
| vector_db = Chroma( | |
| persist_directory=persist_directory, | |
| embedding_function=embeddings | |
| ) | |
| # Setup QA pipeline | |
| qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2") | |
| return vector_db, qa_model | |
| # Initialize the system | |
| if 'vector_db' not in st.session_state: | |
| st.session_state.vector_db, st.session_state.qa_model = initialize_system() | |
| # Function to answer questions | |
| def get_answer(question): | |
| try: | |
| # Get relevant documents | |
| docs = st.session_state.vector_db.as_retriever().get_relevant_documents(question) | |
| if not docs: | |
| return "Sorry, I couldn't find any relevant information." | |
| # Combine document contents | |
| context = " ".join([doc.page_content for doc in docs]) | |
| # Get answer | |
| response = st.session_state.qa_model( | |
| question=question, | |
| context=context | |
| ) | |
| return response['answer'] | |
| except Exception as e: | |
| return f"An error occurred: {str(e)}" | |
| # Simple input/output interface | |
| question = st.text_input("Ask your question:") | |
| if question: | |
| with st.spinner("Finding answer..."): | |
| answer = get_answer(question) | |
| st.write("Answer:", answer) |