chatbot_ai-buddy / app2.py
DataMine's picture
Rename app.py to app2.py
46132d2 verified
import streamlit as st
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import pipeline
import os
# Page setup
st.title("Ai-Buddy Chatbot")
# Load and process PDF
@st.cache_resource
def initialize_system():
# Set up persistent directory for Chroma
persist_directory = "chroma_db"
# Create embeddings
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
# Check if database already exists
if not os.path.exists(persist_directory):
# Load PDF
data = PyPDFLoader("ai_buddy.pdf").load()
# Split into chunks
splitter = RecursiveCharacterTextSplitter(
chunk_size=750,
chunk_overlap=150
)
splits = splitter.split_documents(data)
# Create and persist vector store
vector_db = Chroma.from_documents(
documents=splits,
embedding=embeddings,
persist_directory=persist_directory
)
vector_db.persist()
else:
# Load existing database
vector_db = Chroma(
persist_directory=persist_directory,
embedding_function=embeddings
)
# Setup QA pipeline
qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
return vector_db, qa_model
# Initialize the system
if 'vector_db' not in st.session_state:
st.session_state.vector_db, st.session_state.qa_model = initialize_system()
# Function to answer questions
def get_answer(question):
try:
# Get relevant documents
docs = st.session_state.vector_db.as_retriever().get_relevant_documents(question)
if not docs:
return "Sorry, I couldn't find any relevant information."
# Combine document contents
context = " ".join([doc.page_content for doc in docs])
# Get answer
response = st.session_state.qa_model(
question=question,
context=context
)
return response['answer']
except Exception as e:
return f"An error occurred: {str(e)}"
# Simple input/output interface
question = st.text_input("Ask your question:")
if question:
with st.spinner("Finding answer..."):
answer = get_answer(question)
st.write("Answer:", answer)