| from langchain_community.vectorstores import Chroma |
| from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings |
| from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader |
| from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter |
| from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings |
|
|
| import streamlit as st, os |
| from utils import st_def, utilities |
| openai_api_key = st_def.st_sidebar() |
|
|
| def load_docs(directory): |
| loader = DirectoryLoader(directory) |
| documents = loader.load() |
| return documents |
|
|
|
|
| with st.spinner('Loading files...'): |
| documents = load_docs('data/pets_txt/') |
| file_names = [os.path.basename(doc.metadata['source']) for doc in documents] |
| st.write('\n\n'.join(file_names)) |
|
|
| def split_docs(documents,chunk_size=1000,chunk_overlap=20): |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) |
| docs = text_splitter.split_documents(documents) |
| return docs |
|
|
| docs = split_docs(documents) |
|
|
| embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") |
|
|
| db = Chroma.from_documents(documents=docs,embedding=embedding_function,) |
|
|
| if "messages2" not in st.session_state: |
| st.session_state.messages2 = [] |
| st.session_state.messages2.append({"role": "system", 'content': "hi"}) |
| st.session_state.messages2.append({"role": "assistant", "content": "How May I Help You Today💬?"}) |
|
|
| for message in st.session_state.messages2[1:]: |
| with st.chat_message(message["role"]): st.markdown(message["content"]) |
|
|
| if prompt := st.chat_input("💬Ask me anything about the documents above!🍦"): |
| with st.chat_message("user"): st.markdown(prompt) |
| st.session_state.messages2.append({"role": "user", "content": prompt}) |
|
|
| matching_docs = db.similarity_search(prompt) |
| with st.chat_message("assistant"): |
| st.markdown(matching_docs[0].page_content) |
| st.session_state.messages2.append({"role": "assistant", "content": matching_docs[0].page_content}) |
|
|
| |
|
|