|
|
import streamlit as st |
|
|
|
|
|
from langchain.storage import LocalFileStore |
|
|
from langchain.embeddings import CacheBackedEmbeddings |
|
|
from langchain.vectorstores import FAISS |
|
|
|
|
|
from PIL import Image |
|
|
|
|
|
from langchain.document_loaders import WebBaseLoader |
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
|
|
|
|
|
|
|
st.sidebar.image(Image.open("./test-logo.png"), use_column_width=True) |
|
|
|
|
|
|
|
|
print("Loading Index Page!!") |
|
|
|
|
|
|
|
|
vectorstore = st.session_state['vectorstore'] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _text_splitter(doc): |
|
|
text_splitter = RecursiveCharacterTextSplitter( |
|
|
chunk_size=600, |
|
|
chunk_overlap=50, |
|
|
length_function=len, |
|
|
) |
|
|
return text_splitter.transform_documents(doc) |
|
|
|
|
|
def _load_docs(path: str): |
|
|
load_doc = WebBaseLoader(path).load() |
|
|
doc = _text_splitter(load_doc) |
|
|
return doc |
|
|
|
|
|
|
|
|
with st.form("Index documents to Vector Store"): |
|
|
|
|
|
file_path = st.text_input(label="Enter the web link", value="", placeholder="", label_visibility="visible", disabled=False) |
|
|
print("file_path " ,file_path) |
|
|
|
|
|
submitted = st.form_submit_button("Submit") |
|
|
|
|
|
if submitted: |
|
|
st.write("Submitted web link: " + file_path) |
|
|
webpage_loader = _load_docs(file_path) |
|
|
|
|
|
webpage_chunks = _text_splitter(webpage_loader) |
|
|
|
|
|
|
|
|
print("vectorstore length before addition, ", len(vectorstore.serialize_to_bytes())) |
|
|
vectorstore.add_documents(webpage_chunks) |
|
|
print("vectorstore length after addition, ", len(vectorstore.serialize_to_bytes())) |
|
|
|
|
|
st.session_state['vectorstore'] = vectorstore |
|
|
retriever = vectorstore.as_retriever() |
|
|
st.session_state['retriever'] = retriever |
|
|
st.session_state['docadd'] = 1 |
|
|
|
|
|
st.markdown('<h2 style="color:#100170;font-size:24px;">Document loaded to vector store successfully!!</h2>', unsafe_allow_html=True) |
|
|
|