|
|
import streamlit as st |
|
|
from langchain.document_loaders import TextLoader |
|
|
from langchain.document_loaders import UnstructuredFileLoader |
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
|
from langchain.vectorstores import FAISS |
|
|
from langchain.chains import RetrievalQA |
|
|
from langchain.llms import HuggingFaceHub |
|
|
import tempfile |
|
|
import os |
|
|
|
|
|
@st.cache_resource |
|
|
def load_vector_store(file_path): |
|
|
|
|
|
loader = TextLoader(file_path) |
|
|
documents = loader.load() |
|
|
|
|
|
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) |
|
|
chunks = splitter.split_documents(documents) |
|
|
|
|
|
|
|
|
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
|
db = FAISS.from_documents(chunks, embedding_model) |
|
|
return db |
|
|
|
|
|
def main(): |
|
|
st.title("π Ask Questions About Your Document") |
|
|
st.write("Upload a `.txt` file and ask anything!") |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload a text file", type=["txt"]) |
|
|
|
|
|
if uploaded_file: |
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as tmp_file: |
|
|
tmp_file.write(uploaded_file.read()) |
|
|
tmp_path = tmp_file.name |
|
|
|
|
|
db = load_vector_store(tmp_path) |
|
|
|
|
|
query = st.text_input("Enter your question:") |
|
|
if query: |
|
|
qa_chain = RetrievalQA.from_chain_type( |
|
|
llm=HuggingFaceHub( |
|
|
repo_id="google/flan-t5-base", |
|
|
model_kwargs={"temperature": 0.5, "max_length": 256} |
|
|
), |
|
|
retriever=db.as_retriever(), |
|
|
return_source_documents=True |
|
|
) |
|
|
|
|
|
result = qa_chain.run(query) |
|
|
|
|
|
st.write("### π Answer") |
|
|
st.write(result) |
|
|
|
|
|
|
|
|
os.remove(tmp_path) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|