import streamlit as st from langchain.document_loaders import TextLoader from langchain.document_loaders import UnstructuredFileLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.chains import RetrievalQA from langchain.llms import HuggingFaceHub import tempfile import os @st.cache_resource def load_vector_store(file_path): # Load and chunk the document loader = TextLoader(file_path) documents = loader.load() splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) chunks = splitter.split_documents(documents) # Create embeddings and store in FAISS embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") db = FAISS.from_documents(chunks, embedding_model) return db def main(): st.title("📄 Ask Questions About Your Document") st.write("Upload a `.txt` file and ask anything!") uploaded_file = st.file_uploader("Upload a text file", type=["txt"]) if uploaded_file: with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as tmp_file: tmp_file.write(uploaded_file.read()) tmp_path = tmp_file.name db = load_vector_store(tmp_path) query = st.text_input("Enter your question:") if query: qa_chain = RetrievalQA.from_chain_type( llm=HuggingFaceHub( repo_id="google/flan-t5-base", model_kwargs={"temperature": 0.5, "max_length": 256} ), retriever=db.as_retriever(), return_source_documents=True ) result = qa_chain.run(query) st.write("### 📌 Answer") st.write(result) # Clean up temp file os.remove(tmp_path) if __name__ == "__main__": main()