|
|
|
|
|
import streamlit as st |
|
|
from langchain.document_loaders import TextLoader |
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
|
from langchain.vectorstores import FAISS |
|
|
from langchain.chains import RetrievalQA |
|
|
from langchain.llms import HuggingFaceHub |
|
|
|
|
|
@st.cache_resource |
|
|
def load_vector_store(): |
|
|
loader = TextLoader("data/sample.txt") |
|
|
documents = loader.load() |
|
|
|
|
|
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) |
|
|
chunks = splitter.split_documents(documents) |
|
|
|
|
|
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
|
db = FAISS.from_documents(chunks, embedding_model) |
|
|
return db |
|
|
|
|
|
def main(): |
|
|
st.title("π Ask Your Document (RAG with LangChain + Hugging Face)") |
|
|
st.write("Upload a document, ask questions, and get answers powered by open-source LLMs!") |
|
|
|
|
|
query = st.text_input("Enter your question:") |
|
|
if query: |
|
|
db = load_vector_store() |
|
|
qa_chain = RetrievalQA.from_chain_type( |
|
|
llm=HuggingFaceHub( |
|
|
repo_id="google/flan-t5-base", |
|
|
model_kwargs={"temperature": 0.5, "max_length": 256} |
|
|
), |
|
|
retriever=db.as_retriever(), |
|
|
return_source_documents=True |
|
|
) |
|
|
result = qa_chain.run(query) |
|
|
st.write("### π Answer") |
|
|
st.write(result) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|