Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| # β Correct imports (new structure) | |
| from langchain_community.document_loaders import PyPDFLoader, TextLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.llms import HuggingFacePipeline | |
| from langchain.chains import RetrievalQA | |
| from transformers import pipeline | |
| # ------------------------------- | |
| # Load Documents | |
| # ------------------------------- | |
| def load_documents(uploaded_files): | |
| documents = [] | |
| for file in uploaded_files: | |
| with open(file.name, "wb") as f: | |
| f.write(file.getbuffer()) | |
| if file.name.endswith(".pdf"): | |
| loader = PyPDFLoader(file.name) | |
| else: | |
| loader = TextLoader(file.name) | |
| documents.extend(loader.load()) | |
| return documents | |
| # ------------------------------- | |
| # Split Documents | |
| # ------------------------------- | |
| def split_documents(documents): | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=500, | |
| chunk_overlap=50 | |
| ) | |
| return splitter.split_documents(documents) | |
| # ------------------------------- | |
| # Create Vector Store | |
| # ------------------------------- | |
| def create_vectorstore(chunks): | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2" | |
| ) | |
| return FAISS.from_documents(chunks, embeddings) | |
| # ------------------------------- | |
| # Load Local LLM (FREE) | |
| # ------------------------------- | |
| def load_llm(): | |
| pipe = pipeline( | |
| "text2text-generation", # β FIXED | |
| model="google/flan-t5-base", | |
| max_length=512 | |
| ) | |
| return HuggingFacePipeline(pipeline=pipe) | |
| # ------------------------------- | |
| # Build QA Chain (with strict prompt) | |
| # ------------------------------- | |
| def build_qa(vectorstore): | |
| llm = load_llm() | |
| retriever = vectorstore.as_retriever() | |
| qa = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| retriever=retriever | |
| ) | |
| return qa | |
| # ------------------------------- | |
| # Streamlit UI | |
| # ------------------------------- | |
| st.set_page_config(page_title="RAG Chatbot", layout="wide") | |
| st.title("π Chat with Your Documents (RAG)") | |
| uploaded_files = st.file_uploader( | |
| "Upload PDF or TXT files", | |
| accept_multiple_files=True | |
| ) | |
| if uploaded_files: | |
| with st.spinner("Processing documents..."): | |
| docs = load_documents(uploaded_files) | |
| chunks = split_documents(docs) | |
| vectorstore = create_vectorstore(chunks) | |
| qa_chain = build_qa(vectorstore) | |
| st.success("Documents ready!") | |
| query = st.text_input("Ask a question from your documents") | |
| if query: | |
| with st.spinner("Generating answer..."): | |
| result = qa_chain.run(query) | |
| st.write("### Answer:") | |
| st.write(result) |