import streamlit as st # ✅ Correct imports (new structure) from langchain_community.document_loaders import PyPDFLoader, TextLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain_community.llms import HuggingFacePipeline from langchain.chains import RetrievalQA from transformers import pipeline # ------------------------------- # Load Documents # ------------------------------- def load_documents(uploaded_files): documents = [] for file in uploaded_files: with open(file.name, "wb") as f: f.write(file.getbuffer()) if file.name.endswith(".pdf"): loader = PyPDFLoader(file.name) else: loader = TextLoader(file.name) documents.extend(loader.load()) return documents # ------------------------------- # Split Documents # ------------------------------- def split_documents(documents): splitter = RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=50 ) return splitter.split_documents(documents) # ------------------------------- # Create Vector Store # ------------------------------- def create_vectorstore(chunks): embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) return FAISS.from_documents(chunks, embeddings) # ------------------------------- # Load Local LLM (FREE) # ------------------------------- def load_llm(): pipe = pipeline( "text2text-generation", # ✅ FIXED model="google/flan-t5-base", max_length=512 ) return HuggingFacePipeline(pipeline=pipe) # ------------------------------- # Build QA Chain (with strict prompt) # ------------------------------- def build_qa(vectorstore): llm = load_llm() retriever = vectorstore.as_retriever() qa = RetrievalQA.from_chain_type( llm=llm, retriever=retriever ) return qa # ------------------------------- # Streamlit UI # ------------------------------- st.set_page_config(page_title="RAG Chatbot", layout="wide") st.title("📄 Chat with Your Documents (RAG)") uploaded_files = st.file_uploader( "Upload PDF or TXT files", accept_multiple_files=True ) if uploaded_files: with st.spinner("Processing documents..."): docs = load_documents(uploaded_files) chunks = split_documents(docs) vectorstore = create_vectorstore(chunks) qa_chain = build_qa(vectorstore) st.success("Documents ready!") query = st.text_input("Ask a question from your documents") if query: with st.spinner("Generating answer..."): result = qa_chain.run(query) st.write("### Answer:") st.write(result)