import streamlit as st import transformers import pdfplumber from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.docstore.document import Document from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate from langchain.chains.combine_documents.base import BaseCombineDocumentsChain from langchain.chains import LLMChain # Use Groq LLM from langchain-community from langchain_community.llms import Groq # Load PDFs from a folder using pdfplumber def load_pdfs_from_folder(folder_path): docs = [] for fname in os.listdir(folder_path): if fname.endswith(".pdf"): full_path = os.path.join(folder_path, fname) text = "" with pdfplumber.open(full_path) as pdf: for page in pdf.pages: text += page.extract_text() or "" docs.append(Document(page_content=text, metadata={"source": fname})) return docs # Answer a question via Groq LLM (fallback) def ask_groq(question): llm = Groq(api_key=st.secrets["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"], model="mixtral-8x7b-32768") return llm.predict(question) def main(): st.set_page_config(page_title="RAG Chatbot with Groq", layout="centered") st.title("📚 RAG Chatbot with Groq Fallback") uploaded_files = st.file_uploader("Upload one or more PDF files", type=["pdf"], accept_multiple_files=True) question = st.text_input("Ask a question:") process_btn = st.button("Process and Answer") if process_btn and uploaded_files and question: with st.spinner("📂 Loading documents..."): docs = load_uploaded_pdfs(uploaded_files) # Chunking text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200) chunks = text_splitter.split_documents(docs) # Embedding embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") db = FAISS.from_documents(chunks, embeddings) retriever = db.as_retriever(search_kwargs={"k": 3}) # Check if relevant documents exist relevant_docs = retriever.get_relevant_documents(question) if not relevant_docs: st.warning("❓ No relevant documents found. Asking Groq LLM...") answer = ask_groq(question) else: llm = Groq(api_key=st.secrets["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"], model="mixtral-8x7b-32768") qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True) result = qa({"query": question}) answer = result['result'] st.success("💬 Answer:") st.write(answer) if __name__ == "__main__": main()