import os import gradio as gr from langchain_community.document_loaders import PyPDFLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain_groq import ChatGroq from langchain_core.prompts import ChatPromptTemplate from langchain_core.runnables import RunnablePassthrough from langchain_core.output_parsers import StrOutputParser # ───────────────────────── CONFIG ───────────────────────── EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2" GROQ_MODEL = "llama-3.1-8b-instant" TOP_K = 3 os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY") # ───────────────────────── INIT MODELS ───────────────────────── embeddings = HuggingFaceEmbeddings( model_name=EMBED_MODEL, model_kwargs={"device": "cpu"}, encode_kwargs={"normalize_embeddings": True} ) def create_llm(): return ChatGroq( model=GROQ_MODEL, temperature=0.2, max_tokens=1024, groq_api_key=os.environ["GROQ_API_KEY"] ) RAG_PROMPT = ChatPromptTemplate.from_template(""" You are a helpful assistant. Answer ONLY using the context below. If not found, say you don't have enough information. Context: {context} Question: {question} Answer: """) def format_docs(docs): return "\n\n".join(d.page_content for d in docs) # ───────────────────────── GLOBAL STATE ───────────────────────── vectorstore = None rag_chain = None # ───────────────────────── PROCESS PDF ───────────────────────── def process_pdf(file): global vectorstore, rag_chain if file is None: return "Upload a PDF first." path = file.name # Load loader = PyPDFLoader(path) docs = loader.load() # Split splitter = RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=50 ) chunks = splitter.split_documents(docs) # Vector store if vectorstore is None: vectorstore = FAISS.from_documents(chunks, embeddings) else: vectorstore.add_documents(chunks) retriever = vectorstore.as_retriever(search_kwargs={"k": TOP_K}) llm = create_llm() rag_chain = ( { "context": retriever | format_docs, "question": RunnablePassthrough() } | RAG_PROMPT | llm | StrOutputParser() ) return f"✅ PDF processed successfully!\nChunks: {len(chunks)}" # ───────────────────────── CHAT FUNCTION ───────────────────────── def chat(message, history): if rag_chain is None: history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": "Please upload a PDF first."}) return "", history response = rag_chain.invoke(message) history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": response}) return "", history # ───────────────────────── UI ───────────────────────── with gr.Blocks(title="RAG Chatbot") as demo: gr.Markdown("## 📄 PDF RAG Chatbot (Groq + FAISS + LangChain)") with gr.Row(): file = gr.File(label="Upload PDF") upload_btn = gr.Button("Process PDF") status = gr.Textbox(label="Status") chatbot = gr.Chatbot() msg = gr.Textbox(label="Ask a question") upload_btn.click(process_pdf, inputs=file, outputs=status) msg.submit(chat, inputs=[msg, chatbot], outputs=[msg, chatbot]) demo.launch()