import os import gradio as gr from langchain_groq import ChatGroq from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.chains import ConversationalRetrievalChain from langchain.memory import ConversationBufferMemory from langchain_community.document_loaders import PyPDFLoader import tempfile import shutil MODEL_NAME = "llama-3.3-70b-versatile" DEFAULT_API_KEY = os.getenv("GROQ_API_KEY", "") # Global variables vectorstore = None conversation_chain = None chat_history = [] def process_pdf(pdf_file, api_key): """Process uploaded PDF and create vector store""" global vectorstore, conversation_chain, chat_history if not api_key: return "Please provide a Groq API key first.", None if pdf_file is None: return "Please upload a PDF file.", None try: # Save uploaded file temporarily temp_dir = tempfile.mkdtemp() temp_pdf_path = os.path.join(temp_dir, "uploaded.pdf") shutil.copy(pdf_file.name, temp_pdf_path) # Load PDF loader = PyPDFLoader(temp_pdf_path) documents = loader.load() # Split documents into chunks text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, length_function=len ) chunks = text_splitter.split_documents(documents) # Create embeddings and vector store embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) vectorstore = FAISS.from_documents(chunks, embeddings) # Initialize LLM llm = ChatGroq( groq_api_key=api_key, model_name=MODEL_NAME, temperature=0.7, max_tokens=1024 ) # Create conversation chain memory = ConversationBufferMemory( memory_key="chat_history", return_messages=True, output_key="answer" ) conversation_chain = ConversationalRetrievalChain.from_llm( llm=llm, retriever=vectorstore.as_retriever(search_kwargs={"k": 3}), memory=memory, return_source_documents=True ) # Reset chat history chat_history = [] # Cleanup shutil.rmtree(temp_dir) return f"✅ PDF processed successfully! Found {len(chunks)} text chunks. You can now ask questions about the document.", [] except Exception as e: return f"Error processing PDF: {str(e)}", None def chat_with_pdf(message, chat_history_ui, api_key): """Handle chat interactions with the PDF content""" global conversation_chain, chat_history if not message.strip(): return chat_history_ui, "" if conversation_chain is None: chat_history_ui.append({ "role": "user", "content": message }) chat_history_ui.append({ "role": "assistant", "content": "Please upload a PDF file first before asking questions." }) return chat_history_ui, "" try: # Add user message chat_history_ui.append({ "role": "user", "content": message }) # Get response from RAG chain response = conversation_chain({"question": message}) answer = response["answer"] # Add assistant response chat_history_ui.append({ "role": "assistant", "content": answer }) return chat_history_ui, "" except Exception as e: chat_history_ui.append({ "role": "assistant", "content": f"Error: {str(e)}" }) return chat_history_ui, "" def reset_chat(): """Reset the conversation""" global conversation_chain, vectorstore, chat_history conversation_chain = None vectorstore = None chat_history = [] return [], "Ready to upload a new PDF." # Build Gradio Interface with gr.Blocks(title="PDF RAG Chatbot") as demo: gr.Markdown("# 📄 PDF RAG Chatbot") gr.Markdown("Upload a PDF and chat with its content using AI") gr.Markdown(f"**Model:** `{MODEL_NAME}`") with gr.Row(): with gr.Column(scale=1): if not DEFAULT_API_KEY: api_key_input = gr.Textbox( label="Groq API Key", placeholder="Enter your Groq API key here...", type="password" ) else: api_key_input = gr.Textbox( type="password", value=DEFAULT_API_KEY, visible=False ) pdf_upload = gr.File( label="Upload PDF", file_types=[".pdf"], type="filepath" ) process_btn = gr.Button("Process PDF", variant="primary") status_text = gr.Textbox( label="Status", value="Upload a PDF to get started.", interactive=False, lines=3, max_lines=5 ) clear_btn = gr.Button("Reset Chat", variant="stop") with gr.Column(scale=2): chatbot = gr.Chatbot(height=500) with gr.Row(): msg = gr.Textbox( label="Message", placeholder="Ask a question about the PDF...", scale=4 ) submit_btn = gr.Button("Send", scale=1) if not DEFAULT_API_KEY: gr.Markdown("### Instructions:") gr.Markdown("1. Get a free API key from [Groq Console](https://console.groq.com)") gr.Markdown("2. Enter your API key above") gr.Markdown("3. Upload a PDF file") gr.Markdown("4. Ask questions about the content!") # Event handlers process_btn.click( process_pdf, inputs=[pdf_upload, api_key_input], outputs=[status_text, chatbot] ) msg.submit( chat_with_pdf, inputs=[msg, chatbot, api_key_input], outputs=[chatbot, msg] ) submit_btn.click( chat_with_pdf, inputs=[msg, chatbot, api_key_input], outputs=[chatbot, msg] ) clear_btn.click( reset_chat, outputs=[chatbot, status_text] ) if __name__ == "__main__": demo.launch()