Spaces:

Bijay13
/

Pdf-RAG-Chatbot

Sleeping

File size: 6,723 Bytes

import os
import gradio as gr
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_community.document_loaders import PyPDFLoader
import tempfile
import shutil

MODEL_NAME = "llama-3.3-70b-versatile"
DEFAULT_API_KEY = os.getenv("GROQ_API_KEY", "")

# Global variables
vectorstore = None
conversation_chain = None
chat_history = []

def process_pdf(pdf_file, api_key):
    """Process uploaded PDF and create vector store"""
    global vectorstore, conversation_chain, chat_history
    
    if not api_key:
        return "Please provide a Groq API key first.", None
    
    if pdf_file is None:
        return "Please upload a PDF file.", None
    
    try:
        # Save uploaded file temporarily
        temp_dir = tempfile.mkdtemp()
        temp_pdf_path = os.path.join(temp_dir, "uploaded.pdf")
        shutil.copy(pdf_file.name, temp_pdf_path)
        
        # Load PDF
        loader = PyPDFLoader(temp_pdf_path)
        documents = loader.load()
        
        # Split documents into chunks
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            length_function=len
        )
        chunks = text_splitter.split_documents(documents)
        
        # Create embeddings and vector store
        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2"
        )
        vectorstore = FAISS.from_documents(chunks, embeddings)
        
        # Initialize LLM
        llm = ChatGroq(
            groq_api_key=api_key,
            model_name=MODEL_NAME,
            temperature=0.7,
            max_tokens=1024
        )
        
        # Create conversation chain
        memory = ConversationBufferMemory(
            memory_key="chat_history",
            return_messages=True,
            output_key="answer"
        )
        
        conversation_chain = ConversationalRetrievalChain.from_llm(
            llm=llm,
            retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
            memory=memory,
            return_source_documents=True
        )
        
        # Reset chat history
        chat_history = []
        
        # Cleanup
        shutil.rmtree(temp_dir)
        
        return f"✅ PDF processed successfully! Found {len(chunks)} text chunks. You can now ask questions about the document.", []
        
    except Exception as e:
        return f"Error processing PDF: {str(e)}", None

def chat_with_pdf(message, chat_history_ui, api_key):
    """Handle chat interactions with the PDF content"""
    global conversation_chain, chat_history
    
    if not message.strip():
        return chat_history_ui, ""
    
    if conversation_chain is None:
        chat_history_ui.append({
            "role": "user",
            "content": message
        })
        chat_history_ui.append({
            "role": "assistant",
            "content": "Please upload a PDF file first before asking questions."
        })
        return chat_history_ui, ""
    
    try:
        # Add user message
        chat_history_ui.append({
            "role": "user",
            "content": message
        })
        
        # Get response from RAG chain
        response = conversation_chain({"question": message})
        answer = response["answer"]
        
        # Add assistant response
        chat_history_ui.append({
            "role": "assistant",
            "content": answer
        })
        
        return chat_history_ui, ""
        
    except Exception as e:
        chat_history_ui.append({
            "role": "assistant",
            "content": f"Error: {str(e)}"
        })
        return chat_history_ui, ""

def reset_chat():
    """Reset the conversation"""
    global conversation_chain, vectorstore, chat_history
    conversation_chain = None
    vectorstore = None
    chat_history = []
    return [], "Ready to upload a new PDF."

# Build Gradio Interface
with gr.Blocks(title="PDF RAG Chatbot") as demo:
    gr.Markdown("# 📄 PDF RAG Chatbot")
    gr.Markdown("Upload a PDF and chat with its content using AI")
    gr.Markdown(f"**Model:** `{MODEL_NAME}`")
    
    with gr.Row():
        with gr.Column(scale=1):
            if not DEFAULT_API_KEY:
                api_key_input = gr.Textbox(
                    label="Groq API Key",
                    placeholder="Enter your Groq API key here...",
                    type="password"
                )
            else:
                api_key_input = gr.Textbox(
                    type="password",
                    value=DEFAULT_API_KEY,
                    visible=False
                )
            
            pdf_upload = gr.File(
                label="Upload PDF",
                file_types=[".pdf"],
                type="filepath"
            )
            
            process_btn = gr.Button("Process PDF", variant="primary")
            status_text = gr.Textbox(
                label="Status",
                value="Upload a PDF to get started.",
                interactive=False,
                lines=3,
                max_lines=5
            )
            
            clear_btn = gr.Button("Reset Chat", variant="stop")
        
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(height=500)
            
            with gr.Row():
                msg = gr.Textbox(
                    label="Message",
                    placeholder="Ask a question about the PDF...",
                    scale=4
                )
                submit_btn = gr.Button("Send", scale=1)
    
    if not DEFAULT_API_KEY:
        gr.Markdown("### Instructions:")
        gr.Markdown("1. Get a free API key from [Groq Console](https://console.groq.com)")
        gr.Markdown("2. Enter your API key above")
        gr.Markdown("3. Upload a PDF file")
        gr.Markdown("4. Ask questions about the content!")
    
    # Event handlers
    process_btn.click(
        process_pdf,
        inputs=[pdf_upload, api_key_input],
        outputs=[status_text, chatbot]
    )
    
    msg.submit(
        chat_with_pdf,
        inputs=[msg, chatbot, api_key_input],
        outputs=[chatbot, msg]
    )
    
    submit_btn.click(
        chat_with_pdf,
        inputs=[msg, chatbot, api_key_input],
        outputs=[chatbot, msg]
    )
    
    clear_btn.click(
        reset_chat,
        outputs=[chatbot, status_text]
    )

if __name__ == "__main__":
    demo.launch()