| import os |
| import gradio as gr |
| from langchain_groq import ChatGroq |
| from langchain_community.document_loaders import PyPDFLoader |
| from langchain_text_splitters import RecursiveCharacterTextSplitter |
| from langchain_huggingface import HuggingFaceEmbeddings |
| from langchain_community.vectorstores import FAISS |
| from langchain.chains import ConversationalRetrievalChain |
| from langchain.memory import ConversationBufferMemory |
|
|
| |
| |
| |
|
|
| def initialize_llm(): |
| """Initialize the Groq LLM""" |
| return ChatGroq( |
| temperature=0, |
| model_name="llama3-8b-8192" |
| ) |
|
|
| def process_pdfs(files): |
| """ |
| 1. Extract text from uploaded PDFs. |
| 2. Split text into semantic chunks (Enhancement #6). |
| 3. Create vector embeddings (Enhancement #1). |
| """ |
| if not files: |
| return None, "No files uploaded." |
|
|
| documents = [] |
| for file in files: |
| loader = PyPDFLoader(file.name) |
| documents.extend(loader.load()) |
|
|
| |
| text_splitter = RecursiveCharacterTextSplitter( |
| chunk_size=1000, |
| chunk_overlap=200 |
| ) |
| splits = text_splitter.split_documents(documents) |
|
|
| |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
| |
| |
| vectorstore = FAISS.from_documents(splits, embeddings) |
| |
| return vectorstore, f"Processed {len(files)} PDF(s) successfully!" |
|
|
| def create_conversation_chain(vectorstore): |
| """Create the RAG chain with memory""" |
| llm = initialize_llm() |
| |
| memory = ConversationBufferMemory( |
| memory_key="chat_history", |
| return_messages=True |
| ) |
| |
| conversation_chain = ConversationalRetrievalChain.from_llm( |
| llm=llm, |
| retriever=vectorstore.as_retriever(), |
| memory=memory |
| ) |
| return conversation_chain |
|
|
| |
| conversation = None |
|
|
| def handle_file_upload(files): |
| global conversation |
| vectorstore, message = process_pdfs(files) |
| if vectorstore: |
| conversation = create_conversation_chain(vectorstore) |
| return message |
|
|
| def handle_user_query(message, history): |
| global conversation |
| if conversation is None: |
| return "Please upload and process PDF documents first." |
| |
| response = conversation({"question": message}) |
| return response["answer"] |
|
|
| |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: |
| gr.Markdown("# 📄 RAG Chatbot with Groq & Llama3") |
| gr.Markdown("Upload PDFs, process them, and then ask questions based on their content.") |
| |
| with gr.Row(): |
| file_input = gr.File(label="Upload PDFs", file_count="multiple", file_types=[".pdf"]) |
| upload_btn = gr.Button("Process Documents") |
| |
| status_text = gr.Textbox(label="Status", interactive=False) |
| |
| chatbot = gr.ChatInterface( |
| fn=handle_user_query, |
| title="Chat with your PDFs", |
| description="Ask questions about the uploaded documents." |
| ) |
|
|
| upload_btn.click( |
| fn=handle_file_upload, |
| inputs=[file_input], |
| outputs=[status_text] |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |