import gradio as gr import os from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import FAISS from langchain_core.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough from langchain_community.embeddings import HuggingFaceEmbeddings from groq import Groq # Using Groq's direct client instead of LangChain wrapper from dotenv import load_dotenv # Initialize Groq client with your direct API key GROQ_API_KEY = "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" # Paste your actual Groq API key here groq_client = Groq(api_key=GROQ_API_KEY) def process_pdf(file): try: # Save uploaded file filepath = file.name # Load PDF loader = PyPDFLoader(filepath) documents = loader.load() # Split into chunks text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", " ", ""] ) docs = text_splitter.split_documents(documents) # Create vector store with HuggingFace embeddings embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vectorstore = FAISS.from_documents(docs, embeddings) # Save vectorstore for later use vectorstore.save_local("faiss_index") # Return the first chunk for preview and document info doc_info = f"Processed {len(docs)} chunks from {len(documents)} pages." return docs[0].page_content, doc_info except Exception as e: return f"Error: {str(e)}", "" def answer_question(question): try: # Load vector store embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vectorstore = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True) # Create retriever retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) # Get relevant documents docs = retriever.invoke(question) context = "\n\n".join([doc.page_content for doc in docs]) # Create prompt template for LLaMA 3 prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a helpful AI assistant that answers questions based on the provided context. Use only the information from the context to answer the question. If you don't know the answer, say you don't know. Be concise and accurate in your responses.<|eot_id|> <|start_header_id|>user<|end_header_id|> Context: {context} Question: {question}<|eot_id|> <|start_header_id|>assistant<|end_header_id|>""" # Get response from Groq response = groq_client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama3-70b-8192", temperature=0, ) return response.choices[0].message.content except Exception as e: return f"Error: {str(e)}" # Gradio UI with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("## 📄 PDF RAG System with Groq & LLaMA 3", elem_id="title") gr.Markdown("Upload a PDF file, process it, and ask questions using LLaMA 3. ✨") with gr.Tab("📂 Upload & Process"): with gr.Row(): with gr.Column(): pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"]) process_button = gr.Button("🚀 Process PDF", variant="primary") doc_info = gr.Textbox(label="Document Info") output_text = gr.Textbox( label="First Chunk Preview", lines=12, placeholder="Processed PDF content will appear here...", show_copy_button=True ) process_button.click( fn=process_pdf, inputs=pdf_input, outputs=[output_text, doc_info] ) with gr.Tab("❓ Ask Questions"): with gr.Row(): with gr.Column(): question_input = gr.Textbox( label="Your Question", placeholder="Ask something about the PDF content..." ) ask_button = gr.Button("🔍 Get Answer", variant="primary") answer_output = gr.Textbox( label="Answer", lines=8, interactive=False ) ask_button.click( fn=answer_question, inputs=question_input, outputs=answer_output ) gr.Markdown("

Made with ❤ using Gradio + Groq + LLaMA 3

") if __name__ == "__main__": demo.launch()