import os import gradio as gr import tempfile from dotenv import load_dotenv from langchain_community.document_loaders import PyPDFLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from groq import Groq # ================== LOAD ENV ================== load_dotenv() GROQ_API_KEY = os.getenv("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") if not GROQ_API_KEY: raise ValueError("❌ GROQ_API_KEY not found. Please set it in Hugging Face Secrets.") client = Groq(api_key=GROQ_API_KEY) # ================== GLOBAL VECTOR DB ================== vector_db = None # ================== LLM FUNCTION ================== def groq_llm(prompt): response = client.chat.completions.create( model="llama-3.3-70b-versatile", messages=[{"role": "user", "content": prompt}], ) return response.choices[0].message.content # ================== PDF PROCESSING ================== def process_pdf(file): global vector_db if file is None: return "❌ Please upload a PDF file." # Save uploaded file with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: tmp.write(file.read()) pdf_path = tmp.name # Load PDF loader = PyPDFLoader(pdf_path) documents = loader.load() # Split text into chunks splitter = RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=100 ) docs = splitter.split_documents(documents) # Create embeddings embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) # Create vector database vector_db = FAISS.from_documents(docs, embeddings) return f"✅ Document processed successfully! {len(docs)} chunks created." # ================== QUESTION ANSWERING ================== def ask_question(question): global vector_db if vector_db is None: return "❌ Please upload and process a document first." retriever = vector_db.as_retriever(search_kwargs={"k": 3}) docs = retriever.invoke(question) context = "\n\n".join([doc.page_content for doc in docs]) prompt = f""" You are an intelligent assistant. Use ONLY the context below to answer the question. Context: {context} Question: {question} Answer: """ return groq_llm(prompt) # ================== GRADIO UI ================== with gr.Blocks(title="📄 RAG PDF Question Answering App") as demo: gr.Markdown("# 📄 RAG PDF Question Answering App") gr.Markdown("Upload a PDF and ask questions about it.") with gr.Row(): pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"]) process_btn = gr.Button("📥 Process Document") status = gr.Textbox(label="Status", interactive=False) with gr.Row(): question = gr.Textbox(label="Ask a Question") answer = gr.Textbox(label="Answer", interactive=False) process_btn.click(process_pdf, inputs=pdf_upload, outputs=status) question.submit(ask_question, inputs=question, outputs=answer) demo.launch()