import os import gradio as gr import tempfile from dotenv import load_dotenv from langchain_community.document_loaders import PyPDFLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from groq import Groq # ================= ENVIRONMENT ================= load_dotenv() GROQ_API_KEY = os.getenv("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") client = None if GROQ_API_KEY: client = Groq(api_key=GROQ_API_KEY) vector_db = None # ================= LLM FUNCTION ================= def groq_llm(prompt): if client is None: return "❌ GROQ API key not set. Set it in Hugging Face Secrets." response = client.chat.completions.create( model="llama-3.3-70b-versatile", messages=[{"role": "user", "content": prompt}], ) return response.choices[0].message.content # ================= PROCESS PDF ================= def process_pdf(file): global vector_db if file is None: return "❌ Please upload a PDF." with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: tmp.write(file) pdf_path = tmp.name loader = PyPDFLoader(pdf_path) documents = loader.load() splitter = RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=100 ) docs = splitter.split_documents(documents) embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) vector_db = FAISS.from_documents(docs, embeddings) return f"✅ PDF processed successfully! {len(docs)} chunks created." # ================= ASK QUESTION ================= def ask_question(question): global vector_db if vector_db is None: return "❌ Please upload and process a document first." retriever = vector_db.as_retriever(search_kwargs={"k": 3}) docs = retriever.get_relevant_documents(question) context = "\n\n".join([doc.page_content for doc in docs]) prompt = f""" You are an intelligent assistant. Answer ONLY using the provided context. Context: {context} Question: {question} Answer: """ return groq_llm(prompt) # ================= GRADIO UI ================= with gr.Blocks(title="📄 RAG PDF QA App") as demo: gr.Markdown("# 📄 RAG (Retrieval-Augmented Generation) PDF QA") gr.Markdown("Upload a PDF and ask questions about its content.") if client is None: gr.Markdown("⚠️ GROQ_API_KEY not set. Set it in Hugging Face Secrets to enable answering.") with gr.Row(): pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"]) process_btn = gr.Button("Process PDF") status = gr.Textbox(label="Status", interactive=False) question = gr.Textbox(label="Ask Question") answer = gr.Textbox(label="Answer", lines=10) process_btn.click(process_pdf, inputs=pdf_upload, outputs=status) question.submit(ask_question, inputs=question, outputs=answer) demo.launch()