📄 RAG PDF QA

import os
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from groq import Groq

# ================= ENVIRONMENT =================
GROQ_API_KEY = os.getenv("GROQ_API_KEY")  # Set in Hugging Face Secrets
client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None

vector_db = None

# ================= LLM FUNCTION =================
def groq_llm(prompt):
    if client is None:
        return "❌ GROQ API key not set. Set it in Hugging Face Secrets."
    response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[{"role": "user", "content": prompt}],
    )
    return response.choices[0].message.content

# ================= PROCESS PDF =================
def process_pdf(file):
    global vector_db
    if file is None:
        return "❌ Please upload a PDF."

    # Use the file path from Gradio File component
    pdf_path = file.name

    loader = PyPDFLoader(pdf_path)
    documents = loader.load()

    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    docs = splitter.split_documents(documents)

    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vector_db = FAISS.from_documents(docs, embeddings)

    return f"✅ PDF processed successfully! {len(docs)} chunks created."

# ================= ASK QUESTION =================
def ask_question(question, chat_history):
    global vector_db
    if vector_db is None:
        chat_history.append(["System", "❌ Please upload and process a PDF first."])
        return chat_history, chat_history

    retriever = vector_db.as_retriever(search_kwargs={"k": 3})
    docs = retriever.get_relevant_documents(question)
    context = "\n\n".join([doc.page_content for doc in docs])

    prompt = f"""
You are an intelligent assistant.
Answer ONLY using the provided context.

Context:
{context}

Question:
{question}

Answer:
"""
    answer = groq_llm(prompt)
    chat_history.append(["User", question])
    chat_history.append(["Assistant", answer])
    return chat_history, chat_history

# ================= GRADIO UI =================
with gr.Blocks(css="""
    body {background-color: #f5f5f5;}
    .gradio-container {max-width: 900px; margin:auto; padding:20px; border-radius:12px; box-shadow:0 4px 15px rgba(0,0,0,0.1);}
""") as demo:

    gr.Markdown("<h1 style='text-align:center; color:#4f46e5;'>📄 RAG PDF QA</h1>", elem_id="title")
    gr.Markdown("<p style='text-align:center; color:#333;'>Upload a PDF and chat with it!</p>")

    if client is None:
        gr.Markdown("⚠️ GROQ_API_KEY not set. Set it in Hugging Face Secrets to enable answering.")

    with gr.Row():
        with gr.Column(scale=1):
            pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
            process_btn = gr.Button("Process PDF")
            status = gr.Textbox(label="Status", interactive=False)
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(label="Chat with PDF")
            question = gr.Textbox(placeholder="Type your question here and press Enter")

    process_btn.click(fn=process_pdf, inputs=pdf_upload, outputs=status)
    question.submit(fn=ask_question, inputs=[question, chatbot], outputs=[chatbot, chatbot])

demo.launch()