RAG-APP / app.py
muhammadrazapathan's picture
Create app.py
01b36b2 verified
import os
import gradio as gr
import tempfile
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from groq import Groq
# ================== LOAD ENV ==================
load_dotenv()
GROQ_API_KEY = os.getenv("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")
if not GROQ_API_KEY:
raise ValueError("❌ GROQ_API_KEY not found. Please set it in Hugging Face Secrets.")
client = Groq(api_key=GROQ_API_KEY)
# ================== GLOBAL VECTOR DB ==================
vector_db = None
# ================== LLM FUNCTION ==================
def groq_llm(prompt):
response = client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=[{"role": "user", "content": prompt}],
)
return response.choices[0].message.content
# ================== PDF PROCESSING ==================
def process_pdf(file):
global vector_db
if file is None:
return "❌ Please upload a PDF file."
# Save uploaded file
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp.write(file.read())
pdf_path = tmp.name
# Load PDF
loader = PyPDFLoader(pdf_path)
documents = loader.load()
# Split text into chunks
splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=100
)
docs = splitter.split_documents(documents)
# Create embeddings
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
# Create vector database
vector_db = FAISS.from_documents(docs, embeddings)
return f"βœ… Document processed successfully! {len(docs)} chunks created."
# ================== QUESTION ANSWERING ==================
def ask_question(question):
global vector_db
if vector_db is None:
return "❌ Please upload and process a document first."
retriever = vector_db.as_retriever(search_kwargs={"k": 3})
docs = retriever.invoke(question)
context = "\n\n".join([doc.page_content for doc in docs])
prompt = f"""
You are an intelligent assistant.
Use ONLY the context below to answer the question.
Context:
{context}
Question:
{question}
Answer:
"""
return groq_llm(prompt)
# ================== GRADIO UI ==================
with gr.Blocks(title="πŸ“„ RAG PDF Question Answering App") as demo:
gr.Markdown("# πŸ“„ RAG PDF Question Answering App")
gr.Markdown("Upload a PDF and ask questions about it.")
with gr.Row():
pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
process_btn = gr.Button("πŸ“₯ Process Document")
status = gr.Textbox(label="Status", interactive=False)
with gr.Row():
question = gr.Textbox(label="Ask a Question")
answer = gr.Textbox(label="Answer", interactive=False)
process_btn.click(process_pdf, inputs=pdf_upload, outputs=status)
question.submit(ask_question, inputs=question, outputs=answer)
demo.launch()