ayushrai10's picture
Uploaded app.py
a6f236d verified
# =========================
# RAG PDF Chatbot - app.py
# =========================
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain_community.vectorstores import FAISS
from transformers import pipeline
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
# --------- Load PDF ---------
PDF_PATH = "ml_notes.pdf"
loader = PyPDFLoader(PDF_PATH)
documents = loader.load()
# --------- Split Text ---------
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=200,
chunk_overlap=30
)
docs = text_splitter.split_documents(documents)
# --------- Embeddings ---------
embedding_model = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
# --------- Vector Store ---------
vectorstore = FAISS.from_documents(docs, embedding_model)
retriever = vectorstore.as_retriever(search_kwargs={"k": 2})
# --------- LLM (FLAN-T5) ---------
pipe = pipeline(
"text2text-generation",
model="google/flan-t5-base",
max_new_tokens=200
)
llm = HuggingFacePipeline(pipeline=pipe)
# --------- Prompt ---------
prompt = ChatPromptTemplate.from_template(
"""
Answer the question using ONLY the context below.
If the answer is not in the context, say "I don't know".
Context:
{context}
Question:
{question}
"""
)
# --------- RAG Chain ---------
rag_chain = (
{
"context": retriever,
"question": RunnablePassthrough()
}
| prompt
| llm
| StrOutputParser()
)
# --------- Gradio UI ---------
def chat(question):
return rag_chain.invoke(question)
demo = gr.Interface(
fn=chat,
inputs=gr.Textbox(lines=2, placeholder="Ask from the PDF..."),
outputs="text",
title="📚 RAG PDF Chatbot",
description="Ask questions grounded in your PDF using RAG"
)
demo.launch()