prithvi1029's picture
Update app.py
43657dc verified
raw
history blame
1.83 kB
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
def run_qa(pdf_path, question):
if pdf_path is None or question.strip() == "":
return "Please upload a PDF and enter a question."
# Load PDF
loader = PyPDFLoader(pdf_path)
docs = loader.load()
# Split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
chunks = splitter.split_documents(docs)
# Create embeddings + vector store
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectordb = FAISS.from_documents(chunks, embeddings)
# LLM
llm = ChatOpenAI(temperature=0)
# Retrieval QA chain
qa = RetrievalQA.from_chain_type(
llm=llm,
retriever=vectordb.as_retriever(),
return_source_documents=True
)
# Newer LangChain-safe call
result = qa.invoke({"query": question})
answer_text = result.get("result", "")
source_docs = result.get("source_documents", [])
sources = "\n\n".join([d.page_content[:500] for d in source_docs[:2]])
return f"### Answer\n{answer_text}\n\n---\n### Sources\n{sources}"
with gr.Blocks(title="Agentic Document Intelligence") as demo:
gr.Markdown("# ๐Ÿ“„ Agentic Document Intelligence\nUpload a PDF and ask questions using RAG.")
pdf = gr.File(label="Upload PDF", type="filepath")
question = gr.Textbox(label="Ask a question")
output = gr.Markdown()
btn = gr.Button("Run")
btn.click(run_qa, inputs=[pdf, question], outputs=output)
demo.launch()