PDF-QA / app.py
AymenELKani's picture
Upload 2 files
ef248ab verified
import gradio as gr
from langchain import HuggingFacePipeline
from transformers import pipeline
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
# --- Load free Hugging Face LLM ---
# You can switch to another free model if you like
hf_pipeline = pipeline(
"text-generation",
model="tiiuae/falcon-7b-instruct",
max_new_tokens=256,
temperature=0.5
)
llm = HuggingFacePipeline(pipeline=hf_pipeline)
# --- PDF Loader ---
def document_loader(file):
loader = PyPDFLoader(file.name)
return loader.load()
# --- Text Splitter ---
def text_splitter(documents):
splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=20
)
return splitter.split_documents(documents)
# --- Vector Database using embeddings ---
def vector_database(chunks):
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectordb = Chroma.from_documents(chunks, embeddings)
return vectordb
# --- Build retriever ---
def retriever(file):
chunks = text_splitter(document_loader(file))
vectordb = vector_database(chunks)
return vectordb.as_retriever()
# --- RetrievalQA ---
def retriever_qa(file, query):
retriever_obj = retriever(file)
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever_obj,
return_source_documents=False
)
return qa_chain.run(query)
# --- Gradio Interface ---
app = gr.Interface(
fn=retriever_qa,
inputs=[
gr.File(label="Upload PDF", file_types=['.pdf']),
gr.Textbox(label="Ask a question about the PDF", lines=2, placeholder="Type your question here...")
],
outputs=gr.Textbox(label="Answer"),
title="Free LLM PDF Q&A Bot",
description="Upload a PDF document and ask any question. This bot uses a free open-source LLM and vector search to answer your questions."
)
if __name__ == "__main__":
app.launch()