SRI2005's picture
Update app.py
507a375 verified
import gradio as gr
from PyPDF2 import PdfReader
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
model_id = "ibm-granite/granite-3.3-2b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype="auto",
device_map="auto"
)
# 2. Extract text from PDF
def extract_text(pdf_file):
reader = PdfReader(pdf_file)
text = ""
for page in reader.pages:
if page.extract_text():
text += page.extract_text() + "\n"
return text
def build_qa_chain(pdf_text):
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_text(pdf_text)
embeddings = HuggingFaceEmbeddings()
vectorstore = FAISS.from_texts(chunks, embeddings)
retriever = vectorstore.as_retriever()
llm_pipeline = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_length=512,
temperature=0
)
llm = HuggingFacePipeline(pipeline=llm_pipeline)
return RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
# Store QA chain globally
qa_chain = None
def process_pdf(file):
global qa_chain
text = extract_text(file)
qa_chain = build_qa_chain(text)
return
def answer_question(question):
if qa_chain is None:
return "❌ Please upload a PDF first."
return qa_chain.run(question)
# UI with Gradio
with gr.Blocks() as demo:
gr.Markdown("## πŸ“„ Granite 2B β€” PDF Q&A")
with gr.Row():
pdf_input = gr.File(label="Upload PDF", type="filepath")
status_output = gr.Textbox(label="Status")
pdf_input.upload(process_pdf, pdf_input, status_output)
with gr.Row():
question_input = gr.Textbox(label="Ask a question")
answer_output = gr.Textbox(label="Answer")
ask_btn = gr.Button("Ask")
ask_btn.click(answer_question, question_input, answer_output)
demo.launch()