Spaces:
Build error
Build error
| import gradio as gr | |
| from PyPDF2 import PdfReader | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.chains import RetrievalQA | |
| from langchain.llms import HuggingFacePipeline | |
| model_id = "ibm-granite/granite-3.3-2b-instruct" | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| torch_dtype="auto", | |
| device_map="auto" | |
| ) | |
| # 2. Extract text from PDF | |
| def extract_text(pdf_file): | |
| reader = PdfReader(pdf_file) | |
| text = "" | |
| for page in reader.pages: | |
| if page.extract_text(): | |
| text += page.extract_text() + "\n" | |
| return text | |
| def build_qa_chain(pdf_text): | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| chunks = splitter.split_text(pdf_text) | |
| embeddings = HuggingFaceEmbeddings() | |
| vectorstore = FAISS.from_texts(chunks, embeddings) | |
| retriever = vectorstore.as_retriever() | |
| llm_pipeline = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| max_length=512, | |
| temperature=0 | |
| ) | |
| llm = HuggingFacePipeline(pipeline=llm_pipeline) | |
| return RetrievalQA.from_chain_type(llm=llm, retriever=retriever) | |
| # Store QA chain globally | |
| qa_chain = None | |
| def process_pdf(file): | |
| global qa_chain | |
| text = extract_text(file) | |
| qa_chain = build_qa_chain(text) | |
| return | |
| def answer_question(question): | |
| if qa_chain is None: | |
| return "β Please upload a PDF first." | |
| return qa_chain.run(question) | |
| # UI with Gradio | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## π Granite 2B β PDF Q&A") | |
| with gr.Row(): | |
| pdf_input = gr.File(label="Upload PDF", type="filepath") | |
| status_output = gr.Textbox(label="Status") | |
| pdf_input.upload(process_pdf, pdf_input, status_output) | |
| with gr.Row(): | |
| question_input = gr.Textbox(label="Ask a question") | |
| answer_output = gr.Textbox(label="Answer") | |
| ask_btn = gr.Button("Ask") | |
| ask_btn.click(answer_question, question_input, answer_output) | |
| demo.launch() |