import streamlit as st from transformers import pipeline import torch from pdf2image import convert_from_bytes from PIL import Image st.set_page_config(page_title="Donut PDF QA", layout="centered") @st.cache_resource def load_model(): return pipeline( task="document-question-answering", model="naver-clova-ix/donut-base-finetuned-docvqa", device=0 if torch.cuda.is_available() else -1, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32 ) qa_pipeline = load_model() st.title("📄 Donut: PDF Question Answering") uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) question = st.text_input("Ask a question about the document") if uploaded_file and question: st.write("Reading and converting PDF...") images = convert_from_bytes(uploaded_file.read(), dpi=200) page_number = st.number_input("Select page", min_value=1, max_value=len(images), value=1, step=1) page_image = images[page_number - 1] st.image(page_image, caption=f"Page {page_number}") with st.spinner("Finding answer..."): result = qa_pipeline(image=page_image, question=question) st.success("Answer:") st.write(result[0]['answer'])