Final / app.py
snewby3's picture
Update app.py
3066742 verified
import gradio as gr
from transformers import pipeline
# Step 1: OCR model (extracts text from the image)
ocr = pipeline("image-to-text", model="microsoft/trocr-base-printed")
# Step 2: QA model (answers questions about extracted text)
qa = pipeline("question-answering", model="deepset/roberta-base-squad2")
def process(image, question):
try:
# Extract text from the image
extracted_text = ocr(image)[0]["generated_text"]
# If no question is asked, just return the extracted text
if not question:
return extracted_text, "Please enter a question."
# Run QA on the extracted text
answer = qa(question=question, context=extracted_text)
return extracted_text, answer.get("answer", "No answer found.")
except Exception as e:
return "Error during processing.", str(e)
# Gradio Interface
demo = gr.Interface(
fn=process,
inputs=[
gr.Image(type="pil", label="Upload an image"),
gr.Textbox(label="Ask a question about the document")
],
outputs=[
gr.Textbox(label="Extracted Text"),
gr.Textbox(label="Answer")
],
title="OCR + Question Answering",
description="Upload a document image, extract text, and ask questions about it."
)
if __name__ == "__main__":
demo.launch()