import gradio as gr
from transformers import pipeline

# Step 1: OCR model (extracts text from the image)
ocr = pipeline("image-to-text", model="microsoft/trocr-base-printed")

# Step 2: QA model (answers questions about extracted text)
qa = pipeline("question-answering", model="deepset/roberta-base-squad2")

def process(image, question):
    try:
        # Extract text from the image
        extracted_text = ocr(image)[0]["generated_text"]

        # If no question is asked, just return the extracted text
        if not question:
            return extracted_text, "Please enter a question."

        # Run QA on the extracted text
        answer = qa(question=question, context=extracted_text)

        return extracted_text, answer.get("answer", "No answer found.")

    except Exception as e:
        return "Error during processing.", str(e)

# Gradio Interface
demo = gr.Interface(
    fn=process,
    inputs=[
        gr.Image(type="pil", label="Upload an image"),
        gr.Textbox(label="Ask a question about the document")
    ],
    outputs=[
        gr.Textbox(label="Extracted Text"),
        gr.Textbox(label="Answer")
    ],
    title="OCR + Question Answering",
    description="Upload a document image, extract text, and ask questions about it."
)

if __name__ == "__main__":
    demo.launch()