Spaces:

snewby3
/

week8

Build error

App Files Files Community

snewby3 commited on 26 days ago

Commit

6764cea

verified ·

1 Parent(s): 2647252

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -51

app.py CHANGED Viewed

@@ -1,76 +1,85 @@
 import gradio as gr
-import pytesseract
-from PIL import Image
 from transformers import pipeline
 # -----------------------------
-# OCR FUNCTION (TESSERACT)
 # -----------------------------
-def ocr_extract(image):
-    # Ensure image is a PIL Image
-    if not isinstance(image, Image.Image):
-        image = Image.fromarray(image)
-    # Run Tesseract OCR
-    text = pytesseract.image_to_string(image)
-    return text.strip()
 # -----------------------------
-# QUESTION-ANSWERING MODEL
-# (SUPPORTED IN CONTAINER RUNTIME)
 # -----------------------------
-qa = pipeline(
-    "document-question-answering",
-    model="impira/layoutlm-document-qa"
 )
 # -----------------------------
-# MAIN PROCESS FUNCTION
 # -----------------------------
 def process(image, question):
-    try:
-        # Extract text using Tesseract
-        extracted_text = ocr_extract(image)
-        if not extracted_text:
-            return "No text could be extracted.", "No answer found."
-        if not question:
-            return extracted_text, "Please enter a question."
-        # Run QA on extracted text
-        answer = qa(image=None, question=question, context=extracted_text)
-        # Handle list or dict outputs
-        if isinstance(answer, list) and len(answer) > 0:
-            return extracted_text, answer[0].get("answer", "No answer found.")
-        if isinstance(answer, dict):
-            return extracted_text, answer.get("answer", "No answer found.")
-        return extracted_text, "No answer found."
-    except Exception as e:
-        return "Error during processing.", f"Error: {str(e)}"
-# -----------------------------
-# GRADIO INTERFACE
-# -----------------------------
-demo = gr.Interface(
-    fn=process,
-    inputs=[
-        gr.Image(type="pil", label="Upload a document image"),
-        gr.Textbox(label="Ask a question about the document")
-    ],
-    outputs=[
-        gr.Textbox(label="Extracted Text"),
-        gr.Textbox(label="Answer")
-    ],
-    title="OCR + Document QA (Tesseract Version)",
-    description="Upload a PNG/JPG image of a document. The system extracts text using Tesseract and answers questions about it."
-)
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import easyocr
 from transformers import pipeline
+from PIL import Image
 # -----------------------------
+# OCR SETUP (EasyOCR)
 # -----------------------------
+reader = easyocr.Reader(['en'], gpu=False)
+def extract_text(image):
+    """
+    Extract text from an uploaded image using EasyOCR.
+    """
+    results = reader.readtext(image)
+    text = " ".join([res[1] for res in results])
+    return text
 # -----------------------------
+# QUESTION ANSWERING SETUP
 # -----------------------------
+qa_pipeline = pipeline(
+    "question-answering",
+    model="deepset/roberta-base-squad2"
 )
+def answer_question(context, question):
+    """
+    Use a QA model to answer a question based on extracted text.
+    """
+    if not context.strip():
+        return "No text extracted from the image."
+    if not question.strip():
+        return "Please enter a question."
+    result = qa_pipeline({
+        "context": context,
+        "question": question
+    })
+    return result.get("answer", "No answer found.")
 # -----------------------------
+# GRADIO APP LOGIC
 # -----------------------------
 def process(image, question):
+    """
+    Full pipeline:
+    1. Extract text from image
+    2. Answer question based on extracted text
+    """
+    if image is None:
+        return "Please upload an image.", ""
+    extracted = extract_text(image)
+    answer = answer_question(extracted, question)
+    return extracted, answer
+# -----------------------------
+# GRADIO UI
+# -----------------------------
+with gr.Blocks() as demo:
+    gr.Markdown("# 📘 Week 8 Multimodal OCR + QA System")
+    with gr.Row():
+        image_input = gr.Image(type="numpy", label="Upload Document Image")
+        question_input = gr.Textbox(label="Enter your question")
+    run_button = gr.Button("Run OCR + QA")
+    with gr.Row():
+        extracted_output = gr.Textbox(label="Extracted Text")
+        answer_output = gr.Textbox(label="Answer")
+    run_button.click(
+        fn=process,
+        inputs=[image_input, question_input],
+        outputs=[extracted_output, answer_output]
+    )
+demo.launch()