snewby3 commited on
Commit
6764cea
·
verified ·
1 Parent(s): 2647252

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -51
app.py CHANGED
@@ -1,76 +1,85 @@
1
  import gradio as gr
2
- import pytesseract
3
- from PIL import Image
4
  from transformers import pipeline
 
5
 
6
  # -----------------------------
7
- # OCR FUNCTION (TESSERACT)
8
  # -----------------------------
9
- def ocr_extract(image):
10
- # Ensure image is a PIL Image
11
- if not isinstance(image, Image.Image):
12
- image = Image.fromarray(image)
13
 
14
- # Run Tesseract OCR
15
- text = pytesseract.image_to_string(image)
16
- return text.strip()
 
 
 
 
17
 
18
 
19
  # -----------------------------
20
- # QUESTION-ANSWERING MODEL
21
- # (SUPPORTED IN CONTAINER RUNTIME)
22
  # -----------------------------
23
- qa = pipeline(
24
- "document-question-answering",
25
- model="impira/layoutlm-document-qa"
26
  )
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  # -----------------------------
30
- # MAIN PROCESS FUNCTION
31
  # -----------------------------
32
  def process(image, question):
33
- try:
34
- # Extract text using Tesseract
35
- extracted_text = ocr_extract(image)
 
 
 
 
36
 
37
- if not extracted_text:
38
- return "No text could be extracted.", "No answer found."
39
 
40
- if not question:
41
- return extracted_text, "Please enter a question."
42
 
43
- # Run QA on extracted text
44
- answer = qa(image=None, question=question, context=extracted_text)
45
 
46
- # Handle list or dict outputs
47
- if isinstance(answer, list) and len(answer) > 0:
48
- return extracted_text, answer[0].get("answer", "No answer found.")
49
- if isinstance(answer, dict):
50
- return extracted_text, answer.get("answer", "No answer found.")
51
 
52
- return extracted_text, "No answer found."
 
 
53
 
54
- except Exception as e:
55
- return "Error during processing.", f"Error: {str(e)}"
56
 
 
 
 
57
 
58
- # -----------------------------
59
- # GRADIO INTERFACE
60
- # -----------------------------
61
- demo = gr.Interface(
62
- fn=process,
63
- inputs=[
64
- gr.Image(type="pil", label="Upload a document image"),
65
- gr.Textbox(label="Ask a question about the document")
66
- ],
67
- outputs=[
68
- gr.Textbox(label="Extracted Text"),
69
- gr.Textbox(label="Answer")
70
- ],
71
- title="OCR + Document QA (Tesseract Version)",
72
- description="Upload a PNG/JPG image of a document. The system extracts text using Tesseract and answers questions about it."
73
- )
74
 
75
- if __name__ == "__main__":
76
- demo.launch()
 
1
  import gradio as gr
2
+ import easyocr
 
3
  from transformers import pipeline
4
+ from PIL import Image
5
 
6
  # -----------------------------
7
+ # OCR SETUP (EasyOCR)
8
  # -----------------------------
9
+ reader = easyocr.Reader(['en'], gpu=False)
 
 
 
10
 
11
+ def extract_text(image):
12
+ """
13
+ Extract text from an uploaded image using EasyOCR.
14
+ """
15
+ results = reader.readtext(image)
16
+ text = " ".join([res[1] for res in results])
17
+ return text
18
 
19
 
20
  # -----------------------------
21
+ # QUESTION ANSWERING SETUP
 
22
  # -----------------------------
23
+ qa_pipeline = pipeline(
24
+ "question-answering",
25
+ model="deepset/roberta-base-squad2"
26
  )
27
 
28
+ def answer_question(context, question):
29
+ """
30
+ Use a QA model to answer a question based on extracted text.
31
+ """
32
+ if not context.strip():
33
+ return "No text extracted from the image."
34
+ if not question.strip():
35
+ return "Please enter a question."
36
+
37
+ result = qa_pipeline({
38
+ "context": context,
39
+ "question": question
40
+ })
41
+
42
+ return result.get("answer", "No answer found.")
43
+
44
 
45
  # -----------------------------
46
+ # GRADIO APP LOGIC
47
  # -----------------------------
48
  def process(image, question):
49
+ """
50
+ Full pipeline:
51
+ 1. Extract text from image
52
+ 2. Answer question based on extracted text
53
+ """
54
+ if image is None:
55
+ return "Please upload an image.", ""
56
 
57
+ extracted = extract_text(image)
58
+ answer = answer_question(extracted, question)
59
 
60
+ return extracted, answer
 
61
 
 
 
62
 
63
+ # -----------------------------
64
+ # GRADIO UI
65
+ # -----------------------------
66
+ with gr.Blocks() as demo:
67
+ gr.Markdown("# 📘 Week 8 Multimodal OCR + QA System")
68
 
69
+ with gr.Row():
70
+ image_input = gr.Image(type="numpy", label="Upload Document Image")
71
+ question_input = gr.Textbox(label="Enter your question")
72
 
73
+ run_button = gr.Button("Run OCR + QA")
 
74
 
75
+ with gr.Row():
76
+ extracted_output = gr.Textbox(label="Extracted Text")
77
+ answer_output = gr.Textbox(label="Answer")
78
 
79
+ run_button.click(
80
+ fn=process,
81
+ inputs=[image_input, question_input],
82
+ outputs=[extracted_output, answer_output]
83
+ )
 
 
 
 
 
 
 
 
 
 
 
84
 
85
+ demo.launch()