badru commited on
Commit
3e85cab
·
verified ·
1 Parent(s): a54dc59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -12
app.py CHANGED
@@ -1,10 +1,12 @@
1
  import streamlit as st
2
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
3
  from PIL import Image
 
 
 
 
4
 
5
  # Load the processor and model
6
- st.title("MMSai Meeeting Image Tools")
7
-
8
  @st.cache_resource
9
  def load_model():
10
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
@@ -13,7 +15,34 @@ def load_model():
13
 
14
  processor, model = load_model()
15
 
16
- # File uploader
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  uploaded_file = st.file_uploader("Upload an Image (JPG, JPEG, PNG)", type=["jpg", "jpeg", "png"])
18
 
19
  if uploaded_file is not None:
@@ -24,21 +53,34 @@ if uploaded_file is not None:
24
 
25
  st.write("Processing the image...")
26
 
27
- # Prepare the image for OCR
28
- pixel_values = processor(images=image, return_tensors="pt").pixel_values
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- # Generate text from the image
31
- generated_ids = model.generate(pixel_values)
32
- generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
33
 
34
- # Display the extracted text as multiline
35
  st.subheader("Extracted Text:")
36
- st.text_area("Output Text", generated_text, height=200)
37
 
38
- # Provide option to download the extracted text
39
  st.download_button(
40
  label="Download Text",
41
- data=generated_text,
42
  file_name="extracted_text.txt",
43
  mime="text/plain",
44
  )
 
1
  import streamlit as st
2
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
3
  from PIL import Image
4
+ import torch
5
+ import cv2
6
+ import numpy as np
7
+ import tempfile
8
 
9
  # Load the processor and model
 
 
10
  @st.cache_resource
11
  def load_model():
12
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
 
15
 
16
  processor, model = load_model()
17
 
18
+ # Helper function to preprocess the image and detect lines
19
+ def detect_lines(image):
20
+ # Convert the PIL image to a NumPy array
21
+ image_np = np.array(image)
22
+
23
+ # Convert to grayscale
24
+ gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
25
+
26
+ # Apply binary thresholding
27
+ _, binary = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
28
+
29
+ # Find contours
30
+ contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
31
+
32
+ # Sort contours top-to-bottom
33
+ bounding_boxes = [cv2.boundingRect(c) for c in contours]
34
+ bounding_boxes = sorted(bounding_boxes, key=lambda b: b[1]) # Sort by y-coordinate
35
+
36
+ line_images = []
37
+ for (x, y, w, h) in bounding_boxes:
38
+ # Extract each line as a separate image
39
+ line = image_np[y:y+h, x:x+w]
40
+ line_images.append(line)
41
+
42
+ return line_images
43
+
44
+ # Streamlit app
45
+ st.title("MMSai Meeeting Image Tools 1.0")
46
  uploaded_file = st.file_uploader("Upload an Image (JPG, JPEG, PNG)", type=["jpg", "jpeg", "png"])
47
 
48
  if uploaded_file is not None:
 
53
 
54
  st.write("Processing the image...")
55
 
56
+ # Detect lines in the image
57
+ line_images = detect_lines(image)
58
+ st.write(f"Detected {len(line_images)} lines in the image.")
59
+
60
+ # Perform OCR on each detected line
61
+ extracted_text = ""
62
+ for idx, line_img in enumerate(line_images):
63
+ # Convert the line image to PIL format
64
+ line_pil = Image.fromarray(line_img)
65
+
66
+ # Prepare the image for OCR
67
+ pixel_values = processor(images=line_pil, return_tensors="pt").pixel_values
68
+
69
+ # Generate text from the line image
70
+ generated_ids = model.generate(pixel_values)
71
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
72
 
73
+ # Append the extracted text
74
+ extracted_text += f"Line {idx + 1}: {generated_text}\n"
 
75
 
76
+ # Display the extracted text
77
  st.subheader("Extracted Text:")
78
+ st.text_area("Output Text", extracted_text, height=200)
79
 
80
+ # Provide an option to download the extracted text
81
  st.download_button(
82
  label="Download Text",
83
+ data=extracted_text,
84
  file_name="extracted_text.txt",
85
  mime="text/plain",
86
  )