Spaces:

badru
/

mmsocr

Build error

App Files Files Community

badru commited on Dec 23, 2024

Commit

3e85cab

verified ·

1 Parent(s): a54dc59

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -12

app.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import streamlit as st
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 from PIL import Image
 # Load the processor and model
-st.title("MMSai Meeeting Image Tools")
 @st.cache_resource
 def load_model():
     processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
@@ -13,7 +15,34 @@ def load_model():
 processor, model = load_model()
-# File uploader
 uploaded_file = st.file_uploader("Upload an Image (JPG, JPEG, PNG)", type=["jpg", "jpeg", "png"])
 if uploaded_file is not None:
@@ -24,21 +53,34 @@ if uploaded_file is not None:
         st.write("Processing the image...")
-        # Prepare the image for OCR
-        pixel_values = processor(images=image, return_tensors="pt").pixel_values
-        # Generate text from the image
-        generated_ids = model.generate(pixel_values)
-        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        # Display the extracted text as multiline
         st.subheader("Extracted Text:")
-        st.text_area("Output Text", generated_text, height=200)
-        # Provide option to download the extracted text
         st.download_button(
             label="Download Text",
-            data=generated_text,
             file_name="extracted_text.txt",
             mime="text/plain",
         )

 import streamlit as st
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 from PIL import Image
+import torch
+import cv2
+import numpy as np
+import tempfile
 # Load the processor and model
 @st.cache_resource
 def load_model():
     processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
 processor, model = load_model()
+# Helper function to preprocess the image and detect lines
+def detect_lines(image):
+    # Convert the PIL image to a NumPy array
+    image_np = np.array(image)
+    # Convert to grayscale
+    gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
+    # Apply binary thresholding
+    _, binary = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+    # Find contours
+    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    # Sort contours top-to-bottom
+    bounding_boxes = [cv2.boundingRect(c) for c in contours]
+    bounding_boxes = sorted(bounding_boxes, key=lambda b: b[1])  # Sort by y-coordinate
+    line_images = []
+    for (x, y, w, h) in bounding_boxes:
+        # Extract each line as a separate image
+        line = image_np[y:y+h, x:x+w]
+        line_images.append(line)
+    return line_images
+# Streamlit app
+st.title("MMSai Meeeting Image Tools 1.0")
 uploaded_file = st.file_uploader("Upload an Image (JPG, JPEG, PNG)", type=["jpg", "jpeg", "png"])
 if uploaded_file is not None:
         st.write("Processing the image...")
+        # Detect lines in the image
+        line_images = detect_lines(image)
+        st.write(f"Detected {len(line_images)} lines in the image.")
+        # Perform OCR on each detected line
+        extracted_text = ""
+        for idx, line_img in enumerate(line_images):
+            # Convert the line image to PIL format
+            line_pil = Image.fromarray(line_img)
+            # Prepare the image for OCR
+            pixel_values = processor(images=line_pil, return_tensors="pt").pixel_values
+            # Generate text from the line image
+            generated_ids = model.generate(pixel_values)
+            generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+            # Append the extracted text
+            extracted_text += f"Line {idx + 1}: {generated_text}\n"
+        # Display the extracted text
         st.subheader("Extracted Text:")
+        st.text_area("Output Text", extracted_text, height=200)
+        # Provide an option to download the extracted text
         st.download_button(
             label="Download Text",
+            data=extracted_text,
             file_name="extracted_text.txt",
             mime="text/plain",
         )