Spaces:

NaqchoAli
/

textExtractor

Runtime error

App Files Files Community

NaqchoAli commited on Dec 22, 2024

Commit

7914369

verified ·

1 Parent(s): 47ea833

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -2

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import streamlit as st
-from PIL import Image
 import requests
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 import torch
 # Load pre-trained TrOCR model and processor
 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
@@ -11,6 +12,9 @@ model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwrit
 # Function to perform OCR and extract text
 def extract_text_from_image(image):
     # Preprocess the image
     pixel_values = processor(images=image, return_tensors="pt").pixel_values
     # Ensure the model is in evaluation mode
@@ -24,6 +28,24 @@ def extract_text_from_image(image):
     text = processor.decode(generated_ids[0], skip_special_tokens=True)
     return text
 # Streamlit UI
 st.title("OCR Text Extraction from Image")
@@ -47,4 +69,3 @@ if uploaded_file is not None:
             extracted_text = extract_text_from_image(image)
             st.subheader("Extracted Text:")
             st.write(extracted_text)

 import streamlit as st
+from PIL import Image, ImageOps, ImageEnhance
 import requests
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 import torch
+import numpy as np
 # Load pre-trained TrOCR model and processor
 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
 # Function to perform OCR and extract text
 def extract_text_from_image(image):
     # Preprocess the image
+    image = preprocess_image(image)
+    # Convert image to tensor
     pixel_values = processor(images=image, return_tensors="pt").pixel_values
     # Ensure the model is in evaluation mode
     text = processor.decode(generated_ids[0], skip_special_tokens=True)
     return text
+# Image preprocessing function to enhance OCR performance
+def preprocess_image(image):
+    # Convert image to grayscale
+    image = image.convert("RGB")
+    image = ImageOps.grayscale(image)
+    # Enhance the image for better contrast (optional)
+    enhancer = ImageEnhance.Contrast(image)
+    image = enhancer.enhance(2.0)  # Enhance contrast
+    # Apply thresholding to make text more distinct (optional)
+    image = image.point(lambda p: p > 200 and 255)  # Simple thresholding
+    # Resize the image to a size appropriate for OCR
+    image = image.resize((384, 384))  # Adjust this size as needed
+    return image
 # Streamlit UI
 st.title("OCR Text Extraction from Image")
             extracted_text = extract_text_from_image(image)
             st.subheader("Extracted Text:")
             st.write(extracted_text)