NaqchoAli commited on
Commit
7914369
·
verified ·
1 Parent(s): 47ea833

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -2
app.py CHANGED
@@ -1,8 +1,9 @@
1
  import streamlit as st
2
- from PIL import Image
3
  import requests
4
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
5
  import torch
 
6
 
7
  # Load pre-trained TrOCR model and processor
8
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
@@ -11,6 +12,9 @@ model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwrit
11
  # Function to perform OCR and extract text
12
  def extract_text_from_image(image):
13
  # Preprocess the image
 
 
 
14
  pixel_values = processor(images=image, return_tensors="pt").pixel_values
15
 
16
  # Ensure the model is in evaluation mode
@@ -24,6 +28,24 @@ def extract_text_from_image(image):
24
  text = processor.decode(generated_ids[0], skip_special_tokens=True)
25
  return text
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  # Streamlit UI
28
  st.title("OCR Text Extraction from Image")
29
 
@@ -47,4 +69,3 @@ if uploaded_file is not None:
47
  extracted_text = extract_text_from_image(image)
48
  st.subheader("Extracted Text:")
49
  st.write(extracted_text)
50
-
 
1
  import streamlit as st
2
+ from PIL import Image, ImageOps, ImageEnhance
3
  import requests
4
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
5
  import torch
6
+ import numpy as np
7
 
8
  # Load pre-trained TrOCR model and processor
9
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
 
12
  # Function to perform OCR and extract text
13
  def extract_text_from_image(image):
14
  # Preprocess the image
15
+ image = preprocess_image(image)
16
+
17
+ # Convert image to tensor
18
  pixel_values = processor(images=image, return_tensors="pt").pixel_values
19
 
20
  # Ensure the model is in evaluation mode
 
28
  text = processor.decode(generated_ids[0], skip_special_tokens=True)
29
  return text
30
 
31
+ # Image preprocessing function to enhance OCR performance
32
+ def preprocess_image(image):
33
+ # Convert image to grayscale
34
+ image = image.convert("RGB")
35
+ image = ImageOps.grayscale(image)
36
+
37
+ # Enhance the image for better contrast (optional)
38
+ enhancer = ImageEnhance.Contrast(image)
39
+ image = enhancer.enhance(2.0) # Enhance contrast
40
+
41
+ # Apply thresholding to make text more distinct (optional)
42
+ image = image.point(lambda p: p > 200 and 255) # Simple thresholding
43
+
44
+ # Resize the image to a size appropriate for OCR
45
+ image = image.resize((384, 384)) # Adjust this size as needed
46
+
47
+ return image
48
+
49
  # Streamlit UI
50
  st.title("OCR Text Extraction from Image")
51
 
 
69
  extracted_text = extract_text_from_image(image)
70
  st.subheader("Extracted Text:")
71
  st.write(extracted_text)