Zeeshan24 commited on
Commit
5b20f3a
·
verified ·
1 Parent(s): 421fc43

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -14,11 +14,17 @@ qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-dist
14
 
15
  # Preprocess image for better OCR performance
16
  def preprocess_image(image_file):
 
17
  image = np.array(Image.open(image_file).convert("RGB"))
 
 
18
  gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) # Convert to grayscale
19
  blurred = cv2.GaussianBlur(gray, (5, 5), 0) # Remove noise
20
- thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] # Increase contrast
21
- return Image.fromarray(thresh) # Convert back to PIL format
 
 
 
22
 
23
  # Extract text using TrOCR
24
  def extract_text_from_handwriting(image):
 
14
 
15
  # Preprocess image for better OCR performance
16
  def preprocess_image(image_file):
17
+ # Convert image to OpenCV format (numpy array)
18
  image = np.array(Image.open(image_file).convert("RGB"))
19
+
20
+ # Preprocessing: Grayscale, blur, threshold (to clean up image)
21
  gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) # Convert to grayscale
22
  blurred = cv2.GaussianBlur(gray, (5, 5), 0) # Remove noise
23
+ thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] # Enhance contrast
24
+
25
+ # Convert back to RGB (3-channel) format for compatibility with TrOCR
26
+ preprocessed_image = cv2.cvtColor(thresh, cv2.COLOR_GRAY2RGB)
27
+ return Image.fromarray(preprocessed_image) # Convert back to PIL format
28
 
29
  # Extract text using TrOCR
30
  def extract_text_from_handwriting(image):