aryan365 commited on
Commit
3fe9bd1
·
verified ·
1 Parent(s): 6665e6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -5
app.py CHANGED
@@ -2,13 +2,15 @@ import base64
2
  import cv2
3
  import numpy as np
4
  from flask import Flask, request, jsonify, render_template
5
- from tensorflow import keras
6
  import sys
7
  import io
8
  from ultralytics import YOLO
9
  import time
10
  import easyocr
11
  import pytesseract
 
 
 
12
 
13
 
14
  # Set the default encoding to utf-8
@@ -90,6 +92,11 @@ def detect():
90
  return jsonify({'error': str(e)}), 500
91
 
92
 
 
 
 
 
 
93
  @app.route('/ocr', methods=['POST'])
94
  def ocr():
95
  try:
@@ -103,11 +110,13 @@ def ocr():
103
  # Convert the NumPy array into an OpenCV image (BGR format)
104
  image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
105
 
106
- # Convert the image to RGB (as pytesseract expects RGB format)
107
- image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
108
 
109
- # Use pytesseract to extract text
110
- extracted_text = pytesseract.image_to_string(image_rgb)
 
 
111
 
112
  # Check if any text was detected
113
  text_detected = bool(extracted_text.strip()) # True if text is detected, False otherwise
 
2
  import cv2
3
  import numpy as np
4
  from flask import Flask, request, jsonify, render_template
 
5
  import sys
6
  import io
7
  from ultralytics import YOLO
8
  import time
9
  import easyocr
10
  import pytesseract
11
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
12
+ from PIL import Image
13
+ import torch
14
 
15
 
16
  # Set the default encoding to utf-8
 
92
  return jsonify({'error': str(e)}), 500
93
 
94
 
95
+
96
+ # Load the processor and model
97
+ processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-printed')
98
+ model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-printed')
99
+
100
  @app.route('/ocr', methods=['POST'])
101
  def ocr():
102
  try:
 
110
  # Convert the NumPy array into an OpenCV image (BGR format)
111
  image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
112
 
113
+ # Convert the image to a PIL image (as required by TrOCR)
114
+ image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
115
 
116
+ # Use the processor and model to extract text
117
+ pixel_values = processor(images=image_pil, return_tensors="pt").pixel_values
118
+ generated_ids = model.generate(pixel_values)
119
+ extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
120
 
121
  # Check if any text was detected
122
  text_detected = bool(extracted_text.strip()) # True if text is detected, False otherwise