Spaces:

aryan365
/

Robogrid

Runtime error

aryan365 commited on Oct 20, 2024

Commit

3fe9bd1

verified ·

1 Parent(s): 6665e6c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,13 +2,15 @@ import base64
 import cv2
 import numpy as np
 from flask import Flask, request, jsonify, render_template
-from tensorflow import keras
 import sys
 import io
 from ultralytics import YOLO
 import time
 import easyocr
 import pytesseract
 # Set the default encoding to utf-8
@@ -90,6 +92,11 @@ def detect():
         return jsonify({'error': str(e)}), 500
 @app.route('/ocr', methods=['POST'])
 def ocr():
     try:
@@ -103,11 +110,13 @@ def ocr():
         # Convert the NumPy array into an OpenCV image (BGR format)
         image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
-        # Convert the image to RGB (as pytesseract expects RGB format)
-        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-        # Use pytesseract to extract text
-        extracted_text = pytesseract.image_to_string(image_rgb)
         # Check if any text was detected
         text_detected = bool(extracted_text.strip())  # True if text is detected, False otherwise

 import cv2
 import numpy as np
 from flask import Flask, request, jsonify, render_template
 import sys
 import io
 from ultralytics import YOLO
 import time
 import easyocr
 import pytesseract
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+from PIL import Image
+import torch
 # Set the default encoding to utf-8
         return jsonify({'error': str(e)}), 500
+# Load the processor and model
+processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-printed')
+model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-printed')
 @app.route('/ocr', methods=['POST'])
 def ocr():
     try:
         # Convert the NumPy array into an OpenCV image (BGR format)
         image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+        # Convert the image to a PIL image (as required by TrOCR)
+        image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+        # Use the processor and model to extract text
+        pixel_values = processor(images=image_pil, return_tensors="pt").pixel_values
+        generated_ids = model.generate(pixel_values)
+        extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
         # Check if any text was detected
         text_detected = bool(extracted_text.strip())  # True if text is detected, False otherwise