Update app.py
Browse files
app.py
CHANGED
|
@@ -2,13 +2,15 @@ import base64
|
|
| 2 |
import cv2
|
| 3 |
import numpy as np
|
| 4 |
from flask import Flask, request, jsonify, render_template
|
| 5 |
-
from tensorflow import keras
|
| 6 |
import sys
|
| 7 |
import io
|
| 8 |
from ultralytics import YOLO
|
| 9 |
import time
|
| 10 |
import easyocr
|
| 11 |
import pytesseract
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
# Set the default encoding to utf-8
|
|
@@ -90,6 +92,11 @@ def detect():
|
|
| 90 |
return jsonify({'error': str(e)}), 500
|
| 91 |
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
@app.route('/ocr', methods=['POST'])
|
| 94 |
def ocr():
|
| 95 |
try:
|
|
@@ -103,11 +110,13 @@ def ocr():
|
|
| 103 |
# Convert the NumPy array into an OpenCV image (BGR format)
|
| 104 |
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
|
| 105 |
|
| 106 |
-
# Convert the image to
|
| 107 |
-
|
| 108 |
|
| 109 |
-
# Use
|
| 110 |
-
|
|
|
|
|
|
|
| 111 |
|
| 112 |
# Check if any text was detected
|
| 113 |
text_detected = bool(extracted_text.strip()) # True if text is detected, False otherwise
|
|
|
|
| 2 |
import cv2
|
| 3 |
import numpy as np
|
| 4 |
from flask import Flask, request, jsonify, render_template
|
|
|
|
| 5 |
import sys
|
| 6 |
import io
|
| 7 |
from ultralytics import YOLO
|
| 8 |
import time
|
| 9 |
import easyocr
|
| 10 |
import pytesseract
|
| 11 |
+
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
| 12 |
+
from PIL import Image
|
| 13 |
+
import torch
|
| 14 |
|
| 15 |
|
| 16 |
# Set the default encoding to utf-8
|
|
|
|
| 92 |
return jsonify({'error': str(e)}), 500
|
| 93 |
|
| 94 |
|
| 95 |
+
|
| 96 |
+
# Load the processor and model
|
| 97 |
+
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-printed')
|
| 98 |
+
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-printed')
|
| 99 |
+
|
| 100 |
@app.route('/ocr', methods=['POST'])
|
| 101 |
def ocr():
|
| 102 |
try:
|
|
|
|
| 110 |
# Convert the NumPy array into an OpenCV image (BGR format)
|
| 111 |
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
|
| 112 |
|
| 113 |
+
# Convert the image to a PIL image (as required by TrOCR)
|
| 114 |
+
image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
| 115 |
|
| 116 |
+
# Use the processor and model to extract text
|
| 117 |
+
pixel_values = processor(images=image_pil, return_tensors="pt").pixel_values
|
| 118 |
+
generated_ids = model.generate(pixel_values)
|
| 119 |
+
extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 120 |
|
| 121 |
# Check if any text was detected
|
| 122 |
text_detected = bool(extracted_text.strip()) # True if text is detected, False otherwise
|