Update app.py
Browse files
app.py
CHANGED
|
@@ -7,7 +7,12 @@ import sys
|
|
| 7 |
import io
|
| 8 |
from ultralytics import YOLO
|
| 9 |
import time
|
| 10 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
# Set the default encoding to utf-8
|
| 13 |
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
|
@@ -64,7 +69,8 @@ def predict():
|
|
| 64 |
return jsonify({'prediction': prediction_label})
|
| 65 |
except Exception as e:
|
| 66 |
return jsonify({'error': str(e)}), 500
|
| 67 |
-
|
|
|
|
| 68 |
@app.route('/textis', methods=['POST'])
|
| 69 |
def textis():
|
| 70 |
try:
|
|
@@ -78,16 +84,23 @@ def textis():
|
|
| 78 |
# Convert the NumPy array into an OpenCV image (BGR format)
|
| 79 |
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
|
| 80 |
|
| 81 |
-
# Convert to
|
| 82 |
-
|
| 83 |
-
extracted_text = pytesseract.image_to_string(gray_image)
|
| 84 |
|
| 85 |
-
|
| 86 |
-
|
| 87 |
|
| 88 |
-
#
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
except Exception as e:
|
| 92 |
return jsonify({'error': str(e)}), 500
|
| 93 |
|
|
|
|
| 7 |
import io
|
| 8 |
from ultralytics import YOLO
|
| 9 |
import time
|
| 10 |
+
from transformers import DonutProcessor, VisionEncoderDecoderModel
|
| 11 |
+
from PIL import Image
|
| 12 |
+
|
| 13 |
+
# Load the Donut model and processor (adjust the model name if needed)
|
| 14 |
+
processor = DonutProcessor.from_pretrained('naver-clova-ix/donut-base')
|
| 15 |
+
model = VisionEncoderDecoderModel.from_pretrained('naver-clova-ix/donut-base')
|
| 16 |
|
| 17 |
# Set the default encoding to utf-8
|
| 18 |
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
|
|
|
| 69 |
return jsonify({'prediction': prediction_label})
|
| 70 |
except Exception as e:
|
| 71 |
return jsonify({'error': str(e)}), 500
|
| 72 |
+
|
| 73 |
+
|
| 74 |
@app.route('/textis', methods=['POST'])
|
| 75 |
def textis():
|
| 76 |
try:
|
|
|
|
| 84 |
# Convert the NumPy array into an OpenCV image (BGR format)
|
| 85 |
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
|
| 86 |
|
| 87 |
+
# Convert OpenCV image to PIL Image (Donut requires PIL format)
|
| 88 |
+
image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
|
|
|
| 89 |
|
| 90 |
+
# Preprocess the image using Donut's processor
|
| 91 |
+
pixel_values = processor(image_pil, return_tensors="pt").pixel_values
|
| 92 |
|
| 93 |
+
# Perform inference with the Donut model
|
| 94 |
+
generated_ids = model.generate(pixel_values)
|
| 95 |
+
|
| 96 |
+
# Decode the generated IDs to extract text
|
| 97 |
+
extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 98 |
+
|
| 99 |
+
# Check if any text is detected
|
| 100 |
+
istext = bool(extracted_text.strip())
|
| 101 |
+
|
| 102 |
+
# Return the result as JSON
|
| 103 |
+
return jsonify({'prediction': istext, 'extracted_text': extracted_text})
|
| 104 |
except Exception as e:
|
| 105 |
return jsonify({'error': str(e)}), 500
|
| 106 |
|