aryan365 commited on
Commit
b19be11
·
verified ·
1 Parent(s): db36ba2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -10
app.py CHANGED
@@ -7,7 +7,12 @@ import sys
7
  import io
8
  from ultralytics import YOLO
9
  import time
10
- import pytesseract
 
 
 
 
 
11
 
12
  # Set the default encoding to utf-8
13
  sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
@@ -64,7 +69,8 @@ def predict():
64
  return jsonify({'prediction': prediction_label})
65
  except Exception as e:
66
  return jsonify({'error': str(e)}), 500
67
-
 
68
  @app.route('/textis', methods=['POST'])
69
  def textis():
70
  try:
@@ -78,16 +84,23 @@ def textis():
78
  # Convert the NumPy array into an OpenCV image (BGR format)
79
  image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
80
 
81
- # Convert to RGB format
82
- image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
83
- extracted_text = pytesseract.image_to_string(gray_image)
84
 
85
- # Return True if any text was detected, otherwise False
86
- istext=bool(extracted_text.strip())
87
 
88
- # Resize the image to the expected input size of the model (e.g., 225x225)
89
- # Make a prediction using the vegetable classification modia
90
- return jsonify({'prediction': istext})
 
 
 
 
 
 
 
 
91
  except Exception as e:
92
  return jsonify({'error': str(e)}), 500
93
 
 
7
  import io
8
  from ultralytics import YOLO
9
  import time
10
+ from transformers import DonutProcessor, VisionEncoderDecoderModel
11
+ from PIL import Image
12
+
13
+ # Load the Donut model and processor (adjust the model name if needed)
14
+ processor = DonutProcessor.from_pretrained('naver-clova-ix/donut-base')
15
+ model = VisionEncoderDecoderModel.from_pretrained('naver-clova-ix/donut-base')
16
 
17
  # Set the default encoding to utf-8
18
  sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
 
69
  return jsonify({'prediction': prediction_label})
70
  except Exception as e:
71
  return jsonify({'error': str(e)}), 500
72
+
73
+
74
  @app.route('/textis', methods=['POST'])
75
  def textis():
76
  try:
 
84
  # Convert the NumPy array into an OpenCV image (BGR format)
85
  image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
86
 
87
+ # Convert OpenCV image to PIL Image (Donut requires PIL format)
88
+ image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
 
89
 
90
+ # Preprocess the image using Donut's processor
91
+ pixel_values = processor(image_pil, return_tensors="pt").pixel_values
92
 
93
+ # Perform inference with the Donut model
94
+ generated_ids = model.generate(pixel_values)
95
+
96
+ # Decode the generated IDs to extract text
97
+ extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
98
+
99
+ # Check if any text is detected
100
+ istext = bool(extracted_text.strip())
101
+
102
+ # Return the result as JSON
103
+ return jsonify({'prediction': istext, 'extracted_text': extracted_text})
104
  except Exception as e:
105
  return jsonify({'error': str(e)}), 500
106