Spaces:

aryan365
/

Robogrid

Runtime error

App Files Files Community

aryan365 commited on Oct 19, 2024

Commit

b19be11

verified ·

1 Parent(s): db36ba2

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -10

app.py CHANGED Viewed

@@ -7,7 +7,12 @@ import sys
 import io
 from ultralytics import YOLO
 import time
-import pytesseract
 # Set the default encoding to utf-8
 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
@@ -64,7 +69,8 @@ def predict():
         return jsonify({'prediction': prediction_label})
     except Exception as e:
         return jsonify({'error': str(e)}), 500
 @app.route('/textis', methods=['POST'])
 def textis():
     try:
@@ -78,16 +84,23 @@ def textis():
         # Convert the NumPy array into an OpenCV image (BGR format)
         image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
-        # Convert to RGB format
-        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-        extracted_text = pytesseract.image_to_string(gray_image)
-    # Return True if any text was detected, otherwise False
-        istext=bool(extracted_text.strip())
-        # Resize the image to the expected input size of the model (e.g., 225x225)
-        # Make a prediction using the vegetable classification modia
-        return jsonify({'prediction': istext})
     except Exception as e:
         return jsonify({'error': str(e)}), 500

 import io
 from ultralytics import YOLO
 import time
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from PIL import Image
+# Load the Donut model and processor (adjust the model name if needed)
+processor = DonutProcessor.from_pretrained('naver-clova-ix/donut-base')
+model = VisionEncoderDecoderModel.from_pretrained('naver-clova-ix/donut-base')
 # Set the default encoding to utf-8
 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
         return jsonify({'prediction': prediction_label})
     except Exception as e:
         return jsonify({'error': str(e)}), 500
 @app.route('/textis', methods=['POST'])
 def textis():
     try:
         # Convert the NumPy array into an OpenCV image (BGR format)
         image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+        # Convert OpenCV image to PIL Image (Donut requires PIL format)
+        image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+        # Preprocess the image using Donut's processor
+        pixel_values = processor(image_pil, return_tensors="pt").pixel_values
+        # Perform inference with the Donut model
+        generated_ids = model.generate(pixel_values)
+        # Decode the generated IDs to extract text
+        extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        # Check if any text is detected
+        istext = bool(extracted_text.strip())
+        # Return the result as JSON
+        return jsonify({'prediction': istext, 'extracted_text': extracted_text})
     except Exception as e:
         return jsonify({'error': str(e)}), 500