Spaces:

kartai
/

CAD-AID

Runtime error

Update src/utils/text_ocr.py

by juliajo - opened May 14, 2025

←

Files changed (1) hide show

src/utils/text_ocr.py CHANGED Viewed

@@ -5,6 +5,7 @@ import cv2
 import io
 import re
 import pandas as pd
 from azure.ai.vision.imageanalysis import ImageAnalysisClient
 from azure.ai.vision.imageanalysis.models import VisualFeatures
@@ -100,6 +101,25 @@ def easy_ocr_detection(image_path):
     return detected_text
 def plot_text_bboxes(image_path,detected_text):
     img = cv2.imread(image_path)

 import io
 import re
 import pandas as pd
+import pytesseract
 from azure.ai.vision.imageanalysis import ImageAnalysisClient
 from azure.ai.vision.imageanalysis.models import VisualFeatures
     return detected_text
+def pytesseract_ocr_detection(image_path):
+    image = cv2.imread(image_path)
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    _, thresh = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+    data = pytesseract.image_to_data(thresh, lang='nor', output_type=pytesseract.Output.DICT)
+    detected_text = []
+    n_boxes = len(data['text'])
+    for i in range(n_boxes):
+        text = data['text'][i].strip()
+        if text != "":
+            (x, y, w, h) = (data['left'][i], data['top'][i], data['width'][i], data['height'][i])
+            rect_bbox = (x, y, x + w, y + h)
+            detected_text.append((text, rect_bbox))
+    return detected_text
 def plot_text_bboxes(image_path,detected_text):
     img = cv2.imread(image_path)