import cv2, time, os import numpy as np from edocr2 import tools from pdf2image import convert_from_path file_path = 'tests/test_samples/Candle_holder.jpg' language = 'eng' #Opening the file if file_path.endswith('.pdf') or file_path.endswith(".PDF"): img = convert_from_path(file_path) img = np.array(img[0]) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) _, img = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY) img = cv2.merge([img, img, img]) else: img = cv2.imread(file_path) filename = os.path.splitext(os.path.basename(file_path))[0] output_path = os.path.join('.', filename) #region ############ Segmentation Task #################### img_boxes, frame, gdt_boxes, tables, dim_boxes = tools.layer_segm.segment_img(img, autoframe = True, frame_thres=0.7, GDT_thres = 0.02, binary_thres=127) #endregion #region ######## Set Session ############################## start_time = time.time() #os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices' import tensorflow as tf from edocr2.keras_ocr.recognition import Recognizer from edocr2.keras_ocr.detection import Detector # Configure GPU memory growth gpus = tf.config.list_physical_devices('GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) # Load models gdt_model = 'edocr2/models/recognizer_gdts.keras' dim_model = 'edocr2/models/recognizer_dimensions_2.keras' detector_model = None #'edocr2/models/detector_12_46.keras' recognizer_gdt = None if gdt_boxes: recognizer_gdt = Recognizer(alphabet=tools.ocr_pipelines.read_alphabet(gdt_model)) recognizer_gdt.model.load_weights(gdt_model) alphabet_dim = tools.ocr_pipelines.read_alphabet(dim_model) recognizer_dim = Recognizer(alphabet=alphabet_dim) recognizer_dim.model.load_weights(dim_model) detector = Detector() if detector_model: detector.model.load_weights(detector_model) end_time = time.time() print(f"\033[1;33mLoading session took {end_time - start_time:.6f} seconds to run.\033[0m") #endregion #region ############ OCR Tables ########################### process_img = img.copy() table_results, updated_tables, process_img= tools.ocr_pipelines.ocr_tables(tables, process_img, language) #endregion #region ############ OCR GD&T ############################# gdt_results, updated_gdt_boxes, process_img = tools.ocr_pipelines.ocr_gdt(process_img, gdt_boxes, recognizer_gdt) #endregion #region ############ OCR Dimensions ####################### if frame: process_img = process_img[frame.y : frame.y + frame.h, frame.x : frame.x + frame.w] process_img_ = process_img.copy() dimensions, other_info, process_img, dim_tess = tools.ocr_pipelines.ocr_dimensions(process_img, detector, recognizer_dim, alphabet_dim, frame, dim_boxes, cluster_thres=20, max_img_size=1048, language=language, backg_save=False) #endregion #region ############# Qwen for tables ##################### qwen = False if qwen: model, processor = tools.llm_tools.load_VL(model_name = "Qwen/Qwen2-VL-7B-Instruct") device = "cuda:1" query = ['Tolerance', 'material', 'Surface finish', 'weight'] llm_tab_qwen = tools.llm_tools.llm_table(tables, llm = (model, processor), img = img, device = device, query=query) print(llm_tab_qwen) #endregion #region ########### Output ################################ mask_img = tools.output_tools.mask_img(img, updated_gdt_boxes, updated_tables, dimensions, frame, other_info) table_results, gdt_results, dimensions, other_info = tools.output_tools.process_raw_output(output_path, table_results, gdt_results, dimensions, other_info, save=False) #endregion for b in tables[0]: infoblock_img = img[b.y : b.y + b.h, b.x : b.x + b.w][:] infoblock_img = tools.llm_tools.convert_img(infoblock_img) drw_img = tools.llm_tools.convert_img(process_img_) manuf = False quality = False #region ########## Manufacturability ################ if manuf: messages = [ {"role": "system", "content": [{"type": "text", "text": '''You are a specialized OCR system capable of reading mechanical drawings.'''},], }, {"role": "user", "content": [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{infoblock_img}", "detail": "high"}}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{drw_img}", "detail": "high"}}, {"type": "text", "text": '''You are getting the inforamtion block of the drawing in the first image, and the views of the part in the second. I need you to tell me a PYTHON DICTIONARY with the manufacturing processes (keys) and short description (values) that are best for this part.'''},], }] answer = tools.llm_tools.ask_gpt(messages) print('Manufacturing Answer: \n', answer) #endregion #region ######### Quality Control Check ############## if quality: messages = [ {"role": "system", "content": [{"type": "text", "text": '''You are a specialized OCR system capable of reading mechanical drawings.'''},], }, {"role": "user", "content": [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{infoblock_img}", "detail": "high"}}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{drw_img}", "detail": "high"}}, {"type": "text", "text": '''You are getting the inforamtion block of the drawing in the first image, and the views of the part in the second. I need you to tell me IN A PYTHON LIST ONLY WHICH MEASUREMENTS -NUMERICAL VALUE AND TOLERANCE- needs to be checked in the quality control process'''},], }] answer = tools.llm_tools.ask_gpt(messages) print('Quality Control Answer: \n', answer) #endregion ################################################### #cv2.imwrite('liu.png', mask_img) #cv2.imshow('Mask Image', mask_img) #cv2.waitKey(0) #cv2.destroyAllWindows()