Spaces:
Sleeping
Sleeping
| import cv2 | |
| import numpy as np | |
| import pytesseract | |
| import tensorflow as tf | |
| from tensorflow.keras.preprocessing.image import img_to_array, load_img | |
| from process import preprocess_image | |
| from PIL import Image | |
| from codecs import encode, decode | |
| import requests | |
| def infer_text(im): | |
| im.save("converted.png") | |
| url = "https://ajax.thehive.ai/api/demo/classify?endpoint=text_recognition" | |
| files = { | |
| "image": ("converted.png", open("converted.png", "rb"), "image/png"), | |
| "model_type": (None, "detection"), | |
| "media_type": (None, "photo"), | |
| } | |
| headers = {"referer": "https://thehive.ai/"} | |
| res = requests.post(url, headers=headers, files=files) | |
| text = "" | |
| for output in res.json()["response"]["output"]: | |
| text += output["block_text"] | |
| text = decode(encode(text, "latin-1", "backslashreplace"), "unicode-escape") | |
| return text | |
| def find_order_id(uploaded_file, input_file, model, ocre): | |
| if ocre == 'Hive': | |
| uploaded_image = Image.open(uploaded_file) | |
| text = infer_text(uploaded_image) | |
| else: | |
| rotated = preprocess_image(uploaded_file) | |
| text = pytesseract.image_to_string(rotated) | |
| with input_file as file: | |
| file_contents = file.read().decode() | |
| lines = file_contents.split('\n') | |
| found = False | |
| possible_order_ids = [] | |
| for line in lines: | |
| order_id, name, font = line.strip().split(',') | |
| if name.strip() in text: | |
| image = load_img(uploaded_file, target_size=(64, 64)) | |
| image = img_to_array(image) | |
| image = np.expand_dims(image, axis=0) | |
| image = image / 255.0 | |
| prediction = model.predict(image) | |
| font_type = 'Pacifico' if prediction[0, 0] > prediction[0, 1] else 'OpenSans-Light' | |
| if font_type == font.strip(): | |
| result = { | |
| 'status': 'success', | |
| 'message': f'Detected Text: {text.strip()}\n, Order ID: {order_id}, Predicted Font Type: {font_type}' | |
| } | |
| found = True | |
| break | |
| else: | |
| possible_order_ids.append(order_id) | |
| if not found: | |
| image = load_img(uploaded_file, target_size=(64, 64)) | |
| image = img_to_array(image) | |
| image = np.expand_dims(image, axis=0) | |
| image = image / 255.0 | |
| prediction = model.predict(image) | |
| font_type = 'Pacifico' if prediction[0, 0] > prediction[0, 1] else 'OpenSans-Light' | |
| for line in lines: | |
| order_id, name, font = line.strip().split(',') | |
| if font.strip() == font_type: | |
| possible_order_ids.append(order_id) | |
| if len(possible_order_ids) > 0: | |
| result = { | |
| 'status': 'warning', | |
| 'message': f'Detected Text: {text.strip()}\n, Possible Order IDs: {",".join(possible_order_ids)}, Predicted Font Type: {font_type}' | |
| } | |
| else: | |
| result = { | |
| 'status': 'error', | |
| 'message': f'Detected Text: {text.strip()}\n, Could not find the Order ID and possible font matches.' | |
| } | |
| return result | |
| def jaccard_similarity(s1, s2): | |
| set1 = set(s1.split()) | |
| set2 = set(s2.split()) | |
| intersection = len(set1.intersection(set2)) | |
| union = len(set1.union(set2)) | |
| return intersection / union | |
| def find_order_id_similarity(uploaded_file, input_file, similarity_method, ocre): | |
| if ocre == 'Hive': | |
| uploaded_image = Image.open(uploaded_file) | |
| text = infer_text(uploaded_image) | |
| else: | |
| rotated = preprocess_image(uploaded_file) | |
| text = pytesseract.image_to_string(rotated) | |
| with input_file as file: | |
| file_contents = file.read().decode() | |
| lines = file_contents.split('\n') | |
| if similarity_method == 'exact_match': | |
| for line in lines: | |
| order_id, name, font = line.strip().split(',') | |
| if name.strip() == text.strip(): | |
| result = { | |
| 'status': 'success', | |
| 'message': f'Detected Text: {text.strip()}\n, Order ID: {order_id}' | |
| } | |
| return result | |
| message = f'Detected Text: {text.strip()}\n, Could not find the Order ID.' | |
| result = {'status': 'error', 'message': message} | |
| return result | |
| elif similarity_method == 'jaccard_similarity': | |
| possible_order_ids = [] | |
| for line in lines: | |
| order_id, name, font = line.strip().split(',') | |
| jaccard_score = jaccard_similarity(name.strip(), text.strip()) | |
| if jaccard_score >= 0.8: | |
| result = { | |
| 'status': 'success', | |
| 'message': f'Detected Text: {text.strip()}\n, Order ID: {order_id}' | |
| } | |
| return result | |
| elif jaccard_score >= 0.5: | |
| possible_order_ids.append(order_id) | |
| if len(possible_order_ids) > 0: | |
| message = f'Detected Text: {text.strip()}\n, Possible Order IDs: {",".join(possible_order_ids)}' | |
| result = {'status': 'warning', 'message': message} | |
| return result | |
| else: | |
| message = f'Detected Text: {text.strip()}\n, Could not find the Order ID.' | |
| result = {'status': 'error', 'message': message} | |
| return result | |
| def find_order_id_2(uploaded_file, input_file, model, ocre): | |
| if ocre == 'Hive': | |
| uploaded_image = Image.open(uploaded_file) | |
| text = infer_text(uploaded_image) | |
| else: | |
| rotated = preprocess_image(uploaded_file) | |
| text = pytesseract.image_to_string(rotated) | |
| with input_file as file: | |
| file_contents = file.read().decode() | |
| lines = file_contents.split('\n') | |
| found = False | |
| possible_order_ids = [] | |
| for line in lines: | |
| order_id, name, font = line.strip().split(',') | |
| if name.strip() in text: | |
| image = load_img(uploaded_file, target_size=(64, 64)) | |
| image = img_to_array(image) | |
| image = np.expand_dims(image, axis=0) | |
| image = image / 255.0 | |
| prediction = model.predict(image) | |
| class_names = ['Allibretto1.8.otf', 'Bella1.1.otf', 'Buffalo Nickel1.2.otf', 'Cervanttis1.18.otf', 'Claster1.6.otf', 'Fairy4.5.otf', 'Mon-Amour-April1.7.otf', 'Mon-Amour-Aug1.1.otf', 'Mon-Amour-Dec1.2.otf', 'Mon-Amour-Feb1.1.otf', 'Mon-Amour-January1.2.otf', 'Mon-Amour-July1.1.otf', 'Mon-Amour-June1.1.otf', 'Mon-Amour-Mar1.2.otf', 'Mon-Amour-May1.1.otf', 'Mon-Amour-Nov1.1.otf', 'Mon-Amour-Oct1.1.otf', 'Mon-Amour-Sept1.1.otf', 'Mon-Amour2.3.otf', 'Shelby1.3.otf', 'UKIJJ-Quill1.7.otf'] | |
| predicted_class_index = np.argmax(prediction[0]) | |
| predicted_class_name = class_names[predicted_class_index] | |
| if predicted_class_name.strip() == font.strip(): | |
| result = { | |
| 'status': 'success', | |
| 'message': f'Detected Text: {text.strip()}\n, Order ID: {order_id}, Predicted Font Type: {predicted_class_name.strip()}' | |
| } | |
| found = True | |
| break | |
| else: | |
| possible_order_ids.append(order_id) | |
| if not found: | |
| image = load_img(uploaded_file, target_size=(64, 64)) | |
| image = img_to_array(image) | |
| image = np.expand_dims(image, axis=0) | |
| image = image / 255.0 | |
| prediction = model.predict(image) | |
| class_names = ['Allibretto1.8.otf', 'Bella1.1.otf', 'Buffalo Nickel1.2.otf', 'Cervanttis1.18.otf', 'Claster1.6.otf', 'Fairy4.5.otf', 'Mon-Amour-April1.7.otf', 'Mon-Amour-Aug1.1.otf', 'Mon-Amour-Dec1.2.otf', 'Mon-Amour-Feb1.1.otf', 'Mon-Amour-January1.2.otf', 'Mon-Amour-July1.1.otf', 'Mon-Amour-June1.1.otf', 'Mon-Amour-Mar1.2.otf', 'Mon-Amour-May1.1.otf', 'Mon-Amour-Nov1.1.otf', 'Mon-Amour-Oct1.1.otf', 'Mon-Amour-Sept1.1.otf', 'Mon-Amour2.3.otf', 'Shelby1.3.otf', 'UKIJJ-Quill1.7.otf'] | |
| predicted_class_index = np.argmax(prediction[0]) | |
| predicted_class_name = class_names[predicted_class_index] | |
| for line in lines: | |
| order_id, name, font = line.strip().split(',') | |
| if font.strip() == predicted_class_name.strip(): | |
| possible_order_ids.append(order_id) | |
| if len(possible_order_ids) > 0: | |
| result = { | |
| 'status': 'warning', | |
| 'message': f'Detected Text: {text.strip()}\n, Possible Order IDs: {",".join(possible_order_ids)}, Predicted Font Type: {predicted_class_name.strip()}' | |
| } | |
| else: | |
| result = { | |
| 'status': 'error', | |
| 'message': f'Detected Text: {text.strip()}\n, Could not find the Order ID and possible font matches.' | |
| } | |
| return result |