Spaces:

ethanrom
/

ocr-orderid3

Sleeping

App Files Files Community

ocr-orderid3 / button_click_alt.py

ethanrom

Upload 17 files

9ec9085 over 2 years ago

raw

history blame contribute delete

9.3 kB

	import cv2
	import numpy as np
	import pytesseract
	import tensorflow as tf
	from tensorflow.keras.preprocessing.image import img_to_array, load_img
	from process import preprocess_image
	from PIL import Image
	from codecs import encode, decode
	import requests

	def infer_text(im):
	im.save("converted.png")
	url = "https://ajax.thehive.ai/api/demo/classify?endpoint=text_recognition"
	files = {
	"image": ("converted.png", open("converted.png", "rb"), "image/png"),
	"model_type": (None, "detection"),
	"media_type": (None, "photo"),
	}
	headers = {"referer": "https://thehive.ai/"}
	res = requests.post(url, headers=headers, files=files)
	text = ""
	for output in res.json()["response"]["output"]:
	text += output["block_text"]
	text = decode(encode(text, "latin-1", "backslashreplace"), "unicode-escape")
	return text

	def find_order_id(uploaded_file, input_file, model, ocre):
	if ocre == 'Hive':
	uploaded_image = Image.open(uploaded_file)
	text = infer_text(uploaded_image)
	else:
	rotated = preprocess_image(uploaded_file)
	text = pytesseract.image_to_string(rotated)

	with input_file as file:
	file_contents = file.read().decode()
	lines = file_contents.split('\n')
	found = False
	possible_order_ids = []
	for line in lines:
	order_id, name, font = line.strip().split(',')
	if name.strip() in text:
	image = load_img(uploaded_file, target_size=(64, 64))
	image = img_to_array(image)
	image = np.expand_dims(image, axis=0)
	image = image / 255.0
	prediction = model.predict(image)
	font_type = 'Pacifico' if prediction[0, 0] > prediction[0, 1] else 'OpenSans-Light'
	if font_type == font.strip():
	result = {
	'status': 'success',
	'message': f'Detected Text: {text.strip()}\n, Order ID: {order_id}, Predicted Font Type: {font_type}'
	}
	found = True
	break
	else:
	possible_order_ids.append(order_id)

	if not found:
	image = load_img(uploaded_file, target_size=(64, 64))
	image = img_to_array(image)
	image = np.expand_dims(image, axis=0)
	image = image / 255.0
	prediction = model.predict(image)
	font_type = 'Pacifico' if prediction[0, 0] > prediction[0, 1] else 'OpenSans-Light'
	for line in lines:
	order_id, name, font = line.strip().split(',')
	if font.strip() == font_type:
	possible_order_ids.append(order_id)

	if len(possible_order_ids) > 0:
	result = {
	'status': 'warning',
	'message': f'Detected Text: {text.strip()}\n, Possible Order IDs: {",".join(possible_order_ids)}, Predicted Font Type: {font_type}'
	}
	else:
	result = {
	'status': 'error',
	'message': f'Detected Text: {text.strip()}\n, Could not find the Order ID and possible font matches.'
	}

	return result

	def jaccard_similarity(s1, s2):
	set1 = set(s1.split())
	set2 = set(s2.split())
	intersection = len(set1.intersection(set2))
	union = len(set1.union(set2))
	return intersection / union

	def find_order_id_similarity(uploaded_file, input_file, similarity_method, ocre):
	if ocre == 'Hive':
	uploaded_image = Image.open(uploaded_file)
	text = infer_text(uploaded_image)
	else:
	rotated = preprocess_image(uploaded_file)
	text = pytesseract.image_to_string(rotated)

	with input_file as file:
	file_contents = file.read().decode()
	lines = file_contents.split('\n')

	if similarity_method == 'exact_match':
	for line in lines:
	order_id, name, font = line.strip().split(',')
	if name.strip() == text.strip():
	result = {
	'status': 'success',
	'message': f'Detected Text: {text.strip()}\n, Order ID: {order_id}'
	}
	return result
	message = f'Detected Text: {text.strip()}\n, Could not find the Order ID.'
	result = {'status': 'error', 'message': message}
	return result

	elif similarity_method == 'jaccard_similarity':
	possible_order_ids = []
	for line in lines:
	order_id, name, font = line.strip().split(',')
	jaccard_score = jaccard_similarity(name.strip(), text.strip())
	if jaccard_score >= 0.8:
	result = {
	'status': 'success',
	'message': f'Detected Text: {text.strip()}\n, Order ID: {order_id}'
	}
	return result
	elif jaccard_score >= 0.5:
	possible_order_ids.append(order_id)
	if len(possible_order_ids) > 0:
	message = f'Detected Text: {text.strip()}\n, Possible Order IDs: {",".join(possible_order_ids)}'
	result = {'status': 'warning', 'message': message}
	return result
	else:
	message = f'Detected Text: {text.strip()}\n, Could not find the Order ID.'
	result = {'status': 'error', 'message': message}
	return result

	def find_order_id_2(uploaded_file, input_file, model, ocre):
	if ocre == 'Hive':
	uploaded_image = Image.open(uploaded_file)
	text = infer_text(uploaded_image)
	else:
	rotated = preprocess_image(uploaded_file)
	text = pytesseract.image_to_string(rotated)

	with input_file as file:
	file_contents = file.read().decode()
	lines = file_contents.split('\n')
	found = False
	possible_order_ids = []
	for line in lines:
	order_id, name, font = line.strip().split(',')
	if name.strip() in text:
	image = load_img(uploaded_file, target_size=(64, 64))
	image = img_to_array(image)
	image = np.expand_dims(image, axis=0)
	image = image / 255.0
	prediction = model.predict(image)

	class_names = ['Allibretto1.8.otf', 'Bella1.1.otf', 'Buffalo Nickel1.2.otf', 'Cervanttis1.18.otf', 'Claster1.6.otf', 'Fairy4.5.otf', 'Mon-Amour-April1.7.otf', 'Mon-Amour-Aug1.1.otf', 'Mon-Amour-Dec1.2.otf', 'Mon-Amour-Feb1.1.otf', 'Mon-Amour-January1.2.otf', 'Mon-Amour-July1.1.otf', 'Mon-Amour-June1.1.otf', 'Mon-Amour-Mar1.2.otf', 'Mon-Amour-May1.1.otf', 'Mon-Amour-Nov1.1.otf', 'Mon-Amour-Oct1.1.otf', 'Mon-Amour-Sept1.1.otf', 'Mon-Amour2.3.otf', 'Shelby1.3.otf', 'UKIJJ-Quill1.7.otf']
	predicted_class_index = np.argmax(prediction[0])
	predicted_class_name = class_names[predicted_class_index]

	if predicted_class_name.strip() == font.strip():
	result = {
	'status': 'success',
	'message': f'Detected Text: {text.strip()}\n, Order ID: {order_id}, Predicted Font Type: {predicted_class_name.strip()}'
	}
	found = True
	break
	else:
	possible_order_ids.append(order_id)

	if not found:
	image = load_img(uploaded_file, target_size=(64, 64))
	image = img_to_array(image)
	image = np.expand_dims(image, axis=0)
	image = image / 255.0
	prediction = model.predict(image)

	class_names = ['Allibretto1.8.otf', 'Bella1.1.otf', 'Buffalo Nickel1.2.otf', 'Cervanttis1.18.otf', 'Claster1.6.otf', 'Fairy4.5.otf', 'Mon-Amour-April1.7.otf', 'Mon-Amour-Aug1.1.otf', 'Mon-Amour-Dec1.2.otf', 'Mon-Amour-Feb1.1.otf', 'Mon-Amour-January1.2.otf', 'Mon-Amour-July1.1.otf', 'Mon-Amour-June1.1.otf', 'Mon-Amour-Mar1.2.otf', 'Mon-Amour-May1.1.otf', 'Mon-Amour-Nov1.1.otf', 'Mon-Amour-Oct1.1.otf', 'Mon-Amour-Sept1.1.otf', 'Mon-Amour2.3.otf', 'Shelby1.3.otf', 'UKIJJ-Quill1.7.otf']
	predicted_class_index = np.argmax(prediction[0])
	predicted_class_name = class_names[predicted_class_index]

	for line in lines:
	order_id, name, font = line.strip().split(',')
	if font.strip() == predicted_class_name.strip():
	possible_order_ids.append(order_id)

	if len(possible_order_ids) > 0:
	result = {
	'status': 'warning',
	'message': f'Detected Text: {text.strip()}\n, Possible Order IDs: {",".join(possible_order_ids)}, Predicted Font Type: {predicted_class_name.strip()}'
	}
	else:
	result = {
	'status': 'error',
	'message': f'Detected Text: {text.strip()}\n, Could not find the Order ID and possible font matches.'
	}

	return result