Spaces:

sprakhil
/

OCRQuest

Sleeping

App Files Files Community

OCRQuest / app.py

sprakhil

Added keyword highlighter

ce1aae3 over 1 year ago

raw

history blame contribute delete

2.12 kB

	import os
	import easyocr
	import gradio as gr
	from PIL import Image
	import re

	reader = easyocr.Reader(['en', 'hi'], gpu=False)

	def convert_hindi_numerals_to_arabic(text):
	hindi_to_arabic = {
	'०': '0', '१': '1', '२': '2', '३': '3', '४': '4',
	'५': '5', '६': '6', '७': '7', '८': '8', '९': '9'
	}
	for hindi, arabic in hindi_to_arabic.items():
	text = text.replace(hindi, arabic)
	return text

	def extract_text(image_path):
	img = Image.open(image_path)

	temp_jpg_path = "temp_image.jpg"
	img.convert("RGB").save(temp_jpg_path,"JPEG")

	result = reader.readtext(temp_jpg_path, detail=0)
	extracted_text = " ".join(result)
	extracted_text = convert_hindi_numerals_to_arabic(extracted_text)

	os.remove(temp_jpg_path)

	return extracted_text

	def search_text(image_path, keyword):
	extracted_text = extract_text(image_path)

	escaped_keyword = re.escape(keyword);
	highlighted_text = re.sub(
	f'({escaped_keyword})',
	r'<mark style="background-color: lightblue; color: black;">\1</mark>',
	extracted_text,
	flags=re.IGNORECASE
	)

	if re.search(escaped_keyword, extracted_text, re.IGNORECASE):
	return f"Keyword '{keyword}' found in the extracted text.", highlighted_text
	else:
	return f"Keyword '{keyword}' not found in the extracted text.", highlighted_text

	def create_interface():
	interface = gr.Interface(
	fn=search_text,
	inputs=[
	gr.Image(type="filepath", label="Upload Image"), # Changed to filepath
	gr.Textbox(lines=1, placeholder="Enter keyword to search", label="Keyword")
	],
	outputs=[
	gr.Textbox(label="Search Result"),
	gr.HTML(label="Extracted Text")
	],
	title="OCR and Keyword Search Application",
	description="Upload an image containing text in English or Hindi. Enter a keyword to search within the extracted text.",
	)

	return interface

	if __name__ == "__main__":
	interface = create_interface()
	interface.launch(share=True)