Spaces:

mehreenmehreen
/

ArabicOCR

Build error

Mehreen Saeed

Added SFR link

ae2adec 10 months ago

3.8 kB

	import gradio as gr
	import sys
	import json
	import gradio as gr
	from PIL import Image, ImageDraw
	import os
	import io

	sys.path.append('arabic')
	sys.path.append('coord')
	import page_htr

	TEMP_DIR = "temp/"

	os.environ["GRADIO_TEMP_DIR"] = TEMP_DIR


	img_file = "../../datasets/MoiseK/datasets_sfr/pretrain_images/MG1_045_01.JPG"
	config_file = "model/trial_31_A/set0/config_3100.yaml"



	def annotate_image(image, page_json):
	text = []
	for line_key, line_obj in page_json.items():
	if not line_key.startswith('line_'):
	continue
	if not 'coord' in line_obj:
	continue
	text += [line_obj['text']]
	poly = line_obj['coord']
	poly = [(x, y) for x,y in zip(poly[::2], poly[1::2])]
	draw = ImageDraw.Draw(image)
	draw.polygon(poly, fill=None, outline="red", width=2)

	return image, '\n'.join(text)

	def save_text(text, image_file):
	filename = os.path.splitext(os.path.split(image_file)[1])[0]
	filename += '.txt'
	return gr.File.update(value=text.encode('utf-8'), file_name=filename)

	def get_text_file():
	download_file = gr.DownloadButton(label=f"Download text", visible=False)
	return download_file


	def process_textfile(image_path, text):
	directory, filename = os.path.split(image_path)
	image_id, image_ext = os.path.splitext(filename)
	filename = image_id + '.txt'
	filepath = os.path.join(directory, filename)
	content = text if text else ""

	with open(filepath, 'w') as fout:
	fout.write(content)
	return filepath, filename

	def show_image(image_path):
	if image_path is None:
	return gr.Image(type="pil", label="Image"), ' ', gr.DownloadButton("Download text", visible=False), gr.File(label="Upload JPG Image", file_types=[".jpg"])

	page_json = page_htr.page_htr_one_file(image_path, config_file, device="cpu")

	image, text = annotate_image(Image.open(image_path), page_json)
	filepath, text_filename = process_textfile(image_path, text)

	download_button = gr.DownloadButton(label=f"Download {text_filename}", value=filepath, visible=True)

	upload_image = gr.File(label="Upload JPG Image", file_types=[".jpg"])

	return image, text, download_button, upload_image

	if not os.path.exists(TEMP_DIR):
	os.mkdir(TEMP_DIR)

	kclds = "[Khayrallah Center for Lebanese Diaspora Studies](https://lebanesestudies.ncsu.edu/)"
	ece = "[Electrical and Computer Engineering at NC State](https://ece.ncsu.edu/)"
	github = "Visit our [GitHub page](https://github.com/MehreenMehreen/start_follow_read_arabic) for more details"
	refs = f"© 2025 {kclds} and {ece}. All rights reserved."
	sfr_ref = "HTR code is modified from [Start Follow Read](https://github.com/cwig/start_follow_read) to work on Arabic text"

	with gr.Blocks() as demo:
	gr.HTML("<h2 style='color: red;'>📜🖋️ Arabic handwriting reader powered by Muharaf dataset📝</h2>")
	gr.Markdown("### This is a demo app. Due to limited resources, upload a page image with 8 or less lines of text. Check out the [sample_images](https://huggingface.co/spaces/mehreenmehreen/ArabicOCR/) directory")

	gr.Markdown(github)
	gr.Markdown(sfr_ref)
	gr.Markdown(refs)

	with gr.Row():

	image_text = gr.Textbox(label="Text from HTR Model", lines=15)
	image_output = gr.Image(type="pil", label="Image")

	with gr.Row():
	upload_image = gr.File(label="Upload JPG Image", file_types=[".jpg"])

	#download_btn = gr.Button("Download Text")
	download_file = gr.DownloadButton("Download text", visible=False)



	upload_image.change(fn=show_image, inputs=upload_image, outputs=[image_output, image_text, download_file, upload_image])


	demo.launch()