Spaces:

heerjtdev
/

rocr

Sleeping

App Files Files Community

rocr / app.py

heerjtdev

Update app.py

4095636 verified 29 days ago

raw

history blame contribute delete

5.09 kB

	# import gradio as gr
	# from rapidocr import RapidOCR, OCRVersion

	# # 1. Initialize the OCR engine once with v5 defaults
	# # We use v5 for Detection/Recognition and v4 for Classification (most stable v5 setup)
	# engine = RapidOCR(params={
	# "Det.ocr_version": OCRVersion.PPOCRV5,
	# "Rec.ocr_version": OCRVersion.PPOCRV5,
	# "Cls.ocr_version": OCRVersion.PPOCRV4,
	# })

	# def perform_ocr(img):
	# if img is None:
	# return None, None, "0.0"

	# # 2. Run OCR. return_word_box=True provides the word/char level detail
	# ocr_result = engine(img, return_word_box=True)

	# # 3. Get the annotated preview image
	# vis_img = ocr_result.vis()

	# # 4. Format word-level results for the Dataframe
	# # We flatten the word_results list using the logic from your advanced script
	# word_list = []
	# if ocr_result.word_results:
	# flat_results = sum(ocr_result.word_results, ())
	# for i, (text, score, _) in enumerate(flat_results):
	# word_list.append([i + 1, text, round(float(score), 3)])

	# return vis_img, word_list, f"{ocr_result.elapse:.3f}s"

	# # 5. Build a clean, minimal UI
	# with gr.Blocks(title="Rapid⚡OCR Simple") as demo:
	# gr.Markdown("# Rapid⚡OCR v5")
	# gr.Markdown("Upload an image to extract text with word-level bounding boxes.")

	# with gr.Row():
	# with gr.Column():
	# input_img = gr.Image(label="Input Image", type="numpy")
	# run_btn = gr.Button("Run OCR", variant="primary")

	# with gr.Column():
	# output_img = gr.Image(label="Preview (Bounding Boxes)")
	# elapse_info = gr.Textbox(label="Processing Time")

	# result_table = gr.Dataframe(
	# headers=["ID", "Text", "Confidence"],
	# label="Detected Words",
	# interactive=False
	# )

	# run_btn.click(
	# fn=perform_ocr,
	# inputs=[input_img],
	# outputs=[output_img, result_table, elapse_info]
	# )

	# if __name__ == "__main__":
	# demo.launch()














	import gradio as gr
	from rapidocr import RapidOCR, OCRVersion
	import json
	import tempfile
	import os

	# Initialize the engine with v5 defaults
	engine = RapidOCR(params={
	"Det.ocr_version": OCRVersion.PPOCRV5,
	"Rec.ocr_version": OCRVersion.PPOCRV5,
	"Cls.ocr_version": OCRVersion.PPOCRV4,
	})

	def perform_ocr(img):
	if img is None:
	return None, None, "0.0", None

	# Run OCR with word-level detection enabled
	ocr_result = engine(img, return_word_box=True)

	# Generate annotated image
	vis_img = ocr_result.vis()

	# Process results into the Table and JSON format
	word_list_for_table = []
	json_data_list = []

	if ocr_result.word_results:
	# Flatten the per-line word results into a single list
	flat_results = sum(ocr_result.word_results, ())

	for i, (text, score, bbox) in enumerate(flat_results):
	# 1. Prepare Table Data
	word_list_for_table.append([i + 1, text, round(float(score), 3)])

	# 2. Prepare JSON Data (Convert 4-point box to [xmin, ymin, xmax, ymax])
	# bbox is typically [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
	xs = [p[0] for p in bbox]
	ys = [p[1] for p in bbox]
	xmin, ymin, xmax, ymax = min(xs), min(ys), max(xs), max(ys)

	json_data_list.append({
	"word": text,
	"bbox": [int(xmin), int(ymin), int(xmax), int(ymax)],
	"type": "text"
	})

	# Wrap in the requested page-based JSON structure
	final_json = [{
	"page_number": 1,
	"data": json_data_list,
	"column_separator_x": None
	}]

	# Save to a temporary file for download
	temp_dir = tempfile.gettempdir()
	json_path = os.path.join(temp_dir, "ocr_results.json")
	with open(json_path, "w", encoding="utf-8") as f:
	json.dump(final_json, f, indent=4, ensure_ascii=False)

	return vis_img, word_list_for_table, f"{ocr_result.elapse:.3f}s", json_path

	# Gradio Interface
	with gr.Blocks(title="Rapid⚡OCR to JSON") as demo:
	gr.Markdown("# Rapid⚡OCR v5 with JSON Export")
	gr.Markdown("Extract word-level bounding boxes in the same format as your preprocessed data.")

	with gr.Row():
	with gr.Column():
	input_img = gr.Image(label="Input Image", type="numpy")
	run_btn = gr.Button("Run OCR", variant="primary")

	with gr.Column():
	output_img = gr.Image(label="Preview")
	elapse_info = gr.Textbox(label="Processing Time")
	json_download = gr.File(label="Download OCR JSON")

	result_table = gr.Dataframe(
	headers=["ID", "Text", "Confidence"],
	label="Detected Words",
	interactive=False
	)

	run_btn.click(
	fn=perform_ocr,
	inputs=[input_img],
	outputs=[output_img, result_table, elapse_info, json_download]
	)

	if __name__ == "__main__":
	demo.launch()