Spaces:

DocUA
/

Local_OCR_Demo

Sleeping

App Files Files Community

Local_OCR_Demo / ocr_full_pdf12.py

DocUA

Initial commit: DeepSeek-OCR-2 & MedGemma-1.5 multimodal analysis app with ZeroGPU support

b752d16 about 1 month ago

raw

history blame contribute delete

2.87 kB

	from transformers import AutoModel, AutoTokenizer
	import torch
	import os
	from PIL import Image
	import time

	# Force CPU for stability on Mac
	device = "cpu"
	print(f"Using device: {device}")

	# Patch to avoid CUDA calls in custom code
	torch.Tensor.cuda = lambda self, args, *kwargs: self.to(device)
	torch.nn.Module.cuda = lambda self, args, *kwargs: self.to(device)

	model_name = 'deepseek-ai/DeepSeek-OCR-2'

	def ocr_full_document():
	print(f"Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

	print(f"Loading model...")
	model = AutoModel.from_pretrained(
	model_name,
	trust_remote_code=True,
	use_safetensors=True
	)
	model = model.eval()

	# Overwrite bfloat16 to float32 for CPU compatibility
	torch.bfloat16 = torch.float32

	image_dir = "doc_images_full"
	output_dir = "ocr_results_pdf12"
	os.makedirs(output_dir, exist_ok=True)

	# Get images sorted by page number
	import re
	def get_page_num(filename):
	match = re.search(r'page_(\d+)', filename)
	return int(match.group(1)) if match else 0

	images = sorted([f for f in os.listdir(image_dir) if f.endswith(".png")], key=get_page_num)

	full_markdown = []

	for i, img_name in enumerate(images):
	img_path = os.path.join(image_dir, img_name)
	print(f"\n[{i+1}/{len(images)}] Processing page {get_page_num(img_name)}...")

	prompt = "<image>\nFree OCR. "

	start_time = time.time()
	try:
	with torch.no_grad():
	res = model.infer(
	tokenizer,
	prompt=prompt,
	image_file=img_path,
	output_path=output_dir,
	base_size=1024,
	image_size=768,
	crop_mode=False,
	eval_mode=True
	)

	elapsed = time.time() - start_time
	print(f" Done in {elapsed:.2f}s")

	# Save individual page result
	page_file = os.path.join(output_dir, f"{img_name}.md")
	with open(page_file, "w") as f:
	f.write(res)

	full_markdown.append(f"## Page {get_page_num(img_name)}\n\n{res}\n\n---\n")

	except Exception as e:
	print(f" Failed: {e}")
	full_markdown.append(f"## Page {get_page_num(img_name)}\n\n[OCR FAILED]\n\n---\n")

	# Save combined result
	combined_file = os.path.join(output_dir, "full_document.md")
	with open(combined_file, "w") as f:
	f.write("# OCR Result for pdf12_un.pdf\n\n")
	f.write("".join(full_markdown))

	print(f"\nCompleted! Full result saved to: {combined_file}")

	if __name__ == "__main__":
	ocr_full_document()