BabaK07
/

textract-ai

feature-extraction

vision-language

text-extraction

Model card Files Files and versions

textract-ai / examples /batch_processing.py

BabaK07's picture

Upload custom OCR model based on Qwen2.5-VL

b127e5d verified 7 months ago

1.41 kB


	"""
	Batch processing example for the Custom OCR Model.
	"""

	from transformers import AutoModel
	from PIL import Image
	import os
	from pathlib import Path

	def batch_ocr_example(image_directory: str):
	"""Process multiple images in batch."""

	# Load model
	model = AutoModel.from_pretrained("your-username/your-model-name", trust_remote_code=True)

	# Get all image files
	image_dir = Path(image_directory)
	image_files = list(image_dir.glob(".jpg")) + list(image_dir.glob(".png"))

	print(f"Processing {len(image_files)} images...")

	results = []
	for image_file in image_files:
	print(f"Processing: {image_file.name}")

	# Load image
	image = Image.open(image_file)

	# Extract text
	result = model.generate_ocr_text(image, use_native=True)

	results.append({
	"filename": image_file.name,
	"text": result["text"],
	"confidence": result["confidence"]
	})

	print(f" Text: {result['text'][:50]}...")
	print(f" Confidence: {result['confidence']:.3f}")

	return results

	if __name__ == "__main__":
	import sys
	if len(sys.argv) > 1:
	results = batch_ocr_example(sys.argv[1])
	print(f"\nProcessed {len(results)} images successfully!")
	else:
	print("Usage: python batch_processing.py <image_directory>")