|
|
|
|
|
""" |
|
|
Advanced usage example for the Custom PaliGemma OCR Model. |
|
|
""" |
|
|
|
|
|
from transformers import AutoModel |
|
|
from PIL import Image |
|
|
import json |
|
|
|
|
|
def advanced_ocr_example(): |
|
|
"""Advanced OCR usage with custom prompts and batch processing.""" |
|
|
|
|
|
|
|
|
model = AutoModel.from_pretrained("your-username/your-model-name", trust_remote_code=True) |
|
|
|
|
|
|
|
|
invoice_image = Image.open("invoice.jpg") |
|
|
invoice_result = model.generate_ocr_text( |
|
|
image=invoice_image, |
|
|
prompt="<image>Extract all text and numbers from this invoice:", |
|
|
max_length=1024 |
|
|
) |
|
|
|
|
|
print("Invoice OCR Result:") |
|
|
print(f"Text: {invoice_result['text']}") |
|
|
print(f"Confidence: {invoice_result['confidence']:.3f}") |
|
|
|
|
|
|
|
|
images = [ |
|
|
Image.open("doc1.jpg"), |
|
|
Image.open("doc2.jpg"), |
|
|
Image.open("doc3.jpg") |
|
|
] |
|
|
|
|
|
batch_results = model.batch_ocr(images) |
|
|
|
|
|
print("\nBatch Processing Results:") |
|
|
for i, result in enumerate(batch_results): |
|
|
print(f"Document {i+1}: {result['text'][:50]}...") |
|
|
print(f"Confidence: {result['confidence']:.3f}") |
|
|
|
|
|
|
|
|
info = model.get_model_info() |
|
|
print("\nModel Information:") |
|
|
print(json.dumps(info, indent=2)) |
|
|
|
|
|
return batch_results |
|
|
|
|
|
if __name__ == "__main__": |
|
|
advanced_ocr_example() |
|
|
|