File size: 1,415 Bytes
b8a8a54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51

"""
Advanced usage example for the Custom PaliGemma OCR Model.
"""

from transformers import AutoModel
from PIL import Image
import json

def advanced_ocr_example():
    """Advanced OCR usage with custom prompts and batch processing."""
    
    # Load model
    model = AutoModel.from_pretrained("your-username/your-model-name", trust_remote_code=True)
    
    # Example 1: Custom prompt for invoice
    invoice_image = Image.open("invoice.jpg")
    invoice_result = model.generate_ocr_text(
        image=invoice_image,
        prompt="<image>Extract all text and numbers from this invoice:",
        max_length=1024
    )
    
    print("Invoice OCR Result:")
    print(f"Text: {invoice_result['text']}")
    print(f"Confidence: {invoice_result['confidence']:.3f}")
    
    # Example 2: Batch processing
    images = [
        Image.open("doc1.jpg"),
        Image.open("doc2.jpg"),
        Image.open("doc3.jpg")
    ]
    
    batch_results = model.batch_ocr(images)
    
    print("\nBatch Processing Results:")
    for i, result in enumerate(batch_results):
        print(f"Document {i+1}: {result['text'][:50]}...")
        print(f"Confidence: {result['confidence']:.3f}")
    
    # Example 3: Model information
    info = model.get_model_info()
    print("\nModel Information:")
    print(json.dumps(info, indent=2))
    
    return batch_results

if __name__ == "__main__":
    advanced_ocr_example()