File size: 1,000 Bytes
b5b608e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from __future__ import annotations

import argparse

import torch
from transformers import AutoModel, AutoProcessor


def main() -> None:
    parser = argparse.ArgumentParser(description="Single-image OCR prediction.")
    parser.add_argument("--model-id", required=True, help="HF repo id or local model directory.")
    parser.add_argument("--image", required=True, help="Path to input image.")
    parser.add_argument("--device", default="cpu", help="cpu or cuda")
    args = parser.parse_args()

    processor = AutoProcessor.from_pretrained(args.model_id, trust_remote_code=True)
    model = AutoModel.from_pretrained(args.model_id, trust_remote_code=True).to(args.device)
    model.eval()

    inputs = processor(images=args.image, return_tensors="pt")
    pixel_values = inputs["pixel_values"].to(args.device)
    with torch.no_grad():
        logits = model(pixel_values=pixel_values).logits
    text = processor.batch_decode(logits)[0]
    print(text)


if __name__ == "__main__":
    main()