File size: 1,000 Bytes
b5b608e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | from __future__ import annotations
import argparse
import torch
from transformers import AutoModel, AutoProcessor
def main() -> None:
parser = argparse.ArgumentParser(description="Single-image OCR prediction.")
parser.add_argument("--model-id", required=True, help="HF repo id or local model directory.")
parser.add_argument("--image", required=True, help="Path to input image.")
parser.add_argument("--device", default="cpu", help="cpu or cuda")
args = parser.parse_args()
processor = AutoProcessor.from_pretrained(args.model_id, trust_remote_code=True)
model = AutoModel.from_pretrained(args.model_id, trust_remote_code=True).to(args.device)
model.eval()
inputs = processor(images=args.image, return_tensors="pt")
pixel_values = inputs["pixel_values"].to(args.device)
with torch.no_grad():
logits = model(pixel_values=pixel_values).logits
text = processor.batch_decode(logits)[0]
print(text)
if __name__ == "__main__":
main()
|