| from transformers import AutoModel, AutoTokenizer | |
| import torch | |
| model_name = '.' | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
| model = AutoModel.from_pretrained(model_name, trust_remote_code=True, use_safetensors=True) | |
| model = model.eval().to("cpu").to(torch.float16) | |
| # prompt = "<image>\nFree OCR. " | |
| prompt = "<image>\n<|grounding|>Convert the document to markdown. " | |
| image_file = 'samples/paper.png' | |
| output_path = 'tmp' | |
| res = model.infer( | |
| tokenizer, | |
| prompt=prompt, | |
| image_file=image_file, | |
| output_path = output_path, | |
| base_size = 1024, | |
| image_size = 768, | |
| crop_mode = True, | |
| save_results = True, | |
| test_compress = True, | |
| ) |