|
|
|
|
|
|
|
|
from transformers import TrOCRProcessor, VisionEncoderDecoderModel |
|
|
from PIL import Image |
|
|
import torch |
|
|
import sys |
|
|
|
|
|
|
|
|
model = VisionEncoderDecoderModel.from_pretrained("your-username/sindhi-ocr-model") |
|
|
processor = TrOCRProcessor.from_pretrained("your-username/sindhi-ocr-model") |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
model.to(device) |
|
|
|
|
|
def recognize_text(image_path): |
|
|
image = Image.open(image_path).convert("RGB") |
|
|
pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device) |
|
|
|
|
|
generated_ids = model.generate(pixel_values) |
|
|
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |
|
|
return generated_text |
|
|
|
|
|
if __name__ == "__main__": |
|
|
if len(sys.argv) != 2: |
|
|
print("Usage: python inference.py <image_path>") |
|
|
sys.exit(1) |
|
|
|
|
|
image_path = sys.argv[1] |
|
|
text = recognize_text(image_path) |
|
|
print("Recognized Sindhi Text:", text) |
|
|
|