Spaces:
Running
Running
| import gradio as gr | |
| from transformers import AutoTokenizer, TrOCRProcessor, VisionEncoderDecoderModel | |
| from PIL import Image | |
| import torch | |
| model_name = "mohammadalihumayun/trocr-ur-v2" | |
| # Use TrOCRProcessor to ensure proper preprocessing | |
| processor = TrOCRProcessor.from_pretrained(model_name) | |
| model = VisionEncoderDecoderModel.from_pretrained(model_name) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model.to(device) | |
| def recognize_text(image: Image.Image) -> str: | |
| try: | |
| # Ensure image is in RGB mode | |
| if image.mode != "RGB": | |
| image = image.convert("RGB") | |
| # Manual preprocessing to match the exact config requirements | |
| # Step 1: Resize to 438x438 (as per size in config) | |
| image_resized = image.resize((438, 438), resample=Image.Resampling.BICUBIC) | |
| # Step 2: Center crop to 384x384 (as per crop_size in config) | |
| left = (438 - 384) // 2 | |
| top = (438 - 384) // 2 | |
| right = left + 384 | |
| bottom = top + 384 | |
| image_cropped = image_resized.crop((left, top, right, bottom)) | |
| # Step 3: Use processor only for final processing (normalize, rescale, tensor conversion) | |
| # Set do_resize=False and do_center_crop=False since we did it manually | |
| pixel_values = processor.image_processor( | |
| images=image_cropped, | |
| return_tensors="pt", | |
| do_resize=False, | |
| do_center_crop=False | |
| ).pixel_values.to(device) | |
| # Generate output with proper parameters | |
| generated_ids = model.generate( | |
| pixel_values, | |
| max_length=100, | |
| num_beams=1, | |
| do_sample=False | |
| ) | |
| generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| return generated_text | |
| except Exception as e: | |
| print(f"[ERROR] {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return "⚠️ Error during inference" | |
| demo = gr.Interface( | |
| fn=recognize_text, | |
| inputs=gr.Image(type="pil", label="Upload Urdu Handwriting Image"), | |
| outputs=gr.Textbox(label="Extracted Text (Urdu RTL)"), | |
| title="Urdu OCR with TrOCR", | |
| description="Extract handwritten Urdu text using a fine-tuned TrOCR model.", | |
| allow_flagging="never" | |
| ) | |
| demo.launch() |