Spaces:
Sleeping
Sleeping
| import os | |
| os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' | |
| from transformers import VisionEncoderDecoderModel, TrOCRProcessor | |
| from PIL import Image | |
| import io | |
| import base64 | |
| # Load model and processor | |
| processor = TrOCRProcessor.from_pretrained("anuashok/ocr-captcha-v3", use_fast=True) | |
| model = VisionEncoderDecoderModel.from_pretrained( | |
| "anuashok/ocr-captcha-v3") | |
| def resolve_captcha(image_path): | |
| # Check if input is base64 string | |
| if isinstance(image_path, str) and image_path.startswith('data:image'): | |
| # Extract the base64 data after the comma | |
| base64_data = image_path.split(',')[1] | |
| # Decode base64 to bytes | |
| image_bytes = base64.b64decode(base64_data) | |
| # Create PIL Image from bytes | |
| image = Image.open(io.BytesIO(image_bytes)).convert("RGBA") | |
| else: | |
| # Handle as regular file path | |
| image = Image.open(image_path).convert("RGBA") | |
| background = Image.new("RGBA", image.size, (255, 255, 255)) | |
| combined = Image.alpha_composite(background, image).convert("RGB") | |
| # Prepare image for the model | |
| pixel_values = processor(combined, return_tensors="pt").pixel_values | |
| # Generate text | |
| generated_ids = model.generate(pixel_values) | |
| generated_text = processor.batch_decode( | |
| generated_ids, skip_special_tokens=True)[0] | |
| return generated_text | |