Spaces:
Runtime error
Runtime error
| import torch | |
| from transformers import LayoutLMv2Processor, LayoutLMv2ForTokenClassification | |
| from PIL import Image | |
| import numpy as np | |
| # Initialize the model and processor with caching | |
| processor = None | |
| model = None | |
| def get_document_ai_models(): | |
| """Get or initialize document AI models with proper caching.""" | |
| global processor, model | |
| if processor is None: | |
| processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased") | |
| if model is None: | |
| model = LayoutLMv2ForTokenClassification.from_pretrained("microsoft/layoutlmv2-base-uncased") | |
| return processor, model | |
| def extract_text_and_layout(image): | |
| """ | |
| Extract text and layout information using LayoutLMv2. | |
| Args: | |
| image: PIL Image object | |
| Returns: | |
| Dictionary with extracted text and layout information | |
| """ | |
| # Get models with lazy loading | |
| processor, model = get_document_ai_models() | |
| # Convert numpy array to PIL Image if needed | |
| if isinstance(image, np.ndarray): | |
| image = Image.fromarray(image).convert("RGB") | |
| # Prepare inputs for the model | |
| encoding = processor(image, return_tensors="pt") | |
| # Get the input_ids (tokenized text) | |
| input_ids = encoding.input_ids | |
| # Get words from input_ids | |
| tokens = processor.tokenizer.convert_ids_to_tokens(input_ids[0]) | |
| words = processor.tokenizer.convert_tokens_to_string(tokens).split() | |
| # Get bounding boxes | |
| bbox = encoding.bbox[0] | |
| return { | |
| 'words': words, | |
| 'boxes': bbox.tolist(), | |
| 'encoding': encoding, # Keep for future processing | |
| } | |