File size: 1,486 Bytes
36255e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
## Tr OCR
import numpy as np
from tqdm.auto import tqdm
from Model_loading import processor_tr_ocr, trocr_model
import torch
from PIL import Image



def apply_TRocr(cropped_image,cell_coordinates):
    """
    Apply TrOCR to the given cell coordinates.
    
    Parameters:
    - cell_coordinates: List of coordinates for cropping cells from an image.
    
    Returns:
    - data: A list of extracted text for each cell.
    """
    data = []  # Initialize a list to store data for each row

    for idx, row in enumerate(tqdm(cell_coordinates)):
        row_text = []  # List to store text for the current row
        for cell in row["cells"]:
            # Crop cell out of the image
            cell_image = np.array(cropped_image.crop(cell["cell"]))
            
            
            # Convert the cell image to RGB format for TrOCR processing
            pil_image = Image.fromarray(cell_image).convert("RGB")
            pixel_values = processor_tr_ocr(images=pil_image, return_tensors="pt").pixel_values

            # Generate text predictions using TrOCR model
            generated_ids = trocr_model.generate(pixel_values)
            text = processor_tr_ocr.batch_decode(generated_ids, skip_special_tokens=True)[0]
            
            if not text:
                text = "NAN"  # Append "NAN" if no text is detected
            
            row_text.append(text)

        # Append the row's text list to the data list
        data.append(row_text)

    return data