File size: 2,154 Bytes
bddb462
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import easyocr
from PIL import Image
import re
from torchvision import transforms
import matplotlib.pyplot as plt

# data={"image_path":"", "text":"", "preprocess_image":""}

# to preprocess the text extracted from the meme
def preprocess_text(text):
    emoji_pattern = re.compile(
        "["
        "\U0001F600-\U0001F64F"  # emoticons
        "\U0001F300-\U0001F5FF"  # symbols & pictographs
        "\U0001F680-\U0001F6FF"  # transport & map symbols
        "\U0001F1E0-\U0001F1FF"  # flags
        "\U00002700-\U000027BF"  # dingbats
        "\U0001F900-\U0001F9FF"  # supplemental symbols
        "\U00002600-\U000026FF"  # miscellaneous symbols
        "\U00002B00-\U00002BFF"  # arrows, etc.
        "\U0001FA70-\U0001FAFF"  # extended symbols
        "]+",
        flags=re.UNICODE
    )
    text = emoji_pattern.sub(r'', text)
    text = text.lower().strip()
    text = re.sub(r'[^a-z0-9\s]', '', text)
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'\b\w\b', '', text)
    text = re.sub(r'[^\w\s]', '', text)
    return text

# to extract and preprocess text from image using OCR
def ocr_extract_text(image_path):
    reader =  easyocr.Reader(['en', 'tl'], gpu=True)
    result = reader.readtext(image_path, detail=0)
    final_text = " ".join(result)
    preprocess_txt= preprocess_text(final_text)
    return final_text, preprocess_txt

# to resize and normalize image for model input
def resize_normalize_image(image, target_size= (224, 224)):
    preprocess_image= transforms.Compose([
        transforms.Resize(target_size),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
    ])
    image = Image.open(image).convert('RGB')

    image = preprocess_image(image)
    image = image.unsqueeze(0)
    return image

# if __name__ == "__main__":
#     input_image= "backend/test_image.jpg"
#     data["image_path"]= input_image
#     data["text"]= ocr_extract_text(input_image)
#     data["preprocess_image"]= resize_normalize_image(input_image)
#     print(data)