File size: 2,116 Bytes
71deb6b
 
 
 
ce1aae3
71deb6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce1aae3
 
 
 
 
 
 
 
 
 
 
71deb6b
ce1aae3
71deb6b
 
 
 
 
 
 
 
 
 
ce1aae3
71deb6b
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os
import easyocr
import gradio as gr 
from PIL import Image
import re

reader = easyocr.Reader(['en', 'hi'], gpu=False)

def convert_hindi_numerals_to_arabic(text):
    hindi_to_arabic = {
        '०': '0', '१': '1', '२': '2', '३': '3', '४': '4',
        '५': '5', '६': '6', '७': '7', '८': '8', '९': '9'
    }
    for hindi, arabic in hindi_to_arabic.items():
        text = text.replace(hindi, arabic)
    return text

def extract_text(image_path):
    img = Image.open(image_path)

    temp_jpg_path = "temp_image.jpg"
    img.convert("RGB").save(temp_jpg_path,"JPEG")

    result = reader.readtext(temp_jpg_path, detail=0) 
    extracted_text = " ".join(result)
    extracted_text = convert_hindi_numerals_to_arabic(extracted_text)

    os.remove(temp_jpg_path)

    return extracted_text

def search_text(image_path, keyword):
    extracted_text = extract_text(image_path)

    escaped_keyword = re.escape(keyword);
    highlighted_text = re.sub(
        f'({escaped_keyword})', 
        r'<mark style="background-color: lightblue; color: black;">\1</mark>', 
        extracted_text, 
        flags=re.IGNORECASE
    )

    if re.search(escaped_keyword, extracted_text, re.IGNORECASE):
        return f"Keyword '{keyword}' found in the extracted text.", highlighted_text
    else:
        return f"Keyword '{keyword}' not found in the extracted text.", highlighted_text

def create_interface():
    interface = gr.Interface(
        fn=search_text, 
        inputs=[
            gr.Image(type="filepath", label="Upload Image"),  # Changed to filepath
            gr.Textbox(lines=1, placeholder="Enter keyword to search", label="Keyword") 
        ],
        outputs=[
            gr.Textbox(label="Search Result"),  
            gr.HTML(label="Extracted Text")  
        ],
        title="OCR and Keyword Search Application",
        description="Upload an image containing text in English or Hindi. Enter a keyword to search within the extracted text.",
    )
    
    return interface

if __name__ == "__main__":
    interface = create_interface()
    interface.launch(share=True)