import os import easyocr import gradio as gr from PIL import Image import re reader = easyocr.Reader(['en', 'hi'], gpu=False) def convert_hindi_numerals_to_arabic(text): hindi_to_arabic = { '०': '0', '१': '1', '२': '2', '३': '3', '४': '4', '५': '5', '६': '6', '७': '7', '८': '8', '९': '9' } for hindi, arabic in hindi_to_arabic.items(): text = text.replace(hindi, arabic) return text def extract_text(image_path): img = Image.open(image_path) temp_jpg_path = "temp_image.jpg" img.convert("RGB").save(temp_jpg_path,"JPEG") result = reader.readtext(temp_jpg_path, detail=0) extracted_text = " ".join(result) extracted_text = convert_hindi_numerals_to_arabic(extracted_text) os.remove(temp_jpg_path) return extracted_text def search_text(image_path, keyword): extracted_text = extract_text(image_path) escaped_keyword = re.escape(keyword); highlighted_text = re.sub( f'({escaped_keyword})', r'\1', extracted_text, flags=re.IGNORECASE ) if re.search(escaped_keyword, extracted_text, re.IGNORECASE): return f"Keyword '{keyword}' found in the extracted text.", highlighted_text else: return f"Keyword '{keyword}' not found in the extracted text.", highlighted_text def create_interface(): interface = gr.Interface( fn=search_text, inputs=[ gr.Image(type="filepath", label="Upload Image"), # Changed to filepath gr.Textbox(lines=1, placeholder="Enter keyword to search", label="Keyword") ], outputs=[ gr.Textbox(label="Search Result"), gr.HTML(label="Extracted Text") ], title="OCR and Keyword Search Application", description="Upload an image containing text in English or Hindi. Enter a keyword to search within the extracted text.", ) return interface if __name__ == "__main__": interface = create_interface() interface.launch(share=True)