Spaces:
Build error
Build error
| import gradio as gr | |
| from gradio_client import Client, handle_file | |
| import re | |
| # hugging face clients for both OCR options | |
| surya_ocr_client = Client("artificialguybr/Surya-OCR") | |
| got_ocr_client = Client("stepfun-ai/GOT_official_online_demo") | |
| # Global variable to store the extracted OCR text | |
| extracted_text = "" | |
| def ocr_extraction(image, ocr_model): | |
| global extracted_text | |
| if image is None: | |
| return "Please upload an image first." | |
| try: | |
| if ocr_model == "Surya OCR": | |
| client = surya_ocr_client | |
| result = client.predict( | |
| image=handle_file(image), | |
| langs="en", | |
| api_name="/ocr_workflow" | |
| ) | |
| text_matches = re.findall(r"text='(.*?)'", str(result)) | |
| extracted_text = "\n".join(text_matches) | |
| elif ocr_model == "GOT OCR": | |
| client = got_ocr_client | |
| result = client.predict( | |
| image=handle_file(image), | |
| got_mode="plain texts OCR", | |
| fine_grained_mode="box", | |
| ocr_color="red", | |
| ocr_box="Hello!!", | |
| api_name="/run_GOT" | |
| ) | |
| extracted_text = result[0] | |
| else: | |
| return "Invalid OCR model selected." | |
| return extracted_text | |
| except Exception as e: | |
| return f"An error occurred: {str(e)}" | |
| def search_keyword(keyword): | |
| global extracted_text | |
| if not extracted_text: | |
| return "No OCR text found. Please extract text from an image first." | |
| if not keyword: | |
| return extracted_text | |
| highlighted_text = re.sub(f"({re.escape(keyword)})", r'<span style="background-color: yellow;">\1</span>', extracted_text, flags=re.IGNORECASE) | |
| return highlighted_text | |
| with gr.Blocks(theme=gr.themes.Soft()) as gr_interface: | |
| gr.Markdown("# ๐ท OCR Text Extraction and Direct Keyword Search ๐") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| image_input = gr.Image(type="filepath", label="Upload Image") | |
| ocr_model_dropdown = gr.Dropdown( | |
| choices=["Surya OCR", "GOT OCR"], | |
| value="Surya OCR", | |
| label="Select OCR Model" | |
| ) | |
| ocr_button = gr.Button("Extract Text", variant="primary") | |
| with gr.Column(scale=2): | |
| extracted_text_output = gr.Textbox( | |
| label="Extracted Text", | |
| placeholder="Text extracted from the image will appear here.", | |
| lines=10 | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| keyword_input = gr.Textbox(label="Enter keyword to search") | |
| search_button = gr.Button("Search Keyword", variant="secondary") | |
| with gr.Column(scale=2): | |
| highlighted_output = gr.HTML(label="Highlighted Text") | |
| ocr_button.click( | |
| fn=ocr_extraction, | |
| inputs=[image_input, ocr_model_dropdown], | |
| outputs=extracted_text_output | |
| ) | |
| search_button.click( | |
| fn=search_keyword, | |
| inputs=[keyword_input], | |
| outputs=highlighted_output | |
| ) | |
| gr_interface.launch(share=True) | |