Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from gradio_client import Client, handle_file | |
| import re | |
| from thefuzz import fuzz | |
| # hugging face clients for both OCR options | |
| surya_ocr_client = Client("artificialguybr/Surya-OCR") | |
| got_ocr_client = Client("stepfun-ai/GOT_official_online_demo") | |
| # Global variable to store the extracted OCR text | |
| extracted_text = "" | |
| def ocr_extraction(image, ocr_model): | |
| global extracted_text | |
| if image is None: | |
| return "Please upload an image first." | |
| try: | |
| if ocr_model == "Surya OCR": | |
| client = surya_ocr_client | |
| result = client.predict( | |
| image=handle_file(image), | |
| langs="en", | |
| api_name="/ocr_workflow" | |
| ) | |
| text_matches = re.findall(r"text='(.*?)'", str(result)) | |
| extracted_text = "\n".join(text_matches) | |
| elif ocr_model == "GOT OCR": | |
| client = got_ocr_client | |
| result = client.predict( | |
| image=handle_file(image), | |
| got_mode="plain texts OCR", | |
| fine_grained_mode="box", | |
| ocr_color="red", | |
| ocr_box="Hello!!", | |
| api_name="/run_GOT" | |
| ) | |
| extracted_text = result[0] | |
| else: | |
| return "Invalid OCR model selected." | |
| return extracted_text | |
| except Exception as e: | |
| return f"An error occurred: {str(e)}" | |
| def search_keyword(keyword, search_type): | |
| global extracted_text | |
| if not extracted_text: | |
| return "No OCR text found. Please extract text from an image first." | |
| if not keyword: | |
| return extracted_text | |
| if search_type == "Direct Search": | |
| highlighted_text = re.sub(f"({re.escape(keyword)})", r'<span style="background-color: yellow;">\1</span>', extracted_text, flags=re.IGNORECASE) | |
| else: # Nearest Search | |
| words = extracted_text.split() | |
| highlighted_words = [] | |
| for word in words: | |
| if fuzz.ratio(word.lower(), keyword.lower()) >= 80: # Adjust threshold as needed | |
| highlighted_words.append(f'<span style="background-color: yellow;">{word}</span>') | |
| else: | |
| highlighted_words.append(word) | |
| highlighted_text = " ".join(highlighted_words) | |
| return highlighted_text | |
| with gr.Blocks(theme=gr.themes.Soft()) as gr_interface: | |
| gr.Markdown("# ๐ท OCR Text Extraction and Advanced Keyword Search ๐") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| image_input = gr.Image(type="filepath", label="Upload Image") | |
| ocr_model_dropdown = gr.Dropdown( | |
| choices=["Surya OCR", "GOT OCR"], | |
| value="Surya OCR", | |
| label="Select OCR Model" | |
| ) | |
| ocr_button = gr.Button("Extract Text", variant="primary") | |
| with gr.Column(scale=2): | |
| extracted_text_output = gr.Textbox( | |
| label="Extracted Text", | |
| placeholder="Text extracted from the image will appear here.", | |
| lines=10 | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| keyword_input = gr.Textbox(label="Enter keyword to search") | |
| search_type = gr.Radio(["Direct Search", "Nearest Search"], label="Search Type", value="Direct Search") | |
| search_button = gr.Button("Search Keyword", variant="secondary") | |
| with gr.Column(scale=2): | |
| highlighted_output = gr.HTML(label="Highlighted Text") | |
| ocr_button.click( | |
| fn=ocr_extraction, | |
| inputs=[image_input, ocr_model_dropdown], | |
| outputs=extracted_text_output | |
| ) | |
| search_button.click( | |
| fn=search_keyword, | |
| inputs=[keyword_input, search_type], | |
| outputs=highlighted_output | |
| ) | |
| gr_interface.launch(share=True) |