| import gradio as gr |
| from byaldi import RAGMultiModalModel |
| model = RAGMultiModalModel.from_pretrained("vidore/colpali") |
| def extract_and_search(image, keyword): |
| extracted_text = model.predict(image) |
| matching_lines = [line for line in extracted_text.splitlines() if keyword.lower() in line.lower()] |
| return extracted_text, matching_lines |
| interface = gr.Interface( |
| fn=extract_and_search, |
| inputs=[ |
| gr.Image(type="pil", label="Upload Image"), |
| gr.Textbox(label="Enter Keyword") |
| ], |
| outputs=[ |
| gr.Textbox(label="Extracted Text"), |
| gr.Textbox(label="Matching Lines") |
| ], |
| title="ColPali OCR with Keyword Search", |
| description="Upload an image and enter a keyword to search within the extracted text." |
| ) |
| interface.launch(share=True) |