File size: 1,541 Bytes
9abeda0 379b35c 9abeda0 379b35c 381636a 89a3f00 381636a 89a3f00 381636a 89a3f00 379b35c 2286e1a 379b35c ffb2044 b922c7e 379b35c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import streamlit as st
from main_got import extract_text
import re
def highlight_keywords(text: str, keywords: str) -> str:
# Split text into sentences
sentences = text.split('. ')
highlighted_text = ""
for sentence in sentences:
highlighted_sentence = sentence
flag = False
for keyword in keywords.split():
if keyword.lower() in sentence.lower():
# Highlight keyword in yellow and set flag to True to highlight whole sentence after cycle
highlighted_sentence = highlighted_sentence.replace(keyword, f'<mark style="background-color: yellow">{keyword}</mark>')
flag = True
if flag:
highlighted_text += f'<span style="color: red">{highlighted_sentence}.</span> '
else:
highlighted_text += sentence + '. '
return highlighted_text
# Streamlit UI
st.title("OCR and Document Search Web App")
# Image upload
uploaded_image = st.file_uploader("Upload an image for OCR", type=["jpg", "png", "jpeg"])
if uploaded_image is not None:
with st.spinner("Processing..."):
# Extract text from the uploaded image
extracted_text = extract_text(uploaded_image)
st.subheader("Extracted Text")
st.write(extracted_text)
# Search functionality
search_query = st.text_input("Enter a keyword to search within the text")
if search_query:
st.markdown(highlight_keywords(extracted_text, search_query), unsafe_allow_html=True)
|