|
|
import streamlit as st |
|
|
from main_got import extract_text |
|
|
import re |
|
|
|
|
|
|
|
|
def highlight_keywords(text: str, keywords: str) -> str: |
|
|
|
|
|
sentences = text.split('. ') |
|
|
highlighted_text = "" |
|
|
|
|
|
for sentence in sentences: |
|
|
highlighted_sentence = sentence |
|
|
flag = False |
|
|
for keyword in keywords.split(): |
|
|
if keyword.lower() in sentence.lower(): |
|
|
|
|
|
highlighted_sentence = highlighted_sentence.replace(keyword, f'<mark style="background-color: yellow">{keyword}</mark>') |
|
|
flag = True |
|
|
if flag: |
|
|
highlighted_text += f'<span style="color: red">{highlighted_sentence}.</span> ' |
|
|
else: |
|
|
highlighted_text += sentence + '. ' |
|
|
return highlighted_text |
|
|
|
|
|
|
|
|
|
|
|
st.title("OCR and Document Search Web App") |
|
|
|
|
|
|
|
|
uploaded_image = st.file_uploader("Upload an image for OCR", type=["jpg", "png", "jpeg"]) |
|
|
|
|
|
if uploaded_image is not None: |
|
|
with st.spinner("Processing..."): |
|
|
|
|
|
extracted_text = extract_text(uploaded_image) |
|
|
st.subheader("Extracted Text") |
|
|
st.write(extracted_text) |
|
|
|
|
|
|
|
|
search_query = st.text_input("Enter a keyword to search within the text") |
|
|
if search_query: |
|
|
st.markdown(highlight_keywords(extracted_text, search_query), unsafe_allow_html=True) |
|
|
|