Spaces:

Arch10
/

ocr-app

Sleeping

App Files Files Community

Arch10 commited on Sep 26, 2024

Commit

a041eb6

verified ·

1 Parent(s): aca06b8

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -28

app.py CHANGED Viewed

@@ -1,39 +1,44 @@
 import streamlit as st
-from transformers import AutoTokenizer, AutoModel
-import torch
 from PIL import Image
-# Load the pre-trained GOT OCR 2.0 model and tokenizer
-@st.cache_resource(show_spinner=True)
-def load_model():
-    tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Check for GPU, fallback to CPU
-    model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True)
-    model = model.eval().to(device)  # Move the model to the appropriate device
-    return tokenizer, model, device
-# Streamlit interface
-st.title("OCR Application using General OCR Theory (GOT) 2.0")
-st.write("Upload an image to extract text using the GOT OCR 2.0 model.")
-# File upload handler
-uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
-if uploaded_file is not None:
-    # Display the uploaded image
-    st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
-    # Load model
-    tokenizer, model, device = load_model()
-    # Load the image
-    image = Image.open(uploaded_file)
-    image.save("temp_image.png")  # Save the uploaded image to a temporary file
     # Perform OCR
     with st.spinner("Extracting text..."):
-        res = model.chat(tokenizer, "temp_image.png", ocr_type='ocr')
-    # Display the result
-    st.write("Extracted Text:")
-    st.text(res)

 import streamlit as st
+import easyocr
 from PIL import Image
+import re
+# Initialize EasyOCR Reader
+reader = easyocr.Reader(['en', 'hi'])
+# Streamlit app title
+st.title("Image Text Extraction and Keyword Search using EasyOCR")
+# File uploader for image input
+uploaded_image = st.file_uploader("Upload an image", type=['png', 'jpg', 'jpeg'])
+if uploaded_image is not None:
+    # Load the uploaded image
+    image = Image.open(uploaded_image)
+    # Display the image
+    st.image(image, caption='Uploaded Image', use_column_width=True)
     # Perform OCR
     with st.spinner("Extracting text..."):
+        results = reader.readtext(image)
+    # Extract the text
+    extracted_text = " ".join([text for (_, text, _) in results])
+    if extracted_text:
+        st.success("Extracted Text:")
+        st.write(extracted_text)
+        # Keyword search feature
+        keyword = st.text_input("Enter a keyword to search in the extracted text:")
+        if keyword:
+            # Highlight matches
+            highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", extracted_text, flags=re.IGNORECASE)
+            st.markdown(f"**Search Results for '{keyword}':**", unsafe_allow_html=True)
+            st.markdown(highlighted_text, unsafe_allow_html=True)
+        else:
+            st.info("Enter a keyword to search.")
+    else:
+        st.warning("No text detected in the image.")