Spaces:

wahab5763
/

ImageScanner

Runtime error

File size: 2,144 Bytes

535cfc5
d421cca
b862961
d421cca
 
 
 
a68a28a
d421cca
 
535cfc5
d421cca
 
 
f08df26
d421cca
 
535cfc5
d421cca
 
 
 
 
 
535cfc5
d421cca
 
 
 
 
 
 
 
535cfc5
d421cca
 
 
 
 
 
b862961
d421cca
 
 
535cfc5
d421cca
 
ddd6099
d421cca
 
535cfc5
d421cca
 
535cfc5
d421cca

import streamlit as st
from paddleocr import PaddleOCR
from PIL import Image
import numpy as np
import re
from pyngrok import ngrok
import subprocess

# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')  # Enable angle classification for better accuracy

# Streamlit App
st.title("Real-Time Text Extraction from Images (PaddleOCR)")
st.markdown("Upload or capture an image to extract text using PaddleOCR.")

# Upload Image
uploaded_file = st.file_uploader("Upload Image", type=['png', 'jpg', 'jpeg'])

st.markdown("OR")

# Capture Image
captured_image = st.camera_input("Capture Image")

image = None  # Placeholder for the image

if uploaded_file is not None:
    # Open and Display the Uploaded Image
    image = Image.open(uploaded_file)
    st.image(image, caption="Uploaded Image", use_container_width=True)
elif captured_image is not None:
    # Open and Display the Captured Image
    image = Image.open(captured_image)
    st.image(image, caption="Captured Image", use_container_width=True)

if image is not None:
    # Convert image to numpy array
    image_np = np.array(image)

    # Perform OCR with PaddleOCR
    with st.spinner("Extracting text..."):
        try:
            # Extract text from the image
            results = ocr.ocr(image_np, cls=True)
            extracted_text = " ".join([line[1][0] for line in results[0]])  # Concatenate recognized text

            # Clean the extracted text: replace tabs or multiple spaces with a single space
            cleaned_text = re.sub(r'\s+', ' ', extracted_text).strip()

            # Add HTML <br> tags for line breaks after numbers
            formatted_text = re.sub(r'(\b\d+\b)', r'\1<br>', cleaned_text)

            # Add line breaks for table columns or box-separated text
            formatted_text = re.sub(r'[\t|]', r'<br>', formatted_text)

            # Display the formatted text with HTML rendering
            st.subheader("Extracted Text:")
            st.markdown(formatted_text, unsafe_allow_html=True)  # Use unsafe_allow_html=True to render HTML
        except Exception as e:
            st.error(f"Error during text extraction: {e}")