Spaces:
Runtime error
Runtime error
File size: 2,144 Bytes
535cfc5 d421cca b862961 d421cca a68a28a d421cca 535cfc5 d421cca f08df26 d421cca 535cfc5 d421cca 535cfc5 d421cca 535cfc5 d421cca b862961 d421cca 535cfc5 d421cca ddd6099 d421cca 535cfc5 d421cca 535cfc5 d421cca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import streamlit as st
from paddleocr import PaddleOCR
from PIL import Image
import numpy as np
import re
from pyngrok import ngrok
import subprocess
# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en') # Enable angle classification for better accuracy
# Streamlit App
st.title("Real-Time Text Extraction from Images (PaddleOCR)")
st.markdown("Upload or capture an image to extract text using PaddleOCR.")
# Upload Image
uploaded_file = st.file_uploader("Upload Image", type=['png', 'jpg', 'jpeg'])
st.markdown("OR")
# Capture Image
captured_image = st.camera_input("Capture Image")
image = None # Placeholder for the image
if uploaded_file is not None:
# Open and Display the Uploaded Image
image = Image.open(uploaded_file)
st.image(image, caption="Uploaded Image", use_container_width=True)
elif captured_image is not None:
# Open and Display the Captured Image
image = Image.open(captured_image)
st.image(image, caption="Captured Image", use_container_width=True)
if image is not None:
# Convert image to numpy array
image_np = np.array(image)
# Perform OCR with PaddleOCR
with st.spinner("Extracting text..."):
try:
# Extract text from the image
results = ocr.ocr(image_np, cls=True)
extracted_text = " ".join([line[1][0] for line in results[0]]) # Concatenate recognized text
# Clean the extracted text: replace tabs or multiple spaces with a single space
cleaned_text = re.sub(r'\s+', ' ', extracted_text).strip()
# Add HTML <br> tags for line breaks after numbers
formatted_text = re.sub(r'(\b\d+\b)', r'\1<br>', cleaned_text)
# Add line breaks for table columns or box-separated text
formatted_text = re.sub(r'[\t|]', r'<br>', formatted_text)
# Display the formatted text with HTML rendering
st.subheader("Extracted Text:")
st.markdown(formatted_text, unsafe_allow_html=True) # Use unsafe_allow_html=True to render HTML
except Exception as e:
st.error(f"Error during text extraction: {e}") |