Spaces:

wahab5763
/

ImageScanner

Runtime error

App Files Files Community

ImageScanner / app.py

wahab5763

Update app.py

d421cca verified about 1 year ago

raw

history blame contribute delete

2.14 kB

	import streamlit as st
	from paddleocr import PaddleOCR
	from PIL import Image
	import numpy as np
	import re
	from pyngrok import ngrok
	import subprocess

	# Initialize PaddleOCR
	ocr = PaddleOCR(use_angle_cls=True, lang='en') # Enable angle classification for better accuracy

	# Streamlit App
	st.title("Real-Time Text Extraction from Images (PaddleOCR)")
	st.markdown("Upload or capture an image to extract text using PaddleOCR.")

	# Upload Image
	uploaded_file = st.file_uploader("Upload Image", type=['png', 'jpg', 'jpeg'])

	st.markdown("OR")

	# Capture Image
	captured_image = st.camera_input("Capture Image")

	image = None # Placeholder for the image

	if uploaded_file is not None:
	# Open and Display the Uploaded Image
	image = Image.open(uploaded_file)
	st.image(image, caption="Uploaded Image", use_container_width=True)
	elif captured_image is not None:
	# Open and Display the Captured Image
	image = Image.open(captured_image)
	st.image(image, caption="Captured Image", use_container_width=True)

	if image is not None:
	# Convert image to numpy array
	image_np = np.array(image)

	# Perform OCR with PaddleOCR
	with st.spinner("Extracting text..."):
	try:
	# Extract text from the image
	results = ocr.ocr(image_np, cls=True)
	extracted_text = " ".join([line[1][0] for line in results[0]]) # Concatenate recognized text

	# Clean the extracted text: replace tabs or multiple spaces with a single space
	cleaned_text = re.sub(r'\s+', ' ', extracted_text).strip()

	# Add HTML <br> tags for line breaks after numbers
	formatted_text = re.sub(r'(\b\d+\b)', r'\1<br>', cleaned_text)

	# Add line breaks for table columns or box-separated text
	formatted_text = re.sub(r'[\t\|]', r'<br>', formatted_text)

	# Display the formatted text with HTML rendering
	st.subheader("Extracted Text:")
	st.markdown(formatted_text, unsafe_allow_html=True) # Use unsafe_allow_html=True to render HTML
	except Exception as e:
	st.error(f"Error during text extraction: {e}")