Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| from paddleocr import PaddleOCR | |
| from PIL import Image | |
| import numpy as np | |
| import re | |
| from pyngrok import ngrok | |
| import subprocess | |
| # Initialize PaddleOCR | |
| ocr = PaddleOCR(use_angle_cls=True, lang='en') # Enable angle classification for better accuracy | |
| # Streamlit App | |
| st.title("Real-Time Text Extraction from Images (PaddleOCR)") | |
| st.markdown("Upload or capture an image to extract text using PaddleOCR.") | |
| # Upload Image | |
| uploaded_file = st.file_uploader("Upload Image", type=['png', 'jpg', 'jpeg']) | |
| st.markdown("OR") | |
| # Capture Image | |
| captured_image = st.camera_input("Capture Image") | |
| image = None # Placeholder for the image | |
| if uploaded_file is not None: | |
| # Open and Display the Uploaded Image | |
| image = Image.open(uploaded_file) | |
| st.image(image, caption="Uploaded Image", use_container_width=True) | |
| elif captured_image is not None: | |
| # Open and Display the Captured Image | |
| image = Image.open(captured_image) | |
| st.image(image, caption="Captured Image", use_container_width=True) | |
| if image is not None: | |
| # Convert image to numpy array | |
| image_np = np.array(image) | |
| # Perform OCR with PaddleOCR | |
| with st.spinner("Extracting text..."): | |
| try: | |
| # Extract text from the image | |
| results = ocr.ocr(image_np, cls=True) | |
| extracted_text = " ".join([line[1][0] for line in results[0]]) # Concatenate recognized text | |
| # Clean the extracted text: replace tabs or multiple spaces with a single space | |
| cleaned_text = re.sub(r'\s+', ' ', extracted_text).strip() | |
| # Add HTML <br> tags for line breaks after numbers | |
| formatted_text = re.sub(r'(\b\d+\b)', r'\1<br>', cleaned_text) | |
| # Add line breaks for table columns or box-separated text | |
| formatted_text = re.sub(r'[\t|]', r'<br>', formatted_text) | |
| # Display the formatted text with HTML rendering | |
| st.subheader("Extracted Text:") | |
| st.markdown(formatted_text, unsafe_allow_html=True) # Use unsafe_allow_html=True to render HTML | |
| except Exception as e: | |
| st.error(f"Error during text extraction: {e}") |