Spaces:

legacies
/

doctr

Runtime error

App Files Files Community

doctr / main.py

legacies

initial files

0e17e4e almost 2 years ago

raw

history blame

5.03 kB

	# Copyright (C) 2021-2024, Mindee.

	# This program is licensed under the Apache License 2.0.
	# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
	import cv2
	import matplotlib.pyplot as plt
	import numpy as np
	import streamlit as st
	import pymongo
	import json

	from doctr.file_utils import is_tf_available
	from doctr.io import DocumentFile
	from doctr.utils.visualization import visualize_page
	from doctr.models import ocr_predictor


	import torch
	from backend.pytorch import DET_ARCHS, RECO_ARCHS, forward_image, load_predictor

	forward_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

	def main(det_archs, reco_archs):
	"""Build a streamlit layout"""
	# Wide mode
	st.set_page_config(layout="wide", )


	# Designing the interface
	st.title("docTR: Document Text Recognition")
	# For newline
	st.write("\n")
	# Instructions
	st.markdown("Hint: click on the top-right corner of an image to enlarge it!")
	# Set the columns
	cols = st.columns((1, 1, 1, 1))
	cols[0].subheader("Input page")
	cols[1].subheader("Segmentation heatmap")
	cols[2].subheader("OCR output")
	cols[3].subheader("Page reconstitution")

	# Sidebar
	# File selection
	st.sidebar.title("Document selection")
	# Choose your own image
	uploaded_file = st.sidebar.file_uploader("Upload files", type=["pdf", "png", "jpeg", "jpg"])
	if uploaded_file is not None:
	if uploaded_file.name.endswith(".pdf"):
	doc = DocumentFile.from_pdf(uploaded_file.read())
	else:
	doc = DocumentFile.from_images(uploaded_file.read())
	cols[0].image(doc)

	# Model Selection
	st.sidebar.title("Model selection")
	st.sidebar.markdown("Backend: " + ("TensorFlow" if is_tf_available() else "PyTorch"))
	det_arch = st.sidebar.selectbox("Text detection model", det_archs)
	reco_arch = st.sidebar.selectbox("Text recognition model", reco_archs)

	# For newline
	st.sidebar.write("\n")
	# Only straight pages or possible rotation
	# st.sidebar.title("Parameters")
	assume_straight_pages = True
	st.sidebar.write("\n")
	# Straighten pages
	straighten_pages = False
	st.sidebar.write("\n")
	# Binarization threshold
	bin_thresh = 0.3

	# Box threshold
	box_thresh = 0.1

	if st.sidebar.button("Analyze page"):
	if uploaded_file is None:
	st.sidebar.write("Please upload a document")

	else:
	with st.spinner("Loading model..."):
	predictor = load_predictor(
	det_arch, reco_arch, assume_straight_pages, straighten_pages, bin_thresh, box_thresh, forward_device
	)

	with st.spinner("Analyzing..."):
	for page_idx, page in enumerate(doc):
	# st.write(f"Processing page {page_idx + 1}/{len(doc)}...")
	# Forward the image to the model
	seg_map = forward_image(predictor, page, forward_device)
	seg_map = np.squeeze(seg_map)
	seg_map = cv2.resize(seg_map, (page.shape[1], page.shape[0]), interpolation=cv2.INTER_LINEAR)

	# Plot the raw heatmap
	fig, ax = plt.subplots()
	ax.imshow(seg_map)
	ax.axis("off")
	cols[1].pyplot(fig)

	# Plot OCR output
	out = predictor([page])
	fig = visualize_page(out.pages[0].export(), out.pages[0].page, interactive=False, add_labels=False)
	cols[2].pyplot(fig)

	# Page reconsitution under input page
	page_export = out.pages[0].export()
	img = out.pages[0].synthesize()
	cols[3].image(img, clamp=True)

	# Display JSON
	st.markdown("\nHere are your analysis results in JSON format:")
	st.json(page_export, expanded=False)

	# Download JSON
	st.markdown("\nDownload here:")
	st.download_button(label="Download JSON", data=json.dumps(page_export), file_name="OCR.json", mime="application/json")

	# Connect to MongoDB
	client = pymongo.MongoClient("mongodb://localhost:27017/")
	db = client["OCRoutputs"]
	collection = db["jsonDatas"]

	# Insert the JSON data
	collection.insert_one(page_export)


	if __name__ == "__main__":
	main(DET_ARCHS, RECO_ARCHS)