Spaces:

tianchiguaixia
/

ocr

Sleeping

App Files Files Community

ocr / app.py

tianchiguaixia

Update app.py

d0c0cf0 over 2 years ago

raw

history blame contribute delete

2.18 kB

	# -- coding: utf-8 --
	# time: 2022/10/17 11:22
	# file: AI-医学图片OCR.py


	import streamlit as st

	from ocr.ocr import detect, recognize
	from ocr.utils import bytes_to_numpy
	import pandas as pd

	import os
	import cv2
	from paddleocr import PPStructure, draw_structure_result, save_structure_res


	st.title("AI-医学图片OCR")



	def convert_df(df):
	# IMPORTANT: Cache the conversion to prevent computation on every rerun
	return df.to_csv().encode("gbk")


	# 上传图片
	uploaded_file = st.sidebar.file_uploader(
	'请选择一张图片', type=['png', 'jpg', 'jpeg'])
	print('uploaded_file:', uploaded_file)
	table_engine = PPStructure(show_log=True)
	if uploaded_file is not None:
	# To read file as bytes:
	# content = cv2.imread(uploaded_file)
	# st.write(content)
	bytes_data = uploaded_file.getvalue()
	# 转换格式
	img = bytes_to_numpy(bytes_data, channels='RGB')
	option_task = st.sidebar.radio('请选择要执行的任务', ('查看原图', '文本检测'))
	if option_task == '查看原图':
	st.image(img, caption='原图')
	elif option_task == '文本检测':
	im_show = detect(img)
	st.image(im_show, caption='文本检测后的图片')

	base_path = "streamlit_data"

	path = os.path.exists(base_path + "/" + uploaded_file.name.split('.')[0])

	if st.button('✨ 启动!'):
	local_path = base_path + "/" + uploaded_file.name.split('.')[0]
	result = table_engine(img)
	save_structure_res(result, base_path, uploaded_file.name.split('.')[0])
	with st.container():
	with st.expander(label="json结果展示", expanded=False):
	st.write(result)
	for i in os.listdir(local_path):
	if ".xlsx" in i:
	df = pd.read_excel(os.path.join(local_path, i))
	df = df.fillna("")
	st.write(df)
	csv = convert_df(df)
	st.download_button(
	label="Download data as csv",
	data=csv,
	file_name='large_df.csv',
	mime='text/csv',
	)