Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| # time: 2022/10/17 11:22 | |
| # file: AI-医学图片OCR.py | |
| import streamlit as st | |
| from ocr.ocr import detect, recognize | |
| from ocr.utils import bytes_to_numpy | |
| import pandas as pd | |
| import os | |
| import cv2 | |
| from paddleocr import PPStructure, draw_structure_result, save_structure_res | |
| st.title("AI-医学图片OCR") | |
| def convert_df(df): | |
| # IMPORTANT: Cache the conversion to prevent computation on every rerun | |
| return df.to_csv().encode("gbk") | |
| # 上传图片 | |
| uploaded_file = st.sidebar.file_uploader( | |
| '请选择一张图片', type=['png', 'jpg', 'jpeg']) | |
| print('uploaded_file:', uploaded_file) | |
| table_engine = PPStructure(show_log=True) | |
| if uploaded_file is not None: | |
| # To read file as bytes: | |
| # content = cv2.imread(uploaded_file) | |
| # st.write(content) | |
| bytes_data = uploaded_file.getvalue() | |
| # 转换格式 | |
| img = bytes_to_numpy(bytes_data, channels='RGB') | |
| option_task = st.sidebar.radio('请选择要执行的任务', ('查看原图', '文本检测')) | |
| if option_task == '查看原图': | |
| st.image(img, caption='原图') | |
| elif option_task == '文本检测': | |
| im_show = detect(img) | |
| st.image(im_show, caption='文本检测后的图片') | |
| base_path = "streamlit_data" | |
| path = os.path.exists(base_path + "/" + uploaded_file.name.split('.')[0]) | |
| if st.button('✨ 启动!'): | |
| local_path = base_path + "/" + uploaded_file.name.split('.')[0] | |
| result = table_engine(img) | |
| save_structure_res(result, base_path, uploaded_file.name.split('.')[0]) | |
| with st.container(): | |
| with st.expander(label="json结果展示", expanded=False): | |
| st.write(result) | |
| for i in os.listdir(local_path): | |
| if ".xlsx" in i: | |
| df = pd.read_excel(os.path.join(local_path, i)) | |
| df = df.fillna("") | |
| st.write(df) | |
| csv = convert_df(df) | |
| st.download_button( | |
| label="Download data as csv", | |
| data=csv, | |
| file_name='large_df.csv', | |
| mime='text/csv', | |
| ) | |