|
|
from model import OCRModel, DET_ARCHS, RECO_ARCHS |
|
|
from ui import OCRUI |
|
|
from doctr.io import DocumentFile |
|
|
from doctr.utils.visualization import visualize_page |
|
|
import matplotlib.pyplot as plt |
|
|
import time |
|
|
import streamlit as st |
|
|
import aiohttp |
|
|
import asyncio |
|
|
def main(): |
|
|
ui = OCRUI() |
|
|
model = OCRModel() |
|
|
|
|
|
uploaded_file, params, current_page = ui.setup_sidebar(DET_ARCHS, RECO_ARCHS) |
|
|
|
|
|
if st.sidebar.button("分析步骤演示"): |
|
|
if uploaded_file is None: |
|
|
st.sidebar.error("请上传一个文档") |
|
|
return |
|
|
|
|
|
if current_page is None: |
|
|
st.sidebar.error("页面加载失败") |
|
|
return |
|
|
|
|
|
try: |
|
|
with st.spinner('正在处理中...请稍等'): |
|
|
st.sidebar.info("开始处理...") |
|
|
|
|
|
start_time = time.time() |
|
|
st.sidebar.info("加载模型...") |
|
|
model.load_model(**params) |
|
|
st.sidebar.info("分析页面...") |
|
|
seg_map, out = model.process_page(current_page) |
|
|
|
|
|
|
|
|
st.sidebar.info("显示结果...") |
|
|
fig, ax = plt.subplots() |
|
|
ax.imshow(seg_map) |
|
|
ax.axis("off") |
|
|
ui.cols[1].pyplot(fig) |
|
|
plt.close(fig) |
|
|
fig = visualize_page(out.pages[0].export(), out.pages[0].page, |
|
|
interactive=False, add_labels=True) |
|
|
ui.cols[2].pyplot(fig) |
|
|
plt.close(fig) |
|
|
|
|
|
|
|
|
reconstructed = model.get_reconstructed_page(out, current_page) |
|
|
ui.cols[3].image(reconstructed, clamp=True) |
|
|
plt.close(fig) |
|
|
|
|
|
|
|
|
total_time = time.time() - start_time |
|
|
st.success(f"处理完成! 用时: {total_time:.2f}秒") |
|
|
|
|
|
text_results = [] |
|
|
for block in out.pages[0].export()["blocks"]: |
|
|
for line in block["lines"]: |
|
|
for word in line["words"]: |
|
|
text_results.append(word["value"]) |
|
|
|
|
|
st.write("识别结果:", " ".join(text_results)) |
|
|
st.json({"total_time": total_time}) |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"处理失败: {str(e)}") |
|
|
st.sidebar.error("发生错误,请查看详细信息") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |