ocr

Runtime error

App Files Files Community

Toughen1 commited on Jul 13, 2025

Commit

aebed3f

verified ·

1 Parent(s): fd9728f

CPU

Browse files

Files changed (1) hide show

app.py +261 -65

app.py CHANGED Viewed

@@ -1,26 +1,43 @@
 import atexit
 import functools
 from queue import Queue
 from threading import Event, Thread
 from paddleocr import PaddleOCR, draw_ocr
 from PIL import Image
-from io import BytesIO
-import base64
 import gradio as gr
-from fastapi import FastAPI, UploadFile, Form
-from pydantic import BaseModel
-# ========== 模型配置 ==========
 LANG_CONFIG = {
     "ch": {"num_workers": 2},
     "en": {"num_workers": 2},
 }
 CONCURRENCY_LIMIT = 8
-# ========== 模型池管理类 ==========
 class PaddleOCRModelManager(object):
-    def __init__(self, num_workers, model_factory):
         super().__init__()
         self._model_factory = model_factory
         self._queue = Queue()
@@ -34,6 +51,7 @@ class PaddleOCRModelManager(object):
             self._workers.append(worker)
     def infer(self, *args, **kwargs):
         result_queue = Queue(maxsize=1)
         self._queue.put((args, kwargs, result_queue))
         success, payload = result_queue.get()
@@ -64,83 +82,261 @@ class PaddleOCRModelManager(object):
             finally:
                 self._queue.task_done()
-# ========== OCR 模型初始化 ==========
 def create_model(lang):
     return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False)
-model_managers = {
-    lang: PaddleOCRModelManager(cfg["num_workers"], functools.partial(create_model, lang=lang))
-    for lang, cfg in LANG_CONFIG.items()
-}
 def close_model_managers():
     for manager in model_managers.values():
         manager.close()
 atexit.register(close_model_managers)
-# ========== 通用 OCR 推理函数 ==========
-def run_ocr(image: Image.Image, lang: str):
-    ocr = model_managers[lang]
-    buffered = BytesIO()
-    image.save(buffered, format="PNG")
-    buffered.seek(0)
-    result = ocr.infer(buffered, cls=True)[0]
     boxes = [line[0] for line in result]
     txts = [line[1][0] for line in result]
     scores = [line[1][1] for line in result]
-    im_show = draw_ocr(image, boxes, txts, scores, font_path="./simfang.ttf")
-    return im_show, txts
-# ========== Gradio UI ==========
-def gradio_inference(img_path, lang):
-    image = Image.open(img_path).convert("RGB")
-    result_image, _ = run_ocr(image, lang)
-    return result_image
-title = "PaddleOCR"
 description = '''
-- Gradio demo for PaddleOCR with multi-language support.
-- Supports Chinese, English, French, German, Korean, and Japanese.
-- Upload an image or use the RESTful API below.
 '''
 examples = [
-    ['en_example.jpg', 'en'],
-    ['cn_example.jpg', 'ch'],
-    ['jp_example.jpg', 'japan'],
 ]
-css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
-gr_app = gr.Interface(
-    gradio_inference,
-    inputs=[
-        gr.Image(type='filepath', label='Input'),
-        gr.Dropdown(choices=list(LANG_CONFIG.keys()), value='en', label='language')
-    ],
-    outputs=gr.Image(type='pil', label='Output'),
-    title=title,
-    description=description,
-    examples=examples,
-    cache_examples=False,
-    css=css,
-    concurrency_limit=CONCURRENCY_LIMIT
-)
-# ========== FastAPI + REST OCR ==========
-app = FastAPI()
-@app.post("/api/ocr_base64")
-def ocr_base64(data: str = Form(...), lang: str = Form("ch")):
-    try:
-        content = base64.b64decode(data)
-        image = Image.open(BytesIO(content)).convert("RGB")
-        _, texts = run_ocr(image, lang)
-        return {"success": True, "text": texts}
-    except Exception as e:
-        return {"success": False, "error": str(e)}
-# 挂载 Gradio 到 FastAPI
-app = gr.mount_gradio_app(app, gr_app, path="/")

 import atexit
 import functools
+import base64
+import io
+import re
 from queue import Queue
 from threading import Event, Thread
+import numpy as np
+from langdetect import detect
 from paddleocr import PaddleOCR, draw_ocr
 from PIL import Image
 import gradio as gr
 LANG_CONFIG = {
     "ch": {"num_workers": 2},
     "en": {"num_workers": 2},
+    "fr": {"num_workers": 1},
+    "german": {"num_workers": 1},
+    "korean": {"num_workers": 1},
+    "japan": {"num_workers": 1},
+}
+# 语言检测映射
+LANG_DETECT_MAP = {
+    "zh": "ch",
+    "en": "en",
+    "fr": "fr",
+    "de": "german",
+    "ko": "korean",
+    "ja": "japan",
 }
 CONCURRENCY_LIMIT = 8
 class PaddleOCRModelManager(object):
+    def __init__(self,
+                 num_workers,
+                 model_factory):
         super().__init__()
         self._model_factory = model_factory
         self._queue = Queue()
             self._workers.append(worker)
     def infer(self, *args, **kwargs):
+        # XXX: Should I use a more lightweight data structure, say, a future?
         result_queue = Queue(maxsize=1)
         self._queue.put((args, kwargs, result_queue))
         success, payload = result_queue.get()
             finally:
                 self._queue.task_done()
 def create_model(lang):
     return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False)
+model_managers = {}
+for lang, config in LANG_CONFIG.items():
+    model_manager = PaddleOCRModelManager(config["num_workers"], functools.partial(create_model, lang=lang))
+    model_managers[lang] = model_manager
 def close_model_managers():
     for manager in model_managers.values():
         manager.close()
+# XXX: Not sure if gradio allows adding custom teardown logic
 atexit.register(close_model_managers)
+def detect_language_from_text(text):
+    """根据文本内容自动检测语言"""
+    try:
+        detected = detect(text)
+        return LANG_DETECT_MAP.get(detected, "en")  # 默认返回英文
+    except:
+        return "en"  # 检测失败时默认返回英文
+def auto_detect_language(image):
+    """尝试从图像中检测语言"""
+    # 先用英文OCR提取一些文本
+    ocr = model_managers["en"]
+    try:
+        result = ocr.infer(image, cls=True)[0]
+        if not result:
+            return "en"  # 如果没有检测到文本，默认使用英文
+        # 将所有文本合并起来进行语言检测
+        all_text = " ".join([line[1][0] for line in result])
+        if not all_text.strip():
+            return "en"
+        # 检测语言
+        lang = detect_language_from_text(all_text)
+        return lang
+    except:
+        return "en"  # 出错时默认使用英文
+def process_base64_image(base64_string):
+    """处理Base64编码的图像"""
+    try:
+        # 移除可能的前缀
+        if "base64," in base64_string:
+            base64_string = base64_string.split("base64,")[1]
+        # 解码Base64
+        image_data = base64.b64decode(base64_string)
+        image = Image.open(io.BytesIO(image_data))
+        # 将PIL图像转换为临时文件
+        temp_io = io.BytesIO()
+        image.save(temp_io, format='PNG')
+        temp_io.seek(0)
+        return temp_io, image
+    except Exception as e:
+        raise ValueError(f"处理Base64图像时出错: {str(e)}")
+def inference(img, return_text_only=True):
+    """OCR推理函数，自动检测语言"""
+    # 处理输入图像
+    if isinstance(img, str) and img.startswith("data:") or re.match(r'^[A-Za-z0-9+/=]+$', img):
+        # 处理Base64输入
+        img_io, pil_img = process_base64_image(img)
+        img_path = img_io
+    else:
+        # 处理文件路径输入
+        img_path = img
+        pil_img = Image.open(img_path).convert("RGB")
+    # 自动检测语言
+    lang = auto_detect_language(img_path)
+    # 使用检测到的语言进行OCR
+    ocr = model_managers[lang]
+    result = ocr.infer(img_path, cls=True)[0]
+    # 提取文本和位置信息
     boxes = [line[0] for line in result]
     txts = [line[1][0] for line in result]
     scores = [line[1][1] for line in result]
+    if return_text_only:
+        # 仅返回文本
+        return "\n".join(txts), lang
+    else:
+        # 返回带标注的图像
+        im_show = draw_ocr(pil_img, boxes, txts, scores, font_path="./simfang.ttf")
+        return im_show, "\n".join(txts), lang
+def inference_with_image(img):
+    """返回带标注的图像和文本"""
+    im_show, text, lang = inference(img, return_text_only=False)
+    return im_show, text, lang
+def inference_text_only(img):
+    """仅返回文本"""
+    text, lang = inference(img, return_text_only=True)
+    return text, lang
+def inference_base64(base64_string):
+    """处理Base64图像并返回OCR结果"""
+    if not base64_string or base64_string.strip() == "":
+        return "请提供有效的Base64图像字符串", ""
+    try:
+        text, lang = inference(base64_string, return_text_only=True)
+        return text, lang
+    except Exception as e:
+        return f"处理Base64图像时出错: {str(e)}", ""
+title = '🔍 PaddleOCR 智能文字识别'
 description = '''
+### 功能特点
+- 支持中文、英文、法语、德语、韩语和日语的智能文字识别
+- 自动检测图像中的语言，无需手动选择
+- 支持Base64编码图像识别
+- 同时提供文本结果和标注图像
+### 使用方法
+- 上传图像或提供Base64编码的图像数据
+- 系统会自动检测语言并进行OCR识别
+- 查看识别结果和标注图像
 '''
 examples = [
+    ['en_example.jpg'],
+    ['cn_example.jpg'],
+    ['jp_example.jpg'],
 ]
+# 自定义CSS样式，优化界面
+css = """
+.gradio-container {
+    font-family: 'Roboto', 'Microsoft YaHei', sans-serif;
+}
+.output_image, .input_image {
+    height: 30rem !important;
+    width: 100% !important;
+    object-fit: contain;
+    border-radius: 8px;
+    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+}
+.tabs {
+    margin-top: 0.5rem;
+}
+.output-text {
+    font-family: 'Courier New', monospace;
+    line-height: 1.5;
+    padding: 1rem;
+    border-radius: 8px;
+    background-color: #f8f9fa;
+    border: 1px solid #e9ecef;
+}
+.detected-lang {
+    font-weight: bold;
+    color: #4285f4;
+    margin-bottom: 0.5rem;
+}
+"""
+# 使用Gradio Blocks创建更丰富的界面
+with gr.Blocks(title=title, css=css) as demo:
+    gr.Markdown(f"# {title}")
+    gr.Markdown(description)
+    with gr.Tabs() as tabs:
+        # 图像上传标签页
+        with gr.TabItem("图像上传识别"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    image_input = gr.Image(label="上传图像", type="filepath")
+                    image_submit = gr.Button("开始识别", variant="primary")
+                with gr.Column(scale=2):
+                    with gr.Row():
+                        image_output = gr.Image(label="标注结果", type="pil")
+                    with gr.Row():
+                        detected_lang = gr.Textbox(label="检测到的语言", lines=1)
+                    with gr.Row():
+                        text_output = gr.Textbox(label="识别文本", lines=10, elem_classes=["output-text"])
+        # Base64标签页
+        with gr.TabItem("Base64图像识别"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    base64_input = gr.Textbox(
+                        label="输入Base64编码的图像数据",
+                        lines=8,
+                        placeholder="在此粘贴Base64编码的图像数据..."
+                    )
+                    base64_submit = gr.Button("开始识别", variant="primary")
+                with gr.Column(scale=2):
+                    base64_lang = gr.Textbox(label="检测到的语言", lines=1)
+                    base64_output = gr.Textbox(
+                        label="识别文本",
+                        lines=15,
+                        elem_classes=["output-text"]
+                    )
+    # API使用说明
+    with gr.Accordion("API使用说明", open=False):
+        gr.Markdown("""
+        ## API使用方法
+        ### 1. 图像上传API
+        ```bash
+        curl -X POST "http://localhost:7860/api/predict" \\
+             -F "fn_index=0" \\
+             -F "data=@/path/to/your/image.jpg"
+        ```
+        ### 2. Base64图像API
+        ```bash
+        curl -X POST "http://localhost:7860/api/predict" \\
+             -H "Content-Type: application/json" \\
+             -d '{
+                  "fn_index": 1,
+                  "data": ["YOUR_BASE64_STRING_HERE"]
+                }'
+        ```
+        """)
+    # 设置事件处理
+    image_submit.click(
+        fn=inference_with_image,
+        inputs=[image_input],
+        outputs=[image_output, text_output, detected_lang]
+    )
+    base64_submit.click(
+        fn=inference_base64,
+        inputs=[base64_input],
+        outputs=[base64_output, base64_lang]
+    )
+# 启动Gradio应用
+demo.launch(debug=False, share=False)