Spaces:

topdu
/

OpenOCR-UniRec-Demo

Running

App Files Files Community

topdu commited on Feb 7

Commit

640f03f

1 Parent(s): ead8146

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -123

app.py CHANGED Viewed

@@ -1,123 +1,2 @@
-import gradio as gr
-import torch
-from threading import Thread
-import numpy as np
-import re
-from openrec.postprocess.unirec_postprocess import clean_special_tokens
-from openrec.preprocess import create_operators, transform
-from tools.engine.config import Config
-from tools.utils.ckpt import load_ckpt
-from tools.infer_rec import build_rec_process
-def set_device(device):
-    if device == 'gpu' and torch.cuda.is_available():
-        device = torch.device('cuda:0')
-    else:
-        device = torch.device('cpu')
-    return device
-cfg = Config('configs/rec/unirec/focalsvtr_ardecoder_unirec.yml')
-cfg = cfg.cfg
-global_config = cfg['Global']
-from openrec.modeling.transformers_modeling.modeling_unirec import UniRecForConditionalGenerationNew
-from openrec.modeling.transformers_modeling.configuration_unirec import UniRecConfig
-from transformers import AutoTokenizer, TextIteratorStreamer
-tokenizer = AutoTokenizer.from_pretrained(global_config['vlm_ocr_config'])
-cfg_model = UniRecConfig.from_pretrained(global_config['vlm_ocr_config'])
-# cfg_model._attn_implementation = "flash_attention_2"
-cfg_model._attn_implementation = 'eager'
-model = UniRecForConditionalGenerationNew(config=cfg_model)
-load_ckpt(model, cfg)
-device = set_device(cfg['Global']['device'])
-model.eval()
-model.to(device=device)
-transforms, ratio_resize_flag = build_rec_process(cfg)
-ops = create_operators(transforms, global_config)
-# --- 2. 定义流式生成函数 ---
-def stream_chat_with_image(input_image, history):
-    if input_image is None:
-        yield history + [('🖼️(空)', '请先上传一张图片。')]
-        return
-    # 创建 TextIteratorStreamer
-    streamer = TextIteratorStreamer(tokenizer,
-                                    skip_prompt=True,
-                                    skip_special_tokens=False)
-    data = {'image': input_image}
-    batch = transform(data, ops[1:])
-    images = np.expand_dims(batch[0], axis=0)
-    images = torch.from_numpy(images).to(device=device)
-    inputs = {
-        'pixel_values': images,
-        'input_ids': None,
-        'attention_mask': None
-    }
-    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
-    # 后台线程运行生成
-    thread = Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
-    # 流式输出
-    history = history + [('🖼️(图片)', '')]
-    generated_text_ori = ''
-    for new_text in streamer:
-        generated_text_ori += new_text
-        generated_text = clean_special_tokens(generated_text_ori.replace(' ', ''))
-        text = generated_text.replace('<tdcolspan=', '<td colspan=')
-        text = text.replace('<tdrowspan=', '<td rowspan=')
-        generated_text = text.replace('"colspan=', '" colspan=')
-        history[-1] = ('🖼️(图片)', generated_text)
-        yield history
-# --- 3. Gradio UI ---
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.HTML("""
-            <h1 style='text-align: center;'>
-                <a href="https://github.com/Topdu/OpenOCR">
-                    UniRec-0.1B: Unified Text and Formula Recognition with 0.1B Parameters
-                </a>
-            </h1>
-            <p style='text-align: center;'>
-               A ultralight unified text and formula recognition model
-                (Created by <a href="https://fvl.fudan.edu.cn">FVL Lab</a>,
-                <a href="https://github.com/Topdu/OpenOCR">OCR Team</a>)
-            </p>
-            <p style='text-align: center;'>
-                <a href="https://github.com/Topdu/OpenOCR/blob/main/docs/unirec.md">[Local GPU Deployment]</a>
-                for fast recognition experience
-            </p>"""
-            )
-    gr.Markdown('Upload an image, and the system will automatically recognize text and formulas.')
-    with gr.Row():
-        with gr.Column(scale=1):  # Left column: image + clear button
-            image_input = gr.Image(label='Upload Image or Paste Screenshot', type='pil')
-            clear = gr.ClearButton([image_input], value='Clear')
-        with gr.Column(scale=2):
-            chatbot = gr.Chatbot(
-                label='Result (Use LaTeX renderer to display formulas)',
-                show_copy_button=True,
-                height='auto'
-            )
-    clear.add([chatbot])
-    # Trigger after upload
-    # image_input.upload(stream_chat_with_image, [image_input, chatbot], chatbot)
-    image_input.change(
-        stream_chat_with_image,
-        [image_input, chatbot],
-        chatbot,
-        show_progress=False
-        )
-# --- 4. Launch app ---
-if __name__ == '__main__':
-    demo.queue().launch(share=True)


1	+ from openocr.demo_unirec import launch_demo
2	+ launch_demo()