Spaces:

saif0001
/

OCR_application

Build error

App Files Files Community

saif0001 commited on Sep 28, 2024

Commit

a2e157e

verified ·

1 Parent(s): 8ffa9a4

Update app.py

Browse files

Files changed (1) hide show

app.py +123 -198

app.py CHANGED Viewed

@@ -1,201 +1,126 @@
-import gradio as gr
-import spaces
-from transformers import AutoModel, AutoTokenizer
 import os
-import base64
-import io
-import uuid
-import time
-import shutil
 from pathlib import Path
-# Load tokenizer and model for CPU
-tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
-model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True)
-model = model.eval()  # No need for .cuda() since it's on CPU
-UPLOAD_FOLDER = "./uploads"
-RESULTS_FOLDER = "./results"
-for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
-    if not os.path.exists(folder):
-        os.makedirs(folder)
-def image_to_base64(image):
-    buffered = io.BytesIO()
-    image.save(buffered, format="PNG")
-    return base64.b64encode(buffered.getvalue()).decode()
-def run_GOT(image, got_mode, fine_grained_mode="", ocr_color="", ocr_box=""):
-    unique_id = str(uuid.uuid4())
-    image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.png")
-    result_path = os.path.join(RESULTS_FOLDER, f"{unique_id}.html")
-    shutil.copy(image, image_path)
-    try:
-        if got_mode == "plain texts OCR":
-            res = model.chat(tokenizer, image_path, ocr_type='ocr')
-            return res, None
-        elif got_mode == "format texts OCR":
-            res = model.chat(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
-        elif got_mode == "plain multi-crop OCR":
-            res = model.chat_crop(tokenizer, image_path, ocr_type='ocr')
-            return res, None
-        elif got_mode == "format multi-crop OCR":
-            res = model.chat_crop(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
-        elif got_mode == "plain fine-grained OCR":
-            res = model.chat(tokenizer, image_path, ocr_type='ocr', ocr_box=ocr_box, ocr_color=ocr_color)
-            return res, None
-        elif got_mode == "format fine-grained OCR":
-            res = model.chat(tokenizer, image_path, ocr_type='format', ocr_box=ocr_box, ocr_color=ocr_color, render=True, save_render_file=result_path)
-        res_markdown = res
-        if "format" in got_mode and os.path.exists(result_path):
-            with open(result_path, 'r') as f:
-                html_content = f.read()
-            encoded_html = base64.b64encode(html_content.encode('utf-8')).decode('utf-8')
-            iframe_src = f"data:text/html;base64,{encoded_html}"
-            iframe = f'<iframe src="{iframe_src}" width="100%" height="600px"></iframe>'
-            download_link = f'<a href="data:text/html;base64,{encoded_html}" download="result_{unique_id}.html">Download Full Result</a>'
-            return res_markdown, f"{download_link}<br>{iframe}"
-        else:
-            return res_markdown, None
-    except Exception as e:
-        return f"Error: {str(e)}", None
-    finally:
-        if os.path.exists(image_path):
-            os.remove(image_path)
-def task_update(task):
-    if "fine-grained" in task:
-        return [
-            gr.update(visible=True),
-            gr.update(visible=False),
-            gr.update(visible=False),
-        ]
-    else:
-        return [
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False),
-        ]
-def fine_grained_update(task):
-    if task == "box":
-        return [
-            gr.update(visible=False, value=""),
-            gr.update(visible=True),
-        ]
-    elif task == 'color':
-        return [
-            gr.update(visible=True),
-            gr.update(visible=False, value=""),
-        ]
-def cleanup_old_files():
-    current_time = time.time()
-    for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
-        for file_path in Path(folder).glob('*'):
-            if current_time - file_path.stat().st_mtime > 3600:  # 1 hour
-                file_path.unlink()
-title_html = """
-<h2> <span class="gradient-text" id="text">General OCR Theory</span><span class="plain-text">: Towards OCR-2.0 via a Unified End-to-end Model</span></h2>
-<a href="https://huggingface.co/ucaslcl/GOT-OCR2_0">[😊 Hugging Face]</a>
-<a href="https://arxiv.org/abs/2409.01704">[📜 Paper]</a>
-<a href="https://github.com/Ucas-HaoranWei/GOT-OCR2.0/">[🌟 GitHub]</a>
-"""
-with gr.Blocks() as demo:
-    gr.HTML(title_html)
-    gr.Markdown("""\
-    "🔥🔥🔥This is the official online demo of GOT-OCR-2.0 model!!!"
-    ### Demo Guidelines
-    You need to upload your image below and choose one mode of GOT, then click "Submit" to run GOT model. More characters will result in longer wait times.
-    - **plain texts OCR & format texts OCR**: The two modes are for the image-level OCR.
-    - **plain multi-crop OCR & format multi-crop OCR**: For images with more complex content, you can achieve higher-quality results with these modes.
-    - **plain fine-grained OCR & format fine-grained OCR**: In these modes, you can specify fine-grained regions on the input image for more flexible OCR. Fine-grained regions can be coordinates of the box, red color, blue color, or green color.
-    """)
-    with gr.Row():
-        with gr.Column():
-            image_input = gr.Image(type="filepath", label="upload your image")
-            task_dropdown = gr.Dropdown(
-                choices=[
-                    "plain texts OCR",
-                    "format texts OCR",
-                    "plain multi-crop OCR",
-                    "format multi-crop OCR",
-                    "plain fine-grained OCR",
-                    "format fine-grained OCR",
-                ],
-                label="Choose one mode of GOT",
-                value="plain texts OCR"
-            )
-            fine_grained_dropdown = gr.Dropdown(
-                choices=["box", "color"],
-                label="fine-grained type",
-                visible=False
-            )
-            color_dropdown = gr.Dropdown(
-                choices=["red", "green", "blue"],
-                label="color list",
-                visible=False
-            )
-            box_input = gr.Textbox(
-                label="input box: [x1,y1,x2,y2]",
-                placeholder="e.g., [0,0,100,100]",
-                visible=False
-            )
-            submit_button = gr.Button("Submit")
-        with gr.Column():
-            ocr_result = gr.Textbox(label="GOT output")
-    with gr.Column():
-        gr.Markdown("**If you choose the mode with format, the mathpix result will be automatically rendered as follows:**")
-        html_result = gr.HTML(label="rendered html", show_label=True)
-    # Removed examples section
-    """
-    gr.Examples(
-        examples=[
-            ["assets/coco.jpg", "plain texts OCR", "", "", ""],
-            ["assets/en_30.png", "plain texts OCR", "", "", ""],
-            ["assets/table.jpg", "format texts OCR", "", "", ""],
-            ["assets/eq.jpg", "format texts OCR", "", "", ""],
-            ["assets/exam.jpg", "format texts OCR", "", "", ""],
-            ["assets/giga.jpg", "format multi-crop OCR", "", "", ""],
-            ["assets/aff2.png", "plain fine-grained OCR", "box", "", "[409,763,756,891]"],
-            ["assets/color.png", "plain fine-grained OCR", "color", "red", ""],
-        ],
-        inputs=[image_input, task_dropdown, fine_grained_dropdown, color_dropdown, box_input],
-        outputs=[ocr_result, html_result],
-        fn=run_GOT,
-        label="examples",
-    )
-    """
-    task_dropdown.change(
-        task_update,
-        inputs=[task_dropdown],
-        outputs=[fine_grained_dropdown, color_dropdown, box_input]
-    )
-    fine_grained_dropdown.change(
-        fine_grained_update,
-        inputs=[fine_grained_dropdown],
-        outputs=[color_dropdown, box_input]
-    )
-    submit_button.click(
-        run_GOT,
-        inputs=[image_input, task_dropdown, fine_grained_dropdown, color_dropdown, box_input],
-        outputs=[ocr_result, html_result]
-    )
-if __name__ == "__main__":
-    cleanup_old_files()
-    demo.launch()

 import os
+import copy
+import tempfile
+import requests
+import re
+from argparse import ArgumentParser
 from pathlib import Path
+from byaldi import RAGMultiModalModel
+API_KEY = os.environ['API_KEY']
+RAG = RAGMultiModalModel.from_pretrained("vidore/colpali-v1.2")
+def _get_args():
+    parser = ArgumentParser()
+    parser.add_argument("--share", action="store_true", default=False)
+    args = parser.parse_args()
+    return args
+def _parse_text(text):
+    lines = text.split("\n")
+    lines = [line for line in lines if line]
+    count = 0
+    for i, line in enumerate(lines):
+        if "```" in line:
+            count += 1
+            items = line.split("`")
+            lines[i] = f'<pre><code class="language-{items[-1]}">' if count % 2 == 1 else "<br></code></pre>"
+        elif count % 2 == 1:
+            lines[i] = "<br>" + re.sub(r'[<>\*_\-.\!\(\)\$]', lambda x: f'&{x.group(0)};', line.replace(" ", "&nbsp;"))
+    return "".join(lines)
+def _remove_image_special(text):
+    text = text.replace('<ref>', '').replace('</ref>', '')
+    return re.sub(r'<box>.*?(</box>|$)', '', text)
+def _launch_demo(args):
+    uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str(Path(tempfile.gettempdir()) / "gradio")
+    def predict(_chatbot, task_history):
+        chat_query = _chatbot[-1][0]
+        query = task_history[-1][0]
+        if not chat_query:
+            _chatbot.pop()
+            task_history.pop()
+            return _chatbot
+        history_cp = copy.deepcopy(task_history)
+        messages = []
+        content = []
+        for q, a in history_cp:
+            content.append({'image': f'file://{q[0]}'})
+            messages.append({'role': 'user', 'content': content})
+            messages.append({'role': 'assistant', 'content': [{'text': a}]})
+            content = []
+        messages.pop()
+        responses = RAG.call(model='qwen-vl-max-0809', messages=messages, stream=True)
+        for response in responses:
+            response_content = response['output']['choices'][0]['message']['content']
+            response_text = ''.join(ele.get('text', ele.get('box', '')) for ele in response_content)
+            _chatbot[-1] = (_parse_text(chat_query), _remove_image_special(response_text))
+            yield _chatbot
+        response_text = response_content[0]['text']
+        _chatbot[-1] = (_parse_text(chat_query), response_text)
+        task_history[-1] = (query, _parse_text(response_text))
+    def regenerate(_chatbot, task_history):
+        if not task_history:
+            return _chatbot
+        item = task_history[-1]
+        if item[1] is None:
+            return _chatbot
+        task_history[-1] = (item[0], None)
+        chatbot_item = _chatbot.pop(-1)
+        _chatbot.append((chatbot_item[0], None) if chatbot_item[0] is not None else (_chatbot[-1][0], None))
+        return predict(_chatbot, task_history)
+    def add_text(history, task_history, text):
+        task_text = text
+        history = history if history is not None else []
+        task_history = task_history if task_history is not None else []
+        history.append((_parse_text(text), None))
+        task_history.append((task_text, None))
+        return history, task_history, ""
+    def add_file(history, task_history, file):
+        history = history if history is not None else []
+        task_history = task_history if task_history is not None else []
+        history.append(((file.name,), None))
+        task_history.append(((file.name,), None))
+        return history, task_history
+    def reset_state(task_history):
+        task_history.clear()
+        return []
+    with gr.Blocks() as demo:
+        gr.Markdown("""<p align="center"><img src="https://modelscope.oss-cn-beijing.aliyuncs.com/resource/qwen.png" style="height: 80px"/><p>""")
+        gr.Markdown("<center><font size=8>Qwen2-VL-Max</center>")
+        chatbot = gr.Chatbot(label='Qwen2-VL-Max', height=500)
+        query = gr.Textbox(lines=2, label='Input')
+        task_history = gr.State([])
+        with gr.Row():
+            addfile_btn = gr.UploadButton("📁 Upload", file_types=["image"])
+            submit_btn = gr.Button("🚀 Submit")
+            regen_btn = gr.Button("🤔️ Regenerate")
+            empty_bin = gr.Button("🧹 Clear History")
+        submit_btn.click(add_text, [chatbot, task_history, query], [chatbot, task_history]).then(predict, [chatbot, task_history], [chatbot], show_progress=True)
+        submit_btn.click(lambda: gr.update(value=""), [], [query])
+        empty_bin.click(reset_state, [task_history], [chatbot], show_progress=True)
+        regen_btn.click(regenerate, [chatbot, task_history], [chatbot], show_progress=True)
+        addfile_btn.upload(add_file, [chatbot, task_history, addfile_btn], [chatbot, task_history], show_progress=True)
+    demo.queue().launch(share=args.share)
+def main():
+    args = _get_args()
+    _launch_demo(args)
+if __name__ == '__main__':
+    main()