Spaces:

infly
/

Infinity-Parser2-Demo

Running

App Files Files Community

Update app.py

#11

by KexuanRen - opened Apr 28

base: refs/heads/main

←

from: refs/pr/11

Discussion Files changed

+19

-554

Files changed (1) hide show

app.py +19 -554

app.py CHANGED Viewed

@@ -1,565 +1,30 @@
 import os
-import re
 import sys
-import json
-import time
-import copy
-import base64
-import asyncio
-import tempfile
 import subprocess
-from pathlib import Path
-from datetime import datetime
-import zipfile
-import httpx, aiofiles, os, asyncio
-import numpy as np
-import gradio as gr
-from PIL import Image
-from pdf2image import convert_from_path
-from loguru import logger
-from openai import OpenAI, AsyncOpenAI
-from gradio_pdf import PDF
-import certifi
-import httpx
-import aiohttp
-import uuid
-import tqdm
-import base64, pathlib
-from io import BytesIO
-from pdf2image import convert_from_bytes, convert_from_path     # pip install pdf2image
-import requests
-def setup_poppler_linux():
-    poppler_dir = "/tmp/poppler"
-    if not os.path.exists(poppler_dir):
-        os.makedirs(poppler_dir, exist_ok=True)
-        subprocess.run(["bash", "-lc", "rm -f /etc/apt/sources.list.d/*nodesource*.list || true"], check=False)
-        subprocess.run([
-            "apt-get", "update"
-        ], check=True)
-        subprocess.run([
-            "apt-get", "install", "-y", "poppler-utils"
-        ], check=True)
-setup_poppler_linux()
-preset_prompts = [
-    "Please convert the document into Markdown format.",
-    "Generate a clean and structured Markdown version of the document.",
-    "Transform this content into Markdown with proper headings and bullet points.",
-    "Convert the text to Markdown, preserving structure and formatting.",
-    "Reformat this document as Markdown with clear sections and lists.",
-]
-openai_api_key = "EMPTY"
-openai_api_base = os.environ.get("infinity_parser1_api")
-Authorization =  os.environ.get("infinity_parser1_Authorization")
-AVAILABLE_MODELS = {
-    "Infinity-Parser-7B": {
-            "name": os.environ.get("infinity_parser1_name"),
-            "client": AsyncOpenAI(
-                api_key=openai_api_key,
-                base_url=os.environ.get("infinity_parser1_api") + "/v1",
-            ),
-            "Authorization": os.environ.get("infinity_parser1_Authorization")
-        },
-    "Infinity-Parser2-30B-A3B-Preview": {
-            "name": os.environ.get("infinity_parser2_name"),
-            "client": AsyncOpenAI(
-                api_key=openai_api_key,
-                base_url=os.environ.get("infinity_parser2_api") + "/v1",
-            ),
-            "Authorization": os.environ.get("infinity_parser2_Authorization")
-        }
-}
-def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None):
-    url = f"{openai_api_base}{route}"
-    headers = {}
-    if api_key:
-        headers["Authorization"] = f"Bearer {api_key}"
-    with open(file_path, "rb") as f:
-        files = {"file": (os.path.basename(file_path), f, "application/pdf")}
-        response = requests.post(url, files=files, headers=headers)
-    return response
-async def send_pdf_async_aiohttp(file_path, server_ip, route="/upload", Authorization=None):
-    """使用aiohttp异步发送PDF"""
-    url = f"{server_ip}{route}"
-    headers = {}
-    if Authorization:
-        headers["Authorization"] = f"Bearer {Authorization}"
-    try:
-        async with aiohttp.ClientSession() as session:
-            with open(file_path, "rb") as f:
-                data = aiohttp.FormData()
-                data.add_field('file', f, filename=os.path.basename(file_path), content_type='application/pdf')
-                async with session.post(url, data=data, headers=headers) as response:
-                    print(f"PDF发送成功: {file_path}, 状态码: {response.status}")
-                    return response
-    except Exception as e:
-        print(f"PDF发送失败: {file_path}, 错误: {e}")
-        return None
-def extract_makrdown(text):
-    m = re.search(r'```markdown\s*([\s\S]*?)```', text, re.MULTILINE)
-    if m:
-        return m.group(1).strip()
     else:
-        return text
-async def request(messages, model_name, client, Authorization):
-    chat_completion_from_base64 = await client.chat.completions.create(
-        messages=messages,
-        extra_headers={
-            "Authorization": f"Bearer {Authorization}"
-        },
-        model=model_name,
-        max_completion_tokens=4096,
-        stream=True,
-        temperature=0.0,
-        top_p=0.95
-    )
-    page = ""
-    async for chunk in chat_completion_from_base64:
-        if chunk.choices[0].delta.content:
-            content = chunk.choices[0].delta.content
-            choice = chunk.choices[0]
-            if choice.finish_reason is not None:
-                print(f"end reason = {choice.finish_reason}")
-                break
-            page += content
-            yield content
-def images_to_pdf(img_paths, pdf_path):
-    if isinstance(img_paths, (str, Path)):
-        img_paths = [img_paths]
-    if not img_paths:
-        raise ValueError("img_paths is empty")
-    images = []
-    for p in img_paths:
-        p = Path(p)
-        if not p.is_file():
-            raise FileNotFoundError(p)
-        img = Image.open(p)
-        if img.mode in ("RGBA", "P"):
-            img = img.convert("RGB")
-        images.append(img)
-    pdf_path = Path(pdf_path)
-    pdf_path.parent.mkdir(parents=True, exist_ok=True)
-    images[0].save(pdf_path,
-                   save_all=True,
-                   append_images=images[1:],
-                   resolution=300.0)
-    return pdf_path
-def encode_image(image_path):
-    with open(image_path, "rb") as image_file:
-        return base64.b64encode(image_file.read()).decode("utf-8")
-def build_message(image_path, prompt):
-    content = [
-        {
-            "type": "image_url",
-            "image_url": {
-                "url": f"data:image/jpeg;base64,{encode_image(image_path)}"
-            }
-        },
-        {"type": "text", 'text': prompt}
-    ]
-    messages = [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {'role': 'user', 'content': content}
-    ]
-    return messages
-def download_markdown_file(md_text):
-    filename = f"markdown_{uuid.uuid4().hex[:8]}.md"
-    filepath = Path("downloads") / filename
-    filepath.parent.mkdir(exist_ok=True)
-    with open(filepath, "w", encoding="utf-8") as f:
-        f.write(md_text)
-    return str(filepath)
-async def doc_parser(doc_path, prompt, model_id):
-    model_name = AVAILABLE_MODELS[model_id]["name"]
-    client = AVAILABLE_MODELS[model_id]["client"]
-    Authorization = AVAILABLE_MODELS[model_id]["Authorization"]
-    doc_path = Path(doc_path)
-    if not doc_path.is_file():
-        raise FileNotFoundError(doc_path)
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        queries = []
-        if doc_path.suffix.lower() == ".pdf":
-            pages: List[Image.Image] = convert_from_path(doc_path, dpi=300)
-            for idx, page in enumerate(pages, start=1):
-                img_path = tmpdir / f"page_{idx}.png"
-                page.save(img_path, "PNG")
-                messages = build_message(img_path, prompt)
-                queries.append(messages)
-        else:
-            messages = build_message(doc_path, prompt)
-            queries.append(messages)
-    all_pages = []
-    all_pages_raw = []
-    for query in queries:
-        pages = ""
-        async for chunk in request(query, model_name, client, Authorization):
-            pages += chunk
-            yield extract_makrdown(pages), pages
-        all_pages.append(extract_makrdown(pages))
-        all_pages_raw.append(pages)
-        print(all_pages)
-        yield "\n---\n".join(all_pages), "\n\n".join(all_pages_raw)
-def compress_directory_to_zip(directory_path, output_zip_path):
     try:
-        with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
-            for root, dirs, files in os.walk(directory_path):
-                for file in files:
-                    file_path = os.path.join(root, file)
-                    arcname = os.path.relpath(file_path, directory_path)
-                    zipf.write(file_path, arcname)
-        return 0
-    except Exception as e:
-        logger.exception(e)
-        return -1
-latex_delimiters = [
-    {'left': '$$', 'right': '$$', 'display': True},
-    {'left': '$', 'right': '$', 'display': False},
-    {'left': '\\(', 'right': '\\)', 'display': False},
-    {'left': '\\[', 'right': '\\]', 'display': True},
-]
-def check_prompt(prompt):
-    if not prompt or prompt.strip() == "":
-        raise gr.Error("Please select or enter a prompt before parsing.")
-    return prompt
-def to_file(image_path):
-    if image_path.endswith("Academic_Papers.png"):
-        image_path = image_path.replace("Academic_Papers.png", "Academic_Papers.pdf")
-    return image_path
-def render_img(b64_list, idx, scale):
-    """根据当前索引 idx 和缩放倍数 scale 渲染 HTML。"""
-    if not b64_list:
-        return "<p style='color:gray'>请先上传图片</p>"
-    idx %= len(b64_list)
-    src = b64_list[idx]
-    # return (
-    #     f'<div style="overflow:auto;border:1px solid #ccc;'
-    #     f'display:flex;justify-content:center;align-items:center;'   # ① 横纵向居中
-    #     f'width:100%;height:800px;">'                               # ② 容器尺寸
-    #     f'<img src="{src}" '
-    #     f'style="transform:scale({scale});transform-origin:center center;" />'  # ③ 以中心缩放
-    #     f'</div>'
-    # )
-    # 以百分比形式设置 width，height 自动等比
-    percent = scale * 100
-    if scale <= 1:
-        # ---------- 居中模式 ----------
-        return f"""
-            <div style="
-                width:100%;
-                height:800px;
-                overflow:auto;
-                border:1px solid #ccc;
-            ">
-              <div style="
-                  min-width:100%;           /* 保证外层 div 至少跟容器一样宽 */
-                  display:flex;
-                  justify-content:center;   /* 小图水平居中 */
-              ">
-                <img src="{src}" style="
-                    width:{percent}%;
-                    height:auto;
-                    display:block;
-                ">
-              </div>
-            </div>
-            """
-    else:
-        # ---------- 放大模式 ----------
-        return (
-            f'<div style="overflow:auto;border:1px solid #ccc;'
-            f'width:100%;height:800px;">'
-            f'  <img src="{src}" '
-            f'       style="width:{percent}%;max-width:none;'
-            f'              height:auto;display:block;" />'
-            f'</div>'
-        )
-def files_to_b64(file, pdf_dpi: int = 200):
-    out: list[str] = []
-    if hasattr(file, "data"):
-        raw_bytes = file.data
-        suffix    = pathlib.Path(file.name).suffix.lower()
-        # -- PDF --
-        if suffix == ".pdf":
-            pages = convert_from_bytes(raw_bytes, dpi=pdf_dpi)
-            for page in pages:
-                buf = BytesIO()
-                page.save(buf, format="PNG")
-                b64 = base64.b64encode(buf.getvalue()).decode()
-                out.append(f"data:image/png;base64,{b64}")
-        else:
-            b64 = base64.b64encode(raw_bytes).decode()
-            out.append(f"data:image/{suffix[1:]};base64,{b64}")
-    else:
-        path   = pathlib.Path(file)
-        suffix = path.suffix.lower()
-        if suffix == ".pdf":
-            pages = convert_from_path(str(path), dpi=pdf_dpi)
-            for page in pages:
-                buf = BytesIO()
-                page.save(buf, format="PNG")
-                b64 = base64.b64encode(buf.getvalue()).decode()
-                out.append(f"data:image/png;base64,{b64}")
-        else:
-            raw_bytes = path.read_bytes()
-            b64 = base64.b64encode(raw_bytes).decode()
-            out.append(f"data:image/{suffix[1:]};base64,{b64}")
-    return out
-async def process_file(file_path):
-    """使用asyncio的异步方案"""
-    if file_path is None:
-        return None
-    if not file_path.endswith(".pdf"):
-        tmp_file_path = Path(file_path)
-        tmp_file_path = tmp_file_path.with_suffix(".pdf")
-        images_to_pdf(file_path, tmp_file_path)
-    else:
-        tmp_file_path = file_path
-        asyncio.create_task(send_pdf_async_aiohttp(tmp_file_path, server_ip=openai_api_base, Authorization=Authorization))
-    return str(tmp_file_path)
-def check_file(f):
-    if f is None:
-        raise gr.Error("Please upload a PDF or image before parsing.")
-    return f
-if __name__ == '__main__':
-    with gr.Blocks() as demo:
-        with gr.Row():
-            with gr.Column(variant='panel', scale=5):
-                file = gr.File(label='Please upload a PDF or image', file_types=['.pdf', '.png', '.jpeg', '.jpg'], type="filepath")
-                prompts = gr.Dropdown(
-                    choices=preset_prompts,
-                    label="Prompt",
-                    info="Enter or select prompts...",
-                    value=preset_prompts[0],
-                    multiselect=False,
-                    interactive=True,
-                    allow_custom_value=True,
-                )
-                with gr.Row():
-                    change_bu = gr.Button('Parse')
-                    clear_bu = gr.ClearButton(value='Clear')
-                zoom = gr.Slider(0.5, 3, value=1, step=0.1, label="Image Scale")
-                with gr.Row():
-                    prev_btn = gr.Button("⬅️ Pre")
-                    next_btn = gr.Button("Next ➡️")
-                viewer = gr.HTML()
-                example_root = os.path.join(os.path.dirname(__file__), 'examples')
-                images = [
-                    os.path.join(example_root, f)
-                    for f in os.listdir(example_root)
-                    if f.lower().endswith(('png', 'jpg', 'jpeg'))
-                ]
-            with gr.Column(variant='panel', scale=5):
-                model_selector = gr.Dropdown(
-                    choices=[(k, k) for k, v in AVAILABLE_MODELS.items()],
-                    value=list(AVAILABLE_MODELS.keys())[0],  # 默认选择第一个模型
-                    label="Model Selection",
-                    info="Select the model to use for parsing",
-                    interactive=True,
-                )
-                with gr.Accordion("Examples", open=True):
-                    example_root = "examples"
-                    file_path = [
-                        os.path.join(example_root, f)
-                        for f in ["Financial_Reports.png", "Books.png", "Magazines.png", "Academic_Papers.png"]
-                    ]
-                    with gr.Row():
-                        for i, label in enumerate(["Financial Reports(IMG)", "Books(IMG)", "Magazines(IMG)", "Academic Papers(PDF)"]):
-                            with gr.Column(scale=1, min_width=120):
-                                gr.Image(
-                                    value=file_path[i],
-                                    width=120,
-                                    height=90,
-                                    show_label=False,
-                                    show_download_button=False
-                                )
-                                gr.Button(label).click(fn=to_file, inputs=gr.State(file_path[i]), outputs=file)
-                download_btn = gr.Button("⬇️ Generate download link", size="sm")
-                output_file = gr.File(label='Parse result', interactive=False, elem_id="down-file-box",visible=False)
-                gr.HTML("""
-                <style>
-                #down-file-box {
-                    max-height: 300px;
-                }
-                </style>
-                """)
-                with gr.Tabs():
-                    with gr.Tab('Markdown rendering'):
-                        md = gr.Markdown(label='Markdown rendering', height=1100, show_copy_button=True,
-                             latex_delimiters=latex_delimiters,
-                             line_breaks=True)
-                    with gr.Tab('Markdown text'):
-                        md_text = gr.TextArea(lines=45, show_copy_button=True)
-        img_list_state = gr.State([])
-        idx_state = gr.State(0)
-        async def upload_handler(files):
-            if files is None:
-                return [], 0, ""
-            if files.lower().endswith(".pdf"):
-                asyncio.create_task(send_pdf_async_aiohttp(files, server_ip=openai_api_base, Authorization=Authorization))
-            b64s = files_to_b64(files)
-            return b64s, 0, render_img(b64s, 0, 1)
-        file.change(
-            upload_handler,
-            inputs=file,
-            outputs=[img_list_state, idx_state, viewer],
-        ).then(
-            lambda: gr.update(value=1),   # 无输入，直接把 zoom 设为 1
-            None,                                # inputs=None
-            zoom                                 # outputs=[zoom]
-        )
-        def show_prev(b64s, idx, scale):
-            idx -= 1
-            return idx, render_img(b64s, idx, scale)
-        prev_btn.click(
-            show_prev,
-            inputs=[img_list_state, idx_state, zoom],
-            outputs=[idx_state, viewer],
-        )
-        def show_next(b64s, idx, scale):
-            idx += 1
-            return idx, render_img(b64s, idx, scale)
-        next_btn.click(
-            show_next,
-            inputs=[img_list_state, idx_state, zoom],
-            outputs=[idx_state, viewer],
-        )
-        zoom.change(
-            lambda b64s, idx, scale: render_img(b64s, idx, scale),
-            inputs=[img_list_state, idx_state, zoom],
-            outputs=viewer,
-        )
-        change_bu.click(
-            fn=check_prompt,
-            inputs=prompts,
-            outputs=prompts
-        ).then(
-            lambda f: gr.update(visible=False),
-            inputs=output_file,
-            outputs=output_file
-        ).then(
-            fn=check_file,
-            inputs=file,
-            outputs=file
-        ).then(
-            fn=doc_parser,
-            inputs=[file, prompts, model_selector],
-            outputs=[md, md_text]
-        )
-        clear_bu.add([file, md, md_text])
-        download_btn.click(
-            fn=download_markdown_file,
-            inputs=md_text,
-            outputs=output_file
-        ).then(
-            lambda f: gr.update(visible=True),
-            inputs=output_file,
-            outputs=output_file
-        )
-    demo.launch(server_name='0.0.0.0',share=True)

 import os
 import sys
 import subprocess
+from loguru import logger
+def setup_and_run():
+    repo_url = "https://github.com/infly-ai/INF-MLLM.git"
+    clone_dir = "INF-MLLM"
+    repo_dir = "INF-MLLM/Infinity-Parser2/infinity_parser2"
+    if not os.path.exists(clone_dir):
+        logger.info(f"clone: {repo_url}")
+        subprocess.run(["git", "clone", repo_url], check=True)
     else:
+        logger.info("pulling...")
+        subprocess.run(["git", "pull"], cwd=clone_dir, check=True)
+    os.environ['GRADIO_SSR_MODE'] = "false"
+    os.environ["GRADIO_SERVER_PORT"] = "7860"
+    os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
+    logger.info("run inf_gradio.py ...")
     try:
+        subprocess.run([sys.executable, "gradio_app.py"], cwd=repo_dir, check=True)
+    except subprocess.CalledProcessError as e:
+        logger.error(f"Gradio exit: {e}")
+if __name__ == "__main__":
+    setup_and_run()