Spaces:

DocTron
/

DocTron-Formula

Runtime error

App Files Files Community

zhongyufeng02 commited on Jul 31, 2025

Commit

cb03203

1 Parent(s): ed112a9

Add application file

Browse files

Files changed (5) hide show

app.py +221 -0
asset/test_jsons/line-level.json +20 -0
asset/test_jsons/page-level.json +20 -0
asset/test_jsons/paragraph-level.json +20 -0
requirements.txt +268 -0

app.py ADDED Viewed

	@@ -0,0 +1,221 @@

+import argparse
+import json
+import os
+import torch
+from PIL import Image
+from qwen_vl_utils import process_vision_info
+from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
+import gradio as gr
+user_prompt = "Analyze the image. Extract and output only the LaTeX formulas present in the image, in LaTeX code format. Ignore inline formulas, all other text, and do not include any explanations."
+def read_input_file(input_file):
+    with open(input_file, 'r') as file:
+        data = json.load(file)
+    image_path = data[0]['images'][0]
+    gt_latex_code = data[0]['messages'][1]['content']
+    return image_path, gt_latex_code
+class ImageProcessor:
+    def __init__(self, args):
+        self.args = args
+        self.model, self.vis_processor = self.load_model_and_processor()
+        self.generate_kwargs = dict(
+            max_new_tokens=2048,
+            top_p=0.001,
+            top_k=1,
+            temperature=0.01,
+            repetition_penalty=1.0,
+        )
+    def load_model_and_processor(self):
+        # Load model
+        checkpoint = self.args.ckpt
+        vis_processor = AutoProcessor.from_pretrained(checkpoint)
+        model = Qwen2_5_VLForConditionalGeneration.from_pretrained(checkpoint, torch_dtype="auto", device_map="auto")
+        model.eval()
+        return model, vis_processor
+    def process_single_image(self, image_path):
+        question = user_prompt
+        try:
+            image_local_path = "file://" + image_path
+            messages = []
+            messages.append(
+                {"role": "user", "content": [
+                        {"type": "image", "image": image_local_path, "min_pixels": 32 * 32, "max_pixels": 512 * 512},
+                        {"type": "text", "text": question},
+                    ]
+                }
+            )
+            text = self.vis_processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+            images, videos = process_vision_info([messages])
+            inputs = self.vis_processor(text=text, images=images, videos=videos, padding=True, return_tensors='pt')
+            inputs = inputs.to(self.model.device)
+            with torch.no_grad():
+                generated_ids = self.model.generate(
+                    **inputs,
+                    **self.generate_kwargs,
+                )
+            generated_ids = [
+                output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, generated_ids)
+            ]
+            out = self.vis_processor.tokenizer.batch_decode(
+                generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
+            )
+            model_answer = out[0]
+        except Exception as e:
+            print(e, flush=True)
+            model_answer = "None"
+        return model_answer
+def save_image_with_auto_naming(image, save_dir="./tmp"):
+    # 确保目录存在
+    os.makedirs(save_dir, exist_ok=True)
+    # 获取目录中现有的文件名
+    existing_files = [f for f in os.listdir(save_dir) if f.endswith('.png') and f.split('.')[0].isdigit()]
+    # 找到最大的数字
+    next_num = 0
+    if existing_files:
+        next_num = max([int(f.split('.')[0]) for f in existing_files]) + 1
+    # 生成新文件名
+    temp_path = os.path.join(save_dir, f"{next_num}.png")
+    # 保存图片
+    image.save(temp_path)
+    return temp_path
+# {{ edit_1 }}
+def process_image_for_gradio(image):
+    """处理上传的图片并返回LaTeX结果"""
+    if image is None:
+        return ""
+    # 保存上传的图片到指定目录，并自动命名
+    temp_path = save_image_with_auto_naming(image)
+    # 处理图片
+    pred_latex_code = processor.process_single_image(temp_path)
+    # 清理临时文件
+    if os.path.exists(temp_path):
+        os.remove(temp_path)
+    return pred_latex_code
+def load_example(example_name):
+    """加载示例图片"""
+    input_file = os.path.join('./asset/test_jsons', f"{example_name}.json")
+    image_path, gt_latex_code = read_input_file(input_file)
+    return Image.open(image_path), example_name
+# {{ edit_2 }}
+def create_gradio_interface(processor):
+    """创建Gradio界面"""
+    with gr.Blocks(title="DocTron-Formula") as demo:
+        gr.Markdown("# DocTron-Formula LaTeX公式识别")
+        gr.Markdown("上传图片或选择示例来识别LaTeX公式")
+        with gr.Row():
+            with gr.Column():
+                # 左侧列
+                image_input = gr.Image(type="pil", label="上传图片")
+                with gr.Row():
+                    clear_btn = gr.Button("Clear")
+                    submit_btn = gr.Button("Submit", variant="primary")
+                gr.Markdown("### 示例图片")
+                with gr.Row():
+                    line_btn = gr.Button("Line-level")
+                    paragraph_btn = gr.Button("Paragraph-level")
+                    page_btn = gr.Button("Page-level")
+                # 存储示例名称
+                example_name = gr.State()
+            with gr.Column():
+                # 右侧列 - 显示结果
+                latex_output = gr.Textbox(label="预测的LaTeX公式", lines=10, interactive=False)
+        # 按钮事件绑定
+        submit_btn.click(
+            fn=process_image_for_gradio,
+            inputs=[image_input],
+            outputs=[latex_output]
+        )
+        clear_btn.click(
+            fn=lambda: (None, ""),
+            inputs=[],
+            outputs=[image_input, latex_output]
+        )
+        # 示例按钮事件
+        line_btn.click(
+            fn=load_example,
+            inputs=gr.Textbox(value="line-level", visible=False),
+            outputs=[image_input, example_name]
+        ).then(
+            fn=lambda img: process_image_for_gradio(img),
+            inputs=[image_input],
+            outputs=[latex_output]
+        )
+        paragraph_btn.click(
+            fn=load_example,
+            inputs=gr.Textbox(value="paragraph-level", visible=False),
+            outputs=[image_input, example_name]
+        ).then(
+            fn=lambda img: process_image_for_gradio(img),
+            inputs=[image_input],
+            outputs=[latex_output]
+        )
+        page_btn.click(
+            fn=load_example,
+            inputs=gr.Textbox(value="page-level", visible=False),
+            outputs=[image_input, example_name]
+        ).then(
+            fn=lambda img: process_image_for_gradio(img),
+            inputs=[image_input],
+            outputs=[latex_output]
+        )
+    return demo
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--ckpt", type=str, default="DocTron/DocTron-Formula")
+    parser.add_argument("--input_file", type=str, default="line-level")
+    args = parser.parse_args()
+    # Init model
+    processor = ImageProcessor(args)
+    # {{ edit_3 }}
+    # 创建并启动Gradio界面
+    demo = create_gradio_interface(processor)
+    # demo.launch(
+    #     server_name="10.238.36.208",
+    #     server_port=8000,
+    #     share=False
+    # )
+    demo.launch()

asset/test_jsons/line-level.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+    {
+        "qid": "part-00014-8bdd594e-bf2f-4dee-8277-9c33e8468d28-c000_9402_single_15",
+        "messages": [
+            {
+                "content": "<image> Analyze the image. Extract and output only the LaTeX formulas present in the image, in LaTeX code format. Ignore inline formulas, all other text, and do not include any explanations.",
+                "role": "user"
+            },
+            {
+                "content": "\\begin{align}\n\\frac{d}{dt}X_1+\\frac{d}{dt}X_2 &=\\left(AX_1+B\\begin{bmatrix}c_1\\cos{\\omega_1t}\\\\ d_1\\cos{\\omega_1t}\\end{bmatrix}\\right)+ \\left(AX_2+B\\begin{bmatrix}c_2\\cos{\\omega_2t}\\\\ d_2\\cos{\\omega_2t}\\end{bmatrix}\\right) \\\\ &=AX_1+AX_2+B\\begin{bmatrix}c_1\\cos{\\omega_1t}\\\\ d_1\\cos{\\omega_1t}\\end{bmatrix}+B\\begin{bmatrix}c_2\\cos{\\omega_2t}\\\\ d_2\\cos{\\omega_2t}\\end{bmatrix} \\\\ &= A(X_1+X_2) + B\\left(\\begin{bmatrix}c_1\\cos{\\omega_1t}\\\\ d_1\\cos{\\omega_1t}\\end{bmatrix}+\\begin{bmatrix}c_2\\cos{\\omega_2t}\\\\ d_2\\cos{\\omega_2t}\\end{bmatrix}\\right) \\\\ &= AX+B\\begin{bmatrix}u_1(t) \\\\ u_2(t)\\end{bmatrix} \\\\ &=\\frac{d}{dt}X.\n\\end{align}",
+                "role": "assistant"
+            }
+        ],
+        "images": [
+            "/mnt/dolphinfs/ssd_pool/docker/user/hadoop-basecv-hl/hadoop-basecv/zhongyufeng02/data/OCR/math_stackexchange_single/se_20_host_prepared_20240926/part-00014-8bdd594e-bf2f-4dee-8277-9c33e8468d28-c000/images/question_part-00014-8bdd594e-bf2f-4dee-8277-9c33e8468d28-c000_9402_single_15.png"
+        ],
+        "model_answer": "\\begin{align}\n\\frac{d}{dt}X_1 + \\frac{d}{dt}X_2 &= \\left(AX_1 + B\\begin{bmatrix}c_1\\cos\\omega_1 t\\\\d_1\\cos\\omega_1 t\\end{bmatrix}\\right) + \\left(AX_2 + B\\begin{bmatrix}c_2\\cos\\omega_2 t\\\\d_2\\cos\\omega_2 t\\end{bmatrix}\\right)\\\\ &= AX_1 + AX_2 + B\\begin{bmatrix}c_1\\cos\\omega_1 t\\\\d_1\\cos\\omega_1 t\\end{bmatrix} + B\\begin{bmatrix}c_2\\cos\\omega_2 t\\\\d_2\\cos\\omega_2 t\\end{bmatrix}\\\\ &= A(X_1 + X_2) + B\\left(\\begin{bmatrix}c_1\\cos\\omega_1 t\\\\d_1\\cos\\omega_1 t\\end{bmatrix} + \\begin{bmatrix}c_2\\cos\\omega_2 t\\\\d_2\\cos\\omega_2 t\\end{bmatrix}\\right)\\\\ &= AX + B\\begin{bmatrix}u_1(t)\\\\u_2(t)\\end{bmatrix}\\\\ &= \\frac{d}{dt}X.\n\\end{align}",
+        "edit_acc": 0.1109350237717908
+    }
+]

asset/test_jsons/page-level.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+    {
+        "qid": "part-00014-8bdd594e-bf2f-4dee-8277-9c33e8468d28-c000_27959_all",
+        "messages": [
+            {
+                "content": "<image> Analyze the image. Extract and output only the LaTeX formulas present in the image, in LaTeX code format. Ignore inline formulas, all other text, and do not include any explanations.",
+                "role": "user"
+            },
+            {
+                "content": "$$\n\\sigma^2 = \\int_{-\\infty}^\\infty \\frac{N_0}{2}|H(f)|^2\\,\\mathrm df\n$$\n\\begin{align}\n\\sigma^2 &= \\int_{-\\infty}^\\infty \\frac{N_0}{2}|H(f)|^2\\,df \\\\ &= \\int_{-\\infty}^\\infty \\frac{N_0}{2}|H(\\omega/2\\pi)|^2\\,d(\\omega/2\\pi) \\\\ &= \\frac{1}{2\\pi}\\int_{-\\infty}^\\infty \\frac{N_0}{2}|H(\\omega)|^2\\,d\\omega \\\\ &= \\frac{1}{2\\pi}\\int_{-\\infty}^\\infty \\frac{N_0}{2}\\frac{1}{(\\omega\\tau)^2+1}\\,d\\omega \\\\ &= \\frac{1}{2\\pi\\tau}\\int_{-\\infty}^\\infty \\frac{N_0}{2}\\frac{1}{(\\omega\\tau)^2+1}\\,d(\\omega\\tau) \\\\ &= \\frac{1}{2\\pi\\tau}\\int_{-\\infty}^\\infty \\frac{N_0}{2}\\frac{1}{u^2+1}\\, du \\\\ &= \\frac{N_0}{4\\pi\\tau}\\left[\\tan^{-1} u\\right]_{-\\infty}^{\\infty} \\\\ &= \\frac{N_0}{4\\pi\\tau}\\left[\\pi/2 - (-\\pi/2)\\right] \\\\ &= \\frac{N_0}{4\\tau},\n\\end{align}\n$$\ny[k]=x(kT)\\;\\Longleftrightarrow \\;R_y[k]=R_x(kT)\\tag{1}\n$$\n$$\nS_y(e^{j\\omega T})=\\frac{1}{T}\\sum_{k=-\\infty}^{\\infty}S_x\\left(\\omega-\\frac{2\\pi k}{T}\\right)\\tag{2}\n$$",
+                "role": "assistant"
+            }
+        ],
+        "images": [
+            "/mnt/dolphinfs/ssd_pool/docker/user/hadoop-basecv-hl/hadoop-basecv/zhongyufeng02/data/OCR/math_stackexchange_new/se_20_host_prepared_20240926/part-00014-8bdd594e-bf2f-4dee-8277-9c33e8468d28-c000/images/question_part-00014-8bdd594e-bf2f-4dee-8277-9c33e8468d28-c000_27959_all.png"
+        ],
+        "model_answer": "$$\n\\sigma^2 = \\int_{-\\infty}^{\\infty} \\frac{N_0}{2} |H(f)|^2 \\, \\mathrm{d}f\n$$\n\\begin{align}\n\\sigma^2 &= \\int_{-\\infty}^{\\infty} \\frac{N_0}{2} |H(f)|^2 \\, \\mathrm{d}f \\\\ &= \\int_{-\\infty}^{\\infty} \\frac{N_0}{2} |H(\\omega/2\\pi)|^2 \\, \\mathrm{d}(\\omega/2\\pi) \\\\ &= \\frac{1}{2\\pi} \\int_{-\\infty}^{\\infty} \\frac{N_0}{2} |H(\\omega)|^2 \\, \\mathrm{d}\\omega \\\\ &= \\frac{1}{2\\pi} \\int_{-\\infty}^{\\infty} \\frac{N_0}{2} \\frac{1}{(\\omega\\tau)^2 + 1} \\, \\mathrm{d}\\omega \\\\ &= \\frac{2\\pi\\tau}{2\\pi} \\int_{-\\infty}^{\\infty} \\frac{N_0}{2} \\frac{1}{(\\omega\\tau)^2 + 1} \\, \\mathrm{d}(\\omega\\tau) \\\\ &= \\frac{1}{2\\pi\\tau} \\int_{-\\infty}^{\\infty} \\frac{N_0}{2} \\frac{1}{u^2 + 1} \\, \\mathrm{d}u \\\\ &= \\frac{N_0}{4\\pi} \\left[ \\tan^{-1} u \\right]_{-\\infty}^{\\infty} \\\\ &= \\frac{N_0}{4\\pi} \\left[ \\pi/2 - (-\\pi/2) \\right] \\\\ &= \\frac{N_0}{4\\pi}\n\\end{align}\n$$\ny[k]=x(kT)\\iff R_y[k]=R_x(kT)\\tag{1}\n$$\n$$\nS_y(e^{j\\omega T})=\\frac{1}{T}\\sum_{k=-\\infty}^{\\infty}S_x\\left(\\omega-\\frac{2\\pi k}{T}\\right)\\tag{2}\n$$",
+        "edit_acc": 0.15040650406504066
+    }
+]

asset/test_jsons/paragraph-level.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+    {
+        "qid": "part-00025-8bdd594e-bf2f-4dee-8277-9c33e8468d28-c000_4210_multi",
+        "messages": [
+            {
+                "content": "<image> Analyze the image. Extract and output only the LaTeX formulas present in the image, in LaTeX code format. Ignore inline formulas, all other text, and do not include any explanations.",
+                "role": "user"
+            },
+            {
+                "content": "\\begin{align}\n&{\\text{Start with defining $2N$-Point DFT of $x_{2N}[n]\\ldots$}}\\\\ X_{2N}[k] &= \\sum_{n=0}^{2N-1} {x_{2N}[n] e^{-j\\frac{2\\pi}{2N}nk}} \\\\ \\\\ &{\\text{first split the sum and then substitude $n=n-N$ in the $2^{nd}$}}\\\\ &= \\sum_{n=0}^{N-1} {x_{2N}[n] e^{-j\\frac{2\\pi}{N}n(k/2)}} + \\sum_{n=N}^{2N-1} {x_{2N}[n] e^{-j\\frac{2\\pi}{2N}nk}} \\\\ \\\\ &{\\text{recognise $x_{2N}[n]=x_N[n]$, $x_{2N}[n+N]=x_N[n]$ }}\\\\ &= \\sum_{n=0}^{N-1} {x_{2N}[n] e^{-j\\frac{2\\pi}{N}n(k/2)}} + \\sum_{n=0}^{N-1} {x_{2N}[n+N] e^{-j\\frac{2\\pi}{2N}(n+N)k}} \\\\ \\\\ &{\\text{Expand the 2nd sum's multiplier factor}}\\\\ &= \\sum_{n=0}^{N-1} {x_N[n] e^{-j\\frac{2\\pi}{N}n(k/2)}} + \\sum_{n=0}^{N-1} {x_N[n] e^{-j\\frac{2\\pi}{N}(n+N)(k/2)}} \\\\ \\\\ &{\\text{Replace $e^{-2j\\frac\\pi N}$ with $W_N$ for simplicity }} \\\\ &= \\sum_{n=0}^{N-1} {x_N[n] W_N^{nk/2}} + W_N^{Nk/2}\\sum_{n=0}^{N-1} {x_N[n] W_N^{nk/2}} \\\\ \\\\ &{\\text{Recognise the sums as $N$-Point DFT $X[k]$ of $x[n]$, at $\\frac k2$}}\\\\ &= X_N[k/2] + e^{-j\\pi k} X_N[k/2] \\\\ \\\\ &= X_N[k/2] \\cdot \\big( 1 + (-1)^k \\big) \\\\ \\\\ &= \\begin{cases} 2 X_N[k/2] , &\\scriptstyle{\\text{k=0,2,4,...,2N-2 }}\\\\ 0 , &\\scriptstyle{\\text{k=1,3,5,...,2N-1 }}\\\\ \\end{cases}\n\\end{align}\n$$\nX_{2N}[k] = 0 ~~,~~ \\text{for}~~ k=1,3,5...,2N-1\n$$",
+                "role": "assistant"
+            }
+        ],
+        "images": [
+            "/mnt/dolphinfs/ssd_pool/docker/user/hadoop-basecv-hl/hadoop-basecv/zhongyufeng02/data/OCR/math_stackexchange_multi/se_20_host_prepared_20240926/part-00025-8bdd594e-bf2f-4dee-8277-9c33e8468d28-c000/images/question_part-00025-8bdd594e-bf2f-4dee-8277-9c33e8468d28-c000_4210_multi.png"
+        ],
+        "model_answer": "\\begin{align}\n\\text{Start with defining 2N-Point DFT of } x_{2N}[n] \\ldots \\\\ X_{2N}[k] &= \\sum_{n=0}^{2N-1} x_{2N}[n] e^{-j \\frac{2\\pi}{2N} nk} \\\\ \\\\ &\\text{first split the sum and then substitute } n = n-N \\text{ in the } 2^{nd} \\\\ &= \\sum_{n=0}^{N-1} x_{2N}[n] e^{-j \\frac{2\\pi}{2N} n (k/2)} + \\sum_{n=N}^{2N-1} x_{2N}[n] e^{-j \\frac{2\\pi}{2N} nk} \\\\ \\\\ &\\text{recognise } x_{2N}[n] = x_N[n], \\ x_{2N}[n+N] = x_N[n] \\\\ &= \\sum_{n=0}^{N-1} x_{2N}[n] e^{-j \\frac{2\\pi}{2N} n (k/2)} + \\sum_{n=0}^{N-1} x_{2N}[n+N] e^{-j \\frac{2\\pi}{2N} (n+N) k} \\\\ \\\\ &\\text{Expand the 2nd sum's multiplier factor} \\\\ &= \\sum_{n=0}^{N-1} x_N[n] e^{-j \\frac{2\\pi}{N} n (k/2)} + \\sum_{n=0}^{N-1} x_N[n] e^{-j \\frac{2\\pi}{N} (n+N) (k/2)} \\\\ \\\\ &\\text{Replace } e^{-2j\\frac{\\pi}{N}} \\text{ with } W_N \\text{ for simplicity} \\\\ &= \\sum_{n=0}^{N-1} x_N[n] W_N^{nk/2} + W_N^{Nk/2} \\sum_{n=0}^{N-1} x_N[n] W_N^{nk/2} \\\\ \\\\ &\\text{Recognise the sums as N-Point DFT } X[k] \\text{ of } x[n] \\text{, at } \\frac{k}{2} \\\\ &= X_N[k/2] + e^{-j \\pi k} X_N[k/2] \\\\ \\\\ &= X_N[k/2] \\cdot \\big( 1 + (-1)^k \\big) \\\\ \\\\ &= \\begin{cases} 2 X_N[k/2], & \\text{k = 0,2,4,...2N2} \\\\ 0, & \\text{k = 1,3,5,...2N-1} \\end{cases} \\\\\n\\end{align}\n$$\nX_{2N}[k] = 0 ~~,~~ \\text{for}~~ k = 1,3,5...,2N-1\n$$",
+        "edit_acc": 0.14628297362110312
+    }
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,268 @@

+absl-py==2.2.2
+accelerate==1.0.1
+aiofiles==23.2.1
+aiohappyeyeballs==2.4.4
+aiohttp==3.11.11
+aiosignal==1.3.2
+annotated-types==0.7.0
+antlr4-python3-runtime==4.11.1
+anyio==4.7.0
+asttokens==3.0.0
+async-timeout==5.0.1
+attrs==24.3.0
+audioread==3.0.1
+av==14.0.1
+blis==1.3.0
+boto3==1.35.99
+botocore==1.35.99
+Brotli==1.1.0
+catalogue==2.0.10
+certifi==2024.12.14
+cffi==1.17.1
+charset-normalizer==3.4.1
+choreographer==1.0.9
+click==8.1.8
+cloudpathlib==0.21.1
+cloudpickle==3.1.0
+colorama
+colormath==3.0.0
+confection==0.1.5
+contourpy==1.3.1
+cycler==0.12.1
+cymem==2.0.11
+dataclasses
+datasets==3.1.0
+decorator==5.2.1
+decord==0.6.0
+deepspeed==0.16.7
+dill==0.3.8
+diskcache==5.6.3
+distro==1.9.0
+docopt==0.6.2
+docstring_parser==0.16
+editdistance==0.8.1
+einops==0.8.0
+en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl#sha256=1932429db727d4bff3deed6b34cfc05df17794f4a52eeb26cf8928f7c1a0fb85
+et_xmlfile==2.0.0
+evaluate==0.4.3
+exceptiongroup==1.2.2
+executing==2.2.0
+fastapi==0.115.6
+ffmpy==0.5.0
+filelock==3.16.1
+fire==0.7.0
+flash-attn==2.7.2.post1
+fonttools==4.55.3
+frozenlist==1.5.0
+fsspec==2024.9.0
+gguf==0.9.1
+gradio==5.38.2
+gradio_client==1.11.0
+groovy==0.1.2
+h11==0.14.0
+h5py==3.12.1
+hf-xet==1.1.5
+hjson==3.1.0
+httpcore==1.0.7
+httptools==0.6.4
+httpx==0.28.1
+huggingface-hub==0.33.2
+icecream==2.1.4
+idna==3.10
+imageio==2.37.0
+importlib_metadata==8.5.0
+importlib_resources==6.4.5
+interegular==0.3.3
+iopath
+ipdb==0.13.13
+ipython==8.37.0
+isort==6.0.1
+jedi==0.19.2
+jieba==0.42.1
+Jinja2==3.1.5
+jiter==0.8.2
+jmespath==1.0.1
+joblib==1.4.2
+jsonlines==4.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+kaleido==1.0.0
+kiwisolver==1.4.8
+langcodes==3.5.0
+language_data==1.3.0
+lark==1.2.2
+latex2sympy2_extended==1.10.2
+lazy_loader==0.4
+Levenshtein==0.26.1
+librosa==0.11.0
+-e git+https://github.com/hiyouga/LLaMA-Factory.git@2e4b94e8a287493dee0bf739794fa084810c23c1#egg=llamafactory
+llvmlite==0.43.0
+lm-format-enforcer==0.10.6
+logistro==1.1.0
+loguru==0.7.3
+marisa-trie==1.2.1
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+math-verify==0.8.0
+matplotlib==3.8.4
+matplotlib-inline==0.1.6
+matplotlib-venn==0.11.10
+mdurl==0.1.2
+mistral_common==1.5.1
+mpmath==1.3.0
+mpxpy==0.0.5
+msgpack==1.1.0
+msgspec==0.18.6
+mssapi==1.1
+mt-longcat==0.1.7
+multidict==6.1.0
+multiprocess==0.70.16
+murmurhash==1.0.13
+nest-asyncio==1.6.0
+networkx==3.4.2
+ninja==1.11.1.3
+nltk==3.9.1
+numba==0.60.0
+numpy==1.26.4
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==9.1.0.70
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-ml-py==12.560.30
+nvidia-nccl-cu12==2.20.5
+nvidia-nvjitlink-cu12==12.6.85
+nvidia-nvtx-cu12==12.1.105
+omegaconf==2.3.0
+openai==0.28.0
+opencv-python==4.11.0.86
+opencv-python-headless==4.11.0.86
+openpyxl==3.1.5
+orjson==3.10.18
+outlines==0.0.46
+packaging==24.2
+pandas==2.2.3
+parso==0.8.4
+partial-json-parser==0.2.1.1.post4
+pdf2image==1.17.0
+peft==0.15.1
+pexpect==4.9.0
+pillow==10.4.0
+platformdirs==4.3.8
+pooch==1.8.2
+portalocker
+preshed==3.0.9
+prometheus-fastapi-instrumentator==7.0.0
+prometheus_client==0.21.1
+prompt_toolkit==3.0.51
+propcache==0.2.1
+protobuf==5.29.2
+psutil==6.1.1
+ptyprocess==0.7.0
+pure_eval==0.2.3
+py-cpuinfo==9.0.0
+pyairports==2.1.1
+pyarrow==18.1.0
+pycocoevalcap==1.2
+pycocotools==2.0.8
+pycountry==24.6.1
+pycparser==2.22
+pydantic==2.11.7
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.18.0
+pyparsing==3.2.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-Levenshtein==0.26.1
+python-magic==0.4.27
+python-multipart==0.0.20
+pytz==2024.2
+PyYAML==6.0.2
+pyzmq==26.2.0
+qwen-vl-utils==0.0.8
+RapidFuzz==3.11.0
+ray==2.40.0
+referencing==0.35.1
+regex==2024.11.6
+requests==2.32.3
+requests-toolbelt==1.0.0
+retrying==1.3.4
+rich==13.9.4
+rouge-chinese==1.0.3
+rouge_score==0.1.2
+rpds-py==0.22.3
+ruff==0.11.9
+s3transfer==0.10.4
+safehttpx==0.1.6
+safetensors==0.4.5
+scikit-learn==1.6.1
+scipy==1.14.1
+seaborn==0.13.2
+semantic-version==2.10.0
+sentencepiece==0.2.0
+shellingham==1.5.4
+shtab==1.7.1
+simplejson==3.20.1
+six==1.17.0
+smart-open==7.1.0
+sniffio==1.3.1
+soundfile==0.13.1
+soxr==0.5.0.post1
+spacy==3.8.7
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+spacy_pkuseg==1.0.0
+squarify==0.4.4
+srsly==2.5.1
+sse-starlette==2.2.1
+stack-data==0.6.3
+starlette==0.41.3
+sty==1.0.6
+sympy==1.13.3
+tabulate==0.9.0
+termcolor==2.5.0
+textdistance==4.6.3
+thinc==8.3.6
+threadpoolctl==3.6.0
+tiktoken==0.7.0
+timeout-decorator==0.5.0
+timm==1.0.15
+tokenizers==0.21.1
+tomli==2.2.1
+tomlkit==0.12.0
+torch==2.4.0
+torchvision==0.19.0
+tqdm
+traitlets==5.14.3
+transformers==4.51.3
+triton==3.0.0
+trl==0.9.6
+typer==0.15.1
+typing-inspection==0.4.1
+typing_extensions==4.12.2
+tyro==0.8.14
+tzdata==2024.2
+urllib3==2.3.0
+uvicorn==0.34.0
+uvloop==0.21.0
+validators==0.34.0
+vllm==0.6.1
+vllm-flash-attn==2.6.1
+-e git+https://github.com/open-compass/VLMEvalKit.git@502578411f6cead71ef83930dcffea95e79d2356#egg=vlmeval
+wasabi==1.1.3
+watchfiles==1.0.3
+wcwidth==0.2.13
+weasel==0.4.1
+websockets==12.0
+wrapt==1.17.2
+xformers==0.0.27.post2
+XlsxWriter==3.2.2
+xxhash==3.5.0
+yarl==1.18.3
+zh_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.8.0/zh_core_web_sm-3.8.0-py3-none-any.whl#sha256=7de3bd267176b9b2a8defb6997c1cd296da16c57b5e712f72ea44a51755421c8
+zipp==3.21.0