MarvelCQ commited on
Commit
c48e6ea
·
verified ·
1 Parent(s): 836954a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +93 -3
README.md CHANGED
@@ -1,3 +1,93 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ ---
4
+
5
+ ## 简介
6
+
7
+ ## 推理
8
+ ```python
9
+ from transformers import AutoProcessor
10
+ from vllm import LLM, SamplingParams
11
+ from qwen_vl_utils import process_vision_info
12
+
13
+ # 模型路径
14
+ model_path = "path/DianJin-OCR-R1/seal_sft"
15
+ # 图片路径
16
+ image_path = "example.jpg"
17
+
18
+ instruction = "请识别图片中的印章抬头。"
19
+ tool1 = "IXMTD5JPXGG9FEG10N 发票专用章 湄潭县何彬私房菜店"
20
+ tool2 = "上海鸿路何彬私房菜连锁店 发票专用章"
21
+ tools = """<tool>
22
+ 以下是其它工具对该印章的识别内容:
23
+ {{
24
+ "ocr_tool_1": "{tool1}",
25
+ "ocr_tool_2": "{tool2}"
26
+ }}
27
+ </tool>
28
+ """
29
+
30
+ llm = LLM(
31
+ model=model_path,
32
+ limit_mm_per_prompt={"image": 10, "video": 10},
33
+ gpu_memory_utilization=0.4,
34
+ )
35
+ processor = AutoProcessor.from_pretrained(model_path)
36
+
37
+ messages = [
38
+ {
39
+ "role": "user",
40
+ "content": [
41
+ {"type": "image", "image": image_path},
42
+ {"type": "text", "text": instruction},
43
+ ],
44
+ },
45
+ ]
46
+
47
+ prompt = processor.apply_chat_template(
48
+ messages,
49
+ tokenize=False,
50
+ add_generation_prompt=True,
51
+ )
52
+ image_inputs, video_inputs, _ = process_vision_info(messages, return_video_kwargs=True)
53
+
54
+ mm_data = {}
55
+ if image_inputs is not None:
56
+ mm_data["image"] = image_inputs
57
+
58
+ sampling_params = SamplingParams(
59
+ temperature=0.0,
60
+ top_p=1.0,
61
+ repetition_penalty=1.05,
62
+ max_tokens=4096,
63
+ stop=["<tool>"],
64
+ )
65
+
66
+ llm_inputs = [
67
+ {
68
+ "prompt": prompt,
69
+ "multi_modal_data": mm_data
70
+ }
71
+ ]
72
+
73
+ outputs = llm.generate(llm_inputs, sampling_params=sampling_params)
74
+ think_content = outputs[0].outputs[0].text.strip()
75
+ print("#" * 20 + " think " + "#" * 20)
76
+ print(think_content)
77
+
78
+ llm_inputs[0]["prompt"] = (
79
+ llm_inputs[0]["prompt"].strip()
80
+ + "\n"
81
+ + think_content
82
+ + "\n"
83
+ + tools.format(tool1=tool1, tool2=tool2)
84
+ )
85
+ sampling_params = SamplingParams(
86
+ temperature=0.0, top_p=1.0, repetition_penalty=1.05, max_tokens=4096
87
+ )
88
+ outputs = llm.generate(llm_inputs, sampling_params=sampling_params)
89
+ rethink_content = outputs[0].outputs[0].text.strip()
90
+ print("#" * 20 + " rethink " + "#" * 20)
91
+ print(rethink_content)
92
+
93
+ ```