zezeze97 commited on
Commit
b92e464
·
1 Parent(s): 86d1da2
Files changed (2) hide show
  1. README.md +21 -110
  2. sample/DFE-GPS.png +0 -0
README.md CHANGED
@@ -1,111 +1,25 @@
1
  ---
2
  license: apache-2.0
3
  ---
4
- # 基于FormalGeo7K的推理模型
 
5
 
6
- ## 快速开始
7
- 在运行脚本之前,首先安装如下必要的依赖。
8
 
9
- ```shell
10
- pip install torch transformers==4.40.0 accelerate pillow sentencepiece
11
- ```
12
-
13
- ```python
14
- import torch
15
- import transformers
16
- from transformers import AutoModelForCausalLM, AutoTokenizer
17
- from PIL import Image
18
- import warnings
19
- import numpy as np
20
-
21
- # set device
22
- device = 'cuda' # or cpu
23
- torch.set_default_device(device)
24
-
25
- # create model
26
- model = AutoModelForCausalLM.from_pretrained(
27
- 'NaughtyDog97/FormalEnhencedGPS-9B',
28
- torch_dtype=torch.float16, # float32 for cpu
29
- device_map='auto',
30
- trust_remote_code=True)
31
- tokenizer = AutoTokenizer.from_pretrained(
32
- 'NaughtyDog97/FormalEnhencedGPS-9B',
33
- use_fast=False,
34
- trust_remote_code=True)
35
-
36
- # text prompt
37
- img_path = 'sample/4927.png'
38
- qs = 'As shown in the diagram, AE/AB=1/4, M is the midpoint of segment AC, BE is parallel to CP, EA is parallel to CP. Find the ratio of the length of line BC to the length of line CD.'
39
- prompt = f'Using the provided geometric image and question, first predict the construction_cdl and image_cdl. Then, give a detailed step-by-step solution.\nThe question is:\n{qs}'
40
- text = f'<|im_start|>user\n<image>\n{prompt}<|im_end|>\n<|im_start|>assistant\n'
41
-
42
- def tokenizer_image_token(prompt, tokenizer, image_token_index, return_tensors=None):
43
- prompt_chunks = [tokenizer(chunk).input_ids for chunk in prompt.split('<image>')]
44
-
45
- def insert_separator(X, sep):
46
- return [ele for sublist in zip(X, [sep] * len(X)) for ele in sublist][:-1]
47
 
48
- input_ids = []
49
- offset = 0
50
- if len(prompt_chunks) > 0 and len(prompt_chunks[0]) > 0 and prompt_chunks[0][0] == tokenizer.bos_token_id:
51
- offset = 1
52
- input_ids.append(prompt_chunks[0][0])
53
 
54
- for x in insert_separator(prompt_chunks, [image_token_index] * (offset + 1)):
55
- input_ids.extend(x[offset:])
56
-
57
- if return_tensors is not None:
58
- if return_tensors == 'pt':
59
- return torch.tensor(input_ids, dtype=torch.long)
60
- raise ValueError(f'Unsupported tensor type: {return_tensors}')
61
- return input_ids
62
-
63
- input_ids = tokenizer_image_token(text, tokenizer, -200, return_tensors='pt').unsqueeze(0).cuda()
64
-
65
- # image, sample images can be found in images folder
66
- image = Image.open(img_path).convert('RGB')
67
-
68
- image_tensor = model.process_images([image], model.config).to(dtype=model.dtype, device=device)
69
-
70
- # generate
71
- with torch.inference_mode():
72
- output_ids = model.generate(
73
- input_ids,
74
- images=image_tensor,
75
- do_sample=False,
76
- temperature=None,
77
- top_p=None,
78
- top_k=None,
79
- num_beams=1,
80
- max_new_tokens=3500,
81
- eos_token_id=tokenizer.eos_token_id,
82
- repetition_penalty=None,
83
- use_cache=True
84
- )[0]
85
-
86
-
87
- respones = tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
88
- print(respones)
89
 
 
 
90
  ```
91
 
92
- 我们的模型支持的求解方式有如下三种:
93
- ```python
94
- # Q => Predicted CDL + CoT Answer
95
- prompt = f'Using the provided geometric image and question, first predict the construction_cdl and image_cdl. Then, give a detailed step-by-step solution.\nThe question is:\n{qs}'
96
-
97
-
98
- # Q + Predicted CDL => CoT Answer
99
- prompt = f'Using the provided geometric image, construction_cdl, image_cdl, and question, give a detailed step-by-step solution. Note that there may be minor errors in the construction_cdl and image_cdl.\nThe construction_cdl is:\n{predict_consCDL}\nThe image_cdl is:\n{predict_imgCDL}\nThe question is:\n{qs}'
100
-
101
 
102
- # Q + Predicted CDL => Calibrated CDL + CoT Answer
103
- prompt = f'Using the provided geometric image and the possibly erroneous construction_cdl and image_cdl, first calibrate the construction_cdl and image_cdl, then give a detailed step-by-step solution to the question.\nThe initial construction_cdl is:\n{predict_consCDL}\nThe initial image_cdl is:\n{predict_imgCDL}\nThe question is:\n{qs}'
104
-
105
-
106
- ```
107
-
108
- ## 结合Formalization模型的推理
109
  ```python
110
  import torch
111
  import transformers
@@ -137,7 +51,6 @@ def tokenizer_image_token(prompt, tokenizer, image_token_index, return_tensors=N
137
  return input_ids
138
 
139
  def parse_cdl(input_string):
140
- # 使用正则表达式查找各个部分
141
  patterns = {
142
  'construction_cdl': r'(?:The )?(?:calibrate )?construction_cdl(?: is)?:\n(.*?)(?=\n(?:The )?(?:calibrate )?\w+_cdl is:|\n(?:The )?(?:calibrate )?\w+_cdl:|\nSolution is:|\Z)',
143
  'image_cdl': r'(?:The )?(?:calibrate )?image_cdl(?: is)?:\n(.*?)(?=\n(?:The )?(?:calibrate )?\w+_cdl is:|\n(?:The )?(?:calibrate )?\w+_cdl:|\nSolution is:|\Z)',
@@ -146,8 +59,6 @@ def parse_cdl(input_string):
146
  }
147
 
148
  results = {}
149
-
150
- # 优先匹配包含"calibrate"的版本
151
  for key, pattern in patterns.items():
152
  pattern = pattern.replace("(?:calibrate )?", "(?:calibrate )")
153
  match = re.search(pattern, input_string, re.DOTALL)
@@ -169,25 +80,25 @@ torch.set_default_device(device)
169
 
170
  # create model
171
  formalization_model = AutoModelForCausalLM.from_pretrained(
172
- 'NaughtyDog97/GeoFormalizer',
173
  torch_dtype=torch.float16, # float32 for cpu
174
  device_map='auto',
175
  trust_remote_code=True)
176
 
177
  formalization_tokenizer = AutoTokenizer.from_pretrained(
178
- 'NaughtyDog97/GeoFormalizer',
179
  use_fast=True,
180
  padding_side="right",
181
  trust_remote_code=True)
182
 
183
 
184
  reason_model = AutoModelForCausalLM.from_pretrained(
185
- 'NaughtyDog97/FormalEnhencedGPS-9B',
186
  torch_dtype=torch.float16, # float32 for cpu
187
  device_map='auto',
188
  trust_remote_code=True)
189
  reason_tokenizer = AutoTokenizer.from_pretrained(
190
- 'NaughtyDog97/FormalEnhencedGPS-9B',
191
  use_fase=False
192
  trust_remote_code=True)
193
 
@@ -257,13 +168,13 @@ with torch.inference_mode():
257
  respones = reason_tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
258
  print(f'Reasoning steps is\n{respones}')
259
 
260
-
261
-
262
  ```
263
 
264
 
265
 
266
- ## Performance
267
- | | Q => Predicted CDL + CoT Answer | Q + Predicted CDL => CoT Answer | Q + Predicted CDL => Calibrated CDL + CoT Answer |
268
- |-----|-------------------------------------|--------------------------------------|------------------------------------------------------|
269
- | siglip-0.4B-yi1.5-9B | 63.92/73.30 | 63.59/74.27 | 65.05/75.24 |
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+ # Diagram Formalization Enhanced Multi-Modal Geometry Problem Solver
5
+ ## Model Structure
6
 
7
+ <img src="sample/DFE-GPS.png" alt="Alt text" width="30%" height="auto">
 
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ - **Diagram Encoder**: [siglip-so400m-patch14-384](https://huggingface.co/google/siglip-so400m-patch14-384)
11
+ - **Lightweight LLM**: [Qwen2-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct)
12
+ - **LLM**: [Yi-1.5-9B-Chat](https://huggingface.co/01-ai/Yi-1.5-9B-Chat)
 
 
13
 
14
+ ## Quick Start
15
+ Before running the script, install the following necessary dependencies.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ ```shell
18
+ pip install torch transformers==4.40.0 accelerate pillow sentencepiece
19
  ```
20
 
21
+ You can solve geometric problems using the following script. First, formalize the geometric images with the Diagram Formalizer, and then use the multi-modal reasing model for problem-solving:
 
 
 
 
 
 
 
 
22
 
 
 
 
 
 
 
 
23
  ```python
24
  import torch
25
  import transformers
 
51
  return input_ids
52
 
53
  def parse_cdl(input_string):
 
54
  patterns = {
55
  'construction_cdl': r'(?:The )?(?:calibrate )?construction_cdl(?: is)?:\n(.*?)(?=\n(?:The )?(?:calibrate )?\w+_cdl is:|\n(?:The )?(?:calibrate )?\w+_cdl:|\nSolution is:|\Z)',
56
  'image_cdl': r'(?:The )?(?:calibrate )?image_cdl(?: is)?:\n(.*?)(?=\n(?:The )?(?:calibrate )?\w+_cdl is:|\n(?:The )?(?:calibrate )?\w+_cdl:|\nSolution is:|\Z)',
 
59
  }
60
 
61
  results = {}
 
 
62
  for key, pattern in patterns.items():
63
  pattern = pattern.replace("(?:calibrate )?", "(?:calibrate )")
64
  match = re.search(pattern, input_string, re.DOTALL)
 
80
 
81
  # create model
82
  formalization_model = AutoModelForCausalLM.from_pretrained(
83
+ 'NaughtyDog97/DiagramFormalizer',
84
  torch_dtype=torch.float16, # float32 for cpu
85
  device_map='auto',
86
  trust_remote_code=True)
87
 
88
  formalization_tokenizer = AutoTokenizer.from_pretrained(
89
+ 'NaughtyDog97/DiagramFormalizer',
90
  use_fast=True,
91
  padding_side="right",
92
  trust_remote_code=True)
93
 
94
 
95
  reason_model = AutoModelForCausalLM.from_pretrained(
96
+ 'NaughtyDog97/DFE-GPS-9B',
97
  torch_dtype=torch.float16, # float32 for cpu
98
  device_map='auto',
99
  trust_remote_code=True)
100
  reason_tokenizer = AutoTokenizer.from_pretrained(
101
+ 'NaughtyDog97/DFE-GPS-9B',
102
  use_fase=False
103
  trust_remote_code=True)
104
 
 
168
  respones = reason_tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
169
  print(f'Reasoning steps is\n{respones}')
170
 
 
 
171
  ```
172
 
173
 
174
 
175
+ ## Performance of DFE-GPS on formalgeo7k test set
176
+
177
+ | Model | Choice Acc | OpenEnd ACC | Process Evaluation Score |
178
+ |-------|------------|-------------|--------------------------|
179
+ | DFE-GPS-9B | 77.05 | 68.67 | 76.00 |
180
+ | DFE-GPS-34B | **82.38** | **75.33** | **79.07** |
sample/DFE-GPS.png ADDED