zezeze97 commited on
Commit
0fa8057
·
1 Parent(s): 5f96533
Files changed (1) hide show
  1. README.md +44 -6
README.md CHANGED
@@ -24,6 +24,27 @@ from PIL import Image
24
  import warnings
25
  import numpy as np
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  # set device
28
  device = 'cuda' # or cpu
29
  torch.set_default_device(device)
@@ -45,8 +66,7 @@ img_path = 'sample/4927.png'
45
  qs = 'As shown in the diagram, AE/AB=1/4, M is the midpoint of segment AC, BE is parallel to CP, EA is parallel to CP. Find the ratio of the length of line BC to the length of line CD.'
46
  prompt = f'Using the provided geometric image and question, first predict the construction_cdl and image_cdl. Then, give a detailed step-by-step solution.\nThe question is:\n{qs}'
47
  text = f'<|im_start|>user\n<image>\n{prompt}<|im_end|>\n<|im_start|>assistant\n'
48
- text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
49
- input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1][1:], dtype=torch.long).unsqueeze(0).to(device)
50
 
51
  # image, sample images can be found in images folder
52
  image = Image.open(img_path).convert('RGB')
@@ -101,6 +121,26 @@ import warnings
101
  import numpy as np
102
  import re
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
  def parse_cdl(input_string):
106
  # 使用正则表达式查找各个部分
@@ -166,8 +206,7 @@ image = Image.open(img_path).convert('RGB')
166
  # formalization
167
  prompt = 'Based on the image, first describe what you see in the figure, then predict the construction_cdl and image_cdl and calibrate it.'
168
  text = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<image>\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
169
- text_chunks = [formalization_tokenizer(chunk).input_ids for chunk in text.split('<image>')]
170
- input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1][1:], dtype=torch.long).unsqueeze(0).to(device)
171
 
172
  # generate
173
  image_tensor = formalization_model.process_images([image], formalization_model.config).to(dtype=formalization_model.dtype, device=device)
@@ -200,8 +239,7 @@ predict_imgCDL = cdl_info['image_cdl']
200
  qs = 'As shown in the diagram, AE/AB=1/4, M is the midpoint of segment AC, BE is parallel to CP, EA is parallel to CP. Find the ratio of the length of line BC to the length of line CD.'
201
  prompt = f'Using the provided geometric image and the possibly erroneous construction_cdl and image_cdl, first calibrate the construction_cdl and image_cdl, then give a detailed step-by-step solution to the question.\nThe initial construction_cdl is:\n{predict_consCDL}\nThe initial image_cdl is:\n{predict_imgCDL}\nThe question is:\n{qs}'
202
  text = f'<|im_start|>user\n<image>\n{prompt}<|im_end|>\n<|im_start|>assistant\n'
203
- text_chunks = [reason_tokenizer(chunk).input_ids for chunk in text.split('<image>')]
204
- input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1][1:], dtype=torch.long).unsqueeze(0).to(device)
205
 
206
 
207
 
 
24
  import warnings
25
  import numpy as np
26
 
27
+ def tokenizer_image_token(prompt, tokenizer, image_token_index, return_tensors=None):
28
+ prompt_chunks = [tokenizer(chunk).input_ids for chunk in prompt.split('<image>')]
29
+
30
+ def insert_separator(X, sep):
31
+ return [ele for sublist in zip(X, [sep] * len(X)) for ele in sublist][:-1]
32
+
33
+ input_ids = []
34
+ offset = 0
35
+ if len(prompt_chunks) > 0 and len(prompt_chunks[0]) > 0 and prompt_chunks[0][0] == tokenizer.bos_token_id:
36
+ offset = 1
37
+ input_ids.append(prompt_chunks[0][0])
38
+
39
+ for x in insert_separator(prompt_chunks, [image_token_index] * (offset + 1)):
40
+ input_ids.extend(x[offset:])
41
+
42
+ if return_tensors is not None:
43
+ if return_tensors == 'pt':
44
+ return torch.tensor(input_ids, dtype=torch.long)
45
+ raise ValueError(f'Unsupported tensor type: {return_tensors}')
46
+ return input_ids
47
+
48
  # set device
49
  device = 'cuda' # or cpu
50
  torch.set_default_device(device)
 
66
  qs = 'As shown in the diagram, AE/AB=1/4, M is the midpoint of segment AC, BE is parallel to CP, EA is parallel to CP. Find the ratio of the length of line BC to the length of line CD.'
67
  prompt = f'Using the provided geometric image and question, first predict the construction_cdl and image_cdl. Then, give a detailed step-by-step solution.\nThe question is:\n{qs}'
68
  text = f'<|im_start|>user\n<image>\n{prompt}<|im_end|>\n<|im_start|>assistant\n'
69
+ input_ids = tokenizer_image_token(text, tokenizer, -200, return_tensors='pt').unsqueeze(0).cuda()
 
70
 
71
  # image, sample images can be found in images folder
72
  image = Image.open(img_path).convert('RGB')
 
121
  import numpy as np
122
  import re
123
 
124
+ def tokenizer_image_token(prompt, tokenizer, image_token_index, return_tensors=None):
125
+ prompt_chunks = [tokenizer(chunk).input_ids for chunk in prompt.split('<image>')]
126
+
127
+ def insert_separator(X, sep):
128
+ return [ele for sublist in zip(X, [sep] * len(X)) for ele in sublist][:-1]
129
+
130
+ input_ids = []
131
+ offset = 0
132
+ if len(prompt_chunks) > 0 and len(prompt_chunks[0]) > 0 and prompt_chunks[0][0] == tokenizer.bos_token_id:
133
+ offset = 1
134
+ input_ids.append(prompt_chunks[0][0])
135
+
136
+ for x in insert_separator(prompt_chunks, [image_token_index] * (offset + 1)):
137
+ input_ids.extend(x[offset:])
138
+
139
+ if return_tensors is not None:
140
+ if return_tensors == 'pt':
141
+ return torch.tensor(input_ids, dtype=torch.long)
142
+ raise ValueError(f'Unsupported tensor type: {return_tensors}')
143
+ return input_ids
144
 
145
  def parse_cdl(input_string):
146
  # 使用正则表达式查找各个部分
 
206
  # formalization
207
  prompt = 'Based on the image, first describe what you see in the figure, then predict the construction_cdl and image_cdl and calibrate it.'
208
  text = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<image>\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
209
+ input_ids = tokenizer_image_token(text, formalization_tokenizer, -200, return_tensors='pt').unsqueeze(0).cuda()
 
210
 
211
  # generate
212
  image_tensor = formalization_model.process_images([image], formalization_model.config).to(dtype=formalization_model.dtype, device=device)
 
239
  qs = 'As shown in the diagram, AE/AB=1/4, M is the midpoint of segment AC, BE is parallel to CP, EA is parallel to CP. Find the ratio of the length of line BC to the length of line CD.'
240
  prompt = f'Using the provided geometric image and the possibly erroneous construction_cdl and image_cdl, first calibrate the construction_cdl and image_cdl, then give a detailed step-by-step solution to the question.\nThe initial construction_cdl is:\n{predict_consCDL}\nThe initial image_cdl is:\n{predict_imgCDL}\nThe question is:\n{qs}'
241
  text = f'<|im_start|>user\n<image>\n{prompt}<|im_end|>\n<|im_start|>assistant\n'
242
+ input_ids = tokenizer_image_token(text, reason_tokenizer, -200, return_tensors='pt').unsqueeze(0).cuda()
 
243
 
244
 
245