zezeze97 commited on
Commit
82367b7
·
1 Parent(s): be6070c
Files changed (1) hide show
  1. README.md +45 -6
README.md CHANGED
@@ -24,6 +24,28 @@ from PIL import Image
24
  import warnings
25
  import numpy as np
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  # set device
28
  device = 'cuda' # or cpu
29
  torch.set_default_device(device)
@@ -44,8 +66,7 @@ img_path = 'sample/4927.png'
44
  qs = 'As shown in the diagram, AE/AB=1/4, M is the midpoint of segment AC, BE is parallel to CP, EA is parallel to CP. Find the ratio of the length of line BC to the length of line CD.'
45
  prompt = f'Using the provided geometric image and question, first predict the construction_cdl and image_cdl. Then, give a detailed step-by-step solution.\nThe question is:\n{qs}'
46
  text = f'<|im_start|>user\n<image>\n{prompt}<|im_end|>\n<|im_start|>assistant\n'
47
- text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
48
- input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1][1:], dtype=torch.long).unsqueeze(0).to(device)
49
 
50
  # image, sample images can be found in images folder
51
  image = Image.open(img_path).convert('RGB')
@@ -100,6 +121,26 @@ import warnings
100
  import numpy as np
101
  import re
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  def parse_cdl(input_string):
105
  # 使用正则表达式查找各个部分
@@ -165,8 +206,7 @@ image = Image.open(img_path).convert('RGB')
165
  # formalization
166
  prompt = 'Based on the image, first describe what you see in the figure, then predict the construction_cdl and image_cdl and calibrate it.'
167
  text = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<image>\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
168
- text_chunks = [formalization_tokenizer(chunk).input_ids for chunk in text.split('<image>')]
169
- input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1][1:], dtype=torch.long).unsqueeze(0).to(device)
170
 
171
  # generate
172
  image_tensor = formalization_model.process_images([image], formalization_model.config).to(dtype=formalization_model.dtype, device=device)
@@ -199,8 +239,7 @@ predict_imgCDL = cdl_info['image_cdl']
199
  qs = 'As shown in the diagram, AE/AB=1/4, M is the midpoint of segment AC, BE is parallel to CP, EA is parallel to CP. Find the ratio of the length of line BC to the length of line CD.'
200
  prompt = f'Using the provided geometric image and the possibly erroneous construction_cdl and image_cdl, first calibrate the construction_cdl and image_cdl, then give a detailed step-by-step solution to the question.\nThe initial construction_cdl is:\n{predict_consCDL}\nThe initial image_cdl is:\n{predict_imgCDL}\nThe question is:\n{qs}'
201
  text = f'<|im_start|>user\n<image>\n{prompt}<|im_end|>\n<|im_start|>assistant\n'
202
- text_chunks = [reason_tokenizer(chunk).input_ids for chunk in text.split('<image>')]
203
- input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1][1:], dtype=torch.long).unsqueeze(0).to(device)
204
 
205
 
206
 
 
24
  import warnings
25
  import numpy as np
26
 
27
+
28
+ def tokenizer_image_token(prompt, tokenizer, image_token_index, return_tensors=None):
29
+ prompt_chunks = [tokenizer(chunk).input_ids for chunk in prompt.split('<image>')]
30
+
31
+ def insert_separator(X, sep):
32
+ return [ele for sublist in zip(X, [sep] * len(X)) for ele in sublist][:-1]
33
+
34
+ input_ids = []
35
+ offset = 0
36
+ if len(prompt_chunks) > 0 and len(prompt_chunks[0]) > 0 and prompt_chunks[0][0] == tokenizer.bos_token_id:
37
+ offset = 1
38
+ input_ids.append(prompt_chunks[0][0])
39
+
40
+ for x in insert_separator(prompt_chunks, [image_token_index] * (offset + 1)):
41
+ input_ids.extend(x[offset:])
42
+
43
+ if return_tensors is not None:
44
+ if return_tensors == 'pt':
45
+ return torch.tensor(input_ids, dtype=torch.long)
46
+ raise ValueError(f'Unsupported tensor type: {return_tensors}')
47
+ return input_ids
48
+
49
  # set device
50
  device = 'cuda' # or cpu
51
  torch.set_default_device(device)
 
66
  qs = 'As shown in the diagram, AE/AB=1/4, M is the midpoint of segment AC, BE is parallel to CP, EA is parallel to CP. Find the ratio of the length of line BC to the length of line CD.'
67
  prompt = f'Using the provided geometric image and question, first predict the construction_cdl and image_cdl. Then, give a detailed step-by-step solution.\nThe question is:\n{qs}'
68
  text = f'<|im_start|>user\n<image>\n{prompt}<|im_end|>\n<|im_start|>assistant\n'
69
+ input_ids = tokenizer_image_token(text, tokenizer, -200, return_tensors='pt').unsqueeze(0).cuda()
 
70
 
71
  # image, sample images can be found in images folder
72
  image = Image.open(img_path).convert('RGB')
 
121
  import numpy as np
122
  import re
123
 
124
+ def tokenizer_image_token(prompt, tokenizer, image_token_index, return_tensors=None):
125
+ prompt_chunks = [tokenizer(chunk).input_ids for chunk in prompt.split('<image>')]
126
+
127
+ def insert_separator(X, sep):
128
+ return [ele for sublist in zip(X, [sep] * len(X)) for ele in sublist][:-1]
129
+
130
+ input_ids = []
131
+ offset = 0
132
+ if len(prompt_chunks) > 0 and len(prompt_chunks[0]) > 0 and prompt_chunks[0][0] == tokenizer.bos_token_id:
133
+ offset = 1
134
+ input_ids.append(prompt_chunks[0][0])
135
+
136
+ for x in insert_separator(prompt_chunks, [image_token_index] * (offset + 1)):
137
+ input_ids.extend(x[offset:])
138
+
139
+ if return_tensors is not None:
140
+ if return_tensors == 'pt':
141
+ return torch.tensor(input_ids, dtype=torch.long)
142
+ raise ValueError(f'Unsupported tensor type: {return_tensors}')
143
+ return input_ids
144
 
145
  def parse_cdl(input_string):
146
  # 使用正则表达式查找各个部分
 
206
  # formalization
207
  prompt = 'Based on the image, first describe what you see in the figure, then predict the construction_cdl and image_cdl and calibrate it.'
208
  text = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<image>\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
209
+ input_ids = tokenizer_image_token(text, formalization_tokenizer, -200, return_tensors='pt').unsqueeze(0).cuda()
 
210
 
211
  # generate
212
  image_tensor = formalization_model.process_images([image], formalization_model.config).to(dtype=formalization_model.dtype, device=device)
 
239
  qs = 'As shown in the diagram, AE/AB=1/4, M is the midpoint of segment AC, BE is parallel to CP, EA is parallel to CP. Find the ratio of the length of line BC to the length of line CD.'
240
  prompt = f'Using the provided geometric image and the possibly erroneous construction_cdl and image_cdl, first calibrate the construction_cdl and image_cdl, then give a detailed step-by-step solution to the question.\nThe initial construction_cdl is:\n{predict_consCDL}\nThe initial image_cdl is:\n{predict_imgCDL}\nThe question is:\n{qs}'
241
  text = f'<|im_start|>user\n<image>\n{prompt}<|im_end|>\n<|im_start|>assistant\n'
242
+ input_ids = tokenizer_image_token(text, reason_tokenizer, -200, return_tensors='pt').unsqueeze(0).cuda()
 
243
 
244
 
245