GSheep commited on
Commit
2d3c5df
·
1 Parent(s): cb9f2b6

add score:0.224 and score:0.26

Browse files
LLaVA-MOSS2/add_RUC.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import jsonlines
3
+ import os
4
+
5
+ dir_path = '/root/.cache/huggingface/hub/datasets--RUCAIBox--gaokao-bench/snapshots/49877cf53b6db9c24d7d285161fc12bba2f85d29/test'
6
+ files = os.listdir(dir_path)
7
+
8
+ subject = [ 'Chemistry', 'Geography', 'Math', 'History', 'Biology', 'Political', 'Chinese', 'Physics' ]
9
+
10
+ len = 0
11
+ data = []
12
+ for file in files:
13
+ if 'English' in file:
14
+ continue
15
+ data_item = {}
16
+ for sub in subject:
17
+ if sub in file:
18
+ data_item['keyword'] = sub
19
+ data_item_list = []
20
+ path = os.path.join(dir_path, file)
21
+ with open(path, 'r+', encoding='utf-8') as file:
22
+ for line in jsonlines.Reader(file):
23
+ dict_item = {}
24
+ dict_item['question'] = line['question']
25
+ dict_item['answer'] = line['answer']
26
+ dict_item['analysis'] = line['analysis']
27
+ # len += 1
28
+
29
+ # dict_item['image'] = ""
30
+
31
+ # conversion = []
32
+ # human = {}
33
+ # human['from'] = 'human'
34
+ # human['value'] = line['question']
35
+ # gpt = {}
36
+ # gpt['from'] = 'gpt'
37
+ # result = line['analysis']
38
+ # result += "答案是:" + ''.join(line['answer'])
39
+ # gpt['value'] = result
40
+ # conversion.append(human)
41
+ # conversion.append(gpt)
42
+ # dict_item['conversations'] = conversion
43
+
44
+ print(dict_item)
45
+
46
+ data_item_list.append(dict_item)
47
+ data_item['question'] = data_item_list
48
+ data.append(data_item)
49
+
50
+ with open('RUC_RAG.json', 'w', encoding='utf-8') as file:
51
+ # 使用json.dump()函数将字典写入文件
52
+ json.dump(data, file, ensure_ascii=False, indent=4)
LLaVA-MOSS2/add_cmmlu.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import os
3
+ import json
4
+
5
+ with open('./political_data_with_extra_data.json', 'r', encoding='utf-8') as file:
6
+ data = json.load(file)
7
+ len = len(data)
8
+
9
+ final_folder = 'playground/data/cmmlu'
10
+
11
+ files = os.listdir(final_folder)
12
+
13
+ selected_files = ['combined_anatomy.csv','combined_ancient_chinese.csv','combined_arts.csv','combined_chinese_civil_service_exam.csv','combined_chinese_foreign_policy.csv',
14
+ 'combined_chinese_history.csv','combined_college_education.csv', 'combined_college_engineering_hydrology.csv', 'combined_college_mathematics.csv', 'combined_college_medicine.csv',
15
+ 'combined_conceptual_physics.csv','combined_electrical_engineering.csv','combined_elementary_mathematics.csv','combined_food_science.csv',
16
+ 'combined_genetics.csv', 'combined_high_school_biology.csv', 'combined_high_school_chemistry.csv','combined_high_school_geography.csv','combined_high_school_mathematics.csv',
17
+ 'combined_high_school_physics.csv','combined_high_school_politics.csv','combined_legal_and_moral_basis.csv','combined_management.csv','combined_marxist_theory.csv',
18
+ 'combined_modern_chinese.csv','combined_philosophy.csv','combined_virology.csv','combined_world_history.csv']
19
+ cmmlu_list = []
20
+ for file_name in selected_files:
21
+ path = os.path.join(final_folder, file_name)
22
+ df = pd.read_csv(path)
23
+
24
+ for index, row in df.iterrows():
25
+ dict_item = {}
26
+ dict_item['id'] = str(len)
27
+ len+=1
28
+
29
+ dict_item['image'] = ""
30
+
31
+ conversion = []
32
+ human = {}
33
+ human['from'] = 'human'
34
+ question = row['Question'] + '\nA.' + row['A'] + '\nB.' + row['B'] + '\nC.' + row['C'] + '\nD' + row['D'] + '\n'
35
+ human['value'] = question
36
+ gpt = {}
37
+ gpt['from'] = 'gpt'
38
+ result = "答案是:" + row['Answer']
39
+ gpt['value'] = result
40
+ conversion.append(human)
41
+ conversion.append(gpt)
42
+ dict_item['conversations'] = conversion
43
+
44
+ print(dict_item)
45
+
46
+ cmmlu_list.append(dict_item)
47
+
48
+ data = cmmlu_list + data
49
+
50
+ with open('cmmlu_political_data_gaokao.json', 'w', encoding='utf-8') as file:
51
+ # 使用json.dump()函数将字典写入文件
52
+ json.dump(data, file, ensure_ascii=False, indent=4)
LLaVA-MOSS2/llava/serve/submit.py CHANGED
@@ -57,29 +57,14 @@ def get_prompt(key, question, len_of_pictures, image_token):
57
  for _ in range(len_of_pictures):
58
  question = image_token + question
59
 
60
- prompt = f"""你是一个{key}专家,擅长解决{key}问题。以下是一个{key}的题目,形式为单项选择题。所有的问题都是(close-world assumption)闭世界假设,即未观测事实都为假。请逐步分析问题并在最后一行输出答案,最后一行的格式为"答案是:A"。
61
-
62
- ## 示例
63
- ### 题目:
64
- 根据欧几里得算法,计算6和7的最大公约数
65
-
66
- ### 选项:
67
- A.1
68
- B.2
69
- C.3
70
- D.4
71
-
72
- ### 回答:
73
- 答案是:A.
74
-
75
- 题目如下:
76
- ### 问题:
77
  {question}
78
 
79
- ### 选项:
80
  {options}
81
 
82
- ### 回答:
83
  """
84
  return prompt
85
 
@@ -139,7 +124,7 @@ def main(args):
139
 
140
  answers = []
141
 
142
- for i in tqdm.tqdm(range(0, 5), desc="Voting Processing"):
143
  questions = copy.deepcopy(questions_origin)
144
  for subject in questions:
145
  example = subject['example']
@@ -212,9 +197,9 @@ def main(args):
212
  outputs = tokenizer.decode(output_ids[0]).strip()
213
  outputs = re.sub(r'\([^()]*\)', '', outputs)
214
  outputs = re.sub(r'<s>|</s>', '', outputs)
215
- outputs = extract(outputs, answer_dic)
216
  conv.messages[-1][-1] = outputs
217
- question_itme['model_answer'] = [outputs]
218
  question_itme.pop('picture')
219
  question_itme.pop('question')
220
 
@@ -223,11 +208,14 @@ def main(args):
223
  answers.append(questions)
224
 
225
  final_ans = answers[0]
226
- for ans in answers:
 
 
 
227
  for i, sub in enumerate(ans):
228
  example = sub['example']
229
  for j, item in enumerate(example):
230
- item_ans = item['model_answer']
231
  index = ord(item_ans[0]) - 65
232
  if 'count' not in final_ans:
233
  final_ans[i]['example'][j]['count'] = [0] * 4
@@ -297,7 +285,7 @@ def main(args):
297
 
298
  if __name__ == "__main__":
299
  parser = argparse.ArgumentParser()
300
- parser.add_argument("--model-path", type=str, default="checkpoints/llava-moss2-2_5b-chat-finetune")
301
  parser.add_argument("--model-base", type=str, default=None)
302
  # parser.add_argument("--image-file", type=str, required=True)
303
  parser.add_argument("--device", type=str, default="cuda")
 
57
  for _ in range(len_of_pictures):
58
  question = image_token + question
59
 
60
+ prompt = f"""你是一个{key}专家,擅长解决{key}问题。以下是一个{key}的题目,形式为单项选择题。请逐步分析问题并在最后一行输出答案,最后一行的格式为"答案是:A"。
61
+ 问题:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  {question}
63
 
64
+ 选项:
65
  {options}
66
 
67
+ 回答:
68
  """
69
  return prompt
70
 
 
124
 
125
  answers = []
126
 
127
+ for i in tqdm.tqdm(range(0, 1), desc="Voting Processing"):
128
  questions = copy.deepcopy(questions_origin)
129
  for subject in questions:
130
  example = subject['example']
 
197
  outputs = tokenizer.decode(output_ids[0]).strip()
198
  outputs = re.sub(r'\([^()]*\)', '', outputs)
199
  outputs = re.sub(r'<s>|</s>', '', outputs)
200
+ # outputs = extract(outputs, answer_dic)
201
  conv.messages[-1][-1] = outputs
202
+ question_itme['model_answer'] = outputs
203
  question_itme.pop('picture')
204
  question_itme.pop('question')
205
 
 
208
  answers.append(questions)
209
 
210
  final_ans = answers[0]
211
+ for i, ans in enumerate(answers):
212
+ file_name = f'output_{i}.json'
213
+ with open(file_name, 'w', encoding='utf-8') as file:
214
+ json.dump(ans, file, ensure_ascii=False, indent=4)
215
  for i, sub in enumerate(ans):
216
  example = sub['example']
217
  for j, item in enumerate(example):
218
+ item_ans = extract(item['model_answer'], answer_dic)
219
  index = ord(item_ans[0]) - 65
220
  if 'count' not in final_ans:
221
  final_ans[i]['example'][j]['count'] = [0] * 4
 
285
 
286
  if __name__ == "__main__":
287
  parser = argparse.ArgumentParser()
288
+ parser.add_argument("--model-path", type=str, default="checkpoints/llava-moss2-2_5b-chat-finetune-224")
289
  parser.add_argument("--model-base", type=str, default=None)
290
  # parser.add_argument("--image-file", type=str, required=True)
291
  parser.add_argument("--device", type=str, default="cuda")
LLaVA-MOSS2/scripts/finetune.sh CHANGED
@@ -15,7 +15,7 @@ deepspeed llava/train/train_mem.py \
15
  --deepspeed ./scripts/zero2.json \
16
  --model_name_or_path /root/.cache/huggingface/hub/models--fnlp--moss2-2_5b-chat/snapshots/3eda5a066c519990bf5f9ba056f5f8ef81531c83 \
17
  --version $PROMPT_VERSION \
18
- --data_path ./data_with_extra_data_half.json\
19
  --image_folder ./playground/data \
20
  --vision_tower openai/clip-vit-large-patch14 \
21
  --pretrain_mm_mlp_adapter ./checkpoints/llava-moss2-2_5b-chat-pretrain/mm_projector.bin \
@@ -23,13 +23,13 @@ deepspeed llava/train/train_mem.py \
23
  --mm_use_im_start_end False \
24
  --mm_use_im_patch_token False \
25
  --bf16 True \
26
- --max_steps 40000 \
27
- --per_device_train_batch_size 2 \
28
- --per_device_eval_batch_size 2 \
29
  --gradient_accumulation_steps 2 \
30
  --evaluation_strategy "no" \
31
  --save_strategy "steps" \
32
- --save_steps 2000 \
33
  --save_total_limit 5 \
34
  --learning_rate 2e-5 \
35
  --weight_decay 0. \
@@ -43,5 +43,4 @@ deepspeed llava/train/train_mem.py \
43
  --lazy_preprocess True \
44
  --report_to wandb \
45
  --run_name llava-moss2-finetune\
46
- --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune \
47
-
 
15
  --deepspeed ./scripts/zero2.json \
16
  --model_name_or_path /root/.cache/huggingface/hub/models--fnlp--moss2-2_5b-chat/snapshots/3eda5a066c519990bf5f9ba056f5f8ef81531c83 \
17
  --version $PROMPT_VERSION \
18
+ --data_path ./playground/data/llava_v1_5_mix665k.json\
19
  --image_folder ./playground/data \
20
  --vision_tower openai/clip-vit-large-patch14 \
21
  --pretrain_mm_mlp_adapter ./checkpoints/llava-moss2-2_5b-chat-pretrain/mm_projector.bin \
 
23
  --mm_use_im_start_end False \
24
  --mm_use_im_patch_token False \
25
  --bf16 True \
26
+ --max_steps 20000 \
27
+ --per_device_train_batch_size 4 \
28
+ --per_device_eval_batch_size 4 \
29
  --gradient_accumulation_steps 2 \
30
  --evaluation_strategy "no" \
31
  --save_strategy "steps" \
32
+ --save_steps 5000 \
33
  --save_total_limit 5 \
34
  --learning_rate 2e-5 \
35
  --weight_decay 0. \
 
43
  --lazy_preprocess True \
44
  --report_to wandb \
45
  --run_name llava-moss2-finetune\
46
+ --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune \
 
LLaVA-MOSS2/scripts/pretrain.sh CHANGED
@@ -25,13 +25,13 @@ deepspeed llava/train/train_mem.py \
25
  --mm_use_im_patch_token False \
26
  --bf16 True \
27
  --output_dir ./checkpoints/llava-$MODEL_VERSION-pretrain \
28
- --max_steps 5000 \
29
  --per_device_train_batch_size 16 \
30
  --per_device_eval_batch_size 4 \
31
  --gradient_accumulation_steps 2 \
32
  --evaluation_strategy "no" \
33
  --save_strategy "steps" \
34
- --save_steps 1000 \
35
  --save_total_limit 5 \
36
  --learning_rate 2e-3 \
37
  --weight_decay 0. \
 
25
  --mm_use_im_patch_token False \
26
  --bf16 True \
27
  --output_dir ./checkpoints/llava-$MODEL_VERSION-pretrain \
28
+ --max_steps 9000 \
29
  --per_device_train_batch_size 16 \
30
  --per_device_eval_batch_size 4 \
31
  --gradient_accumulation_steps 2 \
32
  --evaluation_strategy "no" \
33
  --save_strategy "steps" \
34
+ --save_steps 3000 \
35
  --save_total_limit 5 \
36
  --learning_rate 2e-3 \
37
  --weight_decay 0. \
LLaVA-MOSS2/test.py CHANGED
@@ -1,52 +1,94 @@
1
- import pandas as pd
2
- import os
 
3
  import json
 
 
4
 
5
- with open('./political_data_with_extra_data.json', 'r', encoding='utf-8') as file:
6
- data = json.load(file)
7
- len = len(data)
8
-
9
- final_folder = 'playground/data/cmmlu'
10
-
11
- files = os.listdir(final_folder)
12
-
13
- selected_files = ['combined_anatomy.csv','combined_ancient_chinese.csv','combined_arts.csv','combined_chinese_civil_service_exam.csv','combined_chinese_foreign_policy.csv',
14
- 'combined_chinese_history.csv','combined_college_education.csv', 'combined_college_engineering_hydrology.csv', 'combined_college_mathematics.csv', 'combined_college_medicine.csv',
15
- 'combined_conceptual_physics.csv','combined_electrical_engineering.csv','combined_elementary_mathematics.csv','combined_food_science.csv',
16
- 'combined_genetics.csv', 'combined_high_school_biology.csv', 'combined_high_school_chemistry.csv','combined_high_school_geography.csv','combined_high_school_mathematics.csv',
17
- 'combined_high_school_physics.csv','combined_high_school_politics.csv','combined_legal_and_moral_basis.csv','combined_management.csv','combined_marxist_theory.csv',
18
- 'combined_modern_chinese.csv','combined_philosophy.csv','combined_virology.csv','combined_world_history.csv']
19
- cmmlu_list = []
20
- for file_name in selected_files:
21
- path = os.path.join(final_folder, file_name)
22
- df = pd.read_csv(path)
23
-
24
- for index, row in df.iterrows():
25
- dict_item = {}
26
- dict_item['id'] = str(len)
27
- len+=1
28
-
29
- dict_item['image'] = ""
30
-
31
- conversion = []
32
- human = {}
33
- human['from'] = 'human'
34
- question = row['Question'] + '\nA.' + row['A'] + '\nB.' + row['B'] + '\nC.' + row['C'] + '\nD' + row['D'] + '\n'
35
- human['value'] = question
36
- gpt = {}
37
- gpt['from'] = 'gpt'
38
- result = "答案是:" + row['Answer']
39
- gpt['value'] = result
40
- conversion.append(human)
41
- conversion.append(gpt)
42
- dict_item['conversations'] = conversion
43
-
44
- print(dict_item)
45
-
46
- cmmlu_list.append(dict_item)
47
-
48
- data = cmmlu_list + data
49
-
50
- with open('cmmlu_political_data_gaokao.json', 'w', encoding='utf-8') as file:
51
- # 使用json.dump()函数将字典写入文件
52
- json.dump(data, file, ensure_ascii=False, indent=4)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Load model directly
2
+ from transformers import AutoTokenizer, AutoModel
3
+ import numpy as np
4
  import json
5
+ import heapq
6
+ import re
7
 
8
+ from FlagEmbedding import FlagModel
9
+
10
+
11
+ # Function to retrieve similar questions
12
+ def retrieve_similar_questions(bge_model, subject, input_question, question_pool, top_k=5):
13
+ # Encode the input question and question pool
14
+ min_heap = []
15
+ input_embedding = bge_model.encode(input_question)
16
+ for i, pool in enumerate(question_pool):
17
+ if pool['keyword'] == subject:
18
+ for j, question in enumerate(pool['question']):
19
+ question_embedding = bge_model.encode(question['question'])
20
+ similarity = input_embedding @ question_embedding
21
+ heapq.heappush(min_heap, (similarity, i, j))
22
+
23
+ if len(min_heap) > top_k:
24
+ heapq.heappop(min_heap)
25
+
26
+ result = []
27
+ while len(min_heap) != 0:
28
+ top = heapq.heappop(min_heap)
29
+ i = top[1]
30
+ j = top[2]
31
+ result.append(question_pool[i]['question'][j])
32
+ return result
33
+
34
+
35
+ def generate_prompt(subject, input_question, len_of_pictures, image_token):
36
+ bge_model = FlagModel('BAAI/bge-large-zh-v1.5',
37
+ query_instruction_for_retrieval="Represent this sentence for searching relevant passages:",
38
+ use_fp16=True)
39
+
40
+ # Sample question pool
41
+ with open('./RUC_RAG.json', 'r', encoding='utf-8') as file:
42
+ question_pool = json.load(file)
43
+
44
+ # Example usage
45
+ similar_questions = retrieve_similar_questions(bge_model, subject, input_question, question_pool)
46
+
47
+ similar_questions_prompt = ''
48
+ for i, question in enumerate(similar_questions):
49
+ answer = ''.join(question['answer'])
50
+ item = f"""
51
+ {i}.
52
+ 问题:{question['question']}
53
+ 回答:{question['analysis']}
54
+ 答案是:{answer}
55
+
56
+ """
57
+ similar_questions_prompt += item
58
+
59
+ pattern = re.compile(r'\s([A-D]\.\s.*[^\n])')
60
+ # 使用findall查找所有匹配的选项
61
+ options = pattern.findall(input_question)
62
+ if len_of_pictures >= 4:
63
+ options = '\n'.join(f"{'ABCDEFG'[i]}. {image_token}" for i in range(0, 4))
64
+ len_of_pictures -= 4
65
+ else:
66
+ options = '\n'.join(options)
67
+ input_question = input_question.split('A.')[0]
68
+ for _ in range(len_of_pictures):
69
+ input_question = image_token + input_question
70
+ input_question += options
71
+
72
+ prompt = f"""
73
+ 你将参与一个{subject}学科的高中选择题测试,这些题目将涵盖{subject}学科。每个题目都可能包含以下类型的图像:示意图、折线图、地图、照片和几何图形等,以增��题目的多模态特性。
74
+
75
+ 你的任务是:
76
+ 1. 仔细阅读每个题目的描述性问题,这些问题将涉及学科知识和图像分析。
77
+ 2. 分析提供的图像,它们将帮助你更好地理解问题并指导你选择答案。
78
+ 3. 从四个选项(A, B, C, D)中,选择最合适的答案。
79
+
80
+ 为了帮助你准备,这里有一些相似的示例题目:
81
+ {similar_questions_prompt}
82
+
83
+ 例如,对于一个{subject}题目,你可能需要识别图片中的相关信息,并根据图像中的信息选择正确的答案。
84
+
85
+ 现在,让我们开始提供一些示例题目,以便你能够熟悉测试的格式和要求。
86
+ 问题:{input_question}
87
+ """
88
+ return prompt
89
+
90
+ # Generate prompt with similar questions
91
+ subject = "化学"
92
+ image_type = "示意图"
93
+ prompt = generate_prompt('Geography', '日本某汽车公司在中国建有多个整车生产厂和零件生产厂.2011 年 3 月 11 日东 日本大地震及随后的海啸、核辐射灾难,使该公司在灾区的工厂停产.受其 影响,该公司在中国的整车生产厂也被迫减产.据此完成 1~2 题. 1.(4 分)该公司在中国建零部件生产厂,主要目的是( ) A.避免自然灾害对本土汽车生产的影响 B.为其中国整车厂配套,降低整车生产成本 C.利用中国廉价劳动力,为其日本整车厂服务 D.建立其全球整车生产的零部件工业基地 2.(4 分)中国整车生产厂被迫减产是由于该公司在灾区有( ) A.研发中心 B.一般零部件厂 C.核心零部件厂 D.整车厂 ', 0, "<Image>")
94
+ print(prompt)