add score:0.224 and score:0.26

Browse files

Files changed (6) hide show

LLaVA-MOSS2/add_RUC.py +52 -0
LLaVA-MOSS2/add_cmmlu.py +52 -0
LLaVA-MOSS2/llava/serve/submit.py +13 -25
LLaVA-MOSS2/scripts/finetune.sh +6 -7
LLaVA-MOSS2/scripts/pretrain.sh +2 -2
LLaVA-MOSS2/test.py +92 -50

LLaVA-MOSS2/add_RUC.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import json
+import jsonlines
+import os
+dir_path = '/root/.cache/huggingface/hub/datasets--RUCAIBox--gaokao-bench/snapshots/49877cf53b6db9c24d7d285161fc12bba2f85d29/test'
+files = os.listdir(dir_path)
+subject = [ 'Chemistry', 'Geography', 'Math', 'History', 'Biology', 'Political', 'Chinese', 'Physics' ]
+len = 0
+data = []
+for file in files:
+    if 'English' in file:
+        continue
+    data_item = {}
+    for sub in subject:
+        if sub in file:
+            data_item['keyword'] = sub
+    data_item_list = []
+    path = os.path.join(dir_path, file)
+    with open(path, 'r+', encoding='utf-8') as file:
+        for line in jsonlines.Reader(file):
+            dict_item = {}
+            dict_item['question'] = line['question']
+            dict_item['answer'] = line['answer']
+            dict_item['analysis'] = line['analysis']
+            # len += 1
+            # dict_item['image'] = ""
+            # conversion = []
+            # human = {}
+            # human['from'] = 'human'
+            # human['value'] = line['question']
+            # gpt = {}
+            # gpt['from'] = 'gpt'
+            # result = line['analysis']
+            # result += "答案是：" + ''.join(line['answer'])
+            # gpt['value'] = result
+            # conversion.append(human)
+            # conversion.append(gpt)
+            # dict_item['conversations'] = conversion
+            print(dict_item)
+            data_item_list.append(dict_item)
+    data_item['question'] = data_item_list
+    data.append(data_item)
+with open('RUC_RAG.json', 'w', encoding='utf-8') as file:
+# 使用json.dump()函数将字典写入文件
+    json.dump(data, file, ensure_ascii=False, indent=4)

LLaVA-MOSS2/add_cmmlu.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import pandas as pd
+import os
+import json
+with open('./political_data_with_extra_data.json', 'r', encoding='utf-8') as file:
+    data = json.load(file)
+len = len(data)
+final_folder = 'playground/data/cmmlu'
+files = os.listdir(final_folder)
+selected_files = ['combined_anatomy.csv','combined_ancient_chinese.csv','combined_arts.csv','combined_chinese_civil_service_exam.csv','combined_chinese_foreign_policy.csv',
+                'combined_chinese_history.csv','combined_college_education.csv', 'combined_college_engineering_hydrology.csv', 'combined_college_mathematics.csv', 'combined_college_medicine.csv',
+                'combined_conceptual_physics.csv','combined_electrical_engineering.csv','combined_elementary_mathematics.csv','combined_food_science.csv',
+                'combined_genetics.csv', 'combined_high_school_biology.csv', 'combined_high_school_chemistry.csv','combined_high_school_geography.csv','combined_high_school_mathematics.csv',
+                'combined_high_school_physics.csv','combined_high_school_politics.csv','combined_legal_and_moral_basis.csv','combined_management.csv','combined_marxist_theory.csv',
+                'combined_modern_chinese.csv','combined_philosophy.csv','combined_virology.csv','combined_world_history.csv']
+cmmlu_list = []
+for file_name in selected_files:
+    path = os.path.join(final_folder, file_name)
+    df = pd.read_csv(path)
+    for index, row in df.iterrows():
+        dict_item = {}
+        dict_item['id'] = str(len)
+        len+=1
+        dict_item['image'] = ""
+        conversion = []
+        human = {}
+        human['from'] = 'human'
+        question = row['Question'] + '\nA.' + row['A'] + '\nB.' + row['B'] + '\nC.' + row['C'] + '\nD' + row['D'] + '\n'
+        human['value'] = question
+        gpt = {}
+        gpt['from'] = 'gpt'
+        result = "答案是：" + row['Answer']
+        gpt['value'] = result
+        conversion.append(human)
+        conversion.append(gpt)
+        dict_item['conversations'] = conversion
+        print(dict_item)
+        cmmlu_list.append(dict_item)
+data = cmmlu_list + data
+with open('cmmlu_political_data_gaokao.json', 'w', encoding='utf-8') as file:
+# 使用json.dump()函数将字典写入文件
+    json.dump(data, file, ensure_ascii=False, indent=4)

LLaVA-MOSS2/llava/serve/submit.py CHANGED Viewed

@@ -57,29 +57,14 @@ def get_prompt(key, question, len_of_pictures, image_token):
     for _ in range(len_of_pictures):
         question = image_token + question
-    prompt = f"""你是一个{key}专家，擅长解决{key}问题。以下是一个{key}的题目，形式为单项选择题。所有的问题都是（close-world assumption）闭世界假设，即未观测事实都为假。请逐步分析问题并在最后一行输出答案，最后一行的格式为"答案是：A"。
-    ## 示例
-    ### 题目：
-    根据欧几里得算法，计算6和7的最大公约数
-    ### 选项：
-    A.1
-    B.2
-    C.3
-    D.4
-    ### 回答：
-    答案是：A.
-    题目如下：
-    ### 问题:
     {question}
-    ### 选项：
     {options}
-    ### 回答：
     """
     return  prompt
@@ -139,7 +124,7 @@ def main(args):
     answers = []
-    for i in tqdm.tqdm(range(0, 5), desc="Voting Processing"):
         questions = copy.deepcopy(questions_origin)
         for subject in questions:
             example = subject['example']
@@ -212,9 +197,9 @@ def main(args):
                 outputs = tokenizer.decode(output_ids[0]).strip()
                 outputs = re.sub(r'\([^()]*\)', '', outputs)
                 outputs = re.sub(r'<s>|</s>', '', outputs)
-                outputs = extract(outputs, answer_dic)
                 conv.messages[-1][-1] = outputs
-                question_itme['model_answer'] = [outputs]
                 question_itme.pop('picture')
                 question_itme.pop('question')
@@ -223,11 +208,14 @@ def main(args):
         answers.append(questions)
     final_ans = answers[0]
-    for ans in answers:
         for i, sub in enumerate(ans):
             example = sub['example']
             for j, item in enumerate(example):
-                item_ans = item['model_answer']
                 index = ord(item_ans[0]) - 65
                 if 'count' not in final_ans:
                     final_ans[i]['example'][j]['count'] = [0] * 4
@@ -297,7 +285,7 @@ def main(args):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument("--model-path", type=str, default="checkpoints/llava-moss2-2_5b-chat-finetune")
     parser.add_argument("--model-base", type=str, default=None)
     # parser.add_argument("--image-file", type=str, required=True)
     parser.add_argument("--device", type=str, default="cuda")

     for _ in range(len_of_pictures):
         question = image_token + question
+    prompt = f"""你是一个{key}专家，擅长解决{key}问题。以下是一个{key}的题目，形式为单项选择题。请逐步分析问题并在最后一行输出答案，最后一行的格式为"答案是：A"。
+    问题:
     {question}
+    选项：
     {options}
+    回答：
     """
     return  prompt
     answers = []
+    for i in tqdm.tqdm(range(0, 1), desc="Voting Processing"):
         questions = copy.deepcopy(questions_origin)
         for subject in questions:
             example = subject['example']
                 outputs = tokenizer.decode(output_ids[0]).strip()
                 outputs = re.sub(r'\([^()]*\)', '', outputs)
                 outputs = re.sub(r'<s>|</s>', '', outputs)
+                # outputs = extract(outputs, answer_dic)
                 conv.messages[-1][-1] = outputs
+                question_itme['model_answer'] = outputs
                 question_itme.pop('picture')
                 question_itme.pop('question')
         answers.append(questions)
     final_ans = answers[0]
+    for i, ans in enumerate(answers):
+        file_name = f'output_{i}.json'
+        with open(file_name, 'w', encoding='utf-8') as file:
+            json.dump(ans, file, ensure_ascii=False, indent=4)
         for i, sub in enumerate(ans):
             example = sub['example']
             for j, item in enumerate(example):
+                item_ans = extract(item['model_answer'], answer_dic)
                 index = ord(item_ans[0]) - 65
                 if 'count' not in final_ans:
                     final_ans[i]['example'][j]['count'] = [0] * 4
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
+    parser.add_argument("--model-path", type=str, default="checkpoints/llava-moss2-2_5b-chat-finetune-224")
     parser.add_argument("--model-base", type=str, default=None)
     # parser.add_argument("--image-file", type=str, required=True)
     parser.add_argument("--device", type=str, default="cuda")

LLaVA-MOSS2/scripts/finetune.sh CHANGED Viewed

@@ -15,7 +15,7 @@ deepspeed llava/train/train_mem.py \
     --deepspeed ./scripts/zero2.json \
     --model_name_or_path /root/.cache/huggingface/hub/models--fnlp--moss2-2_5b-chat/snapshots/3eda5a066c519990bf5f9ba056f5f8ef81531c83 \
     --version $PROMPT_VERSION \
-    --data_path ./data_with_extra_data_half.json\
     --image_folder ./playground/data \
     --vision_tower openai/clip-vit-large-patch14 \
     --pretrain_mm_mlp_adapter ./checkpoints/llava-moss2-2_5b-chat-pretrain/mm_projector.bin \
@@ -23,13 +23,13 @@ deepspeed llava/train/train_mem.py \
     --mm_use_im_start_end False \
     --mm_use_im_patch_token False \
     --bf16 True \
-    --max_steps 40000 \
-    --per_device_train_batch_size 2 \
-    --per_device_eval_batch_size 2 \
     --gradient_accumulation_steps 2 \
     --evaluation_strategy "no" \
     --save_strategy "steps" \
-    --save_steps 2000 \
     --save_total_limit 5 \
     --learning_rate 2e-5 \
     --weight_decay 0. \
@@ -43,5 +43,4 @@ deepspeed llava/train/train_mem.py \
     --lazy_preprocess True \
     --report_to wandb \
     --run_name llava-moss2-finetune\
-    --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune \

     --deepspeed ./scripts/zero2.json \
     --model_name_or_path /root/.cache/huggingface/hub/models--fnlp--moss2-2_5b-chat/snapshots/3eda5a066c519990bf5f9ba056f5f8ef81531c83 \
     --version $PROMPT_VERSION \
+    --data_path ./playground/data/llava_v1_5_mix665k.json\
     --image_folder ./playground/data \
     --vision_tower openai/clip-vit-large-patch14 \
     --pretrain_mm_mlp_adapter ./checkpoints/llava-moss2-2_5b-chat-pretrain/mm_projector.bin \
     --mm_use_im_start_end False \
     --mm_use_im_patch_token False \
     --bf16 True \
+    --max_steps 20000 \
+    --per_device_train_batch_size 4 \
+    --per_device_eval_batch_size 4 \
     --gradient_accumulation_steps 2 \
     --evaluation_strategy "no" \
     --save_strategy "steps" \
+    --save_steps 5000 \
     --save_total_limit 5 \
     --learning_rate 2e-5 \
     --weight_decay 0. \
     --lazy_preprocess True \
     --report_to wandb \
     --run_name llava-moss2-finetune\
+    --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune \

LLaVA-MOSS2/scripts/pretrain.sh CHANGED Viewed

@@ -25,13 +25,13 @@ deepspeed llava/train/train_mem.py \
     --mm_use_im_patch_token False \
     --bf16 True \
     --output_dir ./checkpoints/llava-$MODEL_VERSION-pretrain \
-    --max_steps 5000 \
     --per_device_train_batch_size 16 \
     --per_device_eval_batch_size 4 \
     --gradient_accumulation_steps 2 \
     --evaluation_strategy "no" \
     --save_strategy "steps" \
-    --save_steps 1000 \
     --save_total_limit 5 \
     --learning_rate 2e-3 \
     --weight_decay 0. \

     --mm_use_im_patch_token False \
     --bf16 True \
     --output_dir ./checkpoints/llava-$MODEL_VERSION-pretrain \
+    --max_steps 9000 \
     --per_device_train_batch_size 16 \
     --per_device_eval_batch_size 4 \
     --gradient_accumulation_steps 2 \
     --evaluation_strategy "no" \
     --save_strategy "steps" \
+    --save_steps 3000 \
     --save_total_limit 5 \
     --learning_rate 2e-3 \
     --weight_decay 0. \

LLaVA-MOSS2/test.py CHANGED Viewed

@@ -1,52 +1,94 @@
-import pandas as pd
-import os
 import json
-with open('./political_data_with_extra_data.json', 'r', encoding='utf-8') as file:
-    data = json.load(file)
-len = len(data)
-final_folder = 'playground/data/cmmlu'
-files = os.listdir(final_folder)
-selected_files = ['combined_anatomy.csv','combined_ancient_chinese.csv','combined_arts.csv','combined_chinese_civil_service_exam.csv','combined_chinese_foreign_policy.csv',
-                'combined_chinese_history.csv','combined_college_education.csv', 'combined_college_engineering_hydrology.csv', 'combined_college_mathematics.csv', 'combined_college_medicine.csv',
-                'combined_conceptual_physics.csv','combined_electrical_engineering.csv','combined_elementary_mathematics.csv','combined_food_science.csv',
-                'combined_genetics.csv', 'combined_high_school_biology.csv', 'combined_high_school_chemistry.csv','combined_high_school_geography.csv','combined_high_school_mathematics.csv',
-                'combined_high_school_physics.csv','combined_high_school_politics.csv','combined_legal_and_moral_basis.csv','combined_management.csv','combined_marxist_theory.csv',
-                'combined_modern_chinese.csv','combined_philosophy.csv','combined_virology.csv','combined_world_history.csv']
-cmmlu_list = []
-for file_name in selected_files:
-    path = os.path.join(final_folder, file_name)
-    df = pd.read_csv(path)
-    for index, row in df.iterrows():
-        dict_item = {}
-        dict_item['id'] = str(len)
-        len+=1
-        dict_item['image'] = ""
-        conversion = []
-        human = {}
-        human['from'] = 'human'
-        question = row['Question'] + '\nA.' + row['A'] + '\nB.' + row['B'] + '\nC.' + row['C'] + '\nD' + row['D'] + '\n'
-        human['value'] = question
-        gpt = {}
-        gpt['from'] = 'gpt'
-        result = "答案是：" + row['Answer']
-        gpt['value'] = result
-        conversion.append(human)
-        conversion.append(gpt)
-        dict_item['conversations'] = conversion
-        print(dict_item)
-        cmmlu_list.append(dict_item)
-data = cmmlu_list + data
-with open('cmmlu_political_data_gaokao.json', 'w', encoding='utf-8') as file:
-# 使用json.dump()函数将字典写入文件
-    json.dump(data, file, ensure_ascii=False, indent=4)

+# Load model directly
+from transformers import AutoTokenizer, AutoModel
+import numpy as np
 import json
+import heapq
+import re
+from FlagEmbedding import FlagModel
+# Function to retrieve similar questions
+def retrieve_similar_questions(bge_model, subject, input_question, question_pool, top_k=5):
+    # Encode the input question and question pool
+    min_heap = []
+    input_embedding = bge_model.encode(input_question)
+    for i, pool in enumerate(question_pool):
+        if pool['keyword'] == subject:
+            for j, question in enumerate(pool['question']):
+                question_embedding = bge_model.encode(question['question'])
+                similarity = input_embedding @ question_embedding
+                heapq.heappush(min_heap, (similarity, i, j))
+                if len(min_heap) > top_k:
+                    heapq.heappop(min_heap)
+    result = []
+    while len(min_heap) != 0:
+        top = heapq.heappop(min_heap)
+        i = top[1]
+        j = top[2]
+        result.append(question_pool[i]['question'][j])
+    return result
+def generate_prompt(subject, input_question, len_of_pictures, image_token):
+    bge_model = FlagModel('BAAI/bge-large-zh-v1.5',
+                  query_instruction_for_retrieval="Represent this sentence for searching relevant passages:",
+                  use_fp16=True)
+    # Sample question pool
+    with open('./RUC_RAG.json', 'r', encoding='utf-8') as file:
+        question_pool = json.load(file)
+    # Example usage
+    similar_questions = retrieve_similar_questions(bge_model, subject, input_question, question_pool)
+    similar_questions_prompt = ''
+    for i, question in enumerate(similar_questions):
+        answer = ''.join(question['answer'])
+        item = f"""
+            {i}.
+            问题：{question['question']}
+            回答：{question['analysis']}
+            答案是：{answer}
+        """
+        similar_questions_prompt += item
+    pattern = re.compile(r'\s([A-D]\.\s.*[^\n])')
+    # 使用findall查找所有匹配的选项
+    options = pattern.findall(input_question)
+    if len_of_pictures >= 4:
+        options = '\n'.join(f"{'ABCDEFG'[i]}. {image_token}" for i in range(0, 4))
+        len_of_pictures -= 4
+    else:
+        options = '\n'.join(options)
+    input_question = input_question.split('A.')[0]
+    for _ in range(len_of_pictures):
+        input_question = image_token + input_question
+    input_question += options
+    prompt = f"""
+    你将参与一个{subject}学科的高中选择题测试，这些题目将涵盖{subject}学科。每个题目都可能包含以下类型的图像：示意图、折线图、地图、照片和几何图形等，以增��题目的多模态特性。
+    你的任务是：
+    1. 仔细阅读每个题目的描述性问题，这些问题将涉及学科知识和图像分析。
+    2. 分析提供的图像，它们将帮助你更好地理解问题并指导你选择答案。
+    3. 从四个选项（A, B, C, D）中，选择最合适的答案。
+    为了帮助你准备，这里有一些相似的示例题目：
+    {similar_questions_prompt}
+    例如，对于一个{subject}题目，你可能需要识别图片中的相关信息，并根据图像中的信息选择正确的答案。
+    现在，让我们开始提供一些示例题目，以便你能够熟悉测试的格式和要求。
+    问题：{input_question}
+    """
+    return prompt
+# Generate prompt with similar questions
+subject = "化学"
+image_type = "示意图"
+prompt = generate_prompt('Geography', '日本某汽车公司在中国建有多个整车生产厂和零件生产厂．2011 年 3 月 11 日东 日本大地震及随后的海啸、核辐射灾难，使该公司在灾区的工厂停产．受其 影响，该公司在中国的整车生产厂也被迫减产．据此完成 1～2 题． 1．（4 分）该公司在中国建零部件生产厂，主要目的是（ ） A．避免自然灾害对本土汽车生产的影响 B．为其中国整车厂配套，降低整车生产成本 C．利用中国廉价劳动力，为其日本整车厂服务 D．建立其全球整车生产的零部件工业基地 2．（4 分）中国整车生产厂被迫减产是由于该公司在灾区有（ ） A．研发中心 B．一般零部件厂 C．核心零部件厂 D．整车厂 ', 0, "<Image>")
+print(prompt)