File size: 6,039 Bytes
77320e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
import os
import re
import argparse
import asyncio
import logging
import sys
import json
import io

import openai


import infiagent
from infiagent.utils import get_logger, upload_files, get_file_name_and_path
from infiagent.services.chat_complete_service import predict


logger = get_logger()


class UploadedFile(io.BytesIO):
    def __init__(self, path):
        with open(path, 'rb') as file:
            data = file.read()

        super().__init__(data)

        self.name = path.split("/")[-1]  # 获取文件名
        self.type = 'application/octet-stream'  # 或者其他适当的 MIME 类型
        self.size = len(data)

    def __repr__(self):
        return f"MyUploadedFile(name={self.name}, size={self.size}, type={self.type})"

    def __len__(self):

        return self.size

# # 使用例子
# file_path = "path/to/your/file"
# uploaded_file = MyUploadedFile(file_path)

# print(uploaded_file)


def _get_script_params():
    try:
        parser = argparse.ArgumentParser()
        parser.add_argument('--llm',
                            help='LLM Model for demo',
                            required=False, type=str)
        parser.add_argument('--api_key',
                            help='Open API token key.',
                            required=False, type=str)

        parser.add_argument('--config_path',
                            help='Config path for demo',
                            default="configs/agent_configs/react_agent_llama_async.yaml",
                            required=False, type=str)

        args = parser.parse_args()

        return args
    except Exception as e:
        logger.error("Failed to get script input arguments: {}".format(str(e)), exc_info=True)

    return None


def extract_questions_and_concepts(file_path):
    # Read the content of the text file
    with open(file_path, 'r') as file:
        content = file.read()

    # Use regular expressions to extract questions and concepts
    pattern = r'\\Question{(.*?)}\s*\\Concepts{(.*?)}'
    matches = re.findall(pattern, content, re.DOTALL)

    # Build a list of dictionaries containing the questions and concepts
    data = []
    for match in matches:
        question = match[0].strip()
        concepts = [concept.strip() for concept in match[1].split(',')]
        data.append({
            'question': question,
            'concepts': concepts
        })

    return data

def read_dicts_from_file(file_name):
    """
    Read a file with each line containing a JSON string representing a dictionary,
    and return a list of dictionaries.

    :param file_name: Name of the file to read from.
    :return: List of dictionaries.
    """
    dict_list = []
    with open(file_name, 'r') as file:
        for line in file:
            # Convert the JSON string back to a dictionary.
            dictionary = json.loads(line.rstrip('\n'))
            dict_list.append(dictionary)
    return dict_list

def read_questions(file_path):
    print(file_path)
    with open(file_path) as f:
        questions = json.load(f)

    return questions

def extract_data_from_folder(folder_path):

    print(f'folder_path {folder_path}')
    extracted_data = {}
    # Traverse the files in the folder
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.questions'):  # You can filter files based on their type
            file_path = os.path.join(folder_path, file_name)
            file_data = read_questions(file_path)
            file_name_without_extension = os.path.splitext(file_name)[0]
            extracted_data[file_name_without_extension] = file_data

    return extracted_data


async def main():
    extracted_data = read_dicts_from_file('./data/da-dev-questions.jsonl')
    args = _get_script_params()

    model_name = getattr(args, "llm", None)
    open_ai_key = getattr(args, "api_key", None)

    if "OPEN_AI" in model_name:
        logger.info("setup open ai ")
        if os.environ.get("OPENAI_API_KEY") is None:
            if open_ai_key:
                openai.api_key = open_ai_key
                os.environ["OPENAI_API_KEY"] = open_ai_key
            else:
                raise ValueError("OPENAI_API_KEY is None, please provide open ai key to use open ai model. Adding "
                                 "'--api_key' to set it up")

        # 获取 'openai' 的 logger
        openai_logger = logging.getLogger('openai')
        # 设置日志级别为 'WARNING',这样 'INFO' 级别的日志就不会被打印了
        openai_logger.setLevel(logging.WARNING)
    else:
        logger.info("use local model ")

    table_path = 'data/da-dev-tables'
    results = []

    i = 1
    for q in extracted_data:
        input_text = q['question']
        concepts = q['concepts']
        file_path = q['file_name']
        constraints = q['constraints']
        format = q['format']

        file_path = os.path.join(table_path, file_path)

        print(f'input_text: {input_text}')
        print(f'concepts: {concepts}')
        print(f'file_path: {file_path}')

        uploaded_file = UploadedFile(file_path)
        print(uploaded_file)

        prompt = f"Question: {input_text}\n{constraints}\n"

        response = await predict(
            prompt=prompt,
            model_name=model_name,
            config_path=args.config_path,
            uploaded_files=[uploaded_file]
        )

        iteration_result = {
            'id': q['id'],
            'input_text': prompt,
            'concepts': concepts,
            'file_path': file_path,
            'response': response,
            'format': format
        }
        results.append(iteration_result)
        print(f"response: {response}")

        if i % 10 == 0:
            with open('results_{}.json'.format(model_name), 'w') as outfile:
                json.dump(results, outfile, indent=4)

        i += 1

    with open('results_{}.json'.format(model_name), 'w') as outfile:
        json.dump(results, outfile, indent=4)


if __name__ == '__main__':
    asyncio.run(main())
    # main()