GeoLLM / Task2 /save_response.py
Pengfa Li
Upload folder using huggingface_hub
badcf3c verified
import json
import pandas as pd
import os
def save_responses(type, index, response, output_file, raw_output_file):
data_path = './data/data.xlsx'
if type == 'yes_no':
sheet = 'Yes or No Train'
elif type == 'factoid':
sheet = 'Factoid Train'
data_train = pd.read_excel(data_path, sheet_name=sheet)
question_train = data_train['Question']
# 判断响应类型并解析内容
if hasattr(response, 'content'): # 针对ChatCompletionMessage对象
content = response.content
elif isinstance(response, dict) and 'content' in response: # 针对字典格式
content = response['content']
else:
content = "无效的响应格式" # 处理无效响应
# 创建要保存的字典
result = {
"question": question_train[index], # 不再添加换行符
"answer": content # 不再添加换行符
}
# 检查文件是否存在,若存在则读取现有数据
if os.path.exists(output_file):
with open(output_file, 'r', encoding='utf-8') as f:
existing_data = json.load(f)
else:
existing_data = []
# 将新结果添加到现有数据中
existing_data.append(result)
# 保存模型的回答结果为json文件
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(existing_data, f, ensure_ascii=False, indent=4) # 使用indent参数格式化输出
# 保存原始响应为json文件
raw_response = {
"question": question_train[index], # 不再添加换行符
"raw_response": str(response) # 将对象转换为字符串
}
if os.path.exists(raw_output_file):
with open(raw_output_file, 'r', encoding='utf-8') as f:
existing_raw_data = json.load(f)
else:
existing_raw_data = []
# 将新原始响应添加到现有数据中
existing_raw_data.append(raw_response)
with open(raw_output_file, 'w', encoding='utf-8') as f:
json.dump(existing_raw_data, f, ensure_ascii=False, indent=4) # 使用indent参数格式化输出
def save_responses_knn(prompt, type, index, response, output_file, raw_output_file):
data_path = './data/data.xlsx'
if type == 'yes_no':
sheet = 'Yes or No Train'
elif type == 'factoid':
sheet = 'Factoid Train'
data_train = pd.read_excel(data_path, sheet_name=sheet)
question_train = data_train['Question']
# 判断响应类型并解析内容
if hasattr(response, 'content'): # 针对ChatCompletionMessage对象
content = response.content
elif isinstance(response, dict) and 'content' in response: # 针对字典格式
content = response['content']
else:
content = "无效的响应格式" # 处理无效响应
# 创建要保存的字典
result = {
"question": question_train[index], # 不再添加换行符
"answer": content # 不再添加换行符
}
# 检查文件是否存在,若存在则读取现有数据
if os.path.exists(output_file):
with open(output_file, 'r', encoding='utf-8') as f:
existing_data = json.load(f)
else:
existing_data = []
# 将新结果添加到现有数据中
existing_data.append(result)
# 保存模型的回答结果为json文件
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(existing_data, f, ensure_ascii=False, indent=4) # 使用indent参数格式化输出
# 保存原始响应为json文件
raw_response = {
"prompt": prompt, # 不再添加换行符
"raw_response": str(response) # 将对象转换为字符串
}
if os.path.exists(raw_output_file):
with open(raw_output_file, 'r', encoding='utf-8') as f:
existing_raw_data = json.load(f)
else:
existing_raw_data = []
# 将新原始响应添加到现有数据中
existing_raw_data.append(raw_response)
with open(raw_output_file, 'w', encoding='utf-8') as f:
json.dump(existing_raw_data, f, ensure_ascii=False, indent=4) # 使用indent参数格式化输出