Spaces:

pengfali
/

GeoLLM

Runtime error

File size: 4,230 Bytes

badcf3c

import json
import pandas as pd
import os

def save_responses(type, index, response, output_file, raw_output_file):
    data_path = './data/data.xlsx'
    if type == 'yes_no':
        sheet = 'Yes or No Train'
    elif type == 'factoid':
        sheet = 'Factoid Train'
    data_train = pd.read_excel(data_path, sheet_name=sheet)
    question_train = data_train['Question']
    
    # 判断响应类型并解析内容
    if hasattr(response, 'content'):  # 针对ChatCompletionMessage对象
        content = response.content
    elif isinstance(response, dict) and 'content' in response:  # 针对字典格式
        content = response['content']
    else:
        content = "无效的响应格式"  # 处理无效响应

    # 创建要保存的字典
    result = {
        "question": question_train[index],  # 不再添加换行符
        "answer": content  # 不再添加换行符
    }
    
    # 检查文件是否存在，若存在则读取现有数据
    if os.path.exists(output_file):
        with open(output_file, 'r', encoding='utf-8') as f:
            existing_data = json.load(f)
    else:
        existing_data = []

    # 将新结果添加到现有数据中
    existing_data.append(result)

    # 保存模型的回答结果为json文件
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(existing_data, f, ensure_ascii=False, indent=4)  # 使用indent参数格式化输出

    # 保存原始响应为json文件
    raw_response = {
        "question": question_train[index],  # 不再添加换行符
        "raw_response": str(response)  # 将对象转换为字符串
    }
    
    if os.path.exists(raw_output_file):
        with open(raw_output_file, 'r', encoding='utf-8') as f:
            existing_raw_data = json.load(f)
    else:
        existing_raw_data = []

    # 将新原始响应添加到现有数据中
    existing_raw_data.append(raw_response)

    with open(raw_output_file, 'w', encoding='utf-8') as f:
        json.dump(existing_raw_data, f, ensure_ascii=False, indent=4)  # 使用indent参数格式化输出

def save_responses_knn(prompt, type, index, response, output_file, raw_output_file):
    data_path = './data/data.xlsx'
    if type == 'yes_no':
        sheet = 'Yes or No Train'
    elif type == 'factoid':
        sheet = 'Factoid Train'
    data_train = pd.read_excel(data_path, sheet_name=sheet)
    question_train = data_train['Question']
    
    # 判断响应类型并解析内容
    if hasattr(response, 'content'):  # 针对ChatCompletionMessage对象
        content = response.content
    elif isinstance(response, dict) and 'content' in response:  # 针对字典格式
        content = response['content']
    else:
        content = "无效的响应格式"  # 处理无效响应

    # 创建要保存的字典
    result = {
        "question": question_train[index],  # 不再添加换行符
        "answer": content  # 不再添加换行符
    }
    
    # 检查文件是否存在，若存在则读取现有数据
    if os.path.exists(output_file):
        with open(output_file, 'r', encoding='utf-8') as f:
            existing_data = json.load(f)
    else:
        existing_data = []

    # 将新结果添加到现有数据中
    existing_data.append(result)

    # 保存模型的回答结果为json文件
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(existing_data, f, ensure_ascii=False, indent=4)  # 使用indent参数格式化输出

    # 保存原始响应为json文件
    raw_response = {
        "prompt": prompt,  # 不再添加换行符
        "raw_response": str(response)  # 将对象转换为字符串
    }
    
    if os.path.exists(raw_output_file):
        with open(raw_output_file, 'r', encoding='utf-8') as f:
            existing_raw_data = json.load(f)
    else:
        existing_raw_data = []

    # 将新原始响应添加到现有数据中
    existing_raw_data.append(raw_response)

    with open(raw_output_file, 'w', encoding='utf-8') as f:
        json.dump(existing_raw_data, f, ensure_ascii=False, indent=4)  # 使用indent参数格式化输出