|
|
import json
|
|
|
import numpy as np
|
|
|
from metrics.graph_matching import (
|
|
|
get_triple_match_f1,
|
|
|
get_graph_match_accuracy,
|
|
|
get_bert_score,
|
|
|
get_bleu_rouge,
|
|
|
split_to_edges,
|
|
|
get_tokens,
|
|
|
get_ged
|
|
|
)
|
|
|
|
|
|
def load_data(gold_path, pred_path):
|
|
|
'''
|
|
|
数据加载处理:
|
|
|
只评估在预测数据中出现的文本对应的三元组
|
|
|
自动匹配真实数据和预测数据中的对应项
|
|
|
多维度评估:
|
|
|
Triple Match F1:评估三元组的精确匹配程度
|
|
|
Graph Match Accuracy:评估图结构的匹配程度
|
|
|
BERT Score:评估语义相似度
|
|
|
BLEU & ROUGE:评估文本生成质量
|
|
|
图编辑距离(GED):评估图结构差异
|
|
|
'''
|
|
|
|
|
|
with open(gold_path, 'r', encoding='utf-8') as f:
|
|
|
gold_data = json.load(f)
|
|
|
|
|
|
|
|
|
with open(pred_path, 'r', encoding='utf-8') as f:
|
|
|
pred_data = json.load(f)
|
|
|
|
|
|
|
|
|
gold_graphs = []
|
|
|
pred_graphs = []
|
|
|
|
|
|
|
|
|
for pred_item in pred_data:
|
|
|
pred_text = pred_item['text']
|
|
|
|
|
|
for gold_item in gold_data:
|
|
|
if gold_item['text'] == pred_text:
|
|
|
gold_graphs.append(gold_item['triple_list'])
|
|
|
pred_graphs.append(pred_item['triple_list'])
|
|
|
break
|
|
|
|
|
|
return gold_graphs, pred_graphs
|
|
|
|
|
|
def evaluate_triples(gold_graphs, pred_graphs):
|
|
|
print("开始评估...")
|
|
|
print("="*50)
|
|
|
|
|
|
|
|
|
precision, recall, f1 = get_triple_match_f1(gold_graphs, pred_graphs)
|
|
|
print("Triple Match")
|
|
|
print(f"精确率: {precision:.4f}, 召回率: {recall:.4f}, F1: {f1:.4f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gold_edges = split_to_edges(gold_graphs)
|
|
|
pred_edges = split_to_edges(pred_graphs)
|
|
|
precisions_BS, recalls_BS, f1s_BS = get_bert_score(gold_edges, pred_edges)
|
|
|
print(f"BERT Score:")
|
|
|
print(f"- Precision: {precisions_BS.mean():.4f}")
|
|
|
print(f"- Recall: {recalls_BS.mean():.4f}")
|
|
|
print(f"- F1: {f1s_BS.mean():.4f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
'triple_match': {
|
|
|
'precision': precision,
|
|
|
'recall': recall,
|
|
|
'f1': f1
|
|
|
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
'bert_score': {
|
|
|
'precision': precisions_BS.mean(),
|
|
|
'recall': recalls_BS.mean(),
|
|
|
'f1': f1s_BS.mean()
|
|
|
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
gold_path = './data/train_triples.json'
|
|
|
pred_path = './output/gpt.json'
|
|
|
|
|
|
|
|
|
gold_graphs, pred_graphs = load_data(gold_path, pred_path)
|
|
|
|
|
|
|
|
|
results = evaluate_triples(gold_graphs, pred_graphs) |