GraphGen / graphgen /models /evaluator /triple /accuracy_evaluator.py
github-actions[bot]
Auto-sync from demo at Thu Jan 29 12:51:48 UTC 2026
0bd1b0f
import json
import re
from typing import Any, Dict
from graphgen.bases import BaseLLMWrapper, BaseTripleEvaluator
from graphgen.templates import ACCURACY_EVALUATION_PROMPT
from graphgen.utils import detect_main_language, logger
class AccuracyEvaluator(BaseTripleEvaluator):
"""Evaluates accuracy of entity recognition and relation extraction using LLM-as-a-Judge.
For each chunk, uses LLM to evaluate the quality of extracted entities and relations
by comparing them with the original chunk content. Provides multi-dimensional quality
scores (accuracy, completeness, precision).
"""
def __init__(
self,
llm_client: BaseLLMWrapper,
):
self.llm_client = llm_client
async def evaluate(self, unit: tuple) -> Dict[str, Any]:
"""Evaluate entity and relation extraction quality using LLM-as-a-Judge.
Returns:
Dictionary containing entity_accuracy and relation_accuracy metrics.
"""
chunk_content, nodes, edges = unit
lang = detect_main_language(chunk_content)
# node
prompt = ACCURACY_EVALUATION_PROMPT[lang]["ENTITY"].format(
chunk_content=chunk_content,
extracted_entities=json.dumps(nodes, ensure_ascii=False, indent=2),
)
response = await self.llm_client.generate_answer(prompt)
# Try to parse JSON response
try:
node_evaluation_result = json.loads(response)
except json.JSONDecodeError:
# Try to extract JSON from markdown code blocks or other formats
json_match = re.search(r"\{.*\}", response, re.DOTALL)
if json_match:
node_evaluation_result = json.loads(json_match.group(0))
else:
logger.warning("Failed to parse LLM response.")
# default evaluation
node_evaluation_result = {
"accuracy": 0.0,
"completeness": 0.0,
"precision": 0.0,
"overall_score": 0.0,
"accuracy_reasoning": "Failed to parse LLM response",
"completeness_reasoning": "",
"precision_reasoning": "",
"issues": ["LLM response parsing failed"],
}
# edge
prompt = ACCURACY_EVALUATION_PROMPT[lang]["RELATION"].format(
chunk_content=chunk_content,
extracted_relations=json.dumps(edges, ensure_ascii=False, indent=2),
)
response = await self.llm_client.generate_answer(prompt)
# Try to parse JSON response
try:
edge_evaluation_result = json.loads(response)
except json.JSONDecodeError:
# Try to extract JSON from markdown code blocks or other formats
json_match = re.search(r"\{.*\}", response, re.DOTALL)
if json_match:
edge_evaluation_result = json.loads(json_match.group(0))
else:
logger.warning("Failed to parse LLM response.")
# default evaluation
edge_evaluation_result = {
"accuracy": 0.0,
"completeness": 0.0,
"precision": 0.0,
"overall_score": 0.0,
"accuracy_reasoning": "Failed to parse LLM response",
"completeness_reasoning": "",
"precision_reasoning": "",
"issues": ["LLM response parsing failed"],
}
return {
"entity_accuracy": node_evaluation_result,
"relation_accuracy": edge_evaluation_result,
}