import json import re from typing import Any, Dict from graphgen.bases import BaseLLMWrapper, BaseTripleEvaluator from graphgen.templates import ACCURACY_EVALUATION_PROMPT from graphgen.utils import detect_main_language, logger class AccuracyEvaluator(BaseTripleEvaluator): """Evaluates accuracy of entity recognition and relation extraction using LLM-as-a-Judge. For each chunk, uses LLM to evaluate the quality of extracted entities and relations by comparing them with the original chunk content. Provides multi-dimensional quality scores (accuracy, completeness, precision). """ def __init__( self, llm_client: BaseLLMWrapper, ): self.llm_client = llm_client async def evaluate(self, unit: tuple) -> Dict[str, Any]: """Evaluate entity and relation extraction quality using LLM-as-a-Judge. Returns: Dictionary containing entity_accuracy and relation_accuracy metrics. """ chunk_content, nodes, edges = unit lang = detect_main_language(chunk_content) # node prompt = ACCURACY_EVALUATION_PROMPT[lang]["ENTITY"].format( chunk_content=chunk_content, extracted_entities=json.dumps(nodes, ensure_ascii=False, indent=2), ) response = await self.llm_client.generate_answer(prompt) # Try to parse JSON response try: node_evaluation_result = json.loads(response) except json.JSONDecodeError: # Try to extract JSON from markdown code blocks or other formats json_match = re.search(r"\{.*\}", response, re.DOTALL) if json_match: node_evaluation_result = json.loads(json_match.group(0)) else: logger.warning("Failed to parse LLM response.") # default evaluation node_evaluation_result = { "accuracy": 0.0, "completeness": 0.0, "precision": 0.0, "overall_score": 0.0, "accuracy_reasoning": "Failed to parse LLM response", "completeness_reasoning": "", "precision_reasoning": "", "issues": ["LLM response parsing failed"], } # edge prompt = ACCURACY_EVALUATION_PROMPT[lang]["RELATION"].format( chunk_content=chunk_content, extracted_relations=json.dumps(edges, ensure_ascii=False, indent=2), ) response = await self.llm_client.generate_answer(prompt) # Try to parse JSON response try: edge_evaluation_result = json.loads(response) except json.JSONDecodeError: # Try to extract JSON from markdown code blocks or other formats json_match = re.search(r"\{.*\}", response, re.DOTALL) if json_match: edge_evaluation_result = json.loads(json_match.group(0)) else: logger.warning("Failed to parse LLM response.") # default evaluation edge_evaluation_result = { "accuracy": 0.0, "completeness": 0.0, "precision": 0.0, "overall_score": 0.0, "accuracy_reasoning": "Failed to parse LLM response", "completeness_reasoning": "", "precision_reasoning": "", "issues": ["LLM response parsing failed"], } return { "entity_accuracy": node_evaluation_result, "relation_accuracy": edge_evaluation_result, }