Spaces:

chenzihong
/

GraphGen

Build error

App Files Files Community

github-actions[bot] commited on Jan 29

Commit

0bd1b0f

1 Parent(s): b275e29

Auto-sync from demo at Thu Jan 29 12:51:48 UTC 2026

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

graphgen/bases/__init__.py +1 -1
graphgen/bases/base_evaluator.py +21 -2
graphgen/bases/base_generator.py +34 -49
graphgen/bases/base_operator.py +110 -9
graphgen/bases/base_storage.py +6 -0
graphgen/bases/datatypes.py +7 -0
graphgen/common/init_storage.py +16 -4
graphgen/models/__init__.py +0 -8
graphgen/models/evaluator/__init__.py +2 -1
graphgen/models/evaluator/kg/__init__.py +0 -17
graphgen/models/evaluator/kg/accuracy_evaluator.py +0 -350
graphgen/models/evaluator/kg/consistency_evaluator.py +0 -388
graphgen/models/evaluator/kg/structure_evaluator.py +15 -15
graphgen/models/evaluator/qa/length_evaluator.py +8 -7
graphgen/models/evaluator/qa/mtld_evaluator.py +7 -6
graphgen/models/evaluator/qa/reward_evaluator.py +12 -8
graphgen/models/evaluator/qa/uni_evaluator.py +17 -9
graphgen/models/evaluator/triple/__init__.py +1 -0
graphgen/models/evaluator/triple/accuracy_evaluator.py +94 -0
graphgen/models/extractor/schema_guided_extractor.py +5 -33
graphgen/models/generator/aggregated_generator.py +7 -11
graphgen/models/generator/atomic_generator.py +4 -9
graphgen/models/generator/cot_generator.py +6 -9
graphgen/models/generator/fill_in_blank_generator.py +11 -11
graphgen/models/generator/multi_answer_generator.py +14 -12
graphgen/models/generator/multi_choice_generator.py +11 -11
graphgen/models/generator/multi_hop_generator.py +4 -9
graphgen/models/generator/quiz_generator.py +18 -14
graphgen/models/generator/true_false_generator.py +10 -10
graphgen/models/generator/vqa_generator.py +50 -65
graphgen/models/kg_builder/light_rag_kg_builder.py +14 -3
graphgen/models/kg_builder/mm_kg_builder.py +2 -0
graphgen/models/reader/csv_reader.py +1 -1
graphgen/models/reader/json_reader.py +4 -1
graphgen/models/reader/parquet_reader.py +1 -1
graphgen/models/reader/rdf_reader.py +1 -1
graphgen/models/reader/txt_reader.py +2 -1
graphgen/models/storage/__init__.py +0 -6
graphgen/models/storage/rocksdb_cache.py +0 -43
graphgen/models/vis/__init__.py +0 -0
graphgen/models/vis/community_visualizer.py +0 -48
graphgen/operators/build_kg/build_kg_service.py +46 -18
graphgen/operators/build_kg/build_text_kg.py +1 -0
graphgen/operators/chunk/chunk_service.py +39 -45
graphgen/operators/evaluate/evaluate_kg.py +15 -0
graphgen/operators/evaluate/evaluate_qa.py +107 -0
graphgen/operators/evaluate/evaluate_service.py +120 -150
graphgen/operators/evaluate/evaluate_triple.py +39 -0
graphgen/operators/extract/extract_service.py +31 -20
graphgen/operators/generate/generate_service.py +30 -28

graphgen/bases/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from .base_extractor import BaseExtractor
 from .base_generator import BaseGenerator
 from .base_kg_builder import BaseKGBuilder
@@ -9,5 +10,4 @@ from .base_searcher import BaseSearcher
 from .base_splitter import BaseSplitter
 from .base_storage import BaseGraphStorage, BaseKVStorage, StorageNameSpace
 from .base_tokenizer import BaseTokenizer
-from .base_evaluator import BaseEvaluator
 from .datatypes import Chunk, Config, Node, QAPair, Token

+from .base_evaluator import BaseKGEvaluator, BaseQAEvaluator, BaseTripleEvaluator
 from .base_extractor import BaseExtractor
 from .base_generator import BaseGenerator
 from .base_kg_builder import BaseKGBuilder
 from .base_splitter import BaseSplitter
 from .base_storage import BaseGraphStorage, BaseKVStorage, StorageNameSpace
 from .base_tokenizer import BaseTokenizer
 from .datatypes import Chunk, Config, Node, QAPair, Token

graphgen/bases/base_evaluator.py CHANGED Viewed

@@ -1,10 +1,29 @@
 from abc import ABC, abstractmethod
 from .datatypes import QAPair
-class BaseEvaluator(ABC):
     @abstractmethod
-    def evaluate(self, pair: QAPair) -> float:
         """
         Evaluate the text and return a score.
         """

 from abc import ABC, abstractmethod
+from typing import Any
+from .base_storage import BaseGraphStorage
 from .datatypes import QAPair
+class BaseQAEvaluator(ABC):
     @abstractmethod
+    async def evaluate(self, pair: QAPair) -> dict[str, float]:
         """
         Evaluate the text and return a score.
         """
+class BaseKGEvaluator(ABC):
+    @abstractmethod
+    def evaluate(self, kg: BaseGraphStorage) -> dict[str, Any]:
+        """
+        Evaluate the whole graph and return a dict of scores.
+        """
+class BaseTripleEvaluator(ABC):
+    @abstractmethod
+    async def evaluate(self, unit: dict) -> dict[str, float]:
+        """
+        Evaluate a node/edge and return a score.
+        """

graphgen/bases/base_generator.py CHANGED Viewed

@@ -21,7 +21,7 @@ class BaseGenerator(ABC):
     @staticmethod
     @abstractmethod
-    def parse_response(response: str) -> Any:
         """Parse the LLM response and return the generated QAs"""
     async def generate(
@@ -29,64 +29,49 @@ class BaseGenerator(ABC):
         batch: tuple[
             list[tuple[str, dict]], list[tuple[Any, Any, dict] | tuple[Any, Any, Any]]
         ],
-    ) -> dict[str, Any]:
         """
         Generate QAs based on a given batch.
         :param batch
         :return: QA pairs
         """
-        result = {}
         prompt = self.build_prompt(batch)
         response = await self.llm_client.generate_answer(prompt)
         qa_pairs = self.parse_response(response)  # generate one or more QA pairs
-        result.update(qa_pairs)
-        return result
     @staticmethod
     def format_generation_results(
-        results: list[dict], output_data_format: str
-    ) -> list[dict[str, Any]]:
-        flat_results = []
-        for item in results:
-            for _, qa_data in item.items():
-                question = qa_data.get("question", "")
-                answer = qa_data.get("answer", "")
-                if "options" in qa_data and qa_data["options"]:
-                    options = qa_data["options"]
-                    options_str = "\n".join(
-                        [f"{key}. {options[key]}" for key in sorted(options.keys())]
-                    )
-                    question += f"\nOptions:\n{options_str}"
-                if output_data_format == "Alpaca":
-                    flat_results.append(
-                        {
-                            "instruction": question,
-                            "input": "",
-                            "output": answer,
-                        }
-                    )
-                elif output_data_format == "Sharegpt":
-                    flat_results.append(
-                        {
-                            "conversations": [
-                                {"from": "human", "value": question},
-                                {"from": "gpt", "value": answer},
-                            ]
-                        }
-                    )
-                elif output_data_format == "ChatML":
-                    flat_results.append(
-                        {
-                            "messages": [
-                                {"role": "user", "content": question},
-                                {"role": "assistant", "content": answer},
-                            ]
-                        }
-                    )
-                else:
-                    raise ValueError(
-                        f"Unknown output data format: {output_data_format}"
-                    )
-        return flat_results

     @staticmethod
     @abstractmethod
+    def parse_response(response: str) -> list[dict]:
         """Parse the LLM response and return the generated QAs"""
     async def generate(
         batch: tuple[
             list[tuple[str, dict]], list[tuple[Any, Any, dict] | tuple[Any, Any, Any]]
         ],
+    ) -> list[dict]:
         """
         Generate QAs based on a given batch.
         :param batch
         :return: QA pairs
         """
         prompt = self.build_prompt(batch)
         response = await self.llm_client.generate_answer(prompt)
         qa_pairs = self.parse_response(response)  # generate one or more QA pairs
+        return qa_pairs
     @staticmethod
     def format_generation_results(
+        result: dict, output_data_format: str
+    ) -> dict[str, Any]:
+        question = result.get("question", "")
+        answer = result.get("answer", "")
+        if "options" in result and result["options"]:
+            options = result["options"]
+            options_str = "\n".join(
+                [f"{key}. {options[key]}" for key in sorted(options.keys())]
+            )
+            question += f"\nOptions:\n{options_str}"
+        if output_data_format == "Alpaca":
+            return {
+                "instruction": question,
+                "input": "",
+                "output": answer,
+            }
+        if output_data_format == "Sharegpt":
+            return {
+                "conversations": [
+                    {"from": "human", "value": question},
+                    {"from": "gpt", "value": answer},
+                ]
+            }
+        if output_data_format == "ChatML":
+            return {
+                "messages": [
+                    {"role": "user", "content": question},
+                    {"role": "assistant", "content": answer},
+                ]
+            }
+        raise ValueError(f"Unknown output data format: {output_data_format}")

graphgen/bases/base_operator.py CHANGED Viewed

@@ -1,19 +1,43 @@
 import inspect
 import os
 from abc import ABC, abstractmethod
-from typing import Iterable, Union
 import pandas as pd
 import ray
 class BaseOperator(ABC):
-    def __init__(self, working_dir: str = "cache", op_name: str = None):
         # lazy import to avoid circular import
         from graphgen.utils import set_logger
         log_dir = os.path.join(working_dir, "logs")
         self.op_name = op_name or self.__class__.__name__
         try:
             ctx = ray.get_runtime_context()
@@ -45,17 +69,94 @@ class BaseOperator(ABC):
         logger_token = CURRENT_LOGGER_VAR.set(self.logger)
         try:
-            result = self.process(batch)
             if inspect.isgenerator(result):
-                yield from result
             else:
-                yield result
         finally:
             CURRENT_LOGGER_VAR.reset(logger_token)
-    @abstractmethod
-    def process(self, batch):
-        raise NotImplementedError("Subclasses must implement the process method.")
     def get_logger(self):
         return self.logger

 import inspect
 import os
 from abc import ABC, abstractmethod
+from typing import Iterable, Tuple, Union
+import numpy as np
 import pandas as pd
 import ray
+def convert_to_serializable(obj):
+    if isinstance(obj, np.ndarray):
+        return obj.tolist()
+    if isinstance(obj, np.generic):
+        return obj.item()
+    if isinstance(obj, dict):
+        return {k: convert_to_serializable(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [convert_to_serializable(v) for v in obj]
+    return obj
 class BaseOperator(ABC):
+    def __init__(
+        self,
+        working_dir: str = "cache",
+        kv_backend: str = "rocksdb",
+        op_name: str = None,
+    ):
         # lazy import to avoid circular import
+        from graphgen.common import init_storage
         from graphgen.utils import set_logger
         log_dir = os.path.join(working_dir, "logs")
         self.op_name = op_name or self.__class__.__name__
+        self.working_dir = working_dir
+        self.kv_backend = kv_backend
+        self.kv_storage = init_storage(
+            backend=kv_backend, working_dir=working_dir, namespace=self.op_name
+        )
         try:
             ctx = ray.get_runtime_context()
         logger_token = CURRENT_LOGGER_VAR.set(self.logger)
         try:
+            self.kv_storage.reload()
+            to_process, recovered = self.split(batch)
+            # yield recovered chunks first
+            if not recovered.empty:
+                yield recovered
+            if to_process.empty:
+                return
+            data = to_process.to_dict(orient="records")
+            result, meta_update = self.process(data)
             if inspect.isgenerator(result):
+                is_first = True
+                for res in result:
+                    yield pd.DataFrame([res])
+                    self.store([res], meta_update if is_first else {})
+                    is_first = False
             else:
+                yield pd.DataFrame(result)
+                self.store(result, meta_update)
         finally:
             CURRENT_LOGGER_VAR.reset(logger_token)
     def get_logger(self):
         return self.logger
+    def get_meta_forward(self):
+        return self.kv_storage.get_by_id("_meta_forward") or {}
+    def get_meta_inverse(self):
+        return self.kv_storage.get_by_id("_meta_inverse") or {}
+    def get_trace_id(self, content: dict) -> str:
+        from graphgen.utils import compute_dict_hash
+        return compute_dict_hash(content, prefix=f"{self.op_name}-")
+    def split(self, batch: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
+        """
+        Split the input batch into to_process & processed based on _meta data in KV_storage
+        :param batch
+        :return:
+            to_process: DataFrame of documents to be chunked
+            recovered: Result DataFrame of already chunked documents
+        """
+        meta_forward = self.get_meta_forward()
+        meta_ids = set(meta_forward.keys())
+        mask = batch["_trace_id"].isin(meta_ids)
+        to_process = batch[~mask]
+        processed = batch[mask]
+        if processed.empty:
+            return to_process, pd.DataFrame()
+        all_ids = [
+            pid for tid in processed["_trace_id"] for pid in meta_forward.get(tid, [])
+        ]
+        recovered_chunks = self.kv_storage.get_by_ids(all_ids)
+        recovered_chunks = [c for c in recovered_chunks if c is not None]
+        return to_process, pd.DataFrame(recovered_chunks)
+    def store(self, results: list, meta_update: dict):
+        results = convert_to_serializable(results)
+        meta_update = convert_to_serializable(meta_update)
+        batch = {res["_trace_id"]: res for res in results}
+        self.kv_storage.upsert(batch)
+        # update forward meta
+        forward_meta = self.get_meta_forward()
+        forward_meta.update(meta_update)
+        self.kv_storage.update({"_meta_forward": forward_meta})
+        # update inverse meta
+        inverse_meta = self.get_meta_inverse()
+        for k, v_list in meta_update.items():
+            for v in v_list:
+                inverse_meta[v] = k
+        self.kv_storage.update({"_meta_inverse": inverse_meta})
+        self.kv_storage.index_done_callback()
+    @abstractmethod
+    def process(self, batch: list) -> Tuple[Union[list, Iterable[dict]], dict]:
+        """
+        Process the input batch and return the result.
+        :param batch
+        :return:
+            result: DataFrame of processed documents
+            meta_update: dict of meta data to be updated
+        """

graphgen/bases/base_storage.py CHANGED Viewed

@@ -39,6 +39,12 @@ class BaseKVStorage(Generic[T], StorageNameSpace):
     def upsert(self, data: dict[str, T]):
         raise NotImplementedError
     def drop(self):
         raise NotImplementedError

     def upsert(self, data: dict[str, T]):
         raise NotImplementedError
+    def update(self, data: dict[str, T]):
+        raise NotImplementedError
+    def delete(self, ids: list[str]):
+        raise NotImplementedError
     def drop(self):
         raise NotImplementedError

graphgen/bases/datatypes.py CHANGED Viewed

@@ -31,6 +31,13 @@ class QAPair:
     question: str
     answer: str
 @dataclass
 class Token:

     question: str
     answer: str
+    @staticmethod
+    def from_dict(data: dict) -> "QAPair":
+        return QAPair(
+            question=data.get("question", ""),
+            answer=data.get("answer", ""),
+        )
 @dataclass
 class Token:

graphgen/common/init_storage.py CHANGED Viewed

@@ -8,11 +8,11 @@ from graphgen.bases.base_storage import BaseGraphStorage, BaseKVStorage
 class KVStorageActor:
     def __init__(self, backend: str, working_dir: str, namespace: str):
         if backend == "json_kv":
-            from graphgen.models import JsonKVStorage
             self.kv = JsonKVStorage(working_dir, namespace)
         elif backend == "rocksdb":
-            from graphgen.models import RocksDBKVStorage
             self.kv = RocksDBKVStorage(working_dir, namespace)
         else:
@@ -42,6 +42,12 @@ class KVStorageActor:
     def upsert(self, data: dict) -> dict:
         return self.kv.upsert(data)
     def drop(self):
         return self.kv.drop()
@@ -55,11 +61,11 @@ class KVStorageActor:
 class GraphStorageActor:
     def __init__(self, backend: str, working_dir: str, namespace: str):
         if backend == "networkx":
-            from graphgen.models import NetworkXStorage
             self.graph = NetworkXStorage(working_dir, namespace)
         elif backend == "kuzu":
-            from graphgen.models import KuzuStorage
             self.graph = KuzuStorage(working_dir, namespace)
         else:
@@ -168,6 +174,12 @@ class RemoteKVStorageProxy(BaseKVStorage):
     def upsert(self, data: Dict[str, Any]):
         return ray.get(self.actor.upsert.remote(data))
     def drop(self):
         return ray.get(self.actor.drop.remote())

 class KVStorageActor:
     def __init__(self, backend: str, working_dir: str, namespace: str):
         if backend == "json_kv":
+            from graphgen.storage import JsonKVStorage
             self.kv = JsonKVStorage(working_dir, namespace)
         elif backend == "rocksdb":
+            from graphgen.storage import RocksDBKVStorage
             self.kv = RocksDBKVStorage(working_dir, namespace)
         else:
     def upsert(self, data: dict) -> dict:
         return self.kv.upsert(data)
+    def update(self, data: dict):
+        return self.kv.update(data)
+    def delete(self, ids: list[str]):
+        return self.kv.delete(ids)
     def drop(self):
         return self.kv.drop()
 class GraphStorageActor:
     def __init__(self, backend: str, working_dir: str, namespace: str):
         if backend == "networkx":
+            from graphgen.storage import NetworkXStorage
             self.graph = NetworkXStorage(working_dir, namespace)
         elif backend == "kuzu":
+            from graphgen.storage import KuzuStorage
             self.graph = KuzuStorage(working_dir, namespace)
         else:
     def upsert(self, data: Dict[str, Any]):
         return ray.get(self.actor.upsert.remote(data))
+    def update(self, data: Dict[str, Any]):
+        return ray.get(self.actor.update.remote(data))
+    def delete(self, ids: list[str]):
+        return ray.get(self.actor.delete.remote(ids))
     def drop(self):
         return ray.get(self.actor.drop.remote())

graphgen/models/__init__.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from .evaluator import (
     AccuracyEvaluator,
-    ConsistencyEvaluator,
     LengthEvaluator,
     MTLDEvaluator,
     RewardEvaluator,
@@ -44,11 +43,4 @@ from .searcher.kg.wiki_search import WikiSearch
 from .searcher.web.bing_search import BingSearch
 from .searcher.web.google_search import GoogleSearch
 from .splitter import ChineseRecursiveTextSplitter, RecursiveCharacterSplitter
-from .storage import (
-    JsonKVStorage,
-    KuzuStorage,
-    NetworkXStorage,
-    RocksDBCache,
-    RocksDBKVStorage,
-)
 from .tokenizer import Tokenizer

 from .evaluator import (
     AccuracyEvaluator,
     LengthEvaluator,
     MTLDEvaluator,
     RewardEvaluator,
 from .searcher.web.bing_search import BingSearch
 from .searcher.web.google_search import GoogleSearch
 from .splitter import ChineseRecursiveTextSplitter, RecursiveCharacterSplitter
 from .tokenizer import Tokenizer

graphgen/models/evaluator/__init__.py CHANGED Viewed

@@ -1,2 +1,3 @@
-from .kg import AccuracyEvaluator, ConsistencyEvaluator, StructureEvaluator
 from .qa import LengthEvaluator, MTLDEvaluator, RewardEvaluator, UniEvaluator

+from .kg import StructureEvaluator
 from .qa import LengthEvaluator, MTLDEvaluator, RewardEvaluator, UniEvaluator
+from .triple import AccuracyEvaluator

graphgen/models/evaluator/kg/__init__.py CHANGED Viewed

@@ -1,18 +1 @@
-"""
-Knowledge Graph Quality Evaluator
-This module provides comprehensive quality evaluation for knowledge graphs,
-1. accuracy assessment (entity/relation/triple validation),
-2. consistency assessment (attribute conflict detection), and structural
-3. robustness assessment (noise ratio, connectivity, degree distribution).
-"""
-from .accuracy_evaluator import AccuracyEvaluator
-from .consistency_evaluator import ConsistencyEvaluator
 from .structure_evaluator import StructureEvaluator
-__all__ = [
-    "AccuracyEvaluator",
-    "ConsistencyEvaluator",
-    "StructureEvaluator",
-]













1	from .structure_evaluator import StructureEvaluator

graphgen/models/evaluator/kg/accuracy_evaluator.py DELETED Viewed

@@ -1,350 +0,0 @@
-import asyncio
-import json
-import re
-from typing import Any, Dict, List
-from graphgen.bases import BaseGraphStorage, BaseKVStorage, BaseLLMWrapper
-from graphgen.bases.datatypes import Chunk
-from graphgen.templates import ACCURACY_EVALUATION_PROMPT
-from graphgen.utils import detect_main_language, logger
-class AccuracyEvaluator:
-    """Evaluates accuracy of entity recognition and relation extraction using LLM-as-a-Judge.
-    For each chunk, uses LLM to evaluate the quality of extracted entities and relations
-    by comparing them with the original chunk content. Provides multi-dimensional quality
-    scores (accuracy, completeness, precision).
-    """
-    def __init__(
-        self,
-        graph_storage: BaseGraphStorage,
-        chunk_storage: BaseKVStorage,
-        llm_client: BaseLLMWrapper,
-    ):
-        self.graph_storage = graph_storage
-        self.chunk_storage = chunk_storage
-        self.llm_client = llm_client
-    def evaluate(self) -> Dict[str, Any]:
-        """Evaluate entity and relation extraction quality using LLM-as-a-Judge.
-        Returns:
-            Dictionary containing entity_accuracy and relation_accuracy metrics.
-        """
-        # 1. Load all chunks from storage
-        chunks = self._load_chunks_from_storage()
-        if not chunks:
-            logger.warning("No chunks found in storage")
-            return {"error": "No chunks found in storage"}
-        logger.info(f"Found {len(chunks)} chunks to evaluate")
-        # 2. Evaluate each chunk
-        entity_evaluations, relation_evaluations = self._evaluate_all_chunks(chunks)
-        # 3. Aggregate results
-        return self._aggregate_evaluation_results(
-            entity_evaluations, relation_evaluations
-        )
-    def _load_chunks_from_storage(self) -> List[Chunk]:
-        """Load all chunks from chunk storage."""
-        chunks = []
-        all_chunk_data = self.chunk_storage.get_all()
-        for chunk_id, chunk_data in all_chunk_data.items():
-            try:
-                chunk = Chunk.from_dict(chunk_id, chunk_data)
-                chunks.append(chunk)
-            except Exception as e:
-                logger.warning(f"Failed to load chunk {chunk_id}: {e}")
-                continue
-        return chunks
-    def _get_extracted_entities_for_chunk(self, chunk_id: str) -> List[Dict]:
-        """Get all entities extracted from the specified chunk."""
-        entities = []
-        all_nodes = self.graph_storage.get_all_nodes() or []
-        for node_id, node_data in all_nodes:
-            if not isinstance(node_data, dict):
-                continue
-            source_ids = node_data.get("source_id", "").split("<SEP>")
-            # Check if this chunk_id is in the source_ids
-            if chunk_id in [sid.strip() for sid in source_ids if sid.strip()]:
-                entities.append(
-                    {
-                        "entity_name": node_data.get("entity_name", node_id),
-                        "entity_type": node_data.get("entity_type", ""),
-                        "description": node_data.get("description", ""),
-                    }
-                )
-        return entities
-    def _get_extracted_relations_for_chunk(self, chunk_id: str) -> List[Dict]:
-        """Get all relations extracted from the specified chunk."""
-        relations = []
-        all_edges = self.graph_storage.get_all_edges() or []
-        for src_id, dst_id, edge_data in all_edges:
-            if not isinstance(edge_data, dict):
-                continue
-            source_ids = edge_data.get("source_id", "").split("<SEP>")
-            # Check if this chunk_id is in the source_ids
-            if chunk_id in [sid.strip() for sid in source_ids if sid.strip()]:
-                src_node = self.graph_storage.get_node(src_id) or {}
-                dst_node = self.graph_storage.get_node(dst_id) or {}
-                relations.append(
-                    {
-                        "source_entity": src_node.get("entity_name", src_id),
-                        "target_entity": dst_node.get("entity_name", dst_id),
-                        "relationship_summary": edge_data.get("description", ""),
-                    }
-                )
-        return relations
-    def _evaluate_all_chunks(
-        self, chunks: List[Chunk]
-    ) -> tuple[List[Dict], List[Dict]]:
-        """Evaluate all chunks sequentially."""
-        entity_evaluations = []
-        relation_evaluations = []
-        for chunk in chunks:
-            try:
-                entities = self._get_extracted_entities_for_chunk(chunk.id)
-                relations = self._get_extracted_relations_for_chunk(chunk.id)
-                entity_eval = self._evaluate_entity_extraction(chunk, entities)
-                relation_eval = self._evaluate_relation_extraction(chunk, relations)
-                entity_evaluations.append(entity_eval)
-                relation_evaluations.append(relation_eval)
-            except Exception as e:
-                logger.error(f"Failed to evaluate chunk {chunk.id}: {e}")
-                continue
-        return entity_evaluations, relation_evaluations
-    def _evaluate_entity_extraction(
-        self, chunk: Chunk, extracted_entities: List[Dict]
-    ) -> Dict[str, Any]:
-        """Use LLM to evaluate entity extraction quality."""
-        try:
-            lang = detect_main_language(chunk.content)
-            prompt = ACCURACY_EVALUATION_PROMPT[lang]["ENTITY"].format(
-                chunk_content=chunk.content,
-                extracted_entities=json.dumps(
-                    extracted_entities, ensure_ascii=False, indent=2
-                ),
-            )
-            response = asyncio.run(self.llm_client.generate_answer(prompt))
-            # Try to parse JSON response
-            try:
-                evaluation_result = json.loads(response)
-            except json.JSONDecodeError:
-                # Try to extract JSON from markdown code blocks or other formats
-                json_match = re.search(r"\{.*\}", response, re.DOTALL)
-                if json_match:
-                    evaluation_result = json.loads(json_match.group(0))
-                else:
-                    logger.warning(
-                        f"Failed to parse LLM response for chunk {chunk.id}: {response[:200]}"
-                    )
-                    # Return default evaluation
-                    evaluation_result = {
-                        "accuracy": 0.0,
-                        "completeness": 0.0,
-                        "precision": 0.0,
-                        "overall_score": 0.0,
-                        "accuracy_reasoning": "Failed to parse LLM response",
-                        "completeness_reasoning": "",
-                        "precision_reasoning": "",
-                        "issues": ["LLM response parsing failed"],
-                    }
-            # Validate and calculate overall_score if not provided
-            if "overall_score" not in evaluation_result:
-                accuracy = float(evaluation_result.get("accuracy", 0.0))
-                completeness = float(evaluation_result.get("completeness", 0.0))
-                precision = float(evaluation_result.get("precision", 0.0))
-                evaluation_result["overall_score"] = (
-                    0.4 * accuracy + 0.4 * completeness + 0.2 * precision
-                )
-            return {
-                "chunk_id": chunk.id,
-                "chunk_content": chunk.content[:200]
-                if chunk.content
-                else "",  # First 200 chars for debugging
-                "extracted_entities_count": len(extracted_entities),
-                **evaluation_result,
-            }
-        except Exception as e:
-            logger.error(
-                f"Error evaluating entity extraction for chunk {chunk.id}: {e}"
-            )
-            return {
-                "chunk_id": chunk.id,
-                "chunk_content": chunk.content[:200] if chunk.content else "",
-                "extracted_entities_count": len(extracted_entities),
-                "accuracy": 0.0,
-                "completeness": 0.0,
-                "precision": 0.0,
-                "overall_score": 0.0,
-                "accuracy_reasoning": f"Evaluation failed: {str(e)}",
-                "completeness_reasoning": "",
-                "precision_reasoning": "",
-                "issues": [f"Evaluation error: {str(e)}"],
-            }
-    def _evaluate_relation_extraction(
-        self, chunk: Chunk, extracted_relations: List[Dict]
-    ) -> Dict[str, Any]:
-        """Use LLM to evaluate relation extraction quality."""
-        try:
-            lang = detect_main_language(chunk.content)
-            prompt = ACCURACY_EVALUATION_PROMPT[lang]["RELATION"].format(
-                chunk_content=chunk.content,
-                extracted_relations=json.dumps(
-                    extracted_relations, ensure_ascii=False, indent=2
-                ),
-            )
-            response = asyncio.run(self.llm_client.generate_answer(prompt))
-            # Try to parse JSON response
-            try:
-                evaluation_result = json.loads(response)
-            except json.JSONDecodeError:
-                # Try to extract JSON from markdown code blocks or other formats
-                json_match = re.search(r"\{.*\}", response, re.DOTALL)
-                if json_match:
-                    evaluation_result = json.loads(json_match.group(0))
-                else:
-                    logger.warning(
-                        f"Failed to parse LLM response for chunk {chunk.id}: {response[:200]}"
-                    )
-                    # Return default evaluation
-                    evaluation_result = {
-                        "accuracy": 0.0,
-                        "completeness": 0.0,
-                        "precision": 0.0,
-                        "overall_score": 0.0,
-                        "accuracy_reasoning": "Failed to parse LLM response",
-                        "completeness_reasoning": "",
-                        "precision_reasoning": "",
-                        "issues": ["LLM response parsing failed"],
-                    }
-            # Validate and calculate overall_score if not provided
-            if "overall_score" not in evaluation_result:
-                accuracy = float(evaluation_result.get("accuracy", 0.0))
-                completeness = float(evaluation_result.get("completeness", 0.0))
-                precision = float(evaluation_result.get("precision", 0.0))
-                evaluation_result["overall_score"] = (
-                    0.4 * accuracy + 0.4 * completeness + 0.2 * precision
-                )
-            return {
-                "chunk_id": chunk.id,
-                "chunk_content": chunk.content[:200] if chunk.content else "",
-                "extracted_relations_count": len(extracted_relations),
-                **evaluation_result,
-            }
-        except Exception as e:
-            logger.error(
-                f"Error evaluating relation extraction for chunk {chunk.id}: {e}"
-            )
-            return {
-                "chunk_id": chunk.id,
-                "chunk_content": chunk.content[:200] if chunk.content else "",
-                "extracted_relations_count": len(extracted_relations),
-                "accuracy": 0.0,
-                "completeness": 0.0,
-                "precision": 0.0,
-                "overall_score": 0.0,
-                "accuracy_reasoning": f"Evaluation failed: {str(e)}",
-                "completeness_reasoning": "",
-                "precision_reasoning": "",
-                "issues": [f"Evaluation error: {str(e)}"],
-            }
-    @staticmethod
-    def _aggregate_evaluation_results(
-        entity_evaluations: List[Dict], relation_evaluations: List[Dict]
-    ) -> Dict[str, Any]:
-        """Aggregate evaluation results from all chunks."""
-        def calculate_stats(scores: List[float]) -> Dict[str, float]:
-            if not scores:
-                return {"mean": 0.0, "median": 0.0, "min": 0.0, "max": 0.0, "std": 0.0}
-            sorted_scores = sorted(scores)
-            n = len(scores)
-            mean = sum(scores) / n
-            median = (
-                sorted_scores[n // 2]
-                if n % 2 == 1
-                else (sorted_scores[n // 2 - 1] + sorted_scores[n // 2]) / 2
-            )
-            variance = sum((x - mean) ** 2 for x in scores) / n
-            std = variance**0.5
-            return {
-                "mean": mean,
-                "median": median,
-                "min": min(scores),
-                "max": max(scores),
-                "std": std,
-            }
-        # Extract scores
-        entity_overall_scores = [
-            e.get("overall_score", 0.0) for e in entity_evaluations
-        ]
-        entity_accuracy_scores = [e.get("accuracy", 0.0) for e in entity_evaluations]
-        entity_completeness_scores = [
-            e.get("completeness", 0.0) for e in entity_evaluations
-        ]
-        entity_precision_scores = [e.get("precision", 0.0) for e in entity_evaluations]
-        relation_overall_scores = [
-            r.get("overall_score", 0.0) for r in relation_evaluations
-        ]
-        relation_accuracy_scores = [
-            r.get("accuracy", 0.0) for r in relation_evaluations
-        ]
-        relation_completeness_scores = [
-            r.get("completeness", 0.0) for r in relation_evaluations
-        ]
-        relation_precision_scores = [
-            r.get("precision", 0.0) for r in relation_evaluations
-        ]
-        return {
-            "entity_accuracy": {
-                "overall_score": calculate_stats(entity_overall_scores),
-                "accuracy": calculate_stats(entity_accuracy_scores),
-                "completeness": calculate_stats(entity_completeness_scores),
-                "precision": calculate_stats(entity_precision_scores),
-                "total_chunks": len(entity_evaluations),
-                "detailed_results": entity_evaluations,
-            },
-            "relation_accuracy": {
-                "overall_score": calculate_stats(relation_overall_scores),
-                "accuracy": calculate_stats(relation_accuracy_scores),
-                "completeness": calculate_stats(relation_completeness_scores),
-                "precision": calculate_stats(relation_precision_scores),
-                "total_chunks": len(relation_evaluations),
-                "detailed_results": relation_evaluations,
-            },
-        }

graphgen/models/evaluator/kg/consistency_evaluator.py DELETED Viewed

@@ -1,388 +0,0 @@
-import asyncio
-import json
-import re
-from typing import Any, Dict, List
-from graphgen.bases import BaseGraphStorage, BaseKVStorage, BaseLLMWrapper
-from graphgen.bases.datatypes import Chunk
-from graphgen.templates.evaluation.kg.consistency_evaluation import (
-    CONSISTENCY_EVALUATION_PROMPT,
-)
-from graphgen.utils import detect_main_language, logger
-class ConsistencyEvaluator:
-    """Evaluates consistency by detecting semantic conflicts using LLM-as-a-Judge.
-    For entities with multiple source chunks, compares entity_type and description
-    extracted from different chunks to detect semantic conflicts.
-    """
-    def __init__(
-        self,
-        graph_storage: BaseGraphStorage,
-        chunk_storage: BaseKVStorage,
-        llm_client: BaseLLMWrapper,
-    ):
-        self.graph_storage = graph_storage
-        self.chunk_storage = chunk_storage
-        self.llm_client = llm_client
-    def evaluate(self) -> Dict[str, Any]:
-        """Evaluate consistency by detecting semantic conflicts."""
-        all_nodes = self.graph_storage.get_all_nodes() or []
-        if not all_nodes:
-            return {"error": "Empty graph"}
-        return self._evaluate_consistency(all_nodes)
-    def _evaluate_consistency(self, all_nodes: List) -> Dict[str, Any]:
-        """Evaluate consistency by detecting semantic conflicts."""
-        # Filter entities with multiple source chunks
-        entities_with_multiple_sources = []
-        for node_id, node_data in all_nodes:
-            if not isinstance(node_data, dict):
-                continue
-            source_ids = node_data.get("source_id", "").split("<SEP>")
-            source_ids = [sid.strip() for sid in source_ids if sid.strip()]
-            if len(source_ids) > 1:  # Only check entities from multiple chunks
-                entities_with_multiple_sources.append((node_id, node_data, source_ids))
-        if not entities_with_multiple_sources:
-            logger.info(
-                "No entities with multiple sources found, skipping consistency check"
-            )
-            return {
-                "conflict_rate": 0.0,
-                "conflict_entities_count": 0,
-                "total_entities": len(all_nodes),
-                "conflicts": [],
-            }
-        logger.info(
-            f"Checking consistency for {len(entities_with_multiple_sources)} entities with multiple sources"
-        )
-        # Evaluate entities sequentially
-        conflicts = []
-        conflict_entities = set()
-        for entity_info in entities_with_multiple_sources:
-            try:
-                entity_id, entity_conflicts = self._evaluate_entity_consistency(entity_info)
-                if entity_conflicts:
-                    conflicts.extend(entity_conflicts)
-                    conflict_entities.add(entity_id)
-            except Exception as e:
-                logger.error(
-                    f"Failed to evaluate entity {entity_info[0]}: {e}"
-                )
-                continue
-        total_entities = len(all_nodes)
-        conflict_rate = (
-            len(conflict_entities) / total_entities if total_entities > 0 else 0
-        )
-        return {
-            "conflict_rate": conflict_rate,
-            "conflict_entities_count": len(conflict_entities),
-            "total_entities": total_entities,
-            "entities_checked": len(entities_with_multiple_sources),
-            "conflicts": conflicts[:100],  # Limit to first 100 conflicts
-        }
-    def _clean_entity_id(self, entity_id: str) -> str:
-        """Clean entity ID by removing surrounding quotes."""
-        clean_id = entity_id.strip()
-        if (clean_id.startswith('"') and clean_id.endswith('"')) or (
-            clean_id.startswith("'") and clean_id.endswith("'")
-        ):
-            clean_id = clean_id[1:-1].strip()
-        return clean_id
-    def _evaluate_entity_consistency(
-        self, entity_info: tuple
-    ) -> tuple[str, List[Dict]]:
-        """Evaluate consistency for a single entity."""
-        entity_id, _node_data, source_ids = entity_info
-        # Clean entity_id for display
-        clean_entity_id = self._clean_entity_id(entity_id)
-        conflicts = []
-        # Get chunks for this entity
-        chunks = self._get_entity_chunks(source_ids)
-        if len(chunks) < 2:
-            return entity_id, []
-        # Extract entity attributes from each chunk
-        entity_extractions = {}
-        for chunk in chunks:
-            extraction = self._extract_entity_from_chunk(entity_id, chunk)
-            if extraction:
-                entity_extractions[chunk.id] = extraction
-        if len(entity_extractions) < 2:
-            return entity_id, []
-        # Check entity type consistency
-        type_extractions = {
-            chunk_id: ext.get("entity_type", "")
-            for chunk_id, ext in entity_extractions.items()
-        }
-        type_conflict = self._check_entity_type_consistency(
-            entity_id, type_extractions
-        )
-        if type_conflict and type_conflict.get("has_conflict", False):
-            conflicts.append(
-                {
-                    "entity_id": clean_entity_id,
-                    "conflict_type": "entity_type",
-                    "conflict_severity": type_conflict.get("conflict_severity", 0.0),
-                    "conflict_reasoning": type_conflict.get("conflict_reasoning", ""),
-                    "conflicting_values": type_conflict.get("conflicting_types", []),
-                    "recommended_value": type_conflict.get("recommended_type", ""),
-                }
-            )
-        # Check entity description consistency
-        descriptions = {
-            chunk_id: ext.get("description", "")
-            for chunk_id, ext in entity_extractions.items()
-        }
-        desc_conflict = self._check_entity_description_consistency(
-            entity_id, descriptions
-        )
-        if desc_conflict and desc_conflict.get("has_conflict", False):
-            conflicts.append(
-                {
-                    "entity_id": clean_entity_id,
-                    "conflict_type": "description",
-                    "conflict_severity": desc_conflict.get("conflict_severity", 0.0),
-                    "conflict_reasoning": desc_conflict.get("conflict_reasoning", ""),
-                    "conflicting_values": desc_conflict.get(
-                        "conflicting_descriptions", []
-                    ),
-                    "conflict_details": desc_conflict.get("conflict_details", ""),
-                }
-            )
-        return entity_id, conflicts
-    def _get_entity_chunks(self, source_ids: List[str]) -> List[Chunk]:
-        """Get all chunks related to an entity."""
-        chunks = []
-        for chunk_id in source_ids:
-            chunk_data = self.chunk_storage.get_by_id(chunk_id)
-            if chunk_data:
-                try:
-                    chunk = Chunk.from_dict(chunk_id, chunk_data)
-                    chunks.append(chunk)
-                except Exception as e:
-                    logger.warning(f"Failed to load chunk {chunk_id}: {e}")
-                    continue
-        return chunks
-    def _extract_entity_from_chunk(
-        self, entity_id: str, chunk: Chunk
-    ) -> Dict[str, str]:
-        """Extract entity attributes from a chunk using LLM."""
-        try:
-            # Clean entity_id: remove surrounding quotes if present
-            clean_entity_id = self._clean_entity_id(entity_id)
-            # Detect language and get appropriate prompt
-            lang = detect_main_language(chunk.content)
-            prompt = CONSISTENCY_EVALUATION_PROMPT[lang]["ENTITY_EXTRACTION"].format(
-                entity_name=clean_entity_id,
-                chunk_content=chunk.content[:2000]
-                if chunk.content
-                else "",  # Limit content length
-            )
-            response = asyncio.run(self.llm_client.generate_answer(prompt))
-            # Try to parse JSON response
-            try:
-                extraction = json.loads(response)
-            except json.JSONDecodeError:
-                # Try to extract JSON from markdown code blocks
-                json_match = re.search(r"\{.*\}", response, re.DOTALL)
-                if json_match:
-                    extraction = json.loads(json_match.group(0))
-                else:
-                    logger.warning(
-                        f"Failed to parse extraction response for {entity_id} in chunk {chunk.id}"
-                    )
-                    return {}
-            # Normalize entity_type to lowercase and validate
-            entity_type = extraction.get("entity_type", "").lower().strip()
-            # Valid preset types
-            valid_types = {
-                "concept",
-                "date",
-                "location",
-                "keyword",
-                "organization",
-                "person",
-                "event",
-                "work",
-                "nature",
-                "artificial",
-                "science",
-                "technology",
-                "mission",
-                "gene",
-            }
-            # If entity_type is not in valid types, default to "concept"
-            if entity_type not in valid_types:
-                if entity_type:  # If LLM provided a type but it's invalid
-                    logger.warning(
-                        f"Invalid entity_type '{entity_type}' for entity {clean_entity_id} in chunk {chunk.id}, "
-                        f"defaulting to 'concept'"
-                    )
-                entity_type = "concept"
-            return {
-                "entity_type": entity_type,
-                "description": extraction.get("description", ""),
-            }
-        except Exception as e:
-            logger.error(
-                f"Error extracting entity {entity_id} from chunk {chunk.id}: {e}"
-            )
-            return {}
-    def _check_entity_type_consistency(
-        self, entity_id: str, type_extractions: Dict[str, str]
-    ) -> Dict[str, Any]:
-        """Check entity type consistency using LLM."""
-        if len(set(type_extractions.values())) <= 1:
-            # All types are the same, no conflict
-            return {"has_conflict": False}
-        try:
-            type_list = [
-                f"Chunk {chunk_id}: {entity_type}"
-                for chunk_id, entity_type in type_extractions.items()
-                if entity_type
-            ]
-            # Detect language from type extraction text
-            type_text = "\n".join(type_list)
-            lang = detect_main_language(type_text)
-            prompt = CONSISTENCY_EVALUATION_PROMPT[lang]["ENTITY_TYPE_CONFLICT"].format(
-                entity_name=entity_id, type_extractions=type_text
-            )
-            response = asyncio.run(self.llm_client.generate_answer(prompt))
-            # Parse JSON response
-            try:
-                result = json.loads(response)
-            except json.JSONDecodeError:
-                json_match = re.search(r"\{.*\}", response, re.DOTALL)
-                if json_match:
-                    result = json.loads(json_match.group(0))
-                else:
-                    logger.warning(
-                        f"Failed to parse conflict detection response for {entity_id}"
-                    )
-                    return {"has_conflict": False}
-            return result
-        except Exception as e:
-            logger.error(f"Error checking type consistency for {entity_id}: {e}")
-            return {"has_conflict": False}
-    def _check_entity_description_consistency(
-        self, entity_id: str, descriptions: Dict[str, str]
-    ) -> Dict[str, Any]:
-        """Check entity description consistency using LLM."""
-        # Filter out empty descriptions
-        valid_descriptions = {k: v for k, v in descriptions.items() if v}
-        if len(valid_descriptions) < 2:
-            return {"has_conflict": False}
-        if len(set(valid_descriptions.values())) <= 1:
-            # All descriptions are the same, no conflict
-            return {"has_conflict": False}
-        try:
-            desc_list = [
-                f"Chunk {chunk_id}: {description}"
-                for chunk_id, description in valid_descriptions.items()
-            ]
-            # Detect language from description text
-            desc_text = "\n".join(desc_list)
-            lang = detect_main_language(desc_text)
-            prompt = CONSISTENCY_EVALUATION_PROMPT[lang]["ENTITY_DESCRIPTION_CONFLICT"].format(
-                entity_name=entity_id, descriptions=desc_text
-            )
-            response = asyncio.run(self.llm_client.generate_answer(prompt))
-            # Parse JSON response
-            try:
-                result = json.loads(response)
-            except json.JSONDecodeError:
-                json_match = re.search(r"\{.*\}", response, re.DOTALL)
-                if json_match:
-                    result = json.loads(json_match.group(0))
-                else:
-                    logger.warning(
-                        f"Failed to parse conflict detection response for {entity_id}"
-                    )
-                    return {"has_conflict": False}
-            return result
-        except Exception as e:
-            logger.error(f"Error checking description consistency for {entity_id}: {e}")
-            return {"has_conflict": False}
-    def _check_relation_consistency(
-        self, src_id: str, dst_id: str, relation_extractions: Dict[str, str]
-    ) -> Dict[str, Any]:
-        """Check relation consistency using LLM."""
-        if len(set(relation_extractions.values())) <= 1:
-            return {"has_conflict": False}
-        try:
-            rel_list = [
-                f"Chunk {chunk_id}: {relation}"
-                for chunk_id, relation in relation_extractions.items()
-                if relation
-            ]
-            # Detect language from relation description text
-            rel_text = "\n".join(rel_list)
-            lang = detect_main_language(rel_text)
-            prompt = CONSISTENCY_EVALUATION_PROMPT[lang]["RELATION_CONFLICT"].format(
-                source_entity=src_id,
-                target_entity=dst_id,
-                relation_descriptions=rel_text,
-            )
-            response = asyncio.run(self.llm_client.generate_answer(prompt))
-            # Parse JSON response
-            try:
-                result = json.loads(response)
-            except json.JSONDecodeError:
-                json_match = re.search(r"\{.*\}", response, re.DOTALL)
-                if json_match:
-                    result = json.loads(json_match.group(0))
-                else:
-                    logger.warning(
-                        f"Failed to parse relation conflict response for {src_id}->{dst_id}"
-                    )
-                    return {"has_conflict": False}
-            return result
-        except Exception as e:
-            logger.error(
-                f"Error checking relation consistency for {src_id}->{dst_id}: {e}"
-            )
-            return {"has_conflict": False}

graphgen/models/evaluator/kg/structure_evaluator.py CHANGED Viewed

@@ -4,49 +4,49 @@ from typing import Any, Dict, Optional
 import numpy as np
 from scipy import stats
-from graphgen.bases import BaseGraphStorage
 from graphgen.utils import logger
-class StructureEvaluator:
     """Evaluates structural robustness of the graph."""
     def __init__(
         self,
-        graph_storage: BaseGraphStorage,
         noise_ratio_threshold: float = 0.15,
         largest_cc_ratio_threshold: float = 0.90,
         avg_degree_min: float = 2.0,
         avg_degree_max: float = 5.0,
         powerlaw_r2_threshold: float = 0.75,
     ):
-        self.graph_storage = graph_storage
         self.noise_ratio_threshold = noise_ratio_threshold
         self.largest_cc_ratio_threshold = largest_cc_ratio_threshold
         self.avg_degree_min = avg_degree_min
         self.avg_degree_max = avg_degree_max
         self.powerlaw_r2_threshold = powerlaw_r2_threshold
-    def evaluate(self) -> Dict[str, Any]:
         """
         Evaluate the structural robustness of the graph.
-        :return:
         """
-        storage = self.graph_storage
-        total_nodes = storage.get_node_count()
-        if total_nodes == 0:
-            return {"error": "Empty graph"}
-        total_edges = storage.get_edge_count()
-        degree_map = storage.get_all_node_degrees()
         # Noise ratio: isolated nodes / total nodes
         isolated_nodes = [nid for nid, deg in degree_map.items() if deg == 0]
         noise_ratio = len(isolated_nodes) / total_nodes
         # Largest connected component
-        components = storage.get_connected_components(undirected=True)
         largest_cc_ratio = (
             len(max(components, key=len)) / total_nodes if components else 0
         )

 import numpy as np
 from scipy import stats
+from graphgen.bases import BaseGraphStorage, BaseKGEvaluator
 from graphgen.utils import logger
+class StructureEvaluator(BaseKGEvaluator):
     """Evaluates structural robustness of the graph."""
     def __init__(
         self,
         noise_ratio_threshold: float = 0.15,
         largest_cc_ratio_threshold: float = 0.90,
         avg_degree_min: float = 2.0,
         avg_degree_max: float = 5.0,
         powerlaw_r2_threshold: float = 0.75,
     ):
         self.noise_ratio_threshold = noise_ratio_threshold
         self.largest_cc_ratio_threshold = largest_cc_ratio_threshold
         self.avg_degree_min = avg_degree_min
         self.avg_degree_max = avg_degree_max
         self.powerlaw_r2_threshold = powerlaw_r2_threshold
+    def evaluate(self, kg: BaseGraphStorage) -> Dict[str, Any]:
         """
         Evaluate the structural robustness of the graph.
+        :return: Dictionary of structural metrics and robustness verdict. The keys include:
+            - total_nodes: Total number of nodes in the graph
+            - total_edges: Total number of edges in the graph
+            - noise_ratio: Ratio of isolated nodes to total nodes
+            - largest_cc_ratio: Ratio of largest connected component size to total nodes
+            - avg_degree: Average node degree
+            - powerlaw_r2: R² value of power law fit to degree distribution
+            - is_robust: Boolean indicating if the graph is structurally robust
         """
+        total_nodes = kg.get_node_count()
+        total_edges = kg.get_edge_count()
+        degree_map = kg.get_all_node_degrees()
         # Noise ratio: isolated nodes / total nodes
         isolated_nodes = [nid for nid, deg in degree_map.items() if deg == 0]
         noise_ratio = len(isolated_nodes) / total_nodes
         # Largest connected component
+        components = kg.get_connected_components(undirected=True)
         largest_cc_ratio = (
             len(max(components, key=len)) / total_nodes if components else 0
         )

graphgen/models/evaluator/qa/length_evaluator.py CHANGED Viewed

@@ -1,18 +1,19 @@
 import os
-from graphgen.bases import BaseEvaluator, QAPair
 from graphgen.models.tokenizer import Tokenizer
-class LengthEvaluator(BaseEvaluator):
     def __init__(self, tokenizer_name: str = None):
-        tokenizer_model = tokenizer_name or os.environ.get("TOKENIZER_MODEL", "cl100k_base")
         self.tokenizer: Tokenizer = Tokenizer(tokenizer_model)
-    def evaluate(self, pair: QAPair) -> float:
         """
         Evaluate the length of the qa pair.
         """
         content = pair.question + pair.answer
-        tokens = self.tokenizer.encode(content)
-        return len(tokens)

 import os
+from graphgen.bases import BaseQAEvaluator, QAPair
 from graphgen.models.tokenizer import Tokenizer
+class LengthEvaluator(BaseQAEvaluator):
     def __init__(self, tokenizer_name: str = None):
+        tokenizer_model = tokenizer_name or os.environ.get(
+            "TOKENIZER_MODEL", "cl100k_base"
+        )
         self.tokenizer: Tokenizer = Tokenizer(tokenizer_model)
+    async def evaluate(self, pair: QAPair) -> dict[str, float]:
         """
         Evaluate the length of the qa pair.
         """
         content = pair.question + pair.answer
+        return {"length": self.tokenizer.count_tokens(content)}

graphgen/models/evaluator/qa/mtld_evaluator.py CHANGED Viewed

@@ -1,10 +1,10 @@
 from typing import Set
-from graphgen.bases import BaseEvaluator, QAPair
 from graphgen.utils import NLTKHelper, detect_main_language
-class MTLDEvaluator(BaseEvaluator):
     """
     Metrics for measuring the lexical diversity of text.
     """
@@ -15,7 +15,7 @@ class MTLDEvaluator(BaseEvaluator):
         self.stopwords_zh: Set[str] = set(self.nltk_helper.get_stopwords("zh"))
         self.threshold = threshold
-    def evaluate(self, pair: QAPair) -> float:
         """
         Calculate the MTLD (Mean Token Length Diversity) score for a given text.
@@ -24,7 +24,7 @@ class MTLDEvaluator(BaseEvaluator):
         """
         text = pair.answer
         if not text or not text.strip():
-            return 0.0
         lang = detect_main_language(text)
         tokens = self.nltk_helper.word_tokenize(text, lang)
@@ -34,7 +34,7 @@ class MTLDEvaluator(BaseEvaluator):
         filtered_tokens = [word for word in filtered_tokens if word.isalnum()]
         if not filtered_tokens:
-            return 0
         # Compute forward factors
         forward_factors = self._compute_factors(filtered_tokens, self.threshold)
@@ -43,7 +43,8 @@ class MTLDEvaluator(BaseEvaluator):
         backward_factors = self._compute_factors(filtered_tokens[::-1], self.threshold)
         # Compute average factors
-        return (forward_factors + backward_factors) / 2
     @staticmethod
     def _compute_factors(tokens: list, threshold: float) -> float:

 from typing import Set
+from graphgen.bases import BaseQAEvaluator, QAPair
 from graphgen.utils import NLTKHelper, detect_main_language
+class MTLDEvaluator(BaseQAEvaluator):
     """
     Metrics for measuring the lexical diversity of text.
     """
         self.stopwords_zh: Set[str] = set(self.nltk_helper.get_stopwords("zh"))
         self.threshold = threshold
+    async def evaluate(self, pair: QAPair) -> dict[str, float]:
         """
         Calculate the MTLD (Mean Token Length Diversity) score for a given text.
         """
         text = pair.answer
         if not text or not text.strip():
+            return {"mtld": 0}
         lang = detect_main_language(text)
         tokens = self.nltk_helper.word_tokenize(text, lang)
         filtered_tokens = [word for word in filtered_tokens if word.isalnum()]
         if not filtered_tokens:
+            return {"mtld": 0}
         # Compute forward factors
         forward_factors = self._compute_factors(filtered_tokens, self.threshold)
         backward_factors = self._compute_factors(filtered_tokens[::-1], self.threshold)
         # Compute average factors
+        mtld_score = (forward_factors + backward_factors) / 2
+        return {"mtld": mtld_score}
     @staticmethod
     def _compute_factors(tokens: list, threshold: float) -> float:

graphgen/models/evaluator/qa/reward_evaluator.py CHANGED Viewed

@@ -1,8 +1,9 @@
 from typing import Optional
-from graphgen.bases import BaseEvaluator, QAPair
-class RewardEvaluator(BaseEvaluator):
     """
     Reward Model Evaluator for single QAPair evaluation.
     """
@@ -15,7 +16,7 @@ class RewardEvaluator(BaseEvaluator):
     ):
         """
         Initialize the reward evaluator.
         Args:
             reward_name: Model name or path on HuggingFace Hub
             max_length: Maximum token length for the model
@@ -26,6 +27,7 @@ class RewardEvaluator(BaseEvaluator):
         import torch
         from transformers import AutoModelForSequenceClassification, AutoTokenizer
         self.torch = torch
         # Set device (auto-detect if not specified)
@@ -37,15 +39,17 @@ class RewardEvaluator(BaseEvaluator):
             self.model.to(self.device)
             self.model.eval()
         except Exception as e:
-            raise RuntimeError(f"Failed to load reward model '{reward_name}': {e}") from e
-    def evaluate(self, pair: QAPair) -> float:
         """
         Evaluate a single question-answer pair using the reward model.
         Args:
             pair: QAPair containing question and answer strings
         Returns:
             Score as a float
         """
@@ -63,4 +67,4 @@ class RewardEvaluator(BaseEvaluator):
         with self.torch.no_grad():
             score = self.model(**inputs).logits[0].item()
-        return score

 from typing import Optional
+from graphgen.bases import BaseQAEvaluator, QAPair
+class RewardEvaluator(BaseQAEvaluator):
     """
     Reward Model Evaluator for single QAPair evaluation.
     """
     ):
         """
         Initialize the reward evaluator.
         Args:
             reward_name: Model name or path on HuggingFace Hub
             max_length: Maximum token length for the model
         import torch
         from transformers import AutoModelForSequenceClassification, AutoTokenizer
         self.torch = torch
         # Set device (auto-detect if not specified)
             self.model.to(self.device)
             self.model.eval()
         except Exception as e:
+            raise RuntimeError(
+                f"Failed to load reward model '{reward_name}': {e}"
+            ) from e
+    async def evaluate(self, pair: QAPair) -> dict[str, float]:
         """
         Evaluate a single question-answer pair using the reward model.
         Args:
             pair: QAPair containing question and answer strings
         Returns:
             Score as a float
         """
         with self.torch.no_grad():
             score = self.model(**inputs).logits[0].item()
+        return {"reward_score": score}

graphgen/models/evaluator/qa/uni_evaluator.py CHANGED Viewed

@@ -1,14 +1,15 @@
 # https://github.com/maszhongming/UniEval/tree/main
-from typing import Optional, List
-from graphgen.bases import BaseEvaluator, QAPair
-class UniEvaluator(BaseEvaluator):
     """
     UniEvaluator for single QAPair evaluation across quality dimensions.
     Dimensions: naturalness, coherence, understandability
     Usage:
         evaluator = UniEvaluator()
         pair = QAPair(question="...", answer="...")
@@ -34,6 +35,7 @@ class UniEvaluator(BaseEvaluator):
         """
         import torch
         from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
         self.torch = torch
         self.model_name = model_name or self.DEFAULT_MODEL
@@ -58,10 +60,12 @@ class UniEvaluator(BaseEvaluator):
         if dimension == "coherence":
             return f"question: Is this a coherent response? </s> response: {answer} </s> history: {question}"
         if dimension == "understandability":
-            return f"question: Is this an understandable response? </s> response: {answer}"
         raise NotImplementedError(f"Unsupported dimension '{dimension}'")
-    def evaluate(
         self,
         pair: QAPair,
         dimensions: Optional[List[str]] = None,
@@ -72,7 +76,9 @@ class UniEvaluator(BaseEvaluator):
         # Validate dimensions
         invalid = set(dimensions) - set(self.DEFAULT_DIMS)
         if invalid:
-            raise ValueError(f"Invalid dimensions: {invalid}. Available: {self.DEFAULT_DIMS}")
         results = {}
         no_token = self.torch.tensor([[self._no_id]], device=self.device)
@@ -95,7 +101,9 @@ class UniEvaluator(BaseEvaluator):
                     attention_mask=src_mask,
                     labels=no_token,
                     use_cache=False,
-                ).logits[:, 0, :]  # [1, vocab_size]
                 probs = self.torch.softmax(logits, dim=-1)[0]
                 score = probs[self._yes_id] / (probs[self._yes_id] + probs[self._no_id])

 # https://github.com/maszhongming/UniEval/tree/main
+from typing import List, Optional
+from graphgen.bases import BaseQAEvaluator, QAPair
+class UniEvaluator(BaseQAEvaluator):
     """
     UniEvaluator for single QAPair evaluation across quality dimensions.
     Dimensions: naturalness, coherence, understandability
     Usage:
         evaluator = UniEvaluator()
         pair = QAPair(question="...", answer="...")
         """
         import torch
         from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
         self.torch = torch
         self.model_name = model_name or self.DEFAULT_MODEL
         if dimension == "coherence":
             return f"question: Is this a coherent response? </s> response: {answer} </s> history: {question}"
         if dimension == "understandability":
+            return (
+                f"question: Is this an understandable response? </s> response: {answer}"
+            )
         raise NotImplementedError(f"Unsupported dimension '{dimension}'")
+    async def evaluate(
         self,
         pair: QAPair,
         dimensions: Optional[List[str]] = None,
         # Validate dimensions
         invalid = set(dimensions) - set(self.DEFAULT_DIMS)
         if invalid:
+            raise ValueError(
+                f"Invalid dimensions: {invalid}. Available: {self.DEFAULT_DIMS}"
+            )
         results = {}
         no_token = self.torch.tensor([[self._no_id]], device=self.device)
                     attention_mask=src_mask,
                     labels=no_token,
                     use_cache=False,
+                ).logits[
+                    :, 0, :
+                ]  # [1, vocab_size]
                 probs = self.torch.softmax(logits, dim=-1)[0]
                 score = probs[self._yes_id] / (probs[self._yes_id] + probs[self._no_id])

graphgen/models/evaluator/triple/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .accuracy_evaluator import AccuracyEvaluator

graphgen/models/evaluator/triple/accuracy_evaluator.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import json
+import re
+from typing import Any, Dict
+from graphgen.bases import BaseLLMWrapper, BaseTripleEvaluator
+from graphgen.templates import ACCURACY_EVALUATION_PROMPT
+from graphgen.utils import detect_main_language, logger
+class AccuracyEvaluator(BaseTripleEvaluator):
+    """Evaluates accuracy of entity recognition and relation extraction using LLM-as-a-Judge.
+    For each chunk, uses LLM to evaluate the quality of extracted entities and relations
+    by comparing them with the original chunk content. Provides multi-dimensional quality
+    scores (accuracy, completeness, precision).
+    """
+    def __init__(
+        self,
+        llm_client: BaseLLMWrapper,
+    ):
+        self.llm_client = llm_client
+    async def evaluate(self, unit: tuple) -> Dict[str, Any]:
+        """Evaluate entity and relation extraction quality using LLM-as-a-Judge.
+        Returns:
+            Dictionary containing entity_accuracy and relation_accuracy metrics.
+        """
+        chunk_content, nodes, edges = unit
+        lang = detect_main_language(chunk_content)
+        # node
+        prompt = ACCURACY_EVALUATION_PROMPT[lang]["ENTITY"].format(
+            chunk_content=chunk_content,
+            extracted_entities=json.dumps(nodes, ensure_ascii=False, indent=2),
+        )
+        response = await self.llm_client.generate_answer(prompt)
+        # Try to parse JSON response
+        try:
+            node_evaluation_result = json.loads(response)
+        except json.JSONDecodeError:
+            # Try to extract JSON from markdown code blocks or other formats
+            json_match = re.search(r"\{.*\}", response, re.DOTALL)
+            if json_match:
+                node_evaluation_result = json.loads(json_match.group(0))
+            else:
+                logger.warning("Failed to parse LLM response.")
+                # default evaluation
+                node_evaluation_result = {
+                    "accuracy": 0.0,
+                    "completeness": 0.0,
+                    "precision": 0.0,
+                    "overall_score": 0.0,
+                    "accuracy_reasoning": "Failed to parse LLM response",
+                    "completeness_reasoning": "",
+                    "precision_reasoning": "",
+                    "issues": ["LLM response parsing failed"],
+                }
+        # edge
+        prompt = ACCURACY_EVALUATION_PROMPT[lang]["RELATION"].format(
+            chunk_content=chunk_content,
+            extracted_relations=json.dumps(edges, ensure_ascii=False, indent=2),
+        )
+        response = await self.llm_client.generate_answer(prompt)
+        # Try to parse JSON response
+        try:
+            edge_evaluation_result = json.loads(response)
+        except json.JSONDecodeError:
+            # Try to extract JSON from markdown code blocks or other formats
+            json_match = re.search(r"\{.*\}", response, re.DOTALL)
+            if json_match:
+                edge_evaluation_result = json.loads(json_match.group(0))
+            else:
+                logger.warning("Failed to parse LLM response.")
+                # default evaluation
+                edge_evaluation_result = {
+                    "accuracy": 0.0,
+                    "completeness": 0.0,
+                    "precision": 0.0,
+                    "overall_score": 0.0,
+                    "accuracy_reasoning": "Failed to parse LLM response",
+                    "completeness_reasoning": "",
+                    "precision_reasoning": "",
+                    "issues": ["LLM response parsing failed"],
+                }
+        return {
+            "entity_accuracy": node_evaluation_result,
+            "relation_accuracy": edge_evaluation_result,
+        }

graphgen/models/extractor/schema_guided_extractor.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import json
-from typing import Dict, List
-from graphgen.bases import BaseExtractor, BaseLLMWrapper
 from graphgen.templates import SCHEMA_GUIDED_EXTRACTION_PROMPT
-from graphgen.utils import compute_dict_hash, detect_main_language, logger
 class SchemaGuidedExtractor(BaseExtractor):
@@ -59,9 +58,8 @@ class SchemaGuidedExtractor(BaseExtractor):
         )
         return prompt
-    async def extract(self, chunk: dict) -> dict:
-        _chunk_id = chunk.get("_chunk_id", "")
-        text = chunk.get("content", "")
         prompt = self.build_prompt(text)
         response = await self.llm_client.generate_answer(prompt)
@@ -74,35 +72,9 @@ class SchemaGuidedExtractor(BaseExtractor):
             if any(extracted_info[key] == "" for key in self.required_keys):
                 logger.debug("Missing required keys in extraction: %s", extracted_info)
                 return {}
-            main_keys_info = {key: extracted_info[key] for key in self.required_keys}
             logger.debug("Extracted info: %s", extracted_info)
-            # add chunk metadata
-            extracted_info["_chunk_id"] = _chunk_id
-            return {
-                compute_dict_hash(main_keys_info, prefix="extract-"): extracted_info
-            }
         except json.JSONDecodeError:
             logger.error("Failed to parse extraction response: %s", response)
             return {}
-    @staticmethod
-    def merge_extractions(extraction_list: List[Dict[str, dict]]) -> Dict[str, dict]:
-        """
-        Merge multiple extraction results based on their hashes.
-        :param extraction_list: List of extraction results, each is a dict with hash as key and record as value.
-        :return: Merged extraction results.
-        """
-        merged: Dict[str, dict] = {}
-        for ext in extraction_list:
-            for h, rec in ext.items():
-                if h not in merged:
-                    merged[h] = rec.copy()
-                else:
-                    for k, v in rec.items():
-                        if k not in merged[h] or merged[h][k] == v:
-                            merged[h][k] = v
-                        else:
-                            merged[h][k] = f"{merged[h][k]}<SEP>{v}"
-        return merged

 import json
+from graphgen.bases import BaseExtractor, BaseLLMWrapper, Chunk
 from graphgen.templates import SCHEMA_GUIDED_EXTRACTION_PROMPT
+from graphgen.utils import detect_main_language, logger
 class SchemaGuidedExtractor(BaseExtractor):
         )
         return prompt
+    async def extract(self, chunk: Chunk) -> dict:
+        text = chunk.content
         prompt = self.build_prompt(text)
         response = await self.llm_client.generate_answer(prompt)
             if any(extracted_info[key] == "" for key in self.required_keys):
                 logger.debug("Missing required keys in extraction: %s", extracted_info)
                 return {}
             logger.debug("Extracted info: %s", extracted_info)
+            return extracted_info
         except json.JSONDecodeError:
             logger.error("Failed to parse extraction response: %s", response)
             return {}

graphgen/models/generator/aggregated_generator.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Any, Optional
 from graphgen.bases import BaseGenerator
 from graphgen.templates import AGGREGATED_GENERATION_PROMPT
-from graphgen.utils import compute_content_hash, detect_main_language, logger
 class AggregatedGenerator(BaseGenerator):
@@ -101,30 +101,26 @@ class AggregatedGenerator(BaseGenerator):
         batch: tuple[
             list[tuple[str, dict]], list[tuple[Any, Any, dict] | tuple[Any, Any, Any]]
         ],
-    ) -> dict[str, Any]:
         """
         Generate QAs based on a given batch.
         :param batch
         :return: QA pairs
         """
-        result = {}
         rephrasing_prompt = self.build_prompt(batch)
         response = await self.llm_client.generate_answer(rephrasing_prompt)
         context = self.parse_rephrased_text(response)
         if not context:
-            return result
         question_generation_prompt = self._build_prompt_for_question_generation(context)
         response = await self.llm_client.generate_answer(question_generation_prompt)
         question = self.parse_response(response)["question"]
         if not question:
-            return result
         logger.debug("Question: %s", question)
         logger.debug("Answer: %s", context)
         qa_pairs = {
-            compute_content_hash(question): {
-                "question": question,
-                "answer": context,
-            }
         }
-        result.update(qa_pairs)
-        return result

 from graphgen.bases import BaseGenerator
 from graphgen.templates import AGGREGATED_GENERATION_PROMPT
+from graphgen.utils import detect_main_language, logger
 class AggregatedGenerator(BaseGenerator):
         batch: tuple[
             list[tuple[str, dict]], list[tuple[Any, Any, dict] | tuple[Any, Any, Any]]
         ],
+    ) -> list[dict]:
         """
         Generate QAs based on a given batch.
         :param batch
         :return: QA pairs
         """
         rephrasing_prompt = self.build_prompt(batch)
         response = await self.llm_client.generate_answer(rephrasing_prompt)
         context = self.parse_rephrased_text(response)
         if not context:
+            return []
         question_generation_prompt = self._build_prompt_for_question_generation(context)
         response = await self.llm_client.generate_answer(question_generation_prompt)
         question = self.parse_response(response)["question"]
         if not question:
+            return []
         logger.debug("Question: %s", question)
         logger.debug("Answer: %s", context)
         qa_pairs = {
+            "question": question,
+            "answer": context,
         }
+        return [qa_pairs]

graphgen/models/generator/atomic_generator.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Any
 from graphgen.bases import BaseGenerator
 from graphgen.templates import ATOMIC_GENERATION_PROMPT
-from graphgen.utils import compute_content_hash, detect_main_language, logger
 class AtomicGenerator(BaseGenerator):
@@ -23,7 +23,7 @@ class AtomicGenerator(BaseGenerator):
         return prompt
     @staticmethod
-    def parse_response(response: str) -> dict:
         """
         AtomicGenerator normally generates one QA pair per response.
         So we just need to parse one QA pair from the response.
@@ -38,15 +38,10 @@ class AtomicGenerator(BaseGenerator):
             answer = answer_match.group(1).strip()
         else:
             logger.warning("Failed to parse response: %s", response)
-            return {}
         question = question.strip('"').strip("'")
         answer = answer.strip('"').strip("'")
         logger.debug("Question: %s", question)
         logger.debug("Answer: %s", answer)
-        return {
-            compute_content_hash(question): {
-                "question": question,
-                "answer": answer,
-            }
-        }

 from graphgen.bases import BaseGenerator
 from graphgen.templates import ATOMIC_GENERATION_PROMPT
+from graphgen.utils import detect_main_language, logger
 class AtomicGenerator(BaseGenerator):
         return prompt
     @staticmethod
+    def parse_response(response: str) -> list[dict]:
         """
         AtomicGenerator normally generates one QA pair per response.
         So we just need to parse one QA pair from the response.
             answer = answer_match.group(1).strip()
         else:
             logger.warning("Failed to parse response: %s", response)
+            return []
         question = question.strip('"').strip("'")
         answer = answer.strip('"').strip("'")
         logger.debug("Question: %s", question)
         logger.debug("Answer: %s", answer)
+        return [{"question": question, "answer": answer}]

graphgen/models/generator/cot_generator.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Any
 from graphgen.bases import BaseGenerator
 from graphgen.templates import COT_GENERATION_PROMPT
-from graphgen.utils import compute_content_hash, detect_main_language, logger
 class CoTGenerator(BaseGenerator):
@@ -100,28 +100,25 @@ class CoTGenerator(BaseGenerator):
         batch: tuple[
             list[tuple[str, dict]], list[tuple[Any, Any, dict] | tuple[Any, Any, Any]]
         ],
-    ) -> dict[str, Any]:
         """
         Generate QAs based on a given batch.
         :param batch
         :return: QA pairs
         """
-        result = {}
         prompt = self.build_prompt(batch)
         response = await self.llm_client.generate_answer(prompt)
         response = self.parse_response(response)
         if not response:
-            return result
         question, reasoning_path = response["question"], response["reasoning_path"]
         prompt = self.build_prompt_for_cot_generation(batch, question, reasoning_path)
         cot_answer = await self.llm_client.generate_answer(prompt)
         logger.debug("CoT Answer: %s", cot_answer)
-        qa_pairs = {
-            compute_content_hash(question): {
                 "question": question,
                 "answer": cot_answer,
                 "reasoning_path": reasoning_path,
             }
-        }
-        result.update(qa_pairs)
-        return result

 from graphgen.bases import BaseGenerator
 from graphgen.templates import COT_GENERATION_PROMPT
+from graphgen.utils import detect_main_language, logger
 class CoTGenerator(BaseGenerator):
         batch: tuple[
             list[tuple[str, dict]], list[tuple[Any, Any, dict] | tuple[Any, Any, Any]]
         ],
+    ) -> list[dict]:
         """
         Generate QAs based on a given batch.
         :param batch
         :return: QA pairs
         """
         prompt = self.build_prompt(batch)
         response = await self.llm_client.generate_answer(prompt)
         response = self.parse_response(response)
         if not response:
+            return []
         question, reasoning_path = response["question"], response["reasoning_path"]
         prompt = self.build_prompt_for_cot_generation(batch, question, reasoning_path)
         cot_answer = await self.llm_client.generate_answer(prompt)
         logger.debug("CoT Answer: %s", cot_answer)
+        return [
+            {
                 "question": question,
                 "answer": cot_answer,
                 "reasoning_path": reasoning_path,
             }
+        ]

graphgen/models/generator/fill_in_blank_generator.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Any
 from graphgen.bases import BaseGenerator
 from graphgen.templates import FILL_IN_BLANK_GENERATION_PROMPT
-from graphgen.utils import compute_content_hash, detect_main_language, logger
 class FillInBlankGenerator(BaseGenerator):
@@ -12,7 +12,7 @@ class FillInBlankGenerator(BaseGenerator):
         self.num_of_questions = num_of_questions
     @staticmethod
-    def parse_response(response: str) -> Any:
         """
         Parse fill-in-the-blank QA pairs from the LLM response.
         Each QA pair contains question text with placeholders and the correct answer(s).
@@ -21,14 +21,14 @@ class FillInBlankGenerator(BaseGenerator):
         :return: Dictionary mapping question hash to question data, where each
                  value is a dict with "question", "answer", and "answers" keys
         """
-        qa_pairs = {}
         # Extract all QA pair blocks
         qa_blocks = re.findall(r"<qa_pair>(.*?)</qa_pair>", response, re.DOTALL)
         if not qa_blocks:
             logger.warning("No QA pairs found in response: %s", response)
-            return {}
         for block in qa_blocks:
             # Extract and clean question text
@@ -55,13 +55,13 @@ class FillInBlankGenerator(BaseGenerator):
                 logger.warning("No valid answers found in: %s", answer_text)
                 continue
-            # Build result entry with question hash as key
-            question_hash = compute_content_hash(question)
-            qa_pairs[question_hash] = {
-                "question": question,
-                "answer": answer_text,  # Original answer text with commas
-                "answers": answers,  # List of individual answers: ["A8X"] or ["A8X", "八百万"]
-            }
             logger.debug(
                 "Successfully parsed fill-in-the-blank question: %s", question[:50]

 from graphgen.bases import BaseGenerator
 from graphgen.templates import FILL_IN_BLANK_GENERATION_PROMPT
+from graphgen.utils import detect_main_language, logger
 class FillInBlankGenerator(BaseGenerator):
         self.num_of_questions = num_of_questions
     @staticmethod
+    def parse_response(response: str) -> list[dict]:
         """
         Parse fill-in-the-blank QA pairs from the LLM response.
         Each QA pair contains question text with placeholders and the correct answer(s).
         :return: Dictionary mapping question hash to question data, where each
                  value is a dict with "question", "answer", and "answers" keys
         """
+        qa_pairs = []
         # Extract all QA pair blocks
         qa_blocks = re.findall(r"<qa_pair>(.*?)</qa_pair>", response, re.DOTALL)
         if not qa_blocks:
             logger.warning("No QA pairs found in response: %s", response)
+            return qa_pairs
         for block in qa_blocks:
             # Extract and clean question text
                 logger.warning("No valid answers found in: %s", answer_text)
                 continue
+            qa_pairs.append(
+                {
+                    "question": question,
+                    "answer": answer_text,  # Original answer text with commas
+                    "answers": answers,  # List of individual answers: ["A8X"] or ["A8X", "八百万"]
+                }
+            )
             logger.debug(
                 "Successfully parsed fill-in-the-blank question: %s", question[:50]

graphgen/models/generator/multi_answer_generator.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Any
 from graphgen.bases import BaseGenerator
 from graphgen.templates import MAQ_GENERATION_PROMPT
-from graphgen.utils import compute_content_hash, detect_main_language, logger
 class MultiAnswerGenerator(BaseGenerator):
@@ -12,7 +12,7 @@ class MultiAnswerGenerator(BaseGenerator):
         self.num_of_questions = num_of_questions
     @staticmethod
-    def parse_response(response: str) -> Any:
         """
         Parse multiple-answer QA pairs from the LLM response.
         Each QA pair contains question text, four options, and the correct answers (one or more).
@@ -21,14 +21,14 @@ class MultiAnswerGenerator(BaseGenerator):
         :return: Dictionary mapping question hash to question data, where each
                  value is a dict with "question", "options", and "answer" keys
         """
-        qa_pairs = {}
         # Extract all QA pair blocks
         qa_blocks = re.findall(r"<qa_pair>(.*?)</qa_pair>", response, re.DOTALL)
         if not qa_blocks:
             logger.warning("No QA pairs found in response: %s", response)
-            return {}
         for block in qa_blocks:
             # Extract and clean question text
@@ -61,7 +61,9 @@ class MultiAnswerGenerator(BaseGenerator):
                 logger.warning("Failed to parse answer from block: %s", block)
                 continue
             answer_text = ans_match.group(1).strip().strip('"').strip("'")
-            answers = [ans.strip().upper() for ans in answer_text.split(",") if ans.strip()]
             invalid_answers = [ans for ans in answers if ans not in options]
             if invalid_answers:
                 logger.warning(
@@ -76,13 +78,13 @@ class MultiAnswerGenerator(BaseGenerator):
                 logger.warning("No valid answers found in: %s", answer_text)
                 continue
-            # Build result entry with question hash as key
-            question_hash = compute_content_hash(question)
-            qa_pairs[question_hash] = {
-                "question": question,
-                "options": options,  # Dict like {"A": "text", "B": "text", ...}
-                "answer": ", ".join(answers),
-            }
             logger.debug("Successfully parsed MAQ: %s", question[:50])

 from graphgen.bases import BaseGenerator
 from graphgen.templates import MAQ_GENERATION_PROMPT
+from graphgen.utils import detect_main_language, logger
 class MultiAnswerGenerator(BaseGenerator):
         self.num_of_questions = num_of_questions
     @staticmethod
+    def parse_response(response: str) -> list[dict]:
         """
         Parse multiple-answer QA pairs from the LLM response.
         Each QA pair contains question text, four options, and the correct answers (one or more).
         :return: Dictionary mapping question hash to question data, where each
                  value is a dict with "question", "options", and "answer" keys
         """
+        qa_pairs = []
         # Extract all QA pair blocks
         qa_blocks = re.findall(r"<qa_pair>(.*?)</qa_pair>", response, re.DOTALL)
         if not qa_blocks:
             logger.warning("No QA pairs found in response: %s", response)
+            return qa_pairs
         for block in qa_blocks:
             # Extract and clean question text
                 logger.warning("Failed to parse answer from block: %s", block)
                 continue
             answer_text = ans_match.group(1).strip().strip('"').strip("'")
+            answers = [
+                ans.strip().upper() for ans in answer_text.split(",") if ans.strip()
+            ]
             invalid_answers = [ans for ans in answers if ans not in options]
             if invalid_answers:
                 logger.warning(
                 logger.warning("No valid answers found in: %s", answer_text)
                 continue
+            qa_pairs.append(
+                {
+                    "question": question,
+                    "options": options,  # Dict like {"A": "text", "B": "text", ...}
+                    "answers": answers,  # List of correct answers: ["A", "C"]
+                }
+            )
             logger.debug("Successfully parsed MAQ: %s", question[:50])

graphgen/models/generator/multi_choice_generator.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Any
 from graphgen.bases import BaseGenerator
 from graphgen.templates import MCQ_GENERATION_PROMPT
-from graphgen.utils import compute_content_hash, detect_main_language, logger
 class MultiChoiceGenerator(BaseGenerator):
@@ -12,7 +12,7 @@ class MultiChoiceGenerator(BaseGenerator):
         self.num_of_questions = num_of_questions
     @staticmethod
-    def parse_response(response: str) -> Any:
         """
         Parse multiple choice QA pairs from the LLM response.
         Each QA pair contains question text, four options, and the correct answer.
@@ -21,14 +21,14 @@ class MultiChoiceGenerator(BaseGenerator):
         :return: Dictionary mapping question hash to question data, where each
                  value is a dict with "question", "options", and "answer" keys
         """
-        qa_pairs = {}
         # Extract all QA pair blocks
         qa_blocks = re.findall(r"<qa_pair>(.*?)</qa_pair>", response, re.DOTALL)
         if not qa_blocks:
             logger.warning("No QA pairs found in response: %s", response)
-            return {}
         for block in qa_blocks:
             # Extract and clean question text
@@ -76,13 +76,13 @@ class MultiChoiceGenerator(BaseGenerator):
                 )
                 continue
-            # Build result entry with question hash as key
-            question_hash = compute_content_hash(question)
-            qa_pairs[question_hash] = {
-                "question": question,
-                "options": options,  # Dict like {"A": "text", "B": "text", ...}
-                "answer": answer,  # Single letter: "A", "B", "C", or "D"
-            }
             logger.debug("Successfully parsed MCQ: %s", question[:50])

 from graphgen.bases import BaseGenerator
 from graphgen.templates import MCQ_GENERATION_PROMPT
+from graphgen.utils import detect_main_language, logger
 class MultiChoiceGenerator(BaseGenerator):
         self.num_of_questions = num_of_questions
     @staticmethod
+    def parse_response(response: str) -> list[dict]:
         """
         Parse multiple choice QA pairs from the LLM response.
         Each QA pair contains question text, four options, and the correct answer.
         :return: Dictionary mapping question hash to question data, where each
                  value is a dict with "question", "options", and "answer" keys
         """
+        qa_pairs = []
         # Extract all QA pair blocks
         qa_blocks = re.findall(r"<qa_pair>(.*?)</qa_pair>", response, re.DOTALL)
         if not qa_blocks:
             logger.warning("No QA pairs found in response: %s", response)
+            return qa_pairs
         for block in qa_blocks:
             # Extract and clean question text
                 )
                 continue
+            qa_pairs.append(
+                {
+                    "question": question,
+                    "options": options,  # Dict like {"A": "text", "B": "text", ...}
+                    "answer": answer,  # Single letter: "A", "B", "C", or "D"
+                }
+            )
             logger.debug("Successfully parsed MCQ: %s", question[:50])

graphgen/models/generator/multi_hop_generator.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Any
 from graphgen.bases import BaseGenerator
 from graphgen.templates import MULTI_HOP_GENERATION_PROMPT
-from graphgen.utils import compute_content_hash, detect_main_language, logger
 class MultiHopGenerator(BaseGenerator):
@@ -32,7 +32,7 @@ class MultiHopGenerator(BaseGenerator):
         return prompt
     @staticmethod
-    def parse_response(response: str) -> dict:
         question_match = re.search(r"<question>(.*?)</question>", response, re.DOTALL)
         answer_match = re.search(r"<answer>(.*?)</answer>", response, re.DOTALL)
@@ -41,15 +41,10 @@ class MultiHopGenerator(BaseGenerator):
             answer = answer_match.group(1).strip()
         else:
             logger.warning("Failed to parse response: %s", response)
-            return {}
         question = question.strip('"').strip("'")
         answer = answer.strip('"').strip("'")
         logger.debug("Question: %s", question)
         logger.debug("Answer: %s", answer)
-        return {
-            compute_content_hash(question): {
-                "question": question,
-                "answer": answer,
-            }
-        }

 from graphgen.bases import BaseGenerator
 from graphgen.templates import MULTI_HOP_GENERATION_PROMPT
+from graphgen.utils import detect_main_language, logger
 class MultiHopGenerator(BaseGenerator):
         return prompt
     @staticmethod
+    def parse_response(response: str) -> list[dict]:
         question_match = re.search(r"<question>(.*?)</question>", response, re.DOTALL)
         answer_match = re.search(r"<answer>(.*?)</answer>", response, re.DOTALL)
             answer = answer_match.group(1).strip()
         else:
             logger.warning("Failed to parse response: %s", response)
+            return []
         question = question.strip('"').strip("'")
         answer = answer.strip('"').strip("'")
         logger.debug("Question: %s", question)
         logger.debug("Answer: %s", answer)
+        return [{"question": question, "answer": answer}]

graphgen/models/generator/quiz_generator.py CHANGED Viewed

@@ -31,12 +31,16 @@ class QuizGenerator(BaseGenerator):
             description = edges[0][2].get("description", "")
             template_type = edges[0][2].get("template_type", "TEMPLATE")
         else:
-            raise ValueError("Batch must contain at least one node or edge with description")
         return QuizGenerator.build_prompt_for_description(description, template_type)
     @staticmethod
-    def build_prompt_for_description(description: str, template_type: str = "TEMPLATE") -> str:
         """
         Build prompt for rephrasing a single description.
         :param description: The description to rephrase
@@ -49,17 +53,6 @@ class QuizGenerator(BaseGenerator):
         )
         return prompt
-    @staticmethod
-    def parse_rephrased_text(response: str) -> str:
-        """
-        Parse the rephrased text from the response.
-        :param response:
-        :return:
-        """
-        rephrased_text = response.strip().strip('"')
-        logger.debug("Rephrased Text: %s", rephrased_text)
-        return rephrased_text
     @staticmethod
     def parse_response(response: str) -> Any:
         """
@@ -67,4 +60,15 @@ class QuizGenerator(BaseGenerator):
         :param response: LLM response
         :return: Rephrased text
         """
-        return QuizGenerator.parse_rephrased_text(response)

             description = edges[0][2].get("description", "")
             template_type = edges[0][2].get("template_type", "TEMPLATE")
         else:
+            raise ValueError(
+                "Batch must contain at least one node or edge with description"
+            )
         return QuizGenerator.build_prompt_for_description(description, template_type)
     @staticmethod
+    def build_prompt_for_description(
+        description: str, template_type: str = "TEMPLATE"
+    ) -> str:
         """
         Build prompt for rephrasing a single description.
         :param description: The description to rephrase
         )
         return prompt
     @staticmethod
     def parse_response(response: str) -> Any:
         """
         :param response: LLM response
         :return: Rephrased text
         """
+        def parse_rephrased_text(content: str) -> str:
+            """
+            Parse the rephrased text from the response.
+            :param content: LLM response content
+            :return:
+            """
+            rephrased_text = content.strip().strip('"')
+            logger.debug("Rephrased Text: %s", rephrased_text)
+            return rephrased_text
+        return parse_rephrased_text(response)

graphgen/models/generator/true_false_generator.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Any
 from graphgen.bases import BaseGenerator
 from graphgen.templates import TF_GENERATION_PROMPT
-from graphgen.utils import compute_content_hash, detect_main_language, logger
 class TrueFalseGenerator(BaseGenerator):
@@ -12,7 +12,7 @@ class TrueFalseGenerator(BaseGenerator):
         self.num_of_questions = num_of_questions
     @staticmethod
-    def parse_response(response: str) -> Any:
         """
         Parse true/false QA pairs from the LLM response.
         Each QA pair contains a statement question and True/False answer.
@@ -21,14 +21,14 @@ class TrueFalseGenerator(BaseGenerator):
         :return: Dictionary mapping question hash to question data, where each
                  value is a dict with "question", "options", and "answer" keys
         """
-        qa_pairs: dict[str, dict[str, Any]] = {}
         # Extract all QA pair blocks
         qa_blocks = re.findall(r"<qa_pair>(.*?)</qa_pair>", response, re.DOTALL)
         if not qa_blocks:
             logger.warning("No QA pairs found in response: %s", response)
-            return {}
         for block in qa_blocks:
             # Extract and clean question text
@@ -50,12 +50,12 @@ class TrueFalseGenerator(BaseGenerator):
                 logger.warning("Invalid answer '%s' in block: %s", answer, block)
                 continue
-            # Build result entry with question hash as key
-            question_hash = compute_content_hash(question)
-            qa_pairs[question_hash] = {
-                "question": question,
-                "answer": answer,  # "True" or "False"
-            }
             logger.debug("Successfully parsed TF question: %s", question[:50])

 from graphgen.bases import BaseGenerator
 from graphgen.templates import TF_GENERATION_PROMPT
+from graphgen.utils import detect_main_language, logger
 class TrueFalseGenerator(BaseGenerator):
         self.num_of_questions = num_of_questions
     @staticmethod
+    def parse_response(response: str) -> list[dict]:
         """
         Parse true/false QA pairs from the LLM response.
         Each QA pair contains a statement question and True/False answer.
         :return: Dictionary mapping question hash to question data, where each
                  value is a dict with "question", "options", and "answer" keys
         """
+        qa_pairs: list[dict[str, str]] = []
         # Extract all QA pair blocks
         qa_blocks = re.findall(r"<qa_pair>(.*?)</qa_pair>", response, re.DOTALL)
         if not qa_blocks:
             logger.warning("No QA pairs found in response: %s", response)
+            return qa_pairs
         for block in qa_blocks:
             # Extract and clean question text
                 logger.warning("Invalid answer '%s' in block: %s", answer, block)
                 continue
+            qa_pairs.append(
+                {
+                    "question": question,
+                    "answer": answer,  # "True" or "False"
+                }
+            )
             logger.debug("Successfully parsed TF question: %s", question[:50])

graphgen/models/generator/vqa_generator.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import re
 from typing import Any
 from graphgen.bases import BaseGenerator
 from graphgen.templates import VQA_GENERATION_PROMPT
-from graphgen.utils import compute_content_hash, detect_main_language, logger
 class VQAGenerator(BaseGenerator):
@@ -32,13 +33,13 @@ class VQAGenerator(BaseGenerator):
         return prompt
     @staticmethod
-    def parse_response(response: str) -> Any:
         """
         Parse the LLM response and return the generated QAs
         :param response
         :return: QA pairs
         """
-        qa_pairs = {}
         pattern = r"<question>(.*?)</question>\s*<answer>(.*?)</answer>"
         matches = re.findall(pattern, response, re.DOTALL)
@@ -48,10 +49,12 @@ class VQAGenerator(BaseGenerator):
                 answer = answer.strip().strip('"').strip("'")
                 logger.debug("Question: %s", question)
                 logger.debug("Answer: %s", answer)
-                qa_pairs[compute_content_hash(question)] = {
-                    "question": question,
-                    "answer": answer,
-                }
         else:
             logger.warning("Error parsing the response %s", response)
         return qa_pairs
@@ -61,76 +64,58 @@ class VQAGenerator(BaseGenerator):
         batch: tuple[
             list[tuple[str, dict]], list[tuple[Any, Any, dict] | tuple[Any, Any, Any]]
         ],
-    ) -> dict[str, Any]:
         """
         Generate QAs based on a given batch.
         :param batch
         :return: QA pairs
         """
-        result = {}
         prompt = self.build_prompt(batch)
         response = await self.llm_client.generate_answer(prompt)
         qa_pairs = self.parse_response(response)  # generate one or more QA pairs
         nodes, _ = batch
         for node in nodes:
             node_data = node[1]
-            if "image_data" in node_data and node_data["image_data"]:
-                img_path = node_data["image_data"]["img_path"]
-                for qa in qa_pairs.values():
                     qa["img_path"] = img_path
-        result.update(qa_pairs)
-        return result
     @staticmethod
-    def format_generation_results(
-        results: list[dict], output_data_format: str
-    ) -> list[dict[str, Any]]:
         if output_data_format == "Alpaca":
-            results = [
-                {
-                    "instruction": v["question"],
-                    "input": "",
-                    "output": v["answer"],
-                    "image": v.get("img_path", ""),
-                }
-                for item in results
-                for k, v in item.items()
-            ]
-        elif output_data_format == "Sharegpt":
-            results = [
-                {
-                    "conversations": [
-                        {
-                            "from": "human",
-                            "value": [
-                                {"text": v["question"], "image": v.get("img_path", "")}
-                            ],
-                        },
-                        {"from": "gpt", "value": [{"text": v["answer"]}]},
-                    ]
-                }
-                for item in results
-                for k, v in item.items()
-            ]
-        elif output_data_format == "ChatML":
-            results = [
-                {
-                    "messages": [
-                        {
-                            "role": "user",
-                            "content": [
-                                {"text": v["question"], "image": v.get("img_path", "")}
-                            ],
-                        },
-                        {
-                            "role": "assistant",
-                            "content": [{"type": "text", "text": v["answer"]}],
-                        },
-                    ]
-                }
-                for item in results
-                for k, v in item.items()
-            ]
-        else:
-            raise ValueError(f"Unknown output data format: {output_data_format}")
-        return results

+import json
 import re
 from typing import Any
 from graphgen.bases import BaseGenerator
 from graphgen.templates import VQA_GENERATION_PROMPT
+from graphgen.utils import detect_main_language, logger
 class VQAGenerator(BaseGenerator):
         return prompt
     @staticmethod
+    def parse_response(response: str) -> list[dict]:
         """
         Parse the LLM response and return the generated QAs
         :param response
         :return: QA pairs
         """
+        qa_pairs = []
         pattern = r"<question>(.*?)</question>\s*<answer>(.*?)</answer>"
         matches = re.findall(pattern, response, re.DOTALL)
                 answer = answer.strip().strip('"').strip("'")
                 logger.debug("Question: %s", question)
                 logger.debug("Answer: %s", answer)
+                qa_pairs.append(
+                    {
+                        "question": question,
+                        "answer": answer,
+                    }
+                )
         else:
             logger.warning("Error parsing the response %s", response)
         return qa_pairs
         batch: tuple[
             list[tuple[str, dict]], list[tuple[Any, Any, dict] | tuple[Any, Any, Any]]
         ],
+    ) -> list[dict]:
         """
         Generate QAs based on a given batch.
         :param batch
         :return: QA pairs
         """
         prompt = self.build_prompt(batch)
         response = await self.llm_client.generate_answer(prompt)
         qa_pairs = self.parse_response(response)  # generate one or more QA pairs
         nodes, _ = batch
         for node in nodes:
             node_data = node[1]
+            if "metadata" in node_data and node_data["metadata"]:
+                metadata = json.loads(node_data["metadata"])["metadata"]
+                img_path = metadata.get("path", "")
+                for qa in qa_pairs:
                     qa["img_path"] = img_path
+        return qa_pairs
     @staticmethod
+    def format_generation_results(result: dict, output_data_format: str) -> dict:
+        question = result.get("question", "")
+        answer = result.get("answer", "")
+        img_path = result.get("img_path", "")
         if output_data_format == "Alpaca":
+            return {
+                "instruction": question,
+                "input": "",
+                "output": answer,
+                "image": img_path,
+            }
+        if output_data_format == "Sharegpt":
+            return {
+                "conversations": [
+                    {
+                        "from": "human",
+                        "value": [{"text": question, "image": img_path}],
+                    },
+                    {"from": "gpt", "value": [{"text": answer}]},
+                ]
+            }
+        if output_data_format == "ChatML":
+            return {
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [{"text": question, "image": img_path}],
+                    },
+                    {
+                        "role": "assistant",
+                        "content": [{"type": "text", "text": answer}],
+                    },
+                ]
+            }
+        raise ValueError(f"Unknown output data format: {output_data_format}")

graphgen/models/kg_builder/light_rag_kg_builder.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import re
 from collections import Counter, defaultdict
 from typing import Dict, List, Tuple
@@ -130,15 +131,25 @@ class LightRAGKGBuilder(BaseKGBuilder):
             set([dp["source_id"] for dp in node_data] + source_ids)
         )
-        node_data = {
             "entity_type": entity_type,
             "entity_name": entity_name,
             "description": description,
             "source_id": source_id,
             "length": self.tokenizer.count_tokens(description),
         }
-        kg_instance.upsert_node(entity_name, node_data=node_data)
-        return node_data
     async def merge_edges(
         self,

+import json
 import re
 from collections import Counter, defaultdict
 from typing import Dict, List, Tuple
             set([dp["source_id"] for dp in node_data] + source_ids)
         )
+        node_data_dict = {
             "entity_type": entity_type,
             "entity_name": entity_name,
             "description": description,
             "source_id": source_id,
             "length": self.tokenizer.count_tokens(description),
         }
+        if entity_type in ("IMAGE", "TABLE", "FORMULA"):
+            metadata = next(
+                (dp["metadata"] for dp in node_data if dp.get("metadata")), None
+            )
+            if metadata:
+                node_data_dict["metadata"] = json.dumps(
+                    metadata, ensure_ascii=False, default=str
+                )
+        kg_instance.upsert_node(entity_name, node_data=node_data_dict)
+        return node_data_dict
     async def merge_edges(
         self,

graphgen/models/kg_builder/mm_kg_builder.py CHANGED Viewed

@@ -70,6 +70,8 @@ class MMKGBuilder(LightRAGKGBuilder):
                 entity = await handle_single_entity_extraction(attributes, chunk_id)
                 if entity is not None:
                     nodes[entity["entity_name"]].append(entity)
                     continue

                 entity = await handle_single_entity_extraction(attributes, chunk_id)
                 if entity is not None:
+                    if entity["entity_type"] == "IMAGE":
+                        entity["metadata"] = chunk.metadata
                     nodes[entity["entity_name"]].append(entity)
                     continue

graphgen/models/reader/csv_reader.py CHANGED Viewed

@@ -22,7 +22,7 @@ class CSVReader(BaseReader):
         :return: Ray Dataset containing validated and filtered data.
         """
-        ds = ray.data.read_csv(input_path)
         ds = ds.map_batches(self._validate_batch, batch_format="pandas")
         ds = ds.filter(self._should_keep_item)
         return ds

         :return: Ray Dataset containing validated and filtered data.
         """
+        ds = ray.data.read_csv(input_path, include_paths=True)
         ds = ds.map_batches(self._validate_batch, batch_format="pandas")
         ds = ds.filter(self._should_keep_item)
         return ds

graphgen/models/reader/json_reader.py CHANGED Viewed

@@ -34,10 +34,13 @@ class JSONReader(BaseReader):
                     with open(file, "r", encoding="utf-8") as f:
                         data = json.load(f)
                         data = self._unify_schema(data)
                 file_ds: ray.data.Dataset = ray.data.from_items(data)
                 ds = ds.union(file_ds)  # type: ignore
         else:
-            ds = ray.data.read_json(input_path)
         ds = ds.map_batches(self._validate_batch, batch_format="pandas")
         ds = ds.filter(self._should_keep_item)
         return ds

                     with open(file, "r", encoding="utf-8") as f:
                         data = json.load(f)
                         data = self._unify_schema(data)
+                # add path
+                for item in data:
+                    item["path"] = file
                 file_ds: ray.data.Dataset = ray.data.from_items(data)
                 ds = ds.union(file_ds)  # type: ignore
         else:
+            ds = ray.data.read_json(input_path, include_paths=True)
         ds = ds.map_batches(self._validate_batch, batch_format="pandas")
         ds = ds.filter(self._should_keep_item)
         return ds

graphgen/models/reader/parquet_reader.py CHANGED Viewed

@@ -24,7 +24,7 @@ class ParquetReader(BaseReader):
         if not ray.is_initialized():
             ray.init()
-        ds = ray.data.read_parquet(input_path)
         ds = ds.map_batches(self._validate_batch, batch_format="pandas")
         ds = ds.filter(self._should_keep_item)
         return ds

         if not ray.is_initialized():
             ray.init()
+        ds = ray.data.read_parquet(input_path, include_paths=True)
         ds = ds.map_batches(self._validate_batch, batch_format="pandas")
         ds = ds.filter(self._should_keep_item)
         return ds

graphgen/models/reader/rdf_reader.py CHANGED Viewed

@@ -118,7 +118,7 @@ class RDFReader(BaseReader):
                 "id": str(subj),
                 self.text_column: text,
                 "properties": props,
-                "source_file": str(file_path),
             }
             docs.append(doc)

                 "id": str(subj),
                 self.text_column: text,
                 "properties": props,
+                "path": str(file_path),
             }
             docs.append(doc)

graphgen/models/reader/txt_reader.py CHANGED Viewed

@@ -18,13 +18,14 @@ class TXTReader(BaseReader):
         """
         docs_ds = ray.data.read_binary_files(
             input_path,
-            include_paths=False,
         )
         docs_ds = docs_ds.map(
             lambda row: {
                 "type": "text",
                 self.text_column: row["bytes"].decode("utf-8"),
             }
         )

         """
         docs_ds = ray.data.read_binary_files(
             input_path,
+            include_paths=True,
         )
         docs_ds = docs_ds.map(
             lambda row: {
                 "type": "text",
                 self.text_column: row["bytes"].decode("utf-8"),
+                "path": row["path"],
             }
         )

graphgen/models/storage/__init__.py DELETED Viewed

@@ -1,6 +0,0 @@
-from graphgen.models.storage.graph.kuzu_storage import KuzuStorage
-from graphgen.models.storage.graph.networkx_storage import NetworkXStorage
-from graphgen.models.storage.kv.json_storage import JsonKVStorage
-from graphgen.models.storage.kv.rocksdb_storage import RocksDBKVStorage
-from .rocksdb_cache import RocksDBCache

graphgen/models/storage/rocksdb_cache.py DELETED Viewed

@@ -1,43 +0,0 @@
-from pathlib import Path
-from typing import Any, Iterator, Optional
-# rocksdict is a lightweight C wrapper around RocksDB for Python, pylint may not recognize it
-# pylint: disable=no-name-in-module
-from rocksdict import Rdict
-class RocksDBCache:
-    def __init__(self, cache_dir: str):
-        self.db_path = Path(cache_dir)
-        self.db = Rdict(str(self.db_path))
-    def get(self, key: str) -> Optional[Any]:
-        return self.db.get(key)
-    def set(self, key: str, value: Any):
-        self.db[key] = value
-    def delete(self, key: str):
-        try:
-            del self.db[key]
-        except KeyError:
-            # If the key does not exist, do nothing (deletion is idempotent for caches)
-            pass
-    def close(self):
-        if hasattr(self, "db") and self.db is not None:
-            self.db.close()
-            self.db = None
-    def __del__(self):
-        # Ensure the database is closed when the object is destroyed
-        self.close()
-    def __enter__(self):
-        return self
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        self.close()
-    def __iter__(self) -> Iterator[str]:
-        return iter(self.db.keys())

graphgen/models/vis/__init__.py DELETED Viewed

File without changes

graphgen/models/vis/community_visualizer.py DELETED Viewed

@@ -1,48 +0,0 @@
-from dataclasses import dataclass
-from typing import Dict
-import matplotlib.pyplot as plt
-import networkx as nx
-@dataclass
-class Visualizer:
-    """
-    Class for visualizing graphs using NetworkX and Matplotlib.
-    """
-    graph: nx.Graph = None
-    communities: Dict[str, int] = None
-    layout: str = "spring"
-    max_nodes: int = 1000
-    node_size: int = 10
-    alpha: float = 0.6
-    def visualize(self, save_path: str = None):
-        n = self.graph.number_of_nodes()
-        if self.layout == "spring":
-            k = max(0.1, 1.0 / (n**0.5))
-            pos = nx.spring_layout(self.graph, k=k, seed=42)
-        else:
-            raise ValueError(f"Unknown layout: {self.layout}")
-        plt.figure(figsize=(10, 10))
-        node_colors = [self.communities.get(node, 0) for node in self.graph.nodes()]
-        nx.draw_networkx_nodes(
-            self.graph,
-            pos,
-            node_size=self.node_size,
-            node_color=node_colors,
-            cmap=plt.cm.tab20,
-            alpha=self.alpha,
-        )
-        nx.draw_networkx_edges(self.graph, pos, alpha=0.3, width=0.2)
-        plt.axis("off")
-        if save_path:
-            plt.savefig(save_path, dpi=300, bbox_inches="tight")
-            print("Saved to", save_path)
-        else:
-            plt.show()

graphgen/operators/build_kg/build_kg_service.py CHANGED Viewed

@@ -1,6 +1,4 @@
-from typing import List
-import pandas as pd
 from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
 from graphgen.bases.datatypes import Chunk
@@ -13,9 +11,15 @@ from .build_text_kg import build_text_kg
 class BuildKGService(BaseOperator):
     def __init__(
-        self, working_dir: str = "cache", graph_backend: str = "kuzu", **build_kwargs
     ):
-        super().__init__(working_dir=working_dir, op_name="build_kg_service")
         self.llm_client: BaseLLMWrapper = init_llm("synthesizer")
         self.graph_storage: BaseGraphStorage = init_storage(
             backend=graph_backend, working_dir=working_dir, namespace="graph"
@@ -23,21 +27,15 @@ class BuildKGService(BaseOperator):
         self.build_kwargs = build_kwargs
         self.max_loop: int = int(self.build_kwargs.get("max_loop", 3))
-    def process(self, batch: pd.DataFrame) -> pd.DataFrame:
-        docs = batch.to_dict(orient="records")
-        docs = [Chunk.from_dict(doc["_chunk_id"], doc) for doc in docs]
-        # consume the chunks and build kg
-        nodes, edges = self.build_kg(docs)
-        return pd.DataFrame(
-            [{"node": node, "edge": []} for node in nodes]
-            + [{"node": [], "edge": edge} for edge in edges]
-        )
-    def build_kg(self, chunks: List[Chunk]) -> tuple:
         """
         Build knowledge graph (KG) and merge into kg_instance
         """
         text_chunks = [chunk for chunk in chunks if chunk.type == "text"]
         mm_chunks = [
             chunk
@@ -75,4 +73,34 @@ class BuildKGService(BaseOperator):
         self.graph_storage.index_done_callback()
         logger.info("Knowledge graph building completed.")
-        return nodes, edges

+from typing import Tuple
 from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
 from graphgen.bases.datatypes import Chunk
 class BuildKGService(BaseOperator):
     def __init__(
+        self,
+        working_dir: str = "cache",
+        kv_backend: str = "rocksdb",
+        graph_backend: str = "kuzu",
+        **build_kwargs
     ):
+        super().__init__(
+            working_dir=working_dir, kv_backend=kv_backend, op_name="build_kg"
+        )
         self.llm_client: BaseLLMWrapper = init_llm("synthesizer")
         self.graph_storage: BaseGraphStorage = init_storage(
             backend=graph_backend, working_dir=working_dir, namespace="graph"
         self.build_kwargs = build_kwargs
         self.max_loop: int = int(self.build_kwargs.get("max_loop", 3))
+    def process(self, batch: list) -> Tuple[list, dict]:
         """
         Build knowledge graph (KG) and merge into kg_instance
+        :return: A tuple of (results, meta_updates)
+            results: A list of dicts containing nodes and edges added to the KG. Each dict has the structure:
+                {"_trace_id": str, "node": dict, "edge": dict}
+            meta_updates: A dict mapping source IDs to lists of trace IDs for nodes and edges added.
         """
+        chunks = [Chunk.from_dict(doc["_trace_id"], doc) for doc in batch]
         text_chunks = [chunk for chunk in chunks if chunk.type == "text"]
         mm_chunks = [
             chunk
         self.graph_storage.index_done_callback()
         logger.info("Knowledge graph building completed.")
+        meta_updates = {}
+        results = []
+        for node in nodes:
+            if not node:
+                continue
+            trace_id = node["entity_name"]
+            results.append(
+                {
+                    "_trace_id": trace_id,
+                    "node": node,
+                    "edge": {},
+                }
+            )
+            source_ids = node.get("source_id", "").split("<SEP>")
+            for source_id in source_ids:
+                meta_updates.setdefault(source_id, []).append(trace_id)
+        for edge in edges:
+            if not edge:
+                continue
+            trace_id = frozenset((edge["src_id"], edge["tgt_id"]))
+            results.append(
+                {
+                    "_trace_id": str(trace_id),
+                    "node": {},
+                    "edge": edge,
+                }
+            )
+            source_ids = edge.get("source_id", "").split("<SEP>")
+            for source_id in source_ids:
+                meta_updates.setdefault(source_id, []).append(str(trace_id))
+        return results, meta_updates

graphgen/operators/build_kg/build_text_kg.py CHANGED Viewed

@@ -30,6 +30,7 @@ def build_text_kg(
         desc="[2/4]Extracting entities and relationships from chunks",
         unit="chunk",
     )
     nodes = defaultdict(list)
     edges = defaultdict(list)

         desc="[2/4]Extracting entities and relationships from chunks",
         unit="chunk",
     )
+    results = [res for res in results if res]
     nodes = defaultdict(list)
     edges = defaultdict(list)

graphgen/operators/chunk/chunk_service.py CHANGED Viewed

@@ -1,17 +1,14 @@
 import os
 from functools import lru_cache
-from typing import Union
-import pandas as pd
 from graphgen.bases import BaseOperator
-from graphgen.common import init_storage
 from graphgen.models import (
     ChineseRecursiveTextSplitter,
     RecursiveCharacterSplitter,
     Tokenizer,
 )
-from graphgen.utils import compute_content_hash, detect_main_language
 _MAPPING = {
     "en": RecursiveCharacterSplitter,
@@ -45,26 +42,25 @@ class ChunkService(BaseOperator):
     def __init__(
         self, working_dir: str = "cache", kv_backend: str = "rocksdb", **chunk_kwargs
     ):
-        super().__init__(working_dir=working_dir, op_name="chunk_service")
         tokenizer_model = os.getenv("TOKENIZER_MODEL", "cl100k_base")
         self.tokenizer_instance: Tokenizer = Tokenizer(model_name=tokenizer_model)
-        self.chunk_storage = init_storage(
-            backend=kv_backend,
-            working_dir=working_dir,
-            namespace="chunk",
-        )
         self.chunk_kwargs = chunk_kwargs
-    def process(self, batch: pd.DataFrame) -> pd.DataFrame:
-        docs = batch.to_dict(orient="records")
-        return pd.DataFrame(self.chunk_documents(docs))
-    def chunk_documents(self, new_docs: list) -> list:
-        chunks = []
-        for doc in new_docs:
-            doc_id = doc.get("_doc_id")
             doc_type = doc.get("type")
             if doc_type == "text":
                 doc_language = detect_main_language(doc["content"])
                 text_chunks = split_chunks(
@@ -72,32 +68,30 @@ class ChunkService(BaseOperator):
                     language=doc_language,
                     **self.chunk_kwargs,
                 )
-                chunks.extend(
-                    [
-                        {
-                            "_chunk_id": compute_content_hash(
-                                chunk_text, prefix="chunk-"
-                            ),
-                            "content": chunk_text,
-                            "type": "text",
-                            "_doc_id": doc_id,
-                            "length": len(self.tokenizer_instance.encode(chunk_text))
                             if self.tokenizer_instance
-                            else len(chunk_text),
                             "language": doc_language,
-                        }
-                        for chunk_text in text_chunks
-                    ]
-                )
             else:
                 # other types of documents(images, sequences) are not chunked
-                chunks.append(
-                    {
-                        "_chunk_id": doc_id.replace("doc-", f"{doc_type}-"),
-                        **doc,
-                    }
-                )
-        self.chunk_storage.upsert({chunk["_chunk_id"]: chunk for chunk in chunks})
-        self.chunk_storage.index_done_callback()
-        return chunks

 import os
 from functools import lru_cache
+from typing import Tuple, Union
 from graphgen.bases import BaseOperator
 from graphgen.models import (
     ChineseRecursiveTextSplitter,
     RecursiveCharacterSplitter,
     Tokenizer,
 )
+from graphgen.utils import detect_main_language
 _MAPPING = {
     "en": RecursiveCharacterSplitter,
     def __init__(
         self, working_dir: str = "cache", kv_backend: str = "rocksdb", **chunk_kwargs
     ):
+        super().__init__(
+            working_dir=working_dir, kv_backend=kv_backend, op_name="chunk"
+        )
         tokenizer_model = os.getenv("TOKENIZER_MODEL", "cl100k_base")
         self.tokenizer_instance: Tokenizer = Tokenizer(model_name=tokenizer_model)
         self.chunk_kwargs = chunk_kwargs
+    def process(self, batch: list) -> Tuple[list, dict]:
+        """
+        Chunk the documents in the batch.
+        :return: A tuple of (results, meta_updates)
+            results: A list of chunked documents. Each chunked document is a dict with the structure:
+                {"_trace_id": str, "content": str, "type": str,  "metadata": {"length": int, "language": str, ...}
+            meta_updates: A dict mapping source document IDs to lists of trace IDs for the chunked documents.
+        """
+        results = []
+        meta_updates = {}
+        for doc in batch:
             doc_type = doc.get("type")
             if doc_type == "text":
                 doc_language = detect_main_language(doc["content"])
                 text_chunks = split_chunks(
                     language=doc_language,
                     **self.chunk_kwargs,
                 )
+                for text_chunk in text_chunks:
+                    chunk = {
+                        "content": text_chunk,
+                        "type": "text",
+                        "metadata": {
+                            "length": len(self.tokenizer_instance.encode(text_chunk))
                             if self.tokenizer_instance
+                            else len(text_chunk),
                             "language": doc_language,
+                        },
+                    }
+                    chunk["_trace_id"] = self.get_trace_id(chunk)
+                    results.append(chunk)
+                    meta_updates.setdefault(doc["_trace_id"], []).append(
+                        chunk["_trace_id"]
+                    )
             else:
                 # other types of documents(images, sequences) are not chunked
+                data = doc.copy()
+                input_trace_id = data.pop("_trace_id")
+                content = data.pop("content") if "content" in data else ""
+                doc_type = data.pop("type")
+                chunk = {"content": content, "type": doc_type, "metadata": data}
+                chunk["_trace_id"] = self.get_trace_id(chunk)
+                results.append(chunk)
+                meta_updates.setdefault(input_trace_id, []).append(chunk["_trace_id"])
+        return results, meta_updates

graphgen/operators/evaluate/evaluate_kg.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from typing import Any, Dict
+from graphgen.bases import BaseGraphStorage
+from graphgen.utils import logger
+def evaluate_kg(
+    kg_evaluators: Dict[str, Any],
+    kg_instance: BaseGraphStorage,
+) -> Dict[str, Any]:
+    results = {}
+    for key, kg_evaluator in kg_evaluators.items():
+        results[key] = kg_evaluator.evaluate(kg_instance)
+        logger.info(f"KG Evaluation result for {key}: {results[key]}")
+    return results

graphgen/operators/evaluate/evaluate_qa.py ADDED Viewed

	@@ -0,0 +1,107 @@

+from typing import Any
+from graphgen.bases import QAPair
+from graphgen.utils import run_concurrent
+def transform_to_qa_format(
+    items: list[dict], format_hint: str = "auto"
+) -> list[dict[str, str]]:
+    extractors = {
+        "ChatML": lambda x: (
+            next(
+                (
+                    m["content"]
+                    for m in x.get("messages", [])
+                    if m.get("role") == "user"
+                ),
+                "",
+            ),
+            next(
+                (
+                    m["content"]
+                    for m in x.get("messages", [])
+                    if m.get("role") == "assistant"
+                ),
+                "",
+            ),
+        ),
+        "Alpaca": lambda x: (
+            f"{x.get('instruction', '')}\n\n{x['input']}".strip()
+            if x.get("input")
+            else x.get("instruction", ""),
+            x.get("output", ""),
+        ),
+        "Sharegpt": lambda x: (
+            next(
+                (
+                    c["value"]
+                    for c in x.get("conversations", [])
+                    if c.get("from") == "human"
+                ),
+                "",
+            ),
+            next(
+                (
+                    c["value"]
+                    for c in x.get("conversations", [])
+                    if c.get("from") in ("gpt", "assistant")
+                ),
+                "",
+            ),
+        ),
+    }
+    auto_detect = {
+        "messages": "ChatML",
+        "conversations": "Sharegpt",
+        "instruction": "Alpaca",
+    }
+    transformed = []
+    for item in items:
+        fmt = format_hint
+        if fmt == "auto":
+            fmt = next(
+                (fmt_name for key, fmt_name in auto_detect.items() if key in item), None
+            )
+            if not fmt:
+                raise ValueError(
+                    "Could not auto-detect format. Please specify format_hint."
+                )
+        question, answer = extractors[fmt](item)
+        options = None
+        if "\nOptions:\n" in question:
+            q_part, opt_part = question.split("\nOptions:\n", 1)
+            question = q_part
+            options = {
+                k.strip(): v.strip()
+                for line in opt_part.strip().split("\n")
+                if "." in line
+                for k, v in [line.split(".", 1)]
+            }
+        result = {"question": question.strip(), "answer": answer.strip()}
+        if options:
+            result["options"] = options
+        transformed.append(result)
+    return transformed
+def evaluate_qa(
+    qa_evaluators: dict[str, Any], items: list[dict[str, Any]]
+) -> dict[str, Any]:
+    items = transform_to_qa_format(items)
+    items = [QAPair.from_dict(item) for item in items]
+    results = {}
+    for key, qa_evaluator in qa_evaluators.items():
+        result = run_concurrent(
+            qa_evaluator.evaluate,
+            items,
+            desc=f"Evaluating QA with {key}",
+        )
+        results[key] = result
+    return results

graphgen/operators/evaluate/evaluate_service.py CHANGED Viewed

@@ -1,10 +1,12 @@
-from typing import Any, Dict
-import pandas as pd
-from graphgen.bases import BaseLLMWrapper, BaseOperator, QAPair
 from graphgen.common import init_llm, init_storage
-from graphgen.utils import logger, run_concurrent
 class EvaluateService(BaseOperator):
@@ -15,167 +17,135 @@ class EvaluateService(BaseOperator):
     def __init__(
         self,
         working_dir: str = "cache",
-        metrics: list[str] = None,
         graph_backend: str = "kuzu",
         kv_backend: str = "rocksdb",
         **kwargs,
     ):
-        super().__init__(working_dir=working_dir, op_name="evaluate_service")
         self.llm_client: BaseLLMWrapper = init_llm("synthesizer")
         self.metrics = metrics or []
         self.kwargs = kwargs
         self.graph_storage = init_storage(
             backend=graph_backend, working_dir=working_dir, namespace="graph"
         )
-        self.chunk_storage = init_storage(
-            backend=kv_backend, working_dir=working_dir, namespace="chunk"
-        )
         # Initialize evaluators
-        self.qa_evaluators = {}
-        self.kg_evaluators = {}
-        self._init_evaluators()
-    def _init_evaluators(self):
-        """Initialize QA and KG evaluators based on metrics."""
-        for metric in self.metrics:
-            if metric == "qa_length":
-                from graphgen.models import LengthEvaluator
-                self.qa_evaluators[metric] = LengthEvaluator()
-            elif metric == "qa_mtld":
-                from graphgen.models import MTLDEvaluator
-                self.qa_evaluators[metric] = MTLDEvaluator(
-                    **self.kwargs.get("mtld_params", {})
-                )
-            elif metric == "qa_reward_score":
-                from graphgen.models import RewardEvaluator
-                self.qa_evaluators[metric] = RewardEvaluator(
-                    **self.kwargs.get("reward_params", {})
-                )
-            elif metric == "qa_uni_score":
-                from graphgen.models import UniEvaluator
-                self.qa_evaluators[metric] = UniEvaluator(
-                    **self.kwargs.get("uni_params", {})
-                )
-            elif metric == "kg_accuracy":
-                from graphgen.models import AccuracyEvaluator
-                self.kg_evaluators[metric] = AccuracyEvaluator(
-                    graph_storage=self.graph_storage,
-                    chunk_storage=self.chunk_storage,
-                    llm_client=self.llm_client,
-                )
-            elif metric == "kg_consistency":
-                from graphgen.models import ConsistencyEvaluator
-                self.kg_evaluators[metric] = ConsistencyEvaluator(
-                    graph_storage=self.graph_storage,
-                    chunk_storage=self.chunk_storage,
-                    llm_client=self.llm_client,
-                )
-            elif metric == "kg_structure":
-                from graphgen.models import StructureEvaluator
-                self.kg_evaluators[metric] = StructureEvaluator(
-                    graph_storage=self.graph_storage,
-                    **self.kwargs.get("structure_params", {}),
-                )
-            else:
-                raise ValueError(f"Unknown QA metric: {metric}")
-    async def _process_single_qa(self, item: dict[str, Any]) -> dict[str, Any]:
-        try:
-            qa_pair = QAPair(
-                question=str(item.get("question", "")),
-                answer=str(item.get("answer", "")),
             )
-            if not qa_pair.question or not qa_pair.answer:
-                logger.error("Empty question or answer, skipping.")
-                return {}
-        except Exception as e:
-            logger.error("Error in QAPair creation: %s", str(e))
-            return {}
-        for metric, evaluator in self.qa_evaluators.items():
-            try:
-                score = evaluator.evaluate(qa_pair)
-                if isinstance(score, dict):
-                    for sub_metric, sub_score in score.items():
-                        item[f"{metric}_{sub_metric}"] = float(sub_score)
-                else:
-                    item[metric] = float(score)
-            except Exception as e:
-                logger.error("Error in %s evaluation: %s", metric, str(e))
-                item[metric] = None
-        return item
-    def _evaluate_qa(self, items: list[dict[str, Any]]) -> list[dict[str, Any]]:
-        def transform_messages_format(items: list[dict]) -> list[dict]:
-            """
-            Transform from [{'messages': [...]}, ...] to [{'question': '...', 'answer': '...'}, ...]
-            """
-            transformed = []
-            for item in items:
-                messages = item.get("messages", [])
-                question = next(
-                    (m["content"] for m in messages if m.get("role") == "user"), ""
-                )
-                answer = next(
-                    (m["content"] for m in messages if m.get("role") == "assistant"), ""
-                )
-                transformed.append({"question": question, "answer": answer})
-            return transformed
-        if not items:
-            return []
-        if not self.qa_evaluators:
-            logger.warning("No QA evaluators initialized, skipping QA evaluation")
-            return []
-        items = transform_messages_format(items)
-        results = run_concurrent(
-            self._process_single_qa,
-            items,
-            desc="Evaluating QA items",
-            unit="item",
         )
-        results = [item for item in results if item]
-        return results
-    def _evaluate_kg(self) -> Dict[str, Any]:
-        results = {}
-        for metric, evaluator in self.kg_evaluators.items():
-            try:
-                logger.info("Running %s evaluation...", metric)
-                score = evaluator.evaluate()
-                results[metric] = score
-            except Exception as e:
-                logger.error("Error in %s evaluation: %s", metric, str(e))
-                results[metric] = {"error": str(e)}
-        return results
-    def process(self, batch: pd.DataFrame) -> pd.DataFrame:
-        # QA evaluation
-        if len(self.qa_evaluators) > 0:
-            items = batch.to_dict(orient="records")
-            results = self._evaluate_qa(items)
-            return pd.DataFrame(results)
-        # KG evaluation
-        if len(self.kg_evaluators) > 0:
-            results = self._evaluate_kg()
-            # Convert dict to DataFrame (single row)
-            return pd.DataFrame([results])
         # No metrics specified
         logger.warning("No metrics specified, returning empty DataFrame")
-        return pd.DataFrame()

+from typing import Tuple
+from graphgen.bases import BaseLLMWrapper, BaseOperator
 from graphgen.common import init_llm, init_storage
+from graphgen.utils import logger
+from .evaluate_kg import evaluate_kg
+from .evaluate_qa import evaluate_qa
+from .evaluate_triple import evaluate_triple
 class EvaluateService(BaseOperator):
     def __init__(
         self,
+        target: str,
+        metrics: list[str],
         working_dir: str = "cache",
         graph_backend: str = "kuzu",
         kv_backend: str = "rocksdb",
         **kwargs,
     ):
+        super().__init__(
+            working_dir=working_dir, kv_backend=kv_backend, op_name="evaluate"
+        )
         self.llm_client: BaseLLMWrapper = init_llm("synthesizer")
         self.metrics = metrics or []
         self.kwargs = kwargs
         self.graph_storage = init_storage(
             backend=graph_backend, working_dir=working_dir, namespace="graph"
         )
         # Initialize evaluators
+        self.target = target
+        self.src_storage = None
+        self.tgt_storage = None
+        self.evaluators = {}
+        self._init_evaluators(self.target, metrics)
+    def _init_evaluators(self, target: str, metrics: list[str]):
+        """Initialize evaluators based on target and metrics."""
+        if target not in {"qa", "kg", "triple"}:
+            raise ValueError(f"Unknown evaluation target: {target}")
+        # Delegate to target-specific initializer
+        getattr(self, f"_init_{target}_evaluators")(metrics)
+    def _init_qa_evaluators(self, metrics: list[str]):
+        """Initialize QA evaluators."""
+        for metric in metrics:
+            self.evaluators[metric] = self._create_qa_evaluator(metric)
+    def _create_qa_evaluator(self, metric: str):
+        """Factory method for QA evaluator instances."""
+        if metric == "length":
+            from graphgen.models import LengthEvaluator
+            return LengthEvaluator()
+        if metric == "mtld":
+            from graphgen.models import MTLDEvaluator
+            return MTLDEvaluator(**self.kwargs.get("mtld_params", {}))
+        if metric == "reward_score":
+            from graphgen.models import RewardEvaluator
+            return RewardEvaluator(**self.kwargs.get("reward_params", {}))
+        if metric == "uni_score":
+            from graphgen.models import UniEvaluator
+            return UniEvaluator(**self.kwargs.get("uni_params", {}))
+        raise ValueError(f"Unknown QA metric: {metric}")
+    def _init_kg_evaluators(self, metrics: list[str]):
+        """Initialize KG evaluators."""
+        for metric in metrics:
+            if metric != "structure":
+                raise ValueError(f"Unknown KG metric: {metric}")
+            from graphgen.models import StructureEvaluator
+            self.evaluators[metric] = StructureEvaluator(
+                **self.kwargs.get("structure_params", {})
             )
+    def _init_triple_evaluators(self, metrics: list[str]):
+        """Initialize Triple evaluators."""
+        self.src_storage = init_storage(
+            backend=self.kv_backend,
+            working_dir=self.working_dir,
+            namespace=self.kwargs["src_namespace"],
+        )
+        self.tgt_storage = init_storage(
+            backend=self.kv_backend,
+            working_dir=self.working_dir,
+            namespace=self.kwargs["tgt_namespace"],
         )
+        for metric in metrics:
+            if metric != "accuracy":
+                raise ValueError(f"Unknown Triple metric: {metric}")
+            from graphgen.models import AccuracyEvaluator
+            self.evaluators[metric] = AccuracyEvaluator(llm_client=self.llm_client)
+    def process(self, batch: list) -> Tuple[list, dict]:
+        final_results = []
+        meta_updates = {}
+        # 1. QA Evaluation (per item)
+        if self.target == "qa" and self.evaluators:
+            results: dict = evaluate_qa(self.evaluators, batch)
+            for i, item in enumerate(batch):
+                metrics = {}
+                for _, scores in results.items():
+                    metrics.update(scores[i])
+                item.update({"metrics": metrics})
+                input_trace_id = item.pop("_trace_id")
+                item["_trace_id"] = self.get_trace_id(item)
+                final_results.append(item)
+                meta_updates.setdefault(input_trace_id, []).append(item["_trace_id"])
+            return final_results, meta_updates
+        # 2. KG evaluation
+        if self.target == "kg" and self.evaluators:
+            results = evaluate_kg(
+                self.evaluators,
+                self.graph_storage,
+            )
+            if not results:
+                logger.warning("No KG evaluation results, returning empty DataFrame")
+                return [], {}
+            results["_trace_id"] = self.get_trace_id(results)
+            final_results.append(results)
+            return final_results, {}
+        # 3. Triple evaluation
+        if self.target == "triple" and self.evaluators:
+            results = evaluate_triple(
+                self.evaluators, self.src_storage, self.tgt_storage
+            )
+            results["_trace_id"] = "evaluate-triple-result"
+            final_results.append(results)
+            return final_results, {}
         # No metrics specified
         logger.warning("No metrics specified, returning empty DataFrame")
+        return [], {}

graphgen/operators/evaluate/evaluate_triple.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from typing import Any
+from graphgen.bases import BaseKVStorage
+from graphgen.utils import logger, run_concurrent
+def evaluate_triple(
+    triple_evaluators: dict[str, Any],
+    src_storage: BaseKVStorage,
+    tgt_storage: BaseKVStorage,
+) -> dict[str, Any]:
+    forward_meta = tgt_storage.get_by_id("_meta_forward")
+    tasks = []
+    for chunk_id, unit_ids in forward_meta.items():
+        chunk_content = str(src_storage.get_by_id(chunk_id))
+        nodes = []
+        edges = []
+        for unit_id in unit_ids:
+            unit_data = tgt_storage.get_by_id(unit_id)
+            if "node" in unit_data and unit_data["node"]:
+                nodes.append(unit_data["node"])
+            if "edge" in unit_data and unit_data["edge"]:
+                edges.append(unit_data["edge"])
+        tasks.append((chunk_content, nodes, edges))
+    results = {}
+    for key, triple_evaluator in triple_evaluators.items():
+        logger.info(f"Evaluating Triples with metric: {key}...")
+        result = run_concurrent(
+            triple_evaluator.evaluate,
+            tasks,
+            desc=f"Evaluating Triples with {key}",
+        )
+        results[key] = result
+    return results

graphgen/operators/extract/extract_service.py CHANGED Viewed

@@ -1,16 +1,19 @@
 import json
-import pandas as pd
-from graphgen.bases import BaseLLMWrapper, BaseOperator
 from graphgen.common import init_llm
 from graphgen.models.extractor import SchemaGuidedExtractor
 from graphgen.utils import logger, run_concurrent
 class ExtractService(BaseOperator):
-    def __init__(self, working_dir: str = "cache", **extract_kwargs):
-        super().__init__(working_dir=working_dir, op_name="extract_service")
         self.llm_client: BaseLLMWrapper = init_llm("synthesizer")
         self.extract_kwargs = extract_kwargs
         self.method = self.extract_kwargs.get("method")
@@ -22,24 +25,32 @@ class ExtractService(BaseOperator):
         else:
             raise ValueError(f"Unsupported extraction method: {self.method}")
-    def process(self, batch: pd.DataFrame) -> pd.DataFrame:
-        items = batch.to_dict(orient="records")
-        return pd.DataFrame(self.extract(items))
-    def extract(self, items: list[dict]) -> list[dict]:
-        logger.info("Start extracting information from %d items", len(items))
         results = run_concurrent(
             self.extractor.extract,
-            items,
             desc="Extracting information",
             unit="item",
         )
-        results = self.extractor.merge_extractions(results)
-        results = [
-            {"_extract_id": key, "extracted_data": value}
-            for key, value in results.items()
-        ]
-        return results

 import json
+from typing import Tuple
+from graphgen.bases import BaseLLMWrapper, BaseOperator, Chunk
 from graphgen.common import init_llm
 from graphgen.models.extractor import SchemaGuidedExtractor
 from graphgen.utils import logger, run_concurrent
 class ExtractService(BaseOperator):
+    def __init__(
+        self, working_dir: str = "cache", kv_backend: str = "rocksdb", **extract_kwargs
+    ):
+        super().__init__(
+            working_dir=working_dir, kv_backend=kv_backend, op_name="extract"
+        )
         self.llm_client: BaseLLMWrapper = init_llm("synthesizer")
         self.extract_kwargs = extract_kwargs
         self.method = self.extract_kwargs.get("method")
         else:
             raise ValueError(f"Unsupported extraction method: {self.method}")
+    def process(self, batch: list) -> Tuple[list, dict]:
+        """
+        Extract information from the batch of chunks.
+        :return: A tuple of (results, meta_updates)
+            results: A list of dicts containing extracted information. Each dict has the structure:
+                {"_trace_id": str, "content": dict}
+            meta_updates: A dict mapping source IDs to lists of trace IDs for the extracted information.
+        """
+        logger.info("Start extracting information from %d items", len(batch))
+        chunks = [Chunk.from_dict(item["_trace_id"], item) for item in batch]
         results = run_concurrent(
             self.extractor.extract,
+            chunks,
             desc="Extracting information",
             unit="item",
         )
+        meta_updates = {}
+        final_results = []
+        # chunk -> extracted info
+        for input_trace_id, result in zip(
+            [item["_trace_id"] for item in batch], results
+        ):
+            if not result:
+                continue
+            result = {"_trace_id": self.get_trace_id(result), "content": result}
+            meta_updates.setdefault(input_trace_id, []).append(result["_trace_id"])
+            final_results.append(result)
+        return final_results, meta_updates

graphgen/operators/generate/generate_service.py CHANGED Viewed

@@ -1,9 +1,6 @@
-import json
-import pandas as pd
-from graphgen.bases import BaseLLMWrapper, BaseOperator
-from graphgen.common import init_llm
 from graphgen.utils import logger, run_concurrent
@@ -15,12 +12,18 @@ class GenerateService(BaseOperator):
     def __init__(
         self,
         working_dir: str = "cache",
         method: str = "aggregated",
         data_format: str = "ChatML",
         **generate_kwargs,
     ):
-        super().__init__(working_dir=working_dir, op_name="generate_service")
         self.llm_client: BaseLLMWrapper = init_llm("synthesizer")
         self.method = method
         self.data_format = data_format
@@ -76,32 +79,31 @@ class GenerateService(BaseOperator):
         else:
             raise ValueError(f"Unsupported generation mode: {method}")
-    def process(self, batch: pd.DataFrame) -> pd.DataFrame:
-        items = batch.to_dict(orient="records")
-        return pd.DataFrame(self.generate(items))
-    def generate(self, items: list[dict]) -> list[dict]:
         """
         Generate question-answer pairs based on nodes and edges.
-        :param items
-        :return: QA pairs
         """
-        logger.info("[Generation] mode: %s, batches: %d", self.method, len(items))
-        items = [
-            (json.loads(item["nodes"]), json.loads(item["edges"])) for item in items
-        ]
         results = run_concurrent(
             self.generator.generate,
-            items,
-            desc="[4/4]Generating QAs",
             unit="batch",
         )
-        # Filter out empty results
-        results = [res for res in results if res]
-        results = self.generator.format_generation_results(
-            results, output_data_format=self.data_format
-        )
-        return results

+from typing import Tuple
+from graphgen.bases import BaseKVStorage, BaseLLMWrapper, BaseOperator
+from graphgen.common import init_llm, init_storage
 from graphgen.utils import logger, run_concurrent
     def __init__(
         self,
         working_dir: str = "cache",
+        kv_backend: str = "rocksdb",
         method: str = "aggregated",
         data_format: str = "ChatML",
         **generate_kwargs,
     ):
+        super().__init__(
+            working_dir=working_dir, kv_backend=kv_backend, op_name="generate"
+        )
         self.llm_client: BaseLLMWrapper = init_llm("synthesizer")
+        self.generate_storage: BaseKVStorage = init_storage(
+            backend=kv_backend, working_dir=working_dir, namespace="generate"
+        )
         self.method = method
         self.data_format = data_format
         else:
             raise ValueError(f"Unsupported generation mode: {method}")
+    def process(self, batch: list) -> Tuple[list, dict]:
         """
         Generate question-answer pairs based on nodes and edges.
         """
+        logger.info("[Generation] mode: %s, batches: %d", self.method, len(batch))
+        triples = [(item["nodes"], item["edges"]) for item in batch]
         results = run_concurrent(
             self.generator.generate,
+            triples,
+            desc="Generating QAs",
             unit="batch",
         )
+        meta_updates = {}
+        final_results = []
+        for input_trace_id, qa_pairs in zip(
+            [item["_trace_id"] for item in batch], results
+        ):
+            if not qa_pairs:
+                continue
+            for qa_pair in qa_pairs:
+                res = self.generator.format_generation_results(
+                    qa_pair, output_data_format=self.data_format
+                )
+                res["_trace_id"] = self.get_trace_id(res)
+                final_results.append(res)
+                meta_updates.setdefault(input_trace_id, []).append(res["_trace_id"])
+        return final_results, meta_updates