Spaces:
Sleeping
Sleeping
github-actions[bot]
commited on
Commit
·
61172af
1
Parent(s):
f19b7a9
Auto-sync from demo at Mon Dec 22 11:46:22 UTC 2025
Browse files
graphgen/operators/build_kg/build_kg_service.py
CHANGED
|
@@ -12,12 +12,16 @@ from .build_text_kg import build_text_kg
|
|
| 12 |
|
| 13 |
|
| 14 |
class BuildKGService(BaseOperator):
|
| 15 |
-
def __init__(
|
|
|
|
|
|
|
| 16 |
super().__init__(working_dir=working_dir, op_name="build_kg_service")
|
| 17 |
self.llm_client: BaseLLMWrapper = init_llm("synthesizer")
|
| 18 |
self.graph_storage: BaseGraphStorage = init_storage(
|
| 19 |
backend=graph_backend, working_dir=working_dir, namespace="graph"
|
| 20 |
)
|
|
|
|
|
|
|
| 21 |
|
| 22 |
def process(self, batch: pd.DataFrame) -> pd.DataFrame:
|
| 23 |
docs = batch.to_dict(orient="records")
|
|
@@ -46,6 +50,7 @@ class BuildKGService(BaseOperator):
|
|
| 46 |
llm_client=self.llm_client,
|
| 47 |
kg_instance=self.graph_storage,
|
| 48 |
chunks=text_chunks,
|
|
|
|
| 49 |
)
|
| 50 |
if len(mm_chunks) == 0:
|
| 51 |
logger.info("All multi-modal chunks are already in the storage")
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
class BuildKGService(BaseOperator):
|
| 15 |
+
def __init__(
|
| 16 |
+
self, working_dir: str = "cache", graph_backend: str = "kuzu", **build_kwargs
|
| 17 |
+
):
|
| 18 |
super().__init__(working_dir=working_dir, op_name="build_kg_service")
|
| 19 |
self.llm_client: BaseLLMWrapper = init_llm("synthesizer")
|
| 20 |
self.graph_storage: BaseGraphStorage = init_storage(
|
| 21 |
backend=graph_backend, working_dir=working_dir, namespace="graph"
|
| 22 |
)
|
| 23 |
+
self.build_kwargs = build_kwargs
|
| 24 |
+
self.max_loop: int = int(self.build_kwargs.get("max_loop", 3))
|
| 25 |
|
| 26 |
def process(self, batch: pd.DataFrame) -> pd.DataFrame:
|
| 27 |
docs = batch.to_dict(orient="records")
|
|
|
|
| 50 |
llm_client=self.llm_client,
|
| 51 |
kg_instance=self.graph_storage,
|
| 52 |
chunks=text_chunks,
|
| 53 |
+
max_loop=self.max_loop,
|
| 54 |
)
|
| 55 |
if len(mm_chunks) == 0:
|
| 56 |
logger.info("All multi-modal chunks are already in the storage")
|
graphgen/operators/build_kg/build_text_kg.py
CHANGED
|
@@ -12,15 +12,17 @@ def build_text_kg(
|
|
| 12 |
llm_client: BaseLLMWrapper,
|
| 13 |
kg_instance: BaseGraphStorage,
|
| 14 |
chunks: List[Chunk],
|
|
|
|
| 15 |
):
|
| 16 |
"""
|
| 17 |
:param llm_client: Synthesizer LLM model to extract entities and relationships
|
| 18 |
:param kg_instance
|
| 19 |
:param chunks
|
|
|
|
| 20 |
:return:
|
| 21 |
"""
|
| 22 |
|
| 23 |
-
kg_builder = LightRAGKGBuilder(llm_client=llm_client, max_loop=
|
| 24 |
|
| 25 |
results = run_concurrent(
|
| 26 |
kg_builder.extract,
|
|
|
|
| 12 |
llm_client: BaseLLMWrapper,
|
| 13 |
kg_instance: BaseGraphStorage,
|
| 14 |
chunks: List[Chunk],
|
| 15 |
+
max_loop: int = 3,
|
| 16 |
):
|
| 17 |
"""
|
| 18 |
:param llm_client: Synthesizer LLM model to extract entities and relationships
|
| 19 |
:param kg_instance
|
| 20 |
:param chunks
|
| 21 |
+
:param max_loop: Maximum number of loops for entity and relationship extraction
|
| 22 |
:return:
|
| 23 |
"""
|
| 24 |
|
| 25 |
+
kg_builder = LightRAGKGBuilder(llm_client=llm_client, max_loop=max_loop)
|
| 26 |
|
| 27 |
results = run_concurrent(
|
| 28 |
kg_builder.extract,
|