github-actions[bot] commited on
Commit
0a18089
·
1 Parent(s): 9a57b42

Auto-sync from demo at Fri Jan 30 08:46:58 UTC 2026

Browse files
Files changed (32) hide show
  1. graphgen/bases/__init__.py +1 -0
  2. graphgen/bases/base_operator.py +1 -1
  3. graphgen/bases/base_rephraser.py +31 -0
  4. graphgen/common/__init__.py +2 -2
  5. graphgen/engine.py +3 -1
  6. graphgen/models/__init__.py +1 -0
  7. graphgen/models/rephraser/__init__.py +1 -0
  8. graphgen/models/rephraser/style_controlled_rephraser.py +32 -0
  9. graphgen/operators/__init__.py +2 -0
  10. graphgen/operators/build_kg/build_kg_service.py +2 -1
  11. graphgen/operators/evaluate/evaluate_service.py +2 -1
  12. graphgen/operators/extract/extract_service.py +1 -1
  13. graphgen/operators/generate/generate_service.py +3 -1
  14. graphgen/operators/judge/judge_service.py +3 -2
  15. graphgen/operators/partition/partition_service.py +1 -1
  16. graphgen/operators/quiz/quiz_service.py +2 -1
  17. graphgen/operators/read/read.py +1 -1
  18. graphgen/operators/rephrase/__init__.py +1 -0
  19. graphgen/operators/rephrase/rephrase_service.py +62 -0
  20. graphgen/operators/search/search_service.py +1 -1
  21. graphgen/templates/__init__.py +1 -0
  22. graphgen/templates/rephrasing/__init__.py +1 -0
  23. graphgen/templates/rephrasing/style_controlled_rephrasing/__init__.py +21 -0
  24. graphgen/templates/rephrasing/style_controlled_rephrasing/critical_analysis_rephrasing.py +52 -0
  25. graphgen/templates/rephrasing/style_controlled_rephrasing/cross_domain_analogy_rephrasing.py +62 -0
  26. graphgen/templates/rephrasing/style_controlled_rephrasing/executive_summary_rephrasing.py +64 -0
  27. graphgen/templates/rephrasing/style_controlled_rephrasing/first_person_narrative_rephrasing.py +60 -0
  28. graphgen/templates/rephrasing/style_controlled_rephrasing/historical_evolution_perspective_rephrasing.py +68 -0
  29. graphgen/templates/rephrasing/style_controlled_rephrasing/popular_science_rephrasing.py +46 -0
  30. graphgen/templates/rephrasing/style_controlled_rephrasing/qa_dialogue_format_rephrasing.py +73 -0
  31. graphgen/templates/rephrasing/style_controlled_rephrasing/technical_deep_dive_rephrasing.py +66 -0
  32. requirements.txt +2 -3
graphgen/bases/__init__.py CHANGED
@@ -7,6 +7,7 @@ from .base_llm_wrapper import BaseLLMWrapper
7
  from .base_operator import BaseOperator
8
  from .base_partitioner import BasePartitioner
9
  from .base_reader import BaseReader
 
10
  from .base_searcher import BaseSearcher
11
  from .base_splitter import BaseSplitter
12
  from .base_storage import BaseGraphStorage, BaseKVStorage, StorageNameSpace
 
7
  from .base_operator import BaseOperator
8
  from .base_partitioner import BasePartitioner
9
  from .base_reader import BaseReader
10
+ from .base_rephraser import BaseRephraser
11
  from .base_searcher import BaseSearcher
12
  from .base_splitter import BaseSplitter
13
  from .base_storage import BaseGraphStorage, BaseKVStorage, StorageNameSpace
graphgen/bases/base_operator.py CHANGED
@@ -28,7 +28,7 @@ class BaseOperator(ABC):
28
  op_name: str = None,
29
  ):
30
  # lazy import to avoid circular import
31
- from graphgen.common import init_storage
32
  from graphgen.utils import set_logger
33
 
34
  log_dir = os.path.join(working_dir, "logs")
 
28
  op_name: str = None,
29
  ):
30
  # lazy import to avoid circular import
31
+ from graphgen.common.init_storage import init_storage
32
  from graphgen.utils import set_logger
33
 
34
  log_dir = os.path.join(working_dir, "logs")
graphgen/bases/base_rephraser.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any
3
+
4
+ from graphgen.bases.base_llm_wrapper import BaseLLMWrapper
5
+
6
+
7
+ class BaseRephraser(ABC):
8
+ """
9
+ Rephrase text based on given prompts.
10
+ """
11
+
12
+ def __init__(self, llm_client: BaseLLMWrapper):
13
+ self.llm_client = llm_client
14
+
15
+ @abstractmethod
16
+ def build_prompt(self, text: str) -> str:
17
+ """Build prompt for LLM based on the given text"""
18
+
19
+ @staticmethod
20
+ @abstractmethod
21
+ def parse_response(response: str) -> Any:
22
+ """Parse the LLM response and return the rephrased text"""
23
+
24
+ async def rephrase(
25
+ self,
26
+ item: dict,
27
+ ) -> dict:
28
+ text = item["content"]
29
+ prompt = self.build_prompt(text)
30
+ response = await self.llm_client.generate_answer(prompt)
31
+ return self.parse_response(response)
graphgen/common/__init__.py CHANGED
@@ -1,2 +1,2 @@
1
- from .init_llm import init_llm
2
- from .init_storage import init_storage
 
1
+ # from .init_llm import init_llm
2
+ # from .init_storage import init_storage
graphgen/engine.py CHANGED
@@ -11,7 +11,8 @@ from ray.data.block import Block
11
  from ray.data.datasource.filename_provider import FilenameProvider
12
 
13
  from graphgen.bases import Config, Node
14
- from graphgen.common import init_llm, init_storage
 
15
  from graphgen.utils import logger
16
 
17
 
@@ -70,6 +71,7 @@ class Engine:
70
 
71
  if not ray.is_initialized():
72
  context = ray.init(
 
73
  ignore_reinit_error=True,
74
  logging_level=logging.ERROR,
75
  log_to_driver=True,
 
11
  from ray.data.datasource.filename_provider import FilenameProvider
12
 
13
  from graphgen.bases import Config, Node
14
+ from graphgen.common.init_llm import init_llm
15
+ from graphgen.common.init_storage import init_storage
16
  from graphgen.utils import logger
17
 
18
 
 
71
 
72
  if not ray.is_initialized():
73
  context = ray.init(
74
+ include_dashboard=True,
75
  ignore_reinit_error=True,
76
  logging_level=logging.ERROR,
77
  log_to_driver=True,
graphgen/models/__init__.py CHANGED
@@ -37,6 +37,7 @@ from .reader import (
37
  RDFReader,
38
  TXTReader,
39
  )
 
40
  from .searcher.db.ncbi_searcher import NCBISearch
41
  from .searcher.db.rnacentral_searcher import RNACentralSearch
42
  from .searcher.db.uniprot_searcher import UniProtSearch
 
37
  RDFReader,
38
  TXTReader,
39
  )
40
+ from .rephraser import StyleControlledRephraser
41
  from .searcher.db.ncbi_searcher import NCBISearch
42
  from .searcher.db.rnacentral_searcher import RNACentralSearch
43
  from .searcher.db.uniprot_searcher import UniProtSearch
graphgen/models/rephraser/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .style_controlled_rephraser import StyleControlledRephraser
graphgen/models/rephraser/style_controlled_rephraser.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Optional
2
+
3
+ from graphgen.bases import BaseRephraser
4
+ from graphgen.templates import STYLE_CONTROLLED_REPHRASING_PROMPTS
5
+ from graphgen.utils import detect_main_language, logger
6
+
7
+
8
+ class StyleControlledRephraser(BaseRephraser):
9
+ """
10
+ Style Controlled Rephraser rephrases the input text based on a specified style.
11
+ """
12
+
13
+ def __init__(self, llm_client: Any, style: str = "critical_analysis"):
14
+ super().__init__(llm_client)
15
+ self.style = style
16
+
17
+ def build_prompt(self, text: str) -> str:
18
+ logger.debug("Text to be rephrased: %s", text)
19
+ language = detect_main_language(text)
20
+ prompt_template = STYLE_CONTROLLED_REPHRASING_PROMPTS[self.style][language]
21
+ prompt = prompt_template.format(text=text)
22
+ return prompt
23
+
24
+ @staticmethod
25
+ def parse_response(response: str) -> Optional[dict]:
26
+ result = response.strip()
27
+ logger.debug("Raw rephrased response: %s", result)
28
+ if not result:
29
+ return None
30
+ return {
31
+ "content": result,
32
+ }
graphgen/operators/__init__.py CHANGED
@@ -8,6 +8,7 @@ from .judge import JudgeService
8
  from .partition import PartitionService
9
  from .quiz import QuizService
10
  from .read import read
 
11
  from .search import SearchService
12
 
13
  operators = {
@@ -21,5 +22,6 @@ operators = {
21
  "partition": PartitionService,
22
  "generate": GenerateService,
23
  "evaluate": EvaluateService,
 
24
  "filter": FilterService,
25
  }
 
8
  from .partition import PartitionService
9
  from .quiz import QuizService
10
  from .read import read
11
+ from .rephrase import RephraseService
12
  from .search import SearchService
13
 
14
  operators = {
 
22
  "partition": PartitionService,
23
  "generate": GenerateService,
24
  "evaluate": EvaluateService,
25
+ "rephrase": RephraseService,
26
  "filter": FilterService,
27
  }
graphgen/operators/build_kg/build_kg_service.py CHANGED
@@ -2,7 +2,8 @@ from typing import Tuple
2
 
3
  from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
4
  from graphgen.bases.datatypes import Chunk
5
- from graphgen.common import init_llm, init_storage
 
6
  from graphgen.utils import logger
7
 
8
  from .build_mm_kg import build_mm_kg
 
2
 
3
  from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
4
  from graphgen.bases.datatypes import Chunk
5
+ from graphgen.common.init_llm import init_llm
6
+ from graphgen.common.init_storage import init_storage
7
  from graphgen.utils import logger
8
 
9
  from .build_mm_kg import build_mm_kg
graphgen/operators/evaluate/evaluate_service.py CHANGED
@@ -1,7 +1,8 @@
1
  from typing import Tuple
2
 
3
  from graphgen.bases import BaseLLMWrapper, BaseOperator
4
- from graphgen.common import init_llm, init_storage
 
5
  from graphgen.utils import logger
6
 
7
  from .evaluate_kg import evaluate_kg
 
1
  from typing import Tuple
2
 
3
  from graphgen.bases import BaseLLMWrapper, BaseOperator
4
+ from graphgen.common.init_llm import init_llm
5
+ from graphgen.common.init_storage import init_storage
6
  from graphgen.utils import logger
7
 
8
  from .evaluate_kg import evaluate_kg
graphgen/operators/extract/extract_service.py CHANGED
@@ -2,7 +2,7 @@ import json
2
  from typing import Tuple
3
 
4
  from graphgen.bases import BaseLLMWrapper, BaseOperator, Chunk
5
- from graphgen.common import init_llm
6
  from graphgen.models.extractor import SchemaGuidedExtractor
7
  from graphgen.utils import logger, run_concurrent
8
 
 
2
  from typing import Tuple
3
 
4
  from graphgen.bases import BaseLLMWrapper, BaseOperator, Chunk
5
+ from graphgen.common.init_llm import init_llm
6
  from graphgen.models.extractor import SchemaGuidedExtractor
7
  from graphgen.utils import logger, run_concurrent
8
 
graphgen/operators/generate/generate_service.py CHANGED
@@ -1,6 +1,8 @@
1
  from typing import Tuple
 
2
  from graphgen.bases import BaseKVStorage, BaseLLMWrapper, BaseOperator
3
- from graphgen.common import init_llm, init_storage
 
4
  from graphgen.utils import logger, run_concurrent
5
 
6
 
 
1
  from typing import Tuple
2
+
3
  from graphgen.bases import BaseKVStorage, BaseLLMWrapper, BaseOperator
4
+ from graphgen.common.init_llm import init_llm
5
+ from graphgen.common.init_storage import init_storage
6
  from graphgen.utils import logger, run_concurrent
7
 
8
 
graphgen/operators/judge/judge_service.py CHANGED
@@ -1,8 +1,9 @@
1
- from typing import Tuple
2
  import math
 
3
 
4
  from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
5
- from graphgen.common import init_llm, init_storage
 
6
  from graphgen.templates import STATEMENT_JUDGEMENT_PROMPT
7
  from graphgen.utils import logger, run_concurrent, yes_no_loss_entropy
8
 
 
 
1
  import math
2
+ from typing import Tuple
3
 
4
  from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
5
+ from graphgen.common.init_llm import init_llm
6
+ from graphgen.common.init_storage import init_storage
7
  from graphgen.templates import STATEMENT_JUDGEMENT_PROMPT
8
  from graphgen.utils import logger, run_concurrent, yes_no_loss_entropy
9
 
graphgen/operators/partition/partition_service.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  from typing import Iterable, Tuple
3
 
4
  from graphgen.bases import BaseGraphStorage, BaseOperator, BaseTokenizer
5
- from graphgen.common import init_storage
6
  from graphgen.models import (
7
  AnchorBFSPartitioner,
8
  BFSPartitioner,
 
2
  from typing import Iterable, Tuple
3
 
4
  from graphgen.bases import BaseGraphStorage, BaseOperator, BaseTokenizer
5
+ from graphgen.common.init_storage import init_storage
6
  from graphgen.models import (
7
  AnchorBFSPartitioner,
8
  BFSPartitioner,
graphgen/operators/quiz/quiz_service.py CHANGED
@@ -1,7 +1,8 @@
1
  from typing import Tuple
2
 
3
  from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
4
- from graphgen.common import init_llm, init_storage
 
5
  from graphgen.models import QuizGenerator
6
  from graphgen.utils import logger, run_concurrent
7
 
 
1
  from typing import Tuple
2
 
3
  from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
4
+ from graphgen.common.init_llm import init_llm
5
+ from graphgen.common.init_storage import init_storage
6
  from graphgen.models import QuizGenerator
7
  from graphgen.utils import logger, run_concurrent
8
 
graphgen/operators/read/read.py CHANGED
@@ -3,7 +3,7 @@ from typing import Any, List, Optional, Union
3
 
4
  import ray
5
 
6
- from graphgen.common import init_storage
7
  from graphgen.models import (
8
  CSVReader,
9
  JSONReader,
 
3
 
4
  import ray
5
 
6
+ from graphgen.common.init_storage import init_storage
7
  from graphgen.models import (
8
  CSVReader,
9
  JSONReader,
graphgen/operators/rephrase/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .rephrase_service import RephraseService
graphgen/operators/rephrase/rephrase_service.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple
2
+
3
+ from graphgen.bases import BaseLLMWrapper, BaseOperator
4
+ from graphgen.common.init_llm import init_llm
5
+ from graphgen.utils import run_concurrent
6
+
7
+
8
+ class RephraseService(BaseOperator):
9
+ """
10
+ Generate question-answer pairs based on nodes and edges.
11
+ """
12
+
13
+ def __init__(
14
+ self,
15
+ working_dir: str = "cache",
16
+ method: str = "aggregated",
17
+ **rephrase_kwargs,
18
+ ):
19
+ super().__init__(working_dir=working_dir, op_name="rephrase_service")
20
+ self.llm_client: BaseLLMWrapper = init_llm("synthesizer")
21
+ self.method = method
22
+ self.rephrase_kwargs = rephrase_kwargs
23
+
24
+ if self.method == "style_controlled":
25
+ from graphgen.models import StyleControlledRephraser
26
+
27
+ self.rephraser = StyleControlledRephraser(
28
+ self.llm_client,
29
+ style=rephrase_kwargs.get("style", "critical_analysis"),
30
+ )
31
+ else:
32
+ raise ValueError(f"Unsupported rephrase method: {self.method}")
33
+
34
+ def process(self, batch: list) -> Tuple[list, dict]:
35
+ """
36
+ Rephrase the texts in the batch.
37
+ :return: A tuple of (results, meta_updates)
38
+ results: A list of dicts containing rephrased texts. Each dict has the structure:
39
+ {"_trace_id": str, "content": str}
40
+ meta_updates: A dict mapping source IDs to lists of trace IDs for the rephrased texts.
41
+ """
42
+ final_results = []
43
+ meta_updates = {}
44
+
45
+ results = run_concurrent(
46
+ self.rephraser.rephrase,
47
+ batch,
48
+ desc="Rephrasing texts",
49
+ unit="batch",
50
+ )
51
+
52
+ for input_trace_id, rephrased in zip(
53
+ [item["_trace_id"] for item in batch], results
54
+ ):
55
+ if not rephrased:
56
+ continue
57
+ rephrased["_trace_id"] = self.get_trace_id(rephrased)
58
+ results.append(rephrased)
59
+ meta_updates.setdefault(input_trace_id, []).append(rephrased["_trace_id"])
60
+ final_results.append(rephrased)
61
+
62
+ return final_results, meta_updates
graphgen/operators/search/search_service.py CHANGED
@@ -4,7 +4,7 @@ from typing import Optional
4
  import pandas as pd
5
 
6
  from graphgen.bases import BaseOperator
7
- from graphgen.common import init_storage
8
  from graphgen.utils import compute_content_hash, logger, run_concurrent
9
 
10
 
 
4
  import pandas as pd
5
 
6
  from graphgen.bases import BaseOperator
7
+ from graphgen.common.init_storage import init_storage
8
  from graphgen.utils import compute_content_hash, logger, run_concurrent
9
 
10
 
graphgen/templates/__init__.py CHANGED
@@ -14,5 +14,6 @@ from .generation import (
14
  VQA_GENERATION_PROMPT,
15
  )
16
  from .kg import KG_EXTRACTION_PROMPT, KG_SUMMARIZATION_PROMPT, MMKG_EXTRACTION_PROMPT
 
17
  from .search_judgement import SEARCH_JUDGEMENT_PROMPT
18
  from .statement_judgement import STATEMENT_JUDGEMENT_PROMPT
 
14
  VQA_GENERATION_PROMPT,
15
  )
16
  from .kg import KG_EXTRACTION_PROMPT, KG_SUMMARIZATION_PROMPT, MMKG_EXTRACTION_PROMPT
17
+ from .rephrasing import STYLE_CONTROLLED_REPHRASING_PROMPTS
18
  from .search_judgement import SEARCH_JUDGEMENT_PROMPT
19
  from .statement_judgement import STATEMENT_JUDGEMENT_PROMPT
graphgen/templates/rephrasing/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .style_controlled_rephrasing import STYLE_CONTROLLED_REPHRASING_PROMPTS
graphgen/templates/rephrasing/style_controlled_rephrasing/__init__.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .critical_analysis_rephrasing import CRITICAL_ANALYSIS_REPHRASING_PROMPTS
2
+ from .cross_domain_analogy_rephrasing import CROSS_DOMAIN_ANALOGY_REPHRASING_PROMPTS
3
+ from .executive_summary_rephrasing import EXECUTIVE_SUMMARY_REPHRASING_PROMPTS
4
+ from .first_person_narrative_rephrasing import FIRST_PERSON_NARRATIVE_REPHRASING_PROMPTS
5
+ from .historical_evolution_perspective_rephrasing import (
6
+ HISTORICAL_EVOLUTION_PERSPECTIVE_REPHRASING_PROMPTS,
7
+ )
8
+ from .popular_science_rephrasing import POPULAR_SCIENCE_REPHRASING_PROMPTS
9
+ from .qa_dialogue_format_rephrasing import QA_DIALOGUE_FORMAT_REPHRASING_PROMPTS
10
+ from .technical_deep_dive_rephrasing import TECHNICAL_DEEP_DIVE_REPHRASING_PROMPTS
11
+
12
+ STYLE_CONTROLLED_REPHRASING_PROMPTS = {
13
+ "popular_science": POPULAR_SCIENCE_REPHRASING_PROMPTS,
14
+ "critical_analysis": CRITICAL_ANALYSIS_REPHRASING_PROMPTS,
15
+ "cross_domain_analogy": CROSS_DOMAIN_ANALOGY_REPHRASING_PROMPTS,
16
+ "technical_deep_dive": TECHNICAL_DEEP_DIVE_REPHRASING_PROMPTS,
17
+ "executive_summary": EXECUTIVE_SUMMARY_REPHRASING_PROMPTS,
18
+ "first_person_narrative": FIRST_PERSON_NARRATIVE_REPHRASING_PROMPTS,
19
+ "historical_evolution_perspective": HISTORICAL_EVOLUTION_PERSPECTIVE_REPHRASING_PROMPTS,
20
+ "qa_dialogue_format": QA_DIALOGUE_FORMAT_REPHRASING_PROMPTS,
21
+ }
graphgen/templates/rephrasing/style_controlled_rephrasing/critical_analysis_rephrasing.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TEMPLATE_ZH = """
2
+ 【任务】以学术批判视角改写以下内容,形成技术评论文章。
3
+
4
+ 【核心要求】
5
+ 1. 语气风格:客观理性,第三人称学术视角,使用规范学术用语
6
+ 2. 内容结构:
7
+ - 准确总结原文核心方法/发现(占比40%)
8
+ - 分析技术优势与创新点(占比20%)
9
+ - 指出潜在局限性与假设条件(占比20%)
10
+ - 提出可能的改进方向或未来工作(占比20%)
11
+ 3. 引用规范:保留原文所有关键引用,采用标准学术引用格式
12
+ 4. 事实准确性:不得歪曲或误读原文技术细节
13
+
14
+ 【输出格式】
15
+ - 标题:原标题 + ":一项批判性分析"
16
+ - 段落:标准学术论文章节结构
17
+ - 字数:与原文相当或略长
18
+
19
+ 原文内容:
20
+ {text}
21
+
22
+ 请输出批判性分析改写版本:
23
+ """
24
+
25
+ TEMPLATE_EN = """
26
+ 【Task】Rewrite the following content from an academic critical perspective as a technical commentary.
27
+
28
+ 【Core Requirements】
29
+ 1. Tone: Objective and rational, third-person academic perspective, using standard academic terminology
30
+ 2. Structure:
31
+ - Accurately summarize core methods/findings (40% of content)
32
+ - Analyze technical advantages and innovations (20%)
33
+ - Identify potential limitations and assumptions (20%)
34
+ - Propose possible improvements or future work (20%)
35
+ 3. Citations: Retain all key references from original, using standard academic citation format
36
+ 4. Factual Accuracy: Do not distort or misinterpret technical details
37
+
38
+ 【Output Format】
39
+ - Title: Original Title + ": A Critical Analysis"
40
+ - Paragraphs: Standard academic paper structure
41
+ - Length: Similar to or slightly longer than original
42
+
43
+ Original Content:
44
+ {text}
45
+
46
+ Please output the critically analyzed rewrite:
47
+ """
48
+
49
+ CRITICAL_ANALYSIS_REPHRASING_PROMPTS = {
50
+ "zh": TEMPLATE_ZH,
51
+ "en": TEMPLATE_EN,
52
+ }
graphgen/templates/rephrasing/style_controlled_rephrasing/cross_domain_analogy_rephrasing.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TEMPLATE_ZH = """
2
+ 【任务】通过跨领域类比解释技术概念。
3
+
4
+ 【类比原则】
5
+ - 类比源领域:生物学、物理学、建筑学、经济学、烹饪等领域
6
+ - 类比强度:类比关系需直观且深刻,避免牵强附会
7
+ - 目标:降低理解门槛,同时保持技术严谨性
8
+
9
+ 【核心要求】
10
+ 1. 双轨并行:每个技术概念配一个恰当类比
11
+ 2. 类比结构:
12
+ - 先介绍技术概念(准确、完整)
13
+ - 再引入类比对象及其映射关系
14
+ - 最后说明类比局限性和适用范围
15
+ 3. 保真红线:技术部分必须与原文完全一致,不得因类比而简化
16
+ 4. 创新性:鼓励使用新颖、出人意料但合理的类比
17
+ 5. 篇幅:可比原文扩展20-40%
18
+
19
+ 【评估标准】
20
+ - 类比恰当性(技术概念与类比对象的核心机制必须同构)
21
+ - 技术准确性(不得扭曲事实)
22
+ - 启发性(帮助读者建立深层理解)
23
+
24
+ 原文内容:
25
+ {text}
26
+
27
+ 请输出跨领域类比版本:
28
+ """
29
+
30
+ TEMPLATE_EN = """
31
+ 【Task】Explain technical concepts through cross-domain analogies.
32
+
33
+ 【Analogy Principles】
34
+ - Source Domains: Biology, physics, architecture, economics, cooking, etc.
35
+ - Strength: Analogy should be intuitive yet profound, avoid forced comparisons
36
+ - Goal: Lower understanding barrier while maintaining technical rigor
37
+
38
+ 【Core Requirements】
39
+ 1. Dual Track: Pair each technical concept with an appropriate analogy
40
+ 2. Analogy Structure:
41
+ - First introduce technical concept (accurate and complete)
42
+ - Then introduce analogy object and mapping relationship
43
+ - Finally explain analogy limitations and applicable scope
44
+ 3. Fidelity Baseline: Technical parts must be identical to original, no simplification for analogy's sake
45
+ 4. Innovation: Encourage novel, surprising but reasonable analogies
46
+ 5. Length: May expand 20-40% beyond original
47
+
48
+ 【Evaluation Criteria】
49
+ - Analogy Appropriateness (core mechanisms must be isomorphic)
50
+ - Technical Accuracy (no factual distortion)
51
+ - Heuristic Value (helps build deep understanding)
52
+
53
+ Original Content:
54
+ {text}
55
+
56
+ Please output the cross-domain analogy version:
57
+ """
58
+
59
+ CROSS_DOMAIN_ANALOGY_REPHRASING_PROMPTS = {
60
+ "zh": TEMPLATE_ZH,
61
+ "en": TEMPLATE_EN,
62
+ }
graphgen/templates/rephrasing/style_controlled_rephrasing/executive_summary_rephrasing.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TEMPLATE_ZH = """
2
+ 【任务】为高管层撰写决策摘要。
3
+
4
+ 【读者假设】
5
+ - 职位:CTO/技术VP/产品总监
6
+ - 核心关切:技术价值、资源投入、竞争壁垒、商业影响
7
+
8
+ 【核心要求】
9
+ 1. 信息密度:每句话必须传达战略价值
10
+ 2. 内容优先级:
11
+ - 核心技术突破与创新价值(必须)
12
+ - 与竞品的差异化优势(必须)
13
+ - 实施成本与资源需求(必须)
14
+ - 潜在商业应用场景(必须)
15
+ - 技术风险评估(可选)
16
+ 3. 语言风格:金字塔原理,结论先行,数据支撑
17
+ 4. 简洁性:控制在原文长度的30-50%
18
+ 5. 事实准确性:所有数据、性能指标必须与原文完全一致
19
+
20
+ 【禁用表达】
21
+ - 避免"可能"、"也许"等不确定表述
22
+ - 禁用技术细节描述(除非直接影响决策)
23
+ - 避免行话和缩写
24
+
25
+ 原文内容:
26
+ {text}
27
+
28
+ 请直接输出高管决策摘要:
29
+ """
30
+
31
+ TEMPLATE_EN = """
32
+ 【Task】Write an executive summary for C-suite decision-making.
33
+
34
+ 【Audience Assumption】
35
+ - Position: CTO/VP of Engineering/Product Director
36
+ - Core Concerns: Technical value, resource investment, competitive moats, business impact
37
+
38
+ 【Core Requirements】
39
+ 1. Information Density: Every sentence must convey strategic value
40
+ 2. Content Priority:
41
+ - Core technical breakthrough and innovation value (MUST)
42
+ - Differentiated advantages over competitors (MUST)
43
+ - Implementation cost and resource requirements (MUST)
44
+ - Potential business application scenarios (MUST)
45
+ - Technical risk assessment (OPTIONAL)
46
+ 3. Language Style: Pyramid principle - lead with conclusions, support with data
47
+ 4. Conciseness: 30-50% of original length
48
+ 5. Factual Accuracy: All data and performance metrics must be identical to original
49
+
50
+ 【Prohibited Expressions】
51
+ - Avoid uncertain terms like "maybe," "perhaps"
52
+ - No deep technical details (unless directly impacting decision)
53
+ - No jargon or unexplained acronyms
54
+
55
+ Original Content:
56
+ {text}
57
+
58
+ Please output the executive summary directly:
59
+ """
60
+
61
+ EXECUTIVE_SUMMARY_REPHRASING_PROMPTS = {
62
+ "zh": TEMPLATE_ZH,
63
+ "en": TEMPLATE_EN,
64
+ }
graphgen/templates/rephrasing/style_controlled_rephrasing/first_person_narrative_rephrasing.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TEMPLATE_ZH = """
2
+ 【任务】将技术文档改写为第一人称实践经验分享。
3
+
4
+ 【角色设定】
5
+ - 身份:资深技术实践者/研究员
6
+ - 场景:技术博客/内部经验分享会
7
+ - 目标读者:同行从业者
8
+
9
+ 【核心要求】
10
+ 1. 视角:全程使用"我/我们"第一人称
11
+ 2. 内容融合:
12
+ - 保留原文所有技术事实(代码、数据、架构)
13
+ - 添加个人实践中的观察、挑战与解决思路
14
+ - 分享真实应用场景和效果数据
15
+ 3. 语言风格:专业但亲和,避免过度口语化
16
+ 4. 叙事元素:可包含"最初尝试-遇到问题-调整思路-最终效果"的故事线
17
+ 5. 事实红线:技术细节必须与原文完全一致,不得虚构数据
18
+
19
+ 【禁止】
20
+ - 不得编造不存在的个人经历
21
+ - 不得改变技术实现细节
22
+
23
+ 原文内容:
24
+ {text}
25
+
26
+ 请直接输出第一人称叙事版本:
27
+ """
28
+
29
+ TEMPLATE_EN = """
30
+ 【Task】Rewrite the technical document as a first-person practical experience sharing.
31
+
32
+ 【Role Setting】
33
+ - Identity: Senior practitioner/researcher
34
+ - Scenario: Technical blog/internal sharing session
35
+ - Target Audience: Peer professionals
36
+
37
+ 【Core Requirements】
38
+ 1. Perspective: Use first-person "I/we" throughout
39
+ 2. Content Integration:
40
+ - Retain ALL technical facts (code, data, architecture) from original
41
+ - Add personal observations, challenges, and solution approaches from practice
42
+ - Share real application scenarios and performance data
43
+ 3. Language Style: Professional yet approachable, avoid excessive colloquialism
44
+ 4. Narrative: May include "initial attempt-encountered problem-adjusted approach-final result" storyline
45
+ 5. Factual Baseline: Technical details must be identical to original, no fabricated data
46
+
47
+ 【Prohibited】
48
+ - Do not invent non-existent personal experiences
49
+ - Do not alter technical implementation details
50
+
51
+ Original Content:
52
+ {text}
53
+
54
+ Please output the first-person narrative version directly:
55
+ """
56
+
57
+ FIRST_PERSON_NARRATIVE_REPHRASING_PROMPTS = {
58
+ "zh": TEMPLATE_ZH,
59
+ "en": TEMPLATE_EN,
60
+ }
graphgen/templates/rephrasing/style_controlled_rephrasing/historical_evolution_perspective_rephrasing.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TEMPLATE_ZH = """
2
+ 【任务】按技术发展史视角重构内容。
3
+
4
+ 【叙事框架】
5
+ - 时间轴线:从起源→关键突破→当前状态→未来趋势
6
+ - 演进逻辑:揭示"技术瓶颈突破→新范式建立→新问题出现"的循环
7
+
8
+ 【核心要求】
9
+ 1. 时间准确性:所有时间点、版本号、发布顺序必须核实准确
10
+ 2. 因果链:
11
+ - 明确每个演进阶段的驱动力(理论突破/工程需求/硬件进步)
12
+ - 指出技术演进的必然性与偶然性
13
+ 3. 内容结构:
14
+ - 背景与起源(技术诞生前的状态)
15
+ - 关键里程碑(带具体时间)
16
+ - 范式转移(革命性变化)
17
+ - 当前成熟形态
18
+ - 未来展望(基于原文技术路径)
19
+ 4. 技术保真:所有技术描述必须与原文事实一致
20
+ 5. 分析深度:不能仅罗列事实,必须揭示演进逻辑
21
+
22
+ 【输出规范】
23
+ - 使用时间轴标记(如[2017]、[2020])增强可读性
24
+ - 关键人物/团队需保留原名
25
+ - 禁止编造不存在的技术演进路径
26
+
27
+ 原文内容:
28
+ {text}
29
+
30
+ 请输出历史演进视角版本:
31
+ """
32
+
33
+ TEMPLATE_EN = """
34
+ 【Task】Reconstruct content from a technological history evolution perspective.
35
+
36
+ 【Narrative Framework】
37
+ - Timeline: Origin → Key Breakthroughs → Current State → Future Trends
38
+ - Evolution Logic: Reveal the cycle of "technical bottleneck breakthrough → new paradigm establishment → new problems emerge"
39
+
40
+ 【Core Requirements】
41
+ 1. Temporal Accuracy: ALL dates, version numbers, and release sequences must be verified and accurate
42
+ 2. Causality Chain:
43
+ - Identify drivers of each evolution stage (theoretical breakthrough/engineering needs/hardware advances)
44
+ - Point out inevitability and contingency of technical evolution
45
+ 3. Content Structure:
46
+ - Background & Origin (state before technology birth)
47
+ - Key Milestones (with specific dates)
48
+ - Paradigm Shifts (revolutionary changes)
49
+ - Current Mature Form
50
+ - Future Outlook (based on original's technical trajectory)
51
+ 4. Technical Fidelity: ALL technical descriptions must be factually consistent with original
52
+ 5. Analytical Depth: Must reveal evolution logic, not just list facts
53
+
54
+ 【Output Specification】
55
+ - Use timeline markers ([2017], [2020]) for readability
56
+ - Keep original names of key people/teams
57
+ - DO NOT invent non-existent evolution paths
58
+
59
+ Original Content:
60
+ {text}
61
+
62
+ Please output the historical evolution version:
63
+ """
64
+
65
+ HISTORICAL_EVOLUTION_PERSPECTIVE_REPHRASING_PROMPTS = {
66
+ "zh": TEMPLATE_ZH,
67
+ "en": TEMPLATE_EN,
68
+ }
graphgen/templates/rephrasing/style_controlled_rephrasing/popular_science_rephrasing.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TEMPLATE_ZH = """
2
+ 【任务】将以下技术文档改写为面向普通读者的科普文章。
3
+
4
+ 【核心要求】
5
+ 1. 语言风格:生动活泼,避免冷僻专业术语;必须使用术语时,需用生活化比喻或类比解释
6
+ 2. 内容保真:所有核心事实、数据和技术结论必须准确无误,不得篡改或过度简化
7
+ 3. 叙事结构:采用"问题-发现-应用"的故事线,增强可读性
8
+ 4. 读者定位:假设读者具有高中文化水平,无专业背景
9
+ 5. 篇幅控制:可适当扩展,但每段聚焦一个核心概念
10
+
11
+ 【禁止行为】
12
+ - 不得删除关键技术细节
13
+ - 不得改变原意或事实
14
+ - 避免使用"这个东西"、"那个技术"等模糊指代
15
+
16
+ 原文内容:
17
+ {text}
18
+
19
+ 请直接输出改写后的科普文章:
20
+ """
21
+
22
+ TEMPLATE_EN = """
23
+ 【Task】Rewrite the following technical document as a popular science article for general readers.
24
+
25
+ 【Core Requirements】
26
+ 1. Language Style: Lively and engaging; avoid jargon; when technical terms are necessary, explain with everyday analogies or metaphors
27
+ 2. Content Fidelity: All core facts, data, and technical conclusions must be accurate. Do not distort or oversimplify
28
+ 3. Narrative Structure: Use a "problem-discovery-application" storyline to enhance readability
29
+ 4. Audience: Assume high school education level, no technical background
30
+ 5. Length: May expand moderately, but each paragraph should focus on one core concept
31
+
32
+ 【Prohibited】
33
+ - Do not remove key technical details
34
+ - Do not change original meaning or facts
35
+ - Avoid vague references like "this thing" or "that technology"
36
+
37
+ Original Content:
38
+ {text}
39
+
40
+ Please output the rewritten popular science article directly:
41
+ """
42
+
43
+ POPULAR_SCIENCE_REPHRASING_PROMPTS = {
44
+ "zh": TEMPLATE_ZH,
45
+ "en": TEMPLATE_EN,
46
+ }
graphgen/templates/rephrasing/style_controlled_rephrasing/qa_dialogue_format_rephrasing.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TEMPLATE_ZH = """
2
+ 【任务】将技术文档重构为自然问答对话。
3
+
4
+ 【对话设计原则】
5
+ - 对话角色:提问者(好奇心驱动的学习者) vs 解答者(专家)
6
+ - 问题序列:从基础概念→技术细节→应用实践→深度追问,逻辑递进
7
+
8
+ 【核心要求】
9
+ 1. 问题设计:
10
+ - 每个问题必须源于原文知识点
11
+ - 问题要具体、明确,避免空泛
12
+ - 体现真实学习过程中的疑惑点
13
+ 2. 回答规范:
14
+ - 回答必须准确、完整,引用原文事实
15
+ - 保持专家解答的权威性
16
+ - 可适当补充背景信息帮助理解
17
+ 3. 对话流畅性:问题间有自然过渡,避免跳跃
18
+ 4. 覆盖度:确保原文所有重要知识点都被至少一个问题覆盖
19
+ 5. 事实核查:回答中的技术细节、数据必须与原文完全一致
20
+
21
+ 【输出格式】
22
+ Q1: [问题1]
23
+ A1: [回答1]
24
+
25
+ Q2: [问题2]
26
+ A2: [回答2]
27
+ ...
28
+
29
+ 原文内容:
30
+ {text}
31
+
32
+ 请输出问答对话版本:
33
+ """
34
+
35
+ TEMPLATE_EN = """
36
+ 【Task】Reconstruct the technical document as a natural Q&A dialogue.
37
+
38
+ 【Dialogue Design Principles】
39
+ - Roles: Inquirer (curious learner) vs. Expert (domain specialist)
40
+ - Question Flow: From basic concepts → technical details → practical applications → deep follow-ups, logically progressive
41
+
42
+ 【Core Requirements】
43
+ 1. Question Design:
44
+ - Each question must originate from original content knowledge points
45
+ - Questions should be specific and clear, avoid vagueness
46
+ - Reflect points of confusion in the real learning process
47
+ 2. Answer Specification:
48
+ - Answers must be accurate and complete, citing original facts
49
+ - Maintain authoritative expert tone
50
+ - May supplement background information when helpful
51
+ 3. Dialogue Fluency: Natural transition between questions, avoid jumping
52
+ 4. Coverage: Ensure ALL important knowledge points from original are covered by at least one question
53
+ 5. Fact Check: Technical details and data in answers must be identical to original
54
+
55
+ 【Output Format】
56
+ Q1: [Question 1]
57
+ A1: [Answer 1]
58
+
59
+ Q2: [Question 2]
60
+ A2: [Answer 2]
61
+ ...
62
+
63
+ Original Content:
64
+ {text}
65
+
66
+ Please output the Q&A dialogue version:
67
+ """
68
+
69
+
70
+ QA_DIALOGUE_FORMAT_REPHRASING_PROMPTS = {
71
+ "zh": TEMPLATE_ZH,
72
+ "en": TEMPLATE_EN,
73
+ }
graphgen/templates/rephrasing/style_controlled_rephrasing/technical_deep_dive_rephrasing.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TEMPLATE_ZH = """
2
+ 【任务】以领域专家视角进行深度技术剖析。
3
+
4
+ 【读者定位】
5
+ - 目标读者:同领域高级工程师/研究员
6
+ - 预期效果:揭示技术细节、设计权衡与实现原理
7
+
8
+ 【核心要求】
9
+ 1. 技术精确性:
10
+ - 使用精确的专业术语和符号表示
11
+ - 补充技术背景、相关工作和理论基础
12
+ - 必要时用公式或代码片段说明
13
+ 2. 深度维度:
14
+ - 算法复杂度分析
15
+ - 系统架构设计权衡
16
+ - 性能瓶颈与优化空间
17
+ - 边界条件和异常情况处理
18
+ 3. 内容扩展:可在原文基础上增加30-50%的技术细节
19
+ 4. 语气:权威、严谨、逻辑严密
20
+
21
+ 【输出规范】
22
+ - 保持原文所有事实准确无误
23
+ - 新增细节需符合领域常识
24
+ - 使用标准技术文档格式
25
+
26
+ 原文内容:
27
+ {text}
28
+
29
+ 请输出技术深度剖析版本:
30
+ """
31
+
32
+ TEMPLATE_EN = """
33
+ 【Task】Conduct an in-depth technical analysis from a domain expert perspective.
34
+
35
+ 【Audience】
36
+ - Target: Senior engineers/researchers in the same field
37
+ - Goal: Reveal technical details, design trade-offs, and implementation principles
38
+
39
+ 【Core Requirements】
40
+ 1. Technical Precision:
41
+ - Use precise technical terminology and notation
42
+ - Supplement with technical background, related work, and theoretical foundations
43
+ - Include formulas or code snippets when necessary
44
+ 2. Depth Dimensions:
45
+ - Algorithmic complexity analysis
46
+ - System architecture design trade-offs
47
+ - Performance bottlenecks and optimization opportunities
48
+ - Edge cases and exception handling
49
+ 3. Content Expansion: May add 30-50% more technical details than original
50
+ 4. Tone: Authoritative, rigorous, logically sound
51
+
52
+ 【Output Specification】
53
+ - Maintain 100% factual accuracy from original
54
+ - Added details must align with domain common knowledge
55
+ - Use standard technical documentation format
56
+
57
+ Original Content:
58
+ {text}
59
+
60
+ Please output the technical deep-dive version:
61
+ """
62
+
63
+ TECHNICAL_DEEP_DIVE_REPHRASING_PROMPTS = {
64
+ "zh": TEMPLATE_ZH,
65
+ "en": TEMPLATE_EN,
66
+ }
requirements.txt CHANGED
@@ -12,7 +12,7 @@ nltk
12
  jieba
13
  plotly
14
  pandas
15
- gradio==5.44.1
16
  kaleido
17
  pyyaml
18
  langcodes
@@ -21,8 +21,7 @@ fastapi
21
  trafilatura
22
  aiohttp
23
  socksio
24
- pydantic
25
- ray==2.53.0
26
  pyarrow
27
 
28
  leidenalg
 
12
  jieba
13
  plotly
14
  pandas
15
+ gradio==5.50.0
16
  kaleido
17
  pyyaml
18
  langcodes
 
21
  trafilatura
22
  aiohttp
23
  socksio
24
+ ray[default]==2.53.0
 
25
  pyarrow
26
 
27
  leidenalg