Spaces:
Runtime error
Runtime error
github-actions[bot] commited on
Commit ·
0a18089
1
Parent(s): 9a57b42
Auto-sync from demo at Fri Jan 30 08:46:58 UTC 2026
Browse files- graphgen/bases/__init__.py +1 -0
- graphgen/bases/base_operator.py +1 -1
- graphgen/bases/base_rephraser.py +31 -0
- graphgen/common/__init__.py +2 -2
- graphgen/engine.py +3 -1
- graphgen/models/__init__.py +1 -0
- graphgen/models/rephraser/__init__.py +1 -0
- graphgen/models/rephraser/style_controlled_rephraser.py +32 -0
- graphgen/operators/__init__.py +2 -0
- graphgen/operators/build_kg/build_kg_service.py +2 -1
- graphgen/operators/evaluate/evaluate_service.py +2 -1
- graphgen/operators/extract/extract_service.py +1 -1
- graphgen/operators/generate/generate_service.py +3 -1
- graphgen/operators/judge/judge_service.py +3 -2
- graphgen/operators/partition/partition_service.py +1 -1
- graphgen/operators/quiz/quiz_service.py +2 -1
- graphgen/operators/read/read.py +1 -1
- graphgen/operators/rephrase/__init__.py +1 -0
- graphgen/operators/rephrase/rephrase_service.py +62 -0
- graphgen/operators/search/search_service.py +1 -1
- graphgen/templates/__init__.py +1 -0
- graphgen/templates/rephrasing/__init__.py +1 -0
- graphgen/templates/rephrasing/style_controlled_rephrasing/__init__.py +21 -0
- graphgen/templates/rephrasing/style_controlled_rephrasing/critical_analysis_rephrasing.py +52 -0
- graphgen/templates/rephrasing/style_controlled_rephrasing/cross_domain_analogy_rephrasing.py +62 -0
- graphgen/templates/rephrasing/style_controlled_rephrasing/executive_summary_rephrasing.py +64 -0
- graphgen/templates/rephrasing/style_controlled_rephrasing/first_person_narrative_rephrasing.py +60 -0
- graphgen/templates/rephrasing/style_controlled_rephrasing/historical_evolution_perspective_rephrasing.py +68 -0
- graphgen/templates/rephrasing/style_controlled_rephrasing/popular_science_rephrasing.py +46 -0
- graphgen/templates/rephrasing/style_controlled_rephrasing/qa_dialogue_format_rephrasing.py +73 -0
- graphgen/templates/rephrasing/style_controlled_rephrasing/technical_deep_dive_rephrasing.py +66 -0
- requirements.txt +2 -3
graphgen/bases/__init__.py
CHANGED
|
@@ -7,6 +7,7 @@ from .base_llm_wrapper import BaseLLMWrapper
|
|
| 7 |
from .base_operator import BaseOperator
|
| 8 |
from .base_partitioner import BasePartitioner
|
| 9 |
from .base_reader import BaseReader
|
|
|
|
| 10 |
from .base_searcher import BaseSearcher
|
| 11 |
from .base_splitter import BaseSplitter
|
| 12 |
from .base_storage import BaseGraphStorage, BaseKVStorage, StorageNameSpace
|
|
|
|
| 7 |
from .base_operator import BaseOperator
|
| 8 |
from .base_partitioner import BasePartitioner
|
| 9 |
from .base_reader import BaseReader
|
| 10 |
+
from .base_rephraser import BaseRephraser
|
| 11 |
from .base_searcher import BaseSearcher
|
| 12 |
from .base_splitter import BaseSplitter
|
| 13 |
from .base_storage import BaseGraphStorage, BaseKVStorage, StorageNameSpace
|
graphgen/bases/base_operator.py
CHANGED
|
@@ -28,7 +28,7 @@ class BaseOperator(ABC):
|
|
| 28 |
op_name: str = None,
|
| 29 |
):
|
| 30 |
# lazy import to avoid circular import
|
| 31 |
-
from graphgen.common import init_storage
|
| 32 |
from graphgen.utils import set_logger
|
| 33 |
|
| 34 |
log_dir = os.path.join(working_dir, "logs")
|
|
|
|
| 28 |
op_name: str = None,
|
| 29 |
):
|
| 30 |
# lazy import to avoid circular import
|
| 31 |
+
from graphgen.common.init_storage import init_storage
|
| 32 |
from graphgen.utils import set_logger
|
| 33 |
|
| 34 |
log_dir = os.path.join(working_dir, "logs")
|
graphgen/bases/base_rephraser.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from abc import ABC, abstractmethod
|
| 2 |
+
from typing import Any
|
| 3 |
+
|
| 4 |
+
from graphgen.bases.base_llm_wrapper import BaseLLMWrapper
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class BaseRephraser(ABC):
|
| 8 |
+
"""
|
| 9 |
+
Rephrase text based on given prompts.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
def __init__(self, llm_client: BaseLLMWrapper):
|
| 13 |
+
self.llm_client = llm_client
|
| 14 |
+
|
| 15 |
+
@abstractmethod
|
| 16 |
+
def build_prompt(self, text: str) -> str:
|
| 17 |
+
"""Build prompt for LLM based on the given text"""
|
| 18 |
+
|
| 19 |
+
@staticmethod
|
| 20 |
+
@abstractmethod
|
| 21 |
+
def parse_response(response: str) -> Any:
|
| 22 |
+
"""Parse the LLM response and return the rephrased text"""
|
| 23 |
+
|
| 24 |
+
async def rephrase(
|
| 25 |
+
self,
|
| 26 |
+
item: dict,
|
| 27 |
+
) -> dict:
|
| 28 |
+
text = item["content"]
|
| 29 |
+
prompt = self.build_prompt(text)
|
| 30 |
+
response = await self.llm_client.generate_answer(prompt)
|
| 31 |
+
return self.parse_response(response)
|
graphgen/common/__init__.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
| 1 |
-
from .init_llm import init_llm
|
| 2 |
-
from .init_storage import init_storage
|
|
|
|
| 1 |
+
# from .init_llm import init_llm
|
| 2 |
+
# from .init_storage import init_storage
|
graphgen/engine.py
CHANGED
|
@@ -11,7 +11,8 @@ from ray.data.block import Block
|
|
| 11 |
from ray.data.datasource.filename_provider import FilenameProvider
|
| 12 |
|
| 13 |
from graphgen.bases import Config, Node
|
| 14 |
-
from graphgen.common import init_llm
|
|
|
|
| 15 |
from graphgen.utils import logger
|
| 16 |
|
| 17 |
|
|
@@ -70,6 +71,7 @@ class Engine:
|
|
| 70 |
|
| 71 |
if not ray.is_initialized():
|
| 72 |
context = ray.init(
|
|
|
|
| 73 |
ignore_reinit_error=True,
|
| 74 |
logging_level=logging.ERROR,
|
| 75 |
log_to_driver=True,
|
|
|
|
| 11 |
from ray.data.datasource.filename_provider import FilenameProvider
|
| 12 |
|
| 13 |
from graphgen.bases import Config, Node
|
| 14 |
+
from graphgen.common.init_llm import init_llm
|
| 15 |
+
from graphgen.common.init_storage import init_storage
|
| 16 |
from graphgen.utils import logger
|
| 17 |
|
| 18 |
|
|
|
|
| 71 |
|
| 72 |
if not ray.is_initialized():
|
| 73 |
context = ray.init(
|
| 74 |
+
include_dashboard=True,
|
| 75 |
ignore_reinit_error=True,
|
| 76 |
logging_level=logging.ERROR,
|
| 77 |
log_to_driver=True,
|
graphgen/models/__init__.py
CHANGED
|
@@ -37,6 +37,7 @@ from .reader import (
|
|
| 37 |
RDFReader,
|
| 38 |
TXTReader,
|
| 39 |
)
|
|
|
|
| 40 |
from .searcher.db.ncbi_searcher import NCBISearch
|
| 41 |
from .searcher.db.rnacentral_searcher import RNACentralSearch
|
| 42 |
from .searcher.db.uniprot_searcher import UniProtSearch
|
|
|
|
| 37 |
RDFReader,
|
| 38 |
TXTReader,
|
| 39 |
)
|
| 40 |
+
from .rephraser import StyleControlledRephraser
|
| 41 |
from .searcher.db.ncbi_searcher import NCBISearch
|
| 42 |
from .searcher.db.rnacentral_searcher import RNACentralSearch
|
| 43 |
from .searcher.db.uniprot_searcher import UniProtSearch
|
graphgen/models/rephraser/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .style_controlled_rephraser import StyleControlledRephraser
|
graphgen/models/rephraser/style_controlled_rephraser.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Optional
|
| 2 |
+
|
| 3 |
+
from graphgen.bases import BaseRephraser
|
| 4 |
+
from graphgen.templates import STYLE_CONTROLLED_REPHRASING_PROMPTS
|
| 5 |
+
from graphgen.utils import detect_main_language, logger
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class StyleControlledRephraser(BaseRephraser):
|
| 9 |
+
"""
|
| 10 |
+
Style Controlled Rephraser rephrases the input text based on a specified style.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
def __init__(self, llm_client: Any, style: str = "critical_analysis"):
|
| 14 |
+
super().__init__(llm_client)
|
| 15 |
+
self.style = style
|
| 16 |
+
|
| 17 |
+
def build_prompt(self, text: str) -> str:
|
| 18 |
+
logger.debug("Text to be rephrased: %s", text)
|
| 19 |
+
language = detect_main_language(text)
|
| 20 |
+
prompt_template = STYLE_CONTROLLED_REPHRASING_PROMPTS[self.style][language]
|
| 21 |
+
prompt = prompt_template.format(text=text)
|
| 22 |
+
return prompt
|
| 23 |
+
|
| 24 |
+
@staticmethod
|
| 25 |
+
def parse_response(response: str) -> Optional[dict]:
|
| 26 |
+
result = response.strip()
|
| 27 |
+
logger.debug("Raw rephrased response: %s", result)
|
| 28 |
+
if not result:
|
| 29 |
+
return None
|
| 30 |
+
return {
|
| 31 |
+
"content": result,
|
| 32 |
+
}
|
graphgen/operators/__init__.py
CHANGED
|
@@ -8,6 +8,7 @@ from .judge import JudgeService
|
|
| 8 |
from .partition import PartitionService
|
| 9 |
from .quiz import QuizService
|
| 10 |
from .read import read
|
|
|
|
| 11 |
from .search import SearchService
|
| 12 |
|
| 13 |
operators = {
|
|
@@ -21,5 +22,6 @@ operators = {
|
|
| 21 |
"partition": PartitionService,
|
| 22 |
"generate": GenerateService,
|
| 23 |
"evaluate": EvaluateService,
|
|
|
|
| 24 |
"filter": FilterService,
|
| 25 |
}
|
|
|
|
| 8 |
from .partition import PartitionService
|
| 9 |
from .quiz import QuizService
|
| 10 |
from .read import read
|
| 11 |
+
from .rephrase import RephraseService
|
| 12 |
from .search import SearchService
|
| 13 |
|
| 14 |
operators = {
|
|
|
|
| 22 |
"partition": PartitionService,
|
| 23 |
"generate": GenerateService,
|
| 24 |
"evaluate": EvaluateService,
|
| 25 |
+
"rephrase": RephraseService,
|
| 26 |
"filter": FilterService,
|
| 27 |
}
|
graphgen/operators/build_kg/build_kg_service.py
CHANGED
|
@@ -2,7 +2,8 @@ from typing import Tuple
|
|
| 2 |
|
| 3 |
from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
|
| 4 |
from graphgen.bases.datatypes import Chunk
|
| 5 |
-
from graphgen.common import init_llm
|
|
|
|
| 6 |
from graphgen.utils import logger
|
| 7 |
|
| 8 |
from .build_mm_kg import build_mm_kg
|
|
|
|
| 2 |
|
| 3 |
from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
|
| 4 |
from graphgen.bases.datatypes import Chunk
|
| 5 |
+
from graphgen.common.init_llm import init_llm
|
| 6 |
+
from graphgen.common.init_storage import init_storage
|
| 7 |
from graphgen.utils import logger
|
| 8 |
|
| 9 |
from .build_mm_kg import build_mm_kg
|
graphgen/operators/evaluate/evaluate_service.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
from typing import Tuple
|
| 2 |
|
| 3 |
from graphgen.bases import BaseLLMWrapper, BaseOperator
|
| 4 |
-
from graphgen.common import init_llm
|
|
|
|
| 5 |
from graphgen.utils import logger
|
| 6 |
|
| 7 |
from .evaluate_kg import evaluate_kg
|
|
|
|
| 1 |
from typing import Tuple
|
| 2 |
|
| 3 |
from graphgen.bases import BaseLLMWrapper, BaseOperator
|
| 4 |
+
from graphgen.common.init_llm import init_llm
|
| 5 |
+
from graphgen.common.init_storage import init_storage
|
| 6 |
from graphgen.utils import logger
|
| 7 |
|
| 8 |
from .evaluate_kg import evaluate_kg
|
graphgen/operators/extract/extract_service.py
CHANGED
|
@@ -2,7 +2,7 @@ import json
|
|
| 2 |
from typing import Tuple
|
| 3 |
|
| 4 |
from graphgen.bases import BaseLLMWrapper, BaseOperator, Chunk
|
| 5 |
-
from graphgen.common import init_llm
|
| 6 |
from graphgen.models.extractor import SchemaGuidedExtractor
|
| 7 |
from graphgen.utils import logger, run_concurrent
|
| 8 |
|
|
|
|
| 2 |
from typing import Tuple
|
| 3 |
|
| 4 |
from graphgen.bases import BaseLLMWrapper, BaseOperator, Chunk
|
| 5 |
+
from graphgen.common.init_llm import init_llm
|
| 6 |
from graphgen.models.extractor import SchemaGuidedExtractor
|
| 7 |
from graphgen.utils import logger, run_concurrent
|
| 8 |
|
graphgen/operators/generate/generate_service.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
from typing import Tuple
|
|
|
|
| 2 |
from graphgen.bases import BaseKVStorage, BaseLLMWrapper, BaseOperator
|
| 3 |
-
from graphgen.common import init_llm
|
|
|
|
| 4 |
from graphgen.utils import logger, run_concurrent
|
| 5 |
|
| 6 |
|
|
|
|
| 1 |
from typing import Tuple
|
| 2 |
+
|
| 3 |
from graphgen.bases import BaseKVStorage, BaseLLMWrapper, BaseOperator
|
| 4 |
+
from graphgen.common.init_llm import init_llm
|
| 5 |
+
from graphgen.common.init_storage import init_storage
|
| 6 |
from graphgen.utils import logger, run_concurrent
|
| 7 |
|
| 8 |
|
graphgen/operators/judge/judge_service.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
| 1 |
-
from typing import Tuple
|
| 2 |
import math
|
|
|
|
| 3 |
|
| 4 |
from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
|
| 5 |
-
from graphgen.common import init_llm
|
|
|
|
| 6 |
from graphgen.templates import STATEMENT_JUDGEMENT_PROMPT
|
| 7 |
from graphgen.utils import logger, run_concurrent, yes_no_loss_entropy
|
| 8 |
|
|
|
|
|
|
|
| 1 |
import math
|
| 2 |
+
from typing import Tuple
|
| 3 |
|
| 4 |
from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
|
| 5 |
+
from graphgen.common.init_llm import init_llm
|
| 6 |
+
from graphgen.common.init_storage import init_storage
|
| 7 |
from graphgen.templates import STATEMENT_JUDGEMENT_PROMPT
|
| 8 |
from graphgen.utils import logger, run_concurrent, yes_no_loss_entropy
|
| 9 |
|
graphgen/operators/partition/partition_service.py
CHANGED
|
@@ -2,7 +2,7 @@ import os
|
|
| 2 |
from typing import Iterable, Tuple
|
| 3 |
|
| 4 |
from graphgen.bases import BaseGraphStorage, BaseOperator, BaseTokenizer
|
| 5 |
-
from graphgen.common import init_storage
|
| 6 |
from graphgen.models import (
|
| 7 |
AnchorBFSPartitioner,
|
| 8 |
BFSPartitioner,
|
|
|
|
| 2 |
from typing import Iterable, Tuple
|
| 3 |
|
| 4 |
from graphgen.bases import BaseGraphStorage, BaseOperator, BaseTokenizer
|
| 5 |
+
from graphgen.common.init_storage import init_storage
|
| 6 |
from graphgen.models import (
|
| 7 |
AnchorBFSPartitioner,
|
| 8 |
BFSPartitioner,
|
graphgen/operators/quiz/quiz_service.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
from typing import Tuple
|
| 2 |
|
| 3 |
from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
|
| 4 |
-
from graphgen.common import init_llm
|
|
|
|
| 5 |
from graphgen.models import QuizGenerator
|
| 6 |
from graphgen.utils import logger, run_concurrent
|
| 7 |
|
|
|
|
| 1 |
from typing import Tuple
|
| 2 |
|
| 3 |
from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
|
| 4 |
+
from graphgen.common.init_llm import init_llm
|
| 5 |
+
from graphgen.common.init_storage import init_storage
|
| 6 |
from graphgen.models import QuizGenerator
|
| 7 |
from graphgen.utils import logger, run_concurrent
|
| 8 |
|
graphgen/operators/read/read.py
CHANGED
|
@@ -3,7 +3,7 @@ from typing import Any, List, Optional, Union
|
|
| 3 |
|
| 4 |
import ray
|
| 5 |
|
| 6 |
-
from graphgen.common import init_storage
|
| 7 |
from graphgen.models import (
|
| 8 |
CSVReader,
|
| 9 |
JSONReader,
|
|
|
|
| 3 |
|
| 4 |
import ray
|
| 5 |
|
| 6 |
+
from graphgen.common.init_storage import init_storage
|
| 7 |
from graphgen.models import (
|
| 8 |
CSVReader,
|
| 9 |
JSONReader,
|
graphgen/operators/rephrase/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .rephrase_service import RephraseService
|
graphgen/operators/rephrase/rephrase_service.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Tuple
|
| 2 |
+
|
| 3 |
+
from graphgen.bases import BaseLLMWrapper, BaseOperator
|
| 4 |
+
from graphgen.common.init_llm import init_llm
|
| 5 |
+
from graphgen.utils import run_concurrent
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class RephraseService(BaseOperator):
|
| 9 |
+
"""
|
| 10 |
+
Generate question-answer pairs based on nodes and edges.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
def __init__(
|
| 14 |
+
self,
|
| 15 |
+
working_dir: str = "cache",
|
| 16 |
+
method: str = "aggregated",
|
| 17 |
+
**rephrase_kwargs,
|
| 18 |
+
):
|
| 19 |
+
super().__init__(working_dir=working_dir, op_name="rephrase_service")
|
| 20 |
+
self.llm_client: BaseLLMWrapper = init_llm("synthesizer")
|
| 21 |
+
self.method = method
|
| 22 |
+
self.rephrase_kwargs = rephrase_kwargs
|
| 23 |
+
|
| 24 |
+
if self.method == "style_controlled":
|
| 25 |
+
from graphgen.models import StyleControlledRephraser
|
| 26 |
+
|
| 27 |
+
self.rephraser = StyleControlledRephraser(
|
| 28 |
+
self.llm_client,
|
| 29 |
+
style=rephrase_kwargs.get("style", "critical_analysis"),
|
| 30 |
+
)
|
| 31 |
+
else:
|
| 32 |
+
raise ValueError(f"Unsupported rephrase method: {self.method}")
|
| 33 |
+
|
| 34 |
+
def process(self, batch: list) -> Tuple[list, dict]:
|
| 35 |
+
"""
|
| 36 |
+
Rephrase the texts in the batch.
|
| 37 |
+
:return: A tuple of (results, meta_updates)
|
| 38 |
+
results: A list of dicts containing rephrased texts. Each dict has the structure:
|
| 39 |
+
{"_trace_id": str, "content": str}
|
| 40 |
+
meta_updates: A dict mapping source IDs to lists of trace IDs for the rephrased texts.
|
| 41 |
+
"""
|
| 42 |
+
final_results = []
|
| 43 |
+
meta_updates = {}
|
| 44 |
+
|
| 45 |
+
results = run_concurrent(
|
| 46 |
+
self.rephraser.rephrase,
|
| 47 |
+
batch,
|
| 48 |
+
desc="Rephrasing texts",
|
| 49 |
+
unit="batch",
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
for input_trace_id, rephrased in zip(
|
| 53 |
+
[item["_trace_id"] for item in batch], results
|
| 54 |
+
):
|
| 55 |
+
if not rephrased:
|
| 56 |
+
continue
|
| 57 |
+
rephrased["_trace_id"] = self.get_trace_id(rephrased)
|
| 58 |
+
results.append(rephrased)
|
| 59 |
+
meta_updates.setdefault(input_trace_id, []).append(rephrased["_trace_id"])
|
| 60 |
+
final_results.append(rephrased)
|
| 61 |
+
|
| 62 |
+
return final_results, meta_updates
|
graphgen/operators/search/search_service.py
CHANGED
|
@@ -4,7 +4,7 @@ from typing import Optional
|
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
from graphgen.bases import BaseOperator
|
| 7 |
-
from graphgen.common import init_storage
|
| 8 |
from graphgen.utils import compute_content_hash, logger, run_concurrent
|
| 9 |
|
| 10 |
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
from graphgen.bases import BaseOperator
|
| 7 |
+
from graphgen.common.init_storage import init_storage
|
| 8 |
from graphgen.utils import compute_content_hash, logger, run_concurrent
|
| 9 |
|
| 10 |
|
graphgen/templates/__init__.py
CHANGED
|
@@ -14,5 +14,6 @@ from .generation import (
|
|
| 14 |
VQA_GENERATION_PROMPT,
|
| 15 |
)
|
| 16 |
from .kg import KG_EXTRACTION_PROMPT, KG_SUMMARIZATION_PROMPT, MMKG_EXTRACTION_PROMPT
|
|
|
|
| 17 |
from .search_judgement import SEARCH_JUDGEMENT_PROMPT
|
| 18 |
from .statement_judgement import STATEMENT_JUDGEMENT_PROMPT
|
|
|
|
| 14 |
VQA_GENERATION_PROMPT,
|
| 15 |
)
|
| 16 |
from .kg import KG_EXTRACTION_PROMPT, KG_SUMMARIZATION_PROMPT, MMKG_EXTRACTION_PROMPT
|
| 17 |
+
from .rephrasing import STYLE_CONTROLLED_REPHRASING_PROMPTS
|
| 18 |
from .search_judgement import SEARCH_JUDGEMENT_PROMPT
|
| 19 |
from .statement_judgement import STATEMENT_JUDGEMENT_PROMPT
|
graphgen/templates/rephrasing/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .style_controlled_rephrasing import STYLE_CONTROLLED_REPHRASING_PROMPTS
|
graphgen/templates/rephrasing/style_controlled_rephrasing/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .critical_analysis_rephrasing import CRITICAL_ANALYSIS_REPHRASING_PROMPTS
|
| 2 |
+
from .cross_domain_analogy_rephrasing import CROSS_DOMAIN_ANALOGY_REPHRASING_PROMPTS
|
| 3 |
+
from .executive_summary_rephrasing import EXECUTIVE_SUMMARY_REPHRASING_PROMPTS
|
| 4 |
+
from .first_person_narrative_rephrasing import FIRST_PERSON_NARRATIVE_REPHRASING_PROMPTS
|
| 5 |
+
from .historical_evolution_perspective_rephrasing import (
|
| 6 |
+
HISTORICAL_EVOLUTION_PERSPECTIVE_REPHRASING_PROMPTS,
|
| 7 |
+
)
|
| 8 |
+
from .popular_science_rephrasing import POPULAR_SCIENCE_REPHRASING_PROMPTS
|
| 9 |
+
from .qa_dialogue_format_rephrasing import QA_DIALOGUE_FORMAT_REPHRASING_PROMPTS
|
| 10 |
+
from .technical_deep_dive_rephrasing import TECHNICAL_DEEP_DIVE_REPHRASING_PROMPTS
|
| 11 |
+
|
| 12 |
+
STYLE_CONTROLLED_REPHRASING_PROMPTS = {
|
| 13 |
+
"popular_science": POPULAR_SCIENCE_REPHRASING_PROMPTS,
|
| 14 |
+
"critical_analysis": CRITICAL_ANALYSIS_REPHRASING_PROMPTS,
|
| 15 |
+
"cross_domain_analogy": CROSS_DOMAIN_ANALOGY_REPHRASING_PROMPTS,
|
| 16 |
+
"technical_deep_dive": TECHNICAL_DEEP_DIVE_REPHRASING_PROMPTS,
|
| 17 |
+
"executive_summary": EXECUTIVE_SUMMARY_REPHRASING_PROMPTS,
|
| 18 |
+
"first_person_narrative": FIRST_PERSON_NARRATIVE_REPHRASING_PROMPTS,
|
| 19 |
+
"historical_evolution_perspective": HISTORICAL_EVOLUTION_PERSPECTIVE_REPHRASING_PROMPTS,
|
| 20 |
+
"qa_dialogue_format": QA_DIALOGUE_FORMAT_REPHRASING_PROMPTS,
|
| 21 |
+
}
|
graphgen/templates/rephrasing/style_controlled_rephrasing/critical_analysis_rephrasing.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
TEMPLATE_ZH = """
|
| 2 |
+
【任务】以学术批判视角改写以下内容,形成技术评论文章。
|
| 3 |
+
|
| 4 |
+
【核心要求】
|
| 5 |
+
1. 语气风格:客观理性,第三人称学术视角,使用规范学术用语
|
| 6 |
+
2. 内容结构:
|
| 7 |
+
- 准确总结原文核心方法/发现(占比40%)
|
| 8 |
+
- 分析技术优势与创新点(占比20%)
|
| 9 |
+
- 指出潜在局限性与假设条件(占比20%)
|
| 10 |
+
- 提出可能的改进方向或未来工作(占比20%)
|
| 11 |
+
3. 引用规范:保留原文所有关键引用,采用标准学术引用格式
|
| 12 |
+
4. 事实准确性:不得歪曲或误读原文技术细节
|
| 13 |
+
|
| 14 |
+
【输出格式】
|
| 15 |
+
- 标题:原标题 + ":一项批判性分析"
|
| 16 |
+
- 段落:标准学术论文章节结构
|
| 17 |
+
- 字数:与原文相当或略长
|
| 18 |
+
|
| 19 |
+
原文内容:
|
| 20 |
+
{text}
|
| 21 |
+
|
| 22 |
+
请输出批判性分析改写版本:
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
TEMPLATE_EN = """
|
| 26 |
+
【Task】Rewrite the following content from an academic critical perspective as a technical commentary.
|
| 27 |
+
|
| 28 |
+
【Core Requirements】
|
| 29 |
+
1. Tone: Objective and rational, third-person academic perspective, using standard academic terminology
|
| 30 |
+
2. Structure:
|
| 31 |
+
- Accurately summarize core methods/findings (40% of content)
|
| 32 |
+
- Analyze technical advantages and innovations (20%)
|
| 33 |
+
- Identify potential limitations and assumptions (20%)
|
| 34 |
+
- Propose possible improvements or future work (20%)
|
| 35 |
+
3. Citations: Retain all key references from original, using standard academic citation format
|
| 36 |
+
4. Factual Accuracy: Do not distort or misinterpret technical details
|
| 37 |
+
|
| 38 |
+
【Output Format】
|
| 39 |
+
- Title: Original Title + ": A Critical Analysis"
|
| 40 |
+
- Paragraphs: Standard academic paper structure
|
| 41 |
+
- Length: Similar to or slightly longer than original
|
| 42 |
+
|
| 43 |
+
Original Content:
|
| 44 |
+
{text}
|
| 45 |
+
|
| 46 |
+
Please output the critically analyzed rewrite:
|
| 47 |
+
"""
|
| 48 |
+
|
| 49 |
+
CRITICAL_ANALYSIS_REPHRASING_PROMPTS = {
|
| 50 |
+
"zh": TEMPLATE_ZH,
|
| 51 |
+
"en": TEMPLATE_EN,
|
| 52 |
+
}
|
graphgen/templates/rephrasing/style_controlled_rephrasing/cross_domain_analogy_rephrasing.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
TEMPLATE_ZH = """
|
| 2 |
+
【任务】通过跨领域类比解释技术概念。
|
| 3 |
+
|
| 4 |
+
【类比原则】
|
| 5 |
+
- 类比源领域:生物学、物理学、建筑学、经济学、烹饪等领域
|
| 6 |
+
- 类比强度:类比关系需直观且深刻,避免牵强附会
|
| 7 |
+
- 目标:降低理解门槛,同时保持技术严谨性
|
| 8 |
+
|
| 9 |
+
【核心要求】
|
| 10 |
+
1. 双轨并行:每个技术概念配一个恰当类比
|
| 11 |
+
2. 类比结构:
|
| 12 |
+
- 先介绍技术概念(准确、完整)
|
| 13 |
+
- 再引入类比对象及其映射关系
|
| 14 |
+
- 最后说明类比局限性和适用范围
|
| 15 |
+
3. 保真红线:技术部分必须与原文完全一致,不得因类比而简化
|
| 16 |
+
4. 创新性:鼓励使用新颖、出人意料但合理的类比
|
| 17 |
+
5. 篇幅:可比原文扩展20-40%
|
| 18 |
+
|
| 19 |
+
【评估标准】
|
| 20 |
+
- 类比恰当性(技术概念与类比对象的核心机制必须同构)
|
| 21 |
+
- 技术准确性(不得扭曲事实)
|
| 22 |
+
- 启发性(帮助读者建立深层理解)
|
| 23 |
+
|
| 24 |
+
原文内容:
|
| 25 |
+
{text}
|
| 26 |
+
|
| 27 |
+
请输出跨领域类比版本:
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
TEMPLATE_EN = """
|
| 31 |
+
【Task】Explain technical concepts through cross-domain analogies.
|
| 32 |
+
|
| 33 |
+
【Analogy Principles】
|
| 34 |
+
- Source Domains: Biology, physics, architecture, economics, cooking, etc.
|
| 35 |
+
- Strength: Analogy should be intuitive yet profound, avoid forced comparisons
|
| 36 |
+
- Goal: Lower understanding barrier while maintaining technical rigor
|
| 37 |
+
|
| 38 |
+
【Core Requirements】
|
| 39 |
+
1. Dual Track: Pair each technical concept with an appropriate analogy
|
| 40 |
+
2. Analogy Structure:
|
| 41 |
+
- First introduce technical concept (accurate and complete)
|
| 42 |
+
- Then introduce analogy object and mapping relationship
|
| 43 |
+
- Finally explain analogy limitations and applicable scope
|
| 44 |
+
3. Fidelity Baseline: Technical parts must be identical to original, no simplification for analogy's sake
|
| 45 |
+
4. Innovation: Encourage novel, surprising but reasonable analogies
|
| 46 |
+
5. Length: May expand 20-40% beyond original
|
| 47 |
+
|
| 48 |
+
【Evaluation Criteria】
|
| 49 |
+
- Analogy Appropriateness (core mechanisms must be isomorphic)
|
| 50 |
+
- Technical Accuracy (no factual distortion)
|
| 51 |
+
- Heuristic Value (helps build deep understanding)
|
| 52 |
+
|
| 53 |
+
Original Content:
|
| 54 |
+
{text}
|
| 55 |
+
|
| 56 |
+
Please output the cross-domain analogy version:
|
| 57 |
+
"""
|
| 58 |
+
|
| 59 |
+
CROSS_DOMAIN_ANALOGY_REPHRASING_PROMPTS = {
|
| 60 |
+
"zh": TEMPLATE_ZH,
|
| 61 |
+
"en": TEMPLATE_EN,
|
| 62 |
+
}
|
graphgen/templates/rephrasing/style_controlled_rephrasing/executive_summary_rephrasing.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
TEMPLATE_ZH = """
|
| 2 |
+
【任务】为高管层撰写决策摘要。
|
| 3 |
+
|
| 4 |
+
【读者假设】
|
| 5 |
+
- 职位:CTO/技术VP/产品总监
|
| 6 |
+
- 核心关切:技术价值、资源投入、竞争壁垒、商业影响
|
| 7 |
+
|
| 8 |
+
【核心要求】
|
| 9 |
+
1. 信息密度:每句话必须传达战略价值
|
| 10 |
+
2. 内容优先级:
|
| 11 |
+
- 核心技术突破与创新价值(必须)
|
| 12 |
+
- 与竞品的差异化优势(必须)
|
| 13 |
+
- 实施成本与资源需求(必须)
|
| 14 |
+
- 潜在商业应用场景(必须)
|
| 15 |
+
- 技术风险评估(可选)
|
| 16 |
+
3. 语言风格:金字塔原理,结论先行,数据支撑
|
| 17 |
+
4. 简洁性:控制在原文长度的30-50%
|
| 18 |
+
5. 事实准确性:所有数据、性能指标必须与原文完全一致
|
| 19 |
+
|
| 20 |
+
【禁用表达】
|
| 21 |
+
- 避免"可能"、"也许"等不确定表述
|
| 22 |
+
- 禁用技术细节描述(除非直接影响决策)
|
| 23 |
+
- 避免行话和缩写
|
| 24 |
+
|
| 25 |
+
原文内容:
|
| 26 |
+
{text}
|
| 27 |
+
|
| 28 |
+
请直接输出高管决策摘要:
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
TEMPLATE_EN = """
|
| 32 |
+
【Task】Write an executive summary for C-suite decision-making.
|
| 33 |
+
|
| 34 |
+
【Audience Assumption】
|
| 35 |
+
- Position: CTO/VP of Engineering/Product Director
|
| 36 |
+
- Core Concerns: Technical value, resource investment, competitive moats, business impact
|
| 37 |
+
|
| 38 |
+
【Core Requirements】
|
| 39 |
+
1. Information Density: Every sentence must convey strategic value
|
| 40 |
+
2. Content Priority:
|
| 41 |
+
- Core technical breakthrough and innovation value (MUST)
|
| 42 |
+
- Differentiated advantages over competitors (MUST)
|
| 43 |
+
- Implementation cost and resource requirements (MUST)
|
| 44 |
+
- Potential business application scenarios (MUST)
|
| 45 |
+
- Technical risk assessment (OPTIONAL)
|
| 46 |
+
3. Language Style: Pyramid principle - lead with conclusions, support with data
|
| 47 |
+
4. Conciseness: 30-50% of original length
|
| 48 |
+
5. Factual Accuracy: All data and performance metrics must be identical to original
|
| 49 |
+
|
| 50 |
+
【Prohibited Expressions】
|
| 51 |
+
- Avoid uncertain terms like "maybe," "perhaps"
|
| 52 |
+
- No deep technical details (unless directly impacting decision)
|
| 53 |
+
- No jargon or unexplained acronyms
|
| 54 |
+
|
| 55 |
+
Original Content:
|
| 56 |
+
{text}
|
| 57 |
+
|
| 58 |
+
Please output the executive summary directly:
|
| 59 |
+
"""
|
| 60 |
+
|
| 61 |
+
EXECUTIVE_SUMMARY_REPHRASING_PROMPTS = {
|
| 62 |
+
"zh": TEMPLATE_ZH,
|
| 63 |
+
"en": TEMPLATE_EN,
|
| 64 |
+
}
|
graphgen/templates/rephrasing/style_controlled_rephrasing/first_person_narrative_rephrasing.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
TEMPLATE_ZH = """
|
| 2 |
+
【任务】将技术文档改写为第一人称实践经验分享。
|
| 3 |
+
|
| 4 |
+
【角色设定】
|
| 5 |
+
- 身份:资深技术实践者/研究员
|
| 6 |
+
- 场景:技术博客/内部经验分享会
|
| 7 |
+
- 目标读者:同行从业者
|
| 8 |
+
|
| 9 |
+
【核心要求】
|
| 10 |
+
1. 视角:全程使用"我/我们"第一人称
|
| 11 |
+
2. 内容融合:
|
| 12 |
+
- 保留原文所有技术事实(代码、数据、架构)
|
| 13 |
+
- 添加个人实践中的观察、挑战与解决思路
|
| 14 |
+
- 分享真实应用场景和效果数据
|
| 15 |
+
3. 语言风格:专业但亲和,避免过度口语化
|
| 16 |
+
4. 叙事元素:可包含"最初尝试-遇到问题-调整思路-最终效果"的故事线
|
| 17 |
+
5. 事实红线:技术细节必须与原文完全一致,不得虚构数据
|
| 18 |
+
|
| 19 |
+
【禁止】
|
| 20 |
+
- 不得编造不存在的个人经历
|
| 21 |
+
- 不得改变技术实现细节
|
| 22 |
+
|
| 23 |
+
原文内容:
|
| 24 |
+
{text}
|
| 25 |
+
|
| 26 |
+
请直接输出第一人称叙事版本:
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
TEMPLATE_EN = """
|
| 30 |
+
【Task】Rewrite the technical document as a first-person practical experience sharing.
|
| 31 |
+
|
| 32 |
+
【Role Setting】
|
| 33 |
+
- Identity: Senior practitioner/researcher
|
| 34 |
+
- Scenario: Technical blog/internal sharing session
|
| 35 |
+
- Target Audience: Peer professionals
|
| 36 |
+
|
| 37 |
+
【Core Requirements】
|
| 38 |
+
1. Perspective: Use first-person "I/we" throughout
|
| 39 |
+
2. Content Integration:
|
| 40 |
+
- Retain ALL technical facts (code, data, architecture) from original
|
| 41 |
+
- Add personal observations, challenges, and solution approaches from practice
|
| 42 |
+
- Share real application scenarios and performance data
|
| 43 |
+
3. Language Style: Professional yet approachable, avoid excessive colloquialism
|
| 44 |
+
4. Narrative: May include "initial attempt-encountered problem-adjusted approach-final result" storyline
|
| 45 |
+
5. Factual Baseline: Technical details must be identical to original, no fabricated data
|
| 46 |
+
|
| 47 |
+
【Prohibited】
|
| 48 |
+
- Do not invent non-existent personal experiences
|
| 49 |
+
- Do not alter technical implementation details
|
| 50 |
+
|
| 51 |
+
Original Content:
|
| 52 |
+
{text}
|
| 53 |
+
|
| 54 |
+
Please output the first-person narrative version directly:
|
| 55 |
+
"""
|
| 56 |
+
|
| 57 |
+
FIRST_PERSON_NARRATIVE_REPHRASING_PROMPTS = {
|
| 58 |
+
"zh": TEMPLATE_ZH,
|
| 59 |
+
"en": TEMPLATE_EN,
|
| 60 |
+
}
|
graphgen/templates/rephrasing/style_controlled_rephrasing/historical_evolution_perspective_rephrasing.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
TEMPLATE_ZH = """
|
| 2 |
+
【任务】按技术发展史视角重构内容。
|
| 3 |
+
|
| 4 |
+
【叙事框架】
|
| 5 |
+
- 时间轴线:从起源→关键突破→当前状态→未来趋势
|
| 6 |
+
- 演进逻辑:揭示"技术瓶颈突破→新范式建立→新问题出现"的循环
|
| 7 |
+
|
| 8 |
+
【核心要求】
|
| 9 |
+
1. 时间准确性:所有时间点、版本号、发布顺序必须核实准确
|
| 10 |
+
2. 因果链:
|
| 11 |
+
- 明确每个演进阶段的驱动力(理论突破/工程需求/硬件进步)
|
| 12 |
+
- 指出技术演进的必然性与偶然性
|
| 13 |
+
3. 内容结构:
|
| 14 |
+
- 背景与起源(技术诞生前的状态)
|
| 15 |
+
- 关键里程碑(带具体时间)
|
| 16 |
+
- 范式转移(革命性变化)
|
| 17 |
+
- 当前成熟形态
|
| 18 |
+
- 未来展望(基于原文技术路径)
|
| 19 |
+
4. 技术保真:所有技术描述必须与原文事实一致
|
| 20 |
+
5. 分析深度:不能仅罗列事实,必须揭示演进逻辑
|
| 21 |
+
|
| 22 |
+
【输出规范】
|
| 23 |
+
- 使用时间轴标记(如[2017]、[2020])增强可读性
|
| 24 |
+
- 关键人物/团队需保留原名
|
| 25 |
+
- 禁止编造不存在的技术演进路径
|
| 26 |
+
|
| 27 |
+
原文内容:
|
| 28 |
+
{text}
|
| 29 |
+
|
| 30 |
+
请输出历史演进视角版本:
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
TEMPLATE_EN = """
|
| 34 |
+
【Task】Reconstruct content from a technological history evolution perspective.
|
| 35 |
+
|
| 36 |
+
【Narrative Framework】
|
| 37 |
+
- Timeline: Origin → Key Breakthroughs → Current State → Future Trends
|
| 38 |
+
- Evolution Logic: Reveal the cycle of "technical bottleneck breakthrough → new paradigm establishment → new problems emerge"
|
| 39 |
+
|
| 40 |
+
【Core Requirements】
|
| 41 |
+
1. Temporal Accuracy: ALL dates, version numbers, and release sequences must be verified and accurate
|
| 42 |
+
2. Causality Chain:
|
| 43 |
+
- Identify drivers of each evolution stage (theoretical breakthrough/engineering needs/hardware advances)
|
| 44 |
+
- Point out inevitability and contingency of technical evolution
|
| 45 |
+
3. Content Structure:
|
| 46 |
+
- Background & Origin (state before technology birth)
|
| 47 |
+
- Key Milestones (with specific dates)
|
| 48 |
+
- Paradigm Shifts (revolutionary changes)
|
| 49 |
+
- Current Mature Form
|
| 50 |
+
- Future Outlook (based on original's technical trajectory)
|
| 51 |
+
4. Technical Fidelity: ALL technical descriptions must be factually consistent with original
|
| 52 |
+
5. Analytical Depth: Must reveal evolution logic, not just list facts
|
| 53 |
+
|
| 54 |
+
【Output Specification】
|
| 55 |
+
- Use timeline markers ([2017], [2020]) for readability
|
| 56 |
+
- Keep original names of key people/teams
|
| 57 |
+
- DO NOT invent non-existent evolution paths
|
| 58 |
+
|
| 59 |
+
Original Content:
|
| 60 |
+
{text}
|
| 61 |
+
|
| 62 |
+
Please output the historical evolution version:
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
HISTORICAL_EVOLUTION_PERSPECTIVE_REPHRASING_PROMPTS = {
|
| 66 |
+
"zh": TEMPLATE_ZH,
|
| 67 |
+
"en": TEMPLATE_EN,
|
| 68 |
+
}
|
graphgen/templates/rephrasing/style_controlled_rephrasing/popular_science_rephrasing.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
TEMPLATE_ZH = """
|
| 2 |
+
【任务】将以下技术文档改写为面向普通读者的科普文章。
|
| 3 |
+
|
| 4 |
+
【核心要求】
|
| 5 |
+
1. 语言风格:生动活泼,避免冷僻专业术语;必须使用术语时,需用生活化比喻或类比解释
|
| 6 |
+
2. 内容保真:所有核心事实、数据和技术结论必须准确无误,不得篡改或过度简化
|
| 7 |
+
3. 叙事结构:采用"问题-发现-应用"的故事线,增强可读性
|
| 8 |
+
4. 读者定位:假设读者具有高中文化水平,无专业背景
|
| 9 |
+
5. 篇幅控制:可适当扩展,但每段聚焦一个核心概念
|
| 10 |
+
|
| 11 |
+
【禁止行为】
|
| 12 |
+
- 不得删除关键技术细节
|
| 13 |
+
- 不得改变原意或事实
|
| 14 |
+
- 避免使用"这个东西"、"那个技术"等模糊指代
|
| 15 |
+
|
| 16 |
+
原文内容:
|
| 17 |
+
{text}
|
| 18 |
+
|
| 19 |
+
请直接输出改写后的科普文章:
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
TEMPLATE_EN = """
|
| 23 |
+
【Task】Rewrite the following technical document as a popular science article for general readers.
|
| 24 |
+
|
| 25 |
+
【Core Requirements】
|
| 26 |
+
1. Language Style: Lively and engaging; avoid jargon; when technical terms are necessary, explain with everyday analogies or metaphors
|
| 27 |
+
2. Content Fidelity: All core facts, data, and technical conclusions must be accurate. Do not distort or oversimplify
|
| 28 |
+
3. Narrative Structure: Use a "problem-discovery-application" storyline to enhance readability
|
| 29 |
+
4. Audience: Assume high school education level, no technical background
|
| 30 |
+
5. Length: May expand moderately, but each paragraph should focus on one core concept
|
| 31 |
+
|
| 32 |
+
【Prohibited】
|
| 33 |
+
- Do not remove key technical details
|
| 34 |
+
- Do not change original meaning or facts
|
| 35 |
+
- Avoid vague references like "this thing" or "that technology"
|
| 36 |
+
|
| 37 |
+
Original Content:
|
| 38 |
+
{text}
|
| 39 |
+
|
| 40 |
+
Please output the rewritten popular science article directly:
|
| 41 |
+
"""
|
| 42 |
+
|
| 43 |
+
POPULAR_SCIENCE_REPHRASING_PROMPTS = {
|
| 44 |
+
"zh": TEMPLATE_ZH,
|
| 45 |
+
"en": TEMPLATE_EN,
|
| 46 |
+
}
|
graphgen/templates/rephrasing/style_controlled_rephrasing/qa_dialogue_format_rephrasing.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
TEMPLATE_ZH = """
|
| 2 |
+
【任务】将技术文档重构为自然问答对话。
|
| 3 |
+
|
| 4 |
+
【对话设计原则】
|
| 5 |
+
- 对话角色:提问者(好奇心驱动的学习者) vs 解答者(专家)
|
| 6 |
+
- 问题序列:从基础概念→技术细节→应用实践→深度追问,逻辑递进
|
| 7 |
+
|
| 8 |
+
【核心要求】
|
| 9 |
+
1. 问题设计:
|
| 10 |
+
- 每个问题必须源于原文知识点
|
| 11 |
+
- 问题要具体、明确,避免空泛
|
| 12 |
+
- 体现真实学习过程中的疑惑点
|
| 13 |
+
2. 回答规范:
|
| 14 |
+
- 回答必须准确、完整,引用原文事实
|
| 15 |
+
- 保持专家解答的权威性
|
| 16 |
+
- 可适当补充背景信息帮助理解
|
| 17 |
+
3. 对话流畅性:问题间有自然过渡,避免跳跃
|
| 18 |
+
4. 覆盖度:确保原文所有重要知识点都被至少一个问题覆盖
|
| 19 |
+
5. 事实核查:回答中的技术细节、数据必须与原文完全一致
|
| 20 |
+
|
| 21 |
+
【输出格式】
|
| 22 |
+
Q1: [问题1]
|
| 23 |
+
A1: [回答1]
|
| 24 |
+
|
| 25 |
+
Q2: [问题2]
|
| 26 |
+
A2: [回答2]
|
| 27 |
+
...
|
| 28 |
+
|
| 29 |
+
原文内容:
|
| 30 |
+
{text}
|
| 31 |
+
|
| 32 |
+
请输出问答对话版本:
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
TEMPLATE_EN = """
|
| 36 |
+
【Task】Reconstruct the technical document as a natural Q&A dialogue.
|
| 37 |
+
|
| 38 |
+
【Dialogue Design Principles】
|
| 39 |
+
- Roles: Inquirer (curious learner) vs. Expert (domain specialist)
|
| 40 |
+
- Question Flow: From basic concepts → technical details → practical applications → deep follow-ups, logically progressive
|
| 41 |
+
|
| 42 |
+
【Core Requirements】
|
| 43 |
+
1. Question Design:
|
| 44 |
+
- Each question must originate from original content knowledge points
|
| 45 |
+
- Questions should be specific and clear, avoid vagueness
|
| 46 |
+
- Reflect points of confusion in the real learning process
|
| 47 |
+
2. Answer Specification:
|
| 48 |
+
- Answers must be accurate and complete, citing original facts
|
| 49 |
+
- Maintain authoritative expert tone
|
| 50 |
+
- May supplement background information when helpful
|
| 51 |
+
3. Dialogue Fluency: Natural transition between questions, avoid jumping
|
| 52 |
+
4. Coverage: Ensure ALL important knowledge points from original are covered by at least one question
|
| 53 |
+
5. Fact Check: Technical details and data in answers must be identical to original
|
| 54 |
+
|
| 55 |
+
【Output Format】
|
| 56 |
+
Q1: [Question 1]
|
| 57 |
+
A1: [Answer 1]
|
| 58 |
+
|
| 59 |
+
Q2: [Question 2]
|
| 60 |
+
A2: [Answer 2]
|
| 61 |
+
...
|
| 62 |
+
|
| 63 |
+
Original Content:
|
| 64 |
+
{text}
|
| 65 |
+
|
| 66 |
+
Please output the Q&A dialogue version:
|
| 67 |
+
"""
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
QA_DIALOGUE_FORMAT_REPHRASING_PROMPTS = {
|
| 71 |
+
"zh": TEMPLATE_ZH,
|
| 72 |
+
"en": TEMPLATE_EN,
|
| 73 |
+
}
|
graphgen/templates/rephrasing/style_controlled_rephrasing/technical_deep_dive_rephrasing.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
TEMPLATE_ZH = """
|
| 2 |
+
【任务】以领域专家视角进行深度技术剖析。
|
| 3 |
+
|
| 4 |
+
【读者定位】
|
| 5 |
+
- 目标读者:同领域高级工程师/研究员
|
| 6 |
+
- 预期效果:揭示技术细节、设计权衡与实现原理
|
| 7 |
+
|
| 8 |
+
【核心要求】
|
| 9 |
+
1. 技术精确性:
|
| 10 |
+
- 使用精确的专业术语和符号表示
|
| 11 |
+
- 补充技术背景、相关工作和理论基础
|
| 12 |
+
- 必要时用公式或代码片段说明
|
| 13 |
+
2. 深度维度:
|
| 14 |
+
- 算法复杂度分析
|
| 15 |
+
- 系统架构设计权衡
|
| 16 |
+
- 性能瓶颈与优化空间
|
| 17 |
+
- 边界条件和异常情况处理
|
| 18 |
+
3. 内容扩展:可在原文基础上增加30-50%的技术细节
|
| 19 |
+
4. 语气:权威、严谨、逻辑严密
|
| 20 |
+
|
| 21 |
+
【输出规范】
|
| 22 |
+
- 保持原文所有事实准确无误
|
| 23 |
+
- 新增细节需符合领域常识
|
| 24 |
+
- 使用标准技术文档格式
|
| 25 |
+
|
| 26 |
+
原文内容:
|
| 27 |
+
{text}
|
| 28 |
+
|
| 29 |
+
请输出技术深度剖析版本:
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
TEMPLATE_EN = """
|
| 33 |
+
【Task】Conduct an in-depth technical analysis from a domain expert perspective.
|
| 34 |
+
|
| 35 |
+
【Audience】
|
| 36 |
+
- Target: Senior engineers/researchers in the same field
|
| 37 |
+
- Goal: Reveal technical details, design trade-offs, and implementation principles
|
| 38 |
+
|
| 39 |
+
【Core Requirements】
|
| 40 |
+
1. Technical Precision:
|
| 41 |
+
- Use precise technical terminology and notation
|
| 42 |
+
- Supplement with technical background, related work, and theoretical foundations
|
| 43 |
+
- Include formulas or code snippets when necessary
|
| 44 |
+
2. Depth Dimensions:
|
| 45 |
+
- Algorithmic complexity analysis
|
| 46 |
+
- System architecture design trade-offs
|
| 47 |
+
- Performance bottlenecks and optimization opportunities
|
| 48 |
+
- Edge cases and exception handling
|
| 49 |
+
3. Content Expansion: May add 30-50% more technical details than original
|
| 50 |
+
4. Tone: Authoritative, rigorous, logically sound
|
| 51 |
+
|
| 52 |
+
【Output Specification】
|
| 53 |
+
- Maintain 100% factual accuracy from original
|
| 54 |
+
- Added details must align with domain common knowledge
|
| 55 |
+
- Use standard technical documentation format
|
| 56 |
+
|
| 57 |
+
Original Content:
|
| 58 |
+
{text}
|
| 59 |
+
|
| 60 |
+
Please output the technical deep-dive version:
|
| 61 |
+
"""
|
| 62 |
+
|
| 63 |
+
TECHNICAL_DEEP_DIVE_REPHRASING_PROMPTS = {
|
| 64 |
+
"zh": TEMPLATE_ZH,
|
| 65 |
+
"en": TEMPLATE_EN,
|
| 66 |
+
}
|
requirements.txt
CHANGED
|
@@ -12,7 +12,7 @@ nltk
|
|
| 12 |
jieba
|
| 13 |
plotly
|
| 14 |
pandas
|
| 15 |
-
gradio==5.
|
| 16 |
kaleido
|
| 17 |
pyyaml
|
| 18 |
langcodes
|
|
@@ -21,8 +21,7 @@ fastapi
|
|
| 21 |
trafilatura
|
| 22 |
aiohttp
|
| 23 |
socksio
|
| 24 |
-
|
| 25 |
-
ray==2.53.0
|
| 26 |
pyarrow
|
| 27 |
|
| 28 |
leidenalg
|
|
|
|
| 12 |
jieba
|
| 13 |
plotly
|
| 14 |
pandas
|
| 15 |
+
gradio==5.50.0
|
| 16 |
kaleido
|
| 17 |
pyyaml
|
| 18 |
langcodes
|
|
|
|
| 21 |
trafilatura
|
| 22 |
aiohttp
|
| 23 |
socksio
|
| 24 |
+
ray[default]==2.53.0
|
|
|
|
| 25 |
pyarrow
|
| 26 |
|
| 27 |
leidenalg
|