Spaces:

chenzihong
/

GraphGen

Runtime error

App Files Files Community

github-actions[bot] commited on Jan 30

Commit

0a18089

1 Parent(s): 9a57b42

Auto-sync from demo at Fri Jan 30 08:46:58 UTC 2026

Browse files

Files changed (32) hide show

graphgen/bases/__init__.py +1 -0
graphgen/bases/base_operator.py +1 -1
graphgen/bases/base_rephraser.py +31 -0
graphgen/common/__init__.py +2 -2
graphgen/engine.py +3 -1
graphgen/models/__init__.py +1 -0
graphgen/models/rephraser/__init__.py +1 -0
graphgen/models/rephraser/style_controlled_rephraser.py +32 -0
graphgen/operators/__init__.py +2 -0
graphgen/operators/build_kg/build_kg_service.py +2 -1
graphgen/operators/evaluate/evaluate_service.py +2 -1
graphgen/operators/extract/extract_service.py +1 -1
graphgen/operators/generate/generate_service.py +3 -1
graphgen/operators/judge/judge_service.py +3 -2
graphgen/operators/partition/partition_service.py +1 -1
graphgen/operators/quiz/quiz_service.py +2 -1
graphgen/operators/read/read.py +1 -1
graphgen/operators/rephrase/__init__.py +1 -0
graphgen/operators/rephrase/rephrase_service.py +62 -0
graphgen/operators/search/search_service.py +1 -1
graphgen/templates/__init__.py +1 -0
graphgen/templates/rephrasing/__init__.py +1 -0
graphgen/templates/rephrasing/style_controlled_rephrasing/__init__.py +21 -0
graphgen/templates/rephrasing/style_controlled_rephrasing/critical_analysis_rephrasing.py +52 -0
graphgen/templates/rephrasing/style_controlled_rephrasing/cross_domain_analogy_rephrasing.py +62 -0
graphgen/templates/rephrasing/style_controlled_rephrasing/executive_summary_rephrasing.py +64 -0
graphgen/templates/rephrasing/style_controlled_rephrasing/first_person_narrative_rephrasing.py +60 -0
graphgen/templates/rephrasing/style_controlled_rephrasing/historical_evolution_perspective_rephrasing.py +68 -0
graphgen/templates/rephrasing/style_controlled_rephrasing/popular_science_rephrasing.py +46 -0
graphgen/templates/rephrasing/style_controlled_rephrasing/qa_dialogue_format_rephrasing.py +73 -0
graphgen/templates/rephrasing/style_controlled_rephrasing/technical_deep_dive_rephrasing.py +66 -0
requirements.txt +2 -3

graphgen/bases/__init__.py CHANGED Viewed

@@ -7,6 +7,7 @@ from .base_llm_wrapper import BaseLLMWrapper
 from .base_operator import BaseOperator
 from .base_partitioner import BasePartitioner
 from .base_reader import BaseReader
 from .base_searcher import BaseSearcher
 from .base_splitter import BaseSplitter
 from .base_storage import BaseGraphStorage, BaseKVStorage, StorageNameSpace

 from .base_operator import BaseOperator
 from .base_partitioner import BasePartitioner
 from .base_reader import BaseReader
+from .base_rephraser import BaseRephraser
 from .base_searcher import BaseSearcher
 from .base_splitter import BaseSplitter
 from .base_storage import BaseGraphStorage, BaseKVStorage, StorageNameSpace

graphgen/bases/base_operator.py CHANGED Viewed

@@ -28,7 +28,7 @@ class BaseOperator(ABC):
         op_name: str = None,
     ):
         # lazy import to avoid circular import
-        from graphgen.common import init_storage
         from graphgen.utils import set_logger
         log_dir = os.path.join(working_dir, "logs")

         op_name: str = None,
     ):
         # lazy import to avoid circular import
+        from graphgen.common.init_storage import init_storage
         from graphgen.utils import set_logger
         log_dir = os.path.join(working_dir, "logs")

graphgen/bases/base_rephraser.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from abc import ABC, abstractmethod
+from typing import Any
+from graphgen.bases.base_llm_wrapper import BaseLLMWrapper
+class BaseRephraser(ABC):
+    """
+    Rephrase text based on given prompts.
+    """
+    def __init__(self, llm_client: BaseLLMWrapper):
+        self.llm_client = llm_client
+    @abstractmethod
+    def build_prompt(self, text: str) -> str:
+        """Build prompt for LLM based on the given text"""
+    @staticmethod
+    @abstractmethod
+    def parse_response(response: str) -> Any:
+        """Parse the LLM response and return the rephrased text"""
+    async def rephrase(
+        self,
+        item: dict,
+    ) -> dict:
+        text = item["content"]
+        prompt = self.build_prompt(text)
+        response = await self.llm_client.generate_answer(prompt)
+        return self.parse_response(response)

graphgen/common/__init__.py CHANGED Viewed

	@@ -1,2 +1,2 @@
1	- from .init_llm import init_llm
2	- from .init_storage import init_storage


1	+ # from .init_llm import init_llm
2	+ # from .init_storage import init_storage

graphgen/engine.py CHANGED Viewed

@@ -11,7 +11,8 @@ from ray.data.block import Block
 from ray.data.datasource.filename_provider import FilenameProvider
 from graphgen.bases import Config, Node
-from graphgen.common import init_llm, init_storage
 from graphgen.utils import logger
@@ -70,6 +71,7 @@ class Engine:
         if not ray.is_initialized():
             context = ray.init(
                 ignore_reinit_error=True,
                 logging_level=logging.ERROR,
                 log_to_driver=True,

 from ray.data.datasource.filename_provider import FilenameProvider
 from graphgen.bases import Config, Node
+from graphgen.common.init_llm import init_llm
+from graphgen.common.init_storage import init_storage
 from graphgen.utils import logger
         if not ray.is_initialized():
             context = ray.init(
+                include_dashboard=True,
                 ignore_reinit_error=True,
                 logging_level=logging.ERROR,
                 log_to_driver=True,

graphgen/models/__init__.py CHANGED Viewed

@@ -37,6 +37,7 @@ from .reader import (
     RDFReader,
     TXTReader,
 )
 from .searcher.db.ncbi_searcher import NCBISearch
 from .searcher.db.rnacentral_searcher import RNACentralSearch
 from .searcher.db.uniprot_searcher import UniProtSearch

     RDFReader,
     TXTReader,
 )
+from .rephraser import StyleControlledRephraser
 from .searcher.db.ncbi_searcher import NCBISearch
 from .searcher.db.rnacentral_searcher import RNACentralSearch
 from .searcher.db.uniprot_searcher import UniProtSearch

graphgen/models/rephraser/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .style_controlled_rephraser import StyleControlledRephraser

graphgen/models/rephraser/style_controlled_rephraser.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from typing import Any, Optional
+from graphgen.bases import BaseRephraser
+from graphgen.templates import STYLE_CONTROLLED_REPHRASING_PROMPTS
+from graphgen.utils import detect_main_language, logger
+class StyleControlledRephraser(BaseRephraser):
+    """
+    Style Controlled Rephraser rephrases the input text based on a specified style.
+    """
+    def __init__(self, llm_client: Any, style: str = "critical_analysis"):
+        super().__init__(llm_client)
+        self.style = style
+    def build_prompt(self, text: str) -> str:
+        logger.debug("Text to be rephrased: %s", text)
+        language = detect_main_language(text)
+        prompt_template = STYLE_CONTROLLED_REPHRASING_PROMPTS[self.style][language]
+        prompt = prompt_template.format(text=text)
+        return prompt
+    @staticmethod
+    def parse_response(response: str) -> Optional[dict]:
+        result = response.strip()
+        logger.debug("Raw rephrased response: %s", result)
+        if not result:
+            return None
+        return {
+            "content": result,
+        }

graphgen/operators/__init__.py CHANGED Viewed

@@ -8,6 +8,7 @@ from .judge import JudgeService
 from .partition import PartitionService
 from .quiz import QuizService
 from .read import read
 from .search import SearchService
 operators = {
@@ -21,5 +22,6 @@ operators = {
     "partition": PartitionService,
     "generate": GenerateService,
     "evaluate": EvaluateService,
     "filter": FilterService,
 }

 from .partition import PartitionService
 from .quiz import QuizService
 from .read import read
+from .rephrase import RephraseService
 from .search import SearchService
 operators = {
     "partition": PartitionService,
     "generate": GenerateService,
     "evaluate": EvaluateService,
+    "rephrase": RephraseService,
     "filter": FilterService,
 }

graphgen/operators/build_kg/build_kg_service.py CHANGED Viewed

@@ -2,7 +2,8 @@ from typing import Tuple
 from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
 from graphgen.bases.datatypes import Chunk
-from graphgen.common import init_llm, init_storage
 from graphgen.utils import logger
 from .build_mm_kg import build_mm_kg

 from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
 from graphgen.bases.datatypes import Chunk
+from graphgen.common.init_llm import init_llm
+from graphgen.common.init_storage import init_storage
 from graphgen.utils import logger
 from .build_mm_kg import build_mm_kg

graphgen/operators/evaluate/evaluate_service.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from typing import Tuple
 from graphgen.bases import BaseLLMWrapper, BaseOperator
-from graphgen.common import init_llm, init_storage
 from graphgen.utils import logger
 from .evaluate_kg import evaluate_kg

 from typing import Tuple
 from graphgen.bases import BaseLLMWrapper, BaseOperator
+from graphgen.common.init_llm import init_llm
+from graphgen.common.init_storage import init_storage
 from graphgen.utils import logger
 from .evaluate_kg import evaluate_kg

graphgen/operators/extract/extract_service.py CHANGED Viewed

@@ -2,7 +2,7 @@ import json
 from typing import Tuple
 from graphgen.bases import BaseLLMWrapper, BaseOperator, Chunk
-from graphgen.common import init_llm
 from graphgen.models.extractor import SchemaGuidedExtractor
 from graphgen.utils import logger, run_concurrent

 from typing import Tuple
 from graphgen.bases import BaseLLMWrapper, BaseOperator, Chunk
+from graphgen.common.init_llm import init_llm
 from graphgen.models.extractor import SchemaGuidedExtractor
 from graphgen.utils import logger, run_concurrent

graphgen/operators/generate/generate_service.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from typing import Tuple
 from graphgen.bases import BaseKVStorage, BaseLLMWrapper, BaseOperator
-from graphgen.common import init_llm, init_storage
 from graphgen.utils import logger, run_concurrent

 from typing import Tuple
 from graphgen.bases import BaseKVStorage, BaseLLMWrapper, BaseOperator
+from graphgen.common.init_llm import init_llm
+from graphgen.common.init_storage import init_storage
 from graphgen.utils import logger, run_concurrent

graphgen/operators/judge/judge_service.py CHANGED Viewed

@@ -1,8 +1,9 @@
-from typing import Tuple
 import math
 from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
-from graphgen.common import init_llm, init_storage
 from graphgen.templates import STATEMENT_JUDGEMENT_PROMPT
 from graphgen.utils import logger, run_concurrent, yes_no_loss_entropy

 import math
+from typing import Tuple
 from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
+from graphgen.common.init_llm import init_llm
+from graphgen.common.init_storage import init_storage
 from graphgen.templates import STATEMENT_JUDGEMENT_PROMPT
 from graphgen.utils import logger, run_concurrent, yes_no_loss_entropy

graphgen/operators/partition/partition_service.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 from typing import Iterable, Tuple
 from graphgen.bases import BaseGraphStorage, BaseOperator, BaseTokenizer
-from graphgen.common import init_storage
 from graphgen.models import (
     AnchorBFSPartitioner,
     BFSPartitioner,

 from typing import Iterable, Tuple
 from graphgen.bases import BaseGraphStorage, BaseOperator, BaseTokenizer
+from graphgen.common.init_storage import init_storage
 from graphgen.models import (
     AnchorBFSPartitioner,
     BFSPartitioner,

graphgen/operators/quiz/quiz_service.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from typing import Tuple
 from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
-from graphgen.common import init_llm, init_storage
 from graphgen.models import QuizGenerator
 from graphgen.utils import logger, run_concurrent

 from typing import Tuple
 from graphgen.bases import BaseGraphStorage, BaseLLMWrapper, BaseOperator
+from graphgen.common.init_llm import init_llm
+from graphgen.common.init_storage import init_storage
 from graphgen.models import QuizGenerator
 from graphgen.utils import logger, run_concurrent

graphgen/operators/read/read.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Any, List, Optional, Union
 import ray
-from graphgen.common import init_storage
 from graphgen.models import (
     CSVReader,
     JSONReader,

 import ray
+from graphgen.common.init_storage import init_storage
 from graphgen.models import (
     CSVReader,
     JSONReader,

graphgen/operators/rephrase/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .rephrase_service import RephraseService

graphgen/operators/rephrase/rephrase_service.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from typing import Tuple
+from graphgen.bases import BaseLLMWrapper, BaseOperator
+from graphgen.common.init_llm import init_llm
+from graphgen.utils import run_concurrent
+class RephraseService(BaseOperator):
+    """
+    Generate question-answer pairs based on nodes and edges.
+    """
+    def __init__(
+        self,
+        working_dir: str = "cache",
+        method: str = "aggregated",
+        **rephrase_kwargs,
+    ):
+        super().__init__(working_dir=working_dir, op_name="rephrase_service")
+        self.llm_client: BaseLLMWrapper = init_llm("synthesizer")
+        self.method = method
+        self.rephrase_kwargs = rephrase_kwargs
+        if self.method == "style_controlled":
+            from graphgen.models import StyleControlledRephraser
+            self.rephraser = StyleControlledRephraser(
+                self.llm_client,
+                style=rephrase_kwargs.get("style", "critical_analysis"),
+            )
+        else:
+            raise ValueError(f"Unsupported rephrase method: {self.method}")
+    def process(self, batch: list) -> Tuple[list, dict]:
+        """
+        Rephrase the texts in the batch.
+        :return: A tuple of (results, meta_updates)
+            results: A list of dicts containing rephrased texts. Each dict has the structure:
+                {"_trace_id": str, "content": str}
+            meta_updates: A dict mapping source IDs to lists of trace IDs for the rephrased texts.
+        """
+        final_results = []
+        meta_updates = {}
+        results = run_concurrent(
+            self.rephraser.rephrase,
+            batch,
+            desc="Rephrasing texts",
+            unit="batch",
+        )
+        for input_trace_id, rephrased in zip(
+            [item["_trace_id"] for item in batch], results
+        ):
+            if not rephrased:
+                continue
+            rephrased["_trace_id"] = self.get_trace_id(rephrased)
+            results.append(rephrased)
+            meta_updates.setdefault(input_trace_id, []).append(rephrased["_trace_id"])
+            final_results.append(rephrased)
+        return final_results, meta_updates

graphgen/operators/search/search_service.py CHANGED Viewed

@@ -4,7 +4,7 @@ from typing import Optional
 import pandas as pd
 from graphgen.bases import BaseOperator
-from graphgen.common import init_storage
 from graphgen.utils import compute_content_hash, logger, run_concurrent

 import pandas as pd
 from graphgen.bases import BaseOperator
+from graphgen.common.init_storage import init_storage
 from graphgen.utils import compute_content_hash, logger, run_concurrent

graphgen/templates/__init__.py CHANGED Viewed

@@ -14,5 +14,6 @@ from .generation import (
     VQA_GENERATION_PROMPT,
 )
 from .kg import KG_EXTRACTION_PROMPT, KG_SUMMARIZATION_PROMPT, MMKG_EXTRACTION_PROMPT
 from .search_judgement import SEARCH_JUDGEMENT_PROMPT
 from .statement_judgement import STATEMENT_JUDGEMENT_PROMPT

     VQA_GENERATION_PROMPT,
 )
 from .kg import KG_EXTRACTION_PROMPT, KG_SUMMARIZATION_PROMPT, MMKG_EXTRACTION_PROMPT
+from .rephrasing import STYLE_CONTROLLED_REPHRASING_PROMPTS
 from .search_judgement import SEARCH_JUDGEMENT_PROMPT
 from .statement_judgement import STATEMENT_JUDGEMENT_PROMPT

graphgen/templates/rephrasing/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .style_controlled_rephrasing import STYLE_CONTROLLED_REPHRASING_PROMPTS

graphgen/templates/rephrasing/style_controlled_rephrasing/__init__.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from .critical_analysis_rephrasing import CRITICAL_ANALYSIS_REPHRASING_PROMPTS
+from .cross_domain_analogy_rephrasing import CROSS_DOMAIN_ANALOGY_REPHRASING_PROMPTS
+from .executive_summary_rephrasing import EXECUTIVE_SUMMARY_REPHRASING_PROMPTS
+from .first_person_narrative_rephrasing import FIRST_PERSON_NARRATIVE_REPHRASING_PROMPTS
+from .historical_evolution_perspective_rephrasing import (
+    HISTORICAL_EVOLUTION_PERSPECTIVE_REPHRASING_PROMPTS,
+)
+from .popular_science_rephrasing import POPULAR_SCIENCE_REPHRASING_PROMPTS
+from .qa_dialogue_format_rephrasing import QA_DIALOGUE_FORMAT_REPHRASING_PROMPTS
+from .technical_deep_dive_rephrasing import TECHNICAL_DEEP_DIVE_REPHRASING_PROMPTS
+STYLE_CONTROLLED_REPHRASING_PROMPTS = {
+    "popular_science": POPULAR_SCIENCE_REPHRASING_PROMPTS,
+    "critical_analysis": CRITICAL_ANALYSIS_REPHRASING_PROMPTS,
+    "cross_domain_analogy": CROSS_DOMAIN_ANALOGY_REPHRASING_PROMPTS,
+    "technical_deep_dive": TECHNICAL_DEEP_DIVE_REPHRASING_PROMPTS,
+    "executive_summary": EXECUTIVE_SUMMARY_REPHRASING_PROMPTS,
+    "first_person_narrative": FIRST_PERSON_NARRATIVE_REPHRASING_PROMPTS,
+    "historical_evolution_perspective": HISTORICAL_EVOLUTION_PERSPECTIVE_REPHRASING_PROMPTS,
+    "qa_dialogue_format": QA_DIALOGUE_FORMAT_REPHRASING_PROMPTS,
+}

graphgen/templates/rephrasing/style_controlled_rephrasing/critical_analysis_rephrasing.py ADDED Viewed

	@@ -0,0 +1,52 @@

+TEMPLATE_ZH = """
+【任务】以学术批判视角改写以下内容，形成技术评论文章。
+【核心要求】
+1. 语气风格：客观理性，第三人称学术视角，使用规范学术用语
+2. 内容结构：
+   - 准确总结原文核心方法/发现（占比40%）
+   - 分析技术优势与创新点（占比20%）
+   - 指出潜在局限性与假设条件（占比20%）
+   - 提出可能的改进方向或未来工作（占比20%）
+3. 引用规范：保留原文所有关键引用，采用标准学术引用格式
+4. 事实准确性：不得歪曲或误读原文技术细节
+【输出格式】
+- 标题：原标题 + "：一项批判性分析"
+- 段落：标准学术论文章节结构
+- 字数：与原文相当或略长
+原文内容：
+{text}
+请输出批判性分析改写版本：
+"""
+TEMPLATE_EN = """
+【Task】Rewrite the following content from an academic critical perspective as a technical commentary.
+【Core Requirements】
+1. Tone: Objective and rational, third-person academic perspective, using standard academic terminology
+2. Structure:
+   - Accurately summarize core methods/findings (40% of content)
+   - Analyze technical advantages and innovations (20%)
+   - Identify potential limitations and assumptions (20%)
+   - Propose possible improvements or future work (20%)
+3. Citations: Retain all key references from original, using standard academic citation format
+4. Factual Accuracy: Do not distort or misinterpret technical details
+【Output Format】
+- Title: Original Title + ": A Critical Analysis"
+- Paragraphs: Standard academic paper structure
+- Length: Similar to or slightly longer than original
+Original Content:
+{text}
+Please output the critically analyzed rewrite:
+"""
+CRITICAL_ANALYSIS_REPHRASING_PROMPTS = {
+    "zh": TEMPLATE_ZH,
+    "en": TEMPLATE_EN,
+}

graphgen/templates/rephrasing/style_controlled_rephrasing/cross_domain_analogy_rephrasing.py ADDED Viewed

	@@ -0,0 +1,62 @@

+TEMPLATE_ZH = """
+【任务】通过跨领域类比解释技术概念。
+【类比原则】
+- 类比源领域：生物学、物理学、建筑学、经济学、烹饪等领域
+- 类比强度：类比关系需直观且深刻，避免牵强附会
+- 目标：降低理解门槛，同时保持技术严谨性
+【核心要求】
+1. 双轨并行：每个技术概念配一个恰当类比
+2. 类比结构：
+   - 先介绍技术概念（准确、完整）
+   - 再引入类比对象及其映射关系
+   - 最后说明类比局限性和适用范围
+3. 保真红线：技术部分必须与原文完全一致，不得因类比而简化
+4. 创新性：鼓励使用新颖、出人意料但合理的类比
+5. 篇幅：可比原文扩展20-40%
+【评估标准】
+- 类比恰当性（技术概念与类比对象的核心机制必须同构）
+- 技术准确性（不得扭曲事实）
+- 启发性（帮助读者建立深层理解）
+原文内容：
+{text}
+请输出跨领域类比版本：
+"""
+TEMPLATE_EN = """
+【Task】Explain technical concepts through cross-domain analogies.
+【Analogy Principles】
+- Source Domains: Biology, physics, architecture, economics, cooking, etc.
+- Strength: Analogy should be intuitive yet profound, avoid forced comparisons
+- Goal: Lower understanding barrier while maintaining technical rigor
+【Core Requirements】
+1. Dual Track: Pair each technical concept with an appropriate analogy
+2. Analogy Structure:
+   - First introduce technical concept (accurate and complete)
+   - Then introduce analogy object and mapping relationship
+   - Finally explain analogy limitations and applicable scope
+3. Fidelity Baseline: Technical parts must be identical to original, no simplification for analogy's sake
+4. Innovation: Encourage novel, surprising but reasonable analogies
+5. Length: May expand 20-40% beyond original
+【Evaluation Criteria】
+- Analogy Appropriateness (core mechanisms must be isomorphic)
+- Technical Accuracy (no factual distortion)
+- Heuristic Value (helps build deep understanding)
+Original Content:
+{text}
+Please output the cross-domain analogy version:
+"""
+CROSS_DOMAIN_ANALOGY_REPHRASING_PROMPTS = {
+    "zh": TEMPLATE_ZH,
+    "en": TEMPLATE_EN,
+}

graphgen/templates/rephrasing/style_controlled_rephrasing/executive_summary_rephrasing.py ADDED Viewed

	@@ -0,0 +1,64 @@

+TEMPLATE_ZH = """
+【任务】为高管层撰写决策摘要。
+【读者假设】
+- 职位：CTO/技术VP/产品总监
+- 核心关切：技术价值、资源投入、竞争壁垒、商业影响
+【核心要求】
+1. 信息密度：每句话必须传达战略价值
+2. 内容优先级：
+   - 核心技术突破与创新价值（必须）
+   - 与竞品的差异化优势（必须）
+   - 实施成本与资源需求（必须）
+   - 潜在商业应用场景（必须）
+   - 技术风险评估（可选）
+3. 语言风格：金字塔原理，结论先行，数据支撑
+4. 简洁性：控制在原文长度的30-50%
+5. 事实准确性：所有数据、性能指标必须与原文完全一致
+【禁用表达】
+- 避免"可能"、"也许"等不确定表述
+- 禁用技术细节描述（除非直接影响决策）
+- 避免行话和缩写
+原文内容：
+{text}
+请直接输出高管决策摘要：
+"""
+TEMPLATE_EN = """
+【Task】Write an executive summary for C-suite decision-making.
+【Audience Assumption】
+- Position: CTO/VP of Engineering/Product Director
+- Core Concerns: Technical value, resource investment, competitive moats, business impact
+【Core Requirements】
+1. Information Density: Every sentence must convey strategic value
+2. Content Priority:
+   - Core technical breakthrough and innovation value (MUST)
+   - Differentiated advantages over competitors (MUST)
+   - Implementation cost and resource requirements (MUST)
+   - Potential business application scenarios (MUST)
+   - Technical risk assessment (OPTIONAL)
+3. Language Style: Pyramid principle - lead with conclusions, support with data
+4. Conciseness: 30-50% of original length
+5. Factual Accuracy: All data and performance metrics must be identical to original
+【Prohibited Expressions】
+- Avoid uncertain terms like "maybe," "perhaps"
+- No deep technical details (unless directly impacting decision)
+- No jargon or unexplained acronyms
+Original Content:
+{text}
+Please output the executive summary directly:
+"""
+EXECUTIVE_SUMMARY_REPHRASING_PROMPTS = {
+    "zh": TEMPLATE_ZH,
+    "en": TEMPLATE_EN,
+}

graphgen/templates/rephrasing/style_controlled_rephrasing/first_person_narrative_rephrasing.py ADDED Viewed

	@@ -0,0 +1,60 @@

+TEMPLATE_ZH = """
+【任务】将技术文档改写为第一人称实践经验分享。
+【角色设定】
+- 身份：资深技术实践者/研究员
+- 场景：技术博客/内部经验分享会
+- 目标读者：同行从业者
+【核心要求】
+1. 视角：全程使用"我/我们"第一人称
+2. 内容融合：
+   - 保留原文所有技术事实（代码、数据、架构）
+   - 添加个人实践中的观察、挑战与解决思路
+   - 分享真实应用场景和效果数据
+3. 语言风格：专业但亲和，避免过度口语化
+4. 叙事元素：可包含"最初尝试-遇到问题-调整思路-最终效果"的故事线
+5. 事实红线：技术细节必须与原文完全一致，不得虚构数据
+【禁止】
+- 不得编造不存在的个人经历
+- 不得改变技术实现细节
+原文内容：
+{text}
+请直接输出第一人称叙事版本：
+"""
+TEMPLATE_EN = """
+【Task】Rewrite the technical document as a first-person practical experience sharing.
+【Role Setting】
+- Identity: Senior practitioner/researcher
+- Scenario: Technical blog/internal sharing session
+- Target Audience: Peer professionals
+【Core Requirements】
+1. Perspective: Use first-person "I/we" throughout
+2. Content Integration:
+   - Retain ALL technical facts (code, data, architecture) from original
+   - Add personal observations, challenges, and solution approaches from practice
+   - Share real application scenarios and performance data
+3. Language Style: Professional yet approachable, avoid excessive colloquialism
+4. Narrative: May include "initial attempt-encountered problem-adjusted approach-final result" storyline
+5. Factual Baseline: Technical details must be identical to original, no fabricated data
+【Prohibited】
+- Do not invent non-existent personal experiences
+- Do not alter technical implementation details
+Original Content:
+{text}
+Please output the first-person narrative version directly:
+"""
+FIRST_PERSON_NARRATIVE_REPHRASING_PROMPTS = {
+    "zh": TEMPLATE_ZH,
+    "en": TEMPLATE_EN,
+}

graphgen/templates/rephrasing/style_controlled_rephrasing/historical_evolution_perspective_rephrasing.py ADDED Viewed

	@@ -0,0 +1,68 @@

+TEMPLATE_ZH = """
+【任务】按技术发展史视角重构内容。
+【叙事框架】
+- 时间轴线：从起源→关键突破→当前状态→未来趋势
+- 演进逻辑：揭示"技术瓶颈突破→新范式建立→新问题出现"的循环
+【核心要求】
+1. 时间准确性：所有时间点、版本号、发布顺序必须核实准确
+2. 因果链：
+   - 明确每个演进阶段的驱动力（理论突破/工程需求/硬件进步）
+   - 指出技术演进的必然性与偶然性
+3. 内容结构：
+   - 背景与起源（技术诞生前的状态）
+   - 关键里程碑（带具体时间）
+   - 范式转移（革命性变化）
+   - 当前成熟形态
+   - 未来展望（基于原文技术路径）
+4. 技术保真：所有技术描述必须与原文事实一致
+5. 分析深度：不能仅罗列事实，必须揭示演进逻辑
+【输出规范】
+- 使用时间轴标记（如[2017]、[2020]）增强可读性
+- 关键人物/团队需保留原名
+- 禁止编造不存在的技术演进路径
+原文内容：
+{text}
+请输出历史演进视角版本：
+"""
+TEMPLATE_EN = """
+【Task】Reconstruct content from a technological history evolution perspective.
+【Narrative Framework】
+- Timeline: Origin → Key Breakthroughs → Current State → Future Trends
+- Evolution Logic: Reveal the cycle of "technical bottleneck breakthrough → new paradigm establishment → new problems emerge"
+【Core Requirements】
+1. Temporal Accuracy: ALL dates, version numbers, and release sequences must be verified and accurate
+2. Causality Chain:
+   - Identify drivers of each evolution stage (theoretical breakthrough/engineering needs/hardware advances)
+   - Point out inevitability and contingency of technical evolution
+3. Content Structure:
+   - Background & Origin (state before technology birth)
+   - Key Milestones (with specific dates)
+   - Paradigm Shifts (revolutionary changes)
+   - Current Mature Form
+   - Future Outlook (based on original's technical trajectory)
+4. Technical Fidelity: ALL technical descriptions must be factually consistent with original
+5. Analytical Depth: Must reveal evolution logic, not just list facts
+【Output Specification】
+- Use timeline markers ([2017], [2020]) for readability
+- Keep original names of key people/teams
+- DO NOT invent non-existent evolution paths
+Original Content:
+{text}
+Please output the historical evolution version:
+"""
+HISTORICAL_EVOLUTION_PERSPECTIVE_REPHRASING_PROMPTS = {
+    "zh": TEMPLATE_ZH,
+    "en": TEMPLATE_EN,
+}

graphgen/templates/rephrasing/style_controlled_rephrasing/popular_science_rephrasing.py ADDED Viewed

	@@ -0,0 +1,46 @@

+TEMPLATE_ZH = """
+【任务】将以下技术文档改写为面向普通读者的科普文章。
+【核心要求】
+1. 语言风格：生动活泼，避免冷僻专业术语；必须使用术语时，需用生活化比喻或类比解释
+2. 内容保真：所有核心事实、数据和技术结论必须准确无误，不得篡改或过度简化
+3. 叙事结构：采用"问题-发现-应用"的故事线，增强可读性
+4. 读者定位：假设读者具有高中文化水平，无专业背景
+5. 篇幅控制：可适当扩展，但每段聚焦一个核心概念
+【禁止行为】
+- 不得删除关键技术细节
+- 不得改变原意或事实
+- 避免使用"这个东西"、"那个技术"等模糊指代
+原文内容：
+{text}
+请直接输出改写后的科普文章：
+"""
+TEMPLATE_EN = """
+【Task】Rewrite the following technical document as a popular science article for general readers.
+【Core Requirements】
+1. Language Style: Lively and engaging; avoid jargon; when technical terms are necessary, explain with everyday analogies or metaphors
+2. Content Fidelity: All core facts, data, and technical conclusions must be accurate. Do not distort or oversimplify
+3. Narrative Structure: Use a "problem-discovery-application" storyline to enhance readability
+4. Audience: Assume high school education level, no technical background
+5. Length: May expand moderately, but each paragraph should focus on one core concept
+【Prohibited】
+- Do not remove key technical details
+- Do not change original meaning or facts
+- Avoid vague references like "this thing" or "that technology"
+Original Content:
+{text}
+Please output the rewritten popular science article directly:
+"""
+POPULAR_SCIENCE_REPHRASING_PROMPTS = {
+    "zh": TEMPLATE_ZH,
+    "en": TEMPLATE_EN,
+}

graphgen/templates/rephrasing/style_controlled_rephrasing/qa_dialogue_format_rephrasing.py ADDED Viewed

	@@ -0,0 +1,73 @@

+TEMPLATE_ZH = """
+【任务】将技术文档重构为自然问答对话。
+【对话设计原则】
+- 对话角色：提问者（好奇心驱动的学习者） vs 解答者（专家）
+- 问题序列：从基础概念→技术细节→应用实践→深度追问，逻辑递进
+【核心要求】
+1. 问题设计：
+   - 每个问题必须源于原文知识点
+   - 问题要具体、明确，避免空泛
+   - 体现真实学习过程中的疑惑点
+2. 回答规范：
+   - 回答必须准确、完整，引用原文事实
+   - 保持专家解答的权威性
+   - 可适当补充背景信息帮助理解
+3. 对话流畅性：问题间有自然过渡，避免跳跃
+4. 覆盖度：确保原文所有重要知识点都被至少一个问题覆盖
+5. 事实核查：回答中的技术细节、数据必须与原文完全一致
+【输出格式】
+Q1: [问题1]
+A1: [回答1]
+Q2: [问题2]
+A2: [回答2]
+...
+原文内容：
+{text}
+请输出问答对话版本：
+"""
+TEMPLATE_EN = """
+【Task】Reconstruct the technical document as a natural Q&A dialogue.
+【Dialogue Design Principles】
+- Roles: Inquirer (curious learner) vs. Expert (domain specialist)
+- Question Flow: From basic concepts → technical details → practical applications → deep follow-ups, logically progressive
+【Core Requirements】
+1. Question Design:
+   - Each question must originate from original content knowledge points
+   - Questions should be specific and clear, avoid vagueness
+   - Reflect points of confusion in the real learning process
+2. Answer Specification:
+   - Answers must be accurate and complete, citing original facts
+   - Maintain authoritative expert tone
+   - May supplement background information when helpful
+3. Dialogue Fluency: Natural transition between questions, avoid jumping
+4. Coverage: Ensure ALL important knowledge points from original are covered by at least one question
+5. Fact Check: Technical details and data in answers must be identical to original
+【Output Format】
+Q1: [Question 1]
+A1: [Answer 1]
+Q2: [Question 2]
+A2: [Answer 2]
+...
+Original Content:
+{text}
+Please output the Q&A dialogue version:
+"""
+QA_DIALOGUE_FORMAT_REPHRASING_PROMPTS = {
+    "zh": TEMPLATE_ZH,
+    "en": TEMPLATE_EN,
+}

graphgen/templates/rephrasing/style_controlled_rephrasing/technical_deep_dive_rephrasing.py ADDED Viewed

	@@ -0,0 +1,66 @@

+TEMPLATE_ZH = """
+【任务】以领域专家视角进行深度技术剖析。
+【读者定位】
+- 目标读者：同领域高级工程师/研究员
+- 预期效果：揭示技术细节、设计权衡与实现原理
+【核心要求】
+1. 技术精确性：
+   - 使用精确的专业术语和符号表示
+   - 补充技术背景、相关工作和理论基础
+   - 必要时用公式或代码片段说明
+2. 深度维度：
+   - 算法复杂度分析
+   - 系统架构设计权衡
+   - 性能瓶颈与优化空间
+   - 边界条件和异常情况处理
+3. 内容扩展：可在原文基础上增加30-50%的技术细节
+4. 语气：权威、严谨、逻辑严密
+【输出规范】
+- 保持原文所有事实准确无误
+- 新增细节需符合领域常识
+- 使用标准技术文档格式
+原文内容：
+{text}
+请输出技术深度剖析版本：
+"""
+TEMPLATE_EN = """
+【Task】Conduct an in-depth technical analysis from a domain expert perspective.
+【Audience】
+- Target: Senior engineers/researchers in the same field
+- Goal: Reveal technical details, design trade-offs, and implementation principles
+【Core Requirements】
+1. Technical Precision:
+   - Use precise technical terminology and notation
+   - Supplement with technical background, related work, and theoretical foundations
+   - Include formulas or code snippets when necessary
+2. Depth Dimensions:
+   - Algorithmic complexity analysis
+   - System architecture design trade-offs
+   - Performance bottlenecks and optimization opportunities
+   - Edge cases and exception handling
+3. Content Expansion: May add 30-50% more technical details than original
+4. Tone: Authoritative, rigorous, logically sound
+【Output Specification】
+- Maintain 100% factual accuracy from original
+- Added details must align with domain common knowledge
+- Use standard technical documentation format
+Original Content:
+{text}
+Please output the technical deep-dive version:
+"""
+TECHNICAL_DEEP_DIVE_REPHRASING_PROMPTS = {
+    "zh": TEMPLATE_ZH,
+    "en": TEMPLATE_EN,
+}

requirements.txt CHANGED Viewed

@@ -12,7 +12,7 @@ nltk
 jieba
 plotly
 pandas
-gradio==5.44.1
 kaleido
 pyyaml
 langcodes
@@ -21,8 +21,7 @@ fastapi
 trafilatura
 aiohttp
 socksio
-pydantic
-ray==2.53.0
 pyarrow
 leidenalg

 jieba
 plotly
 pandas
+gradio==5.50.0
 kaleido
 pyyaml
 langcodes
 trafilatura
 aiohttp
 socksio
+ray[default]==2.53.0
 pyarrow
 leidenalg