github-actions[bot] commited on
Commit
a8c3e2a
·
1 Parent(s): bf63ef4

Auto-sync from demo at Tue Dec 23 13:00:55 UTC 2025

Browse files
graphgen/models/generator/aggregated_generator.py CHANGED
@@ -1,4 +1,5 @@
1
- from typing import Any
 
2
 
3
  from graphgen.bases import BaseGenerator
4
  from graphgen.templates import AGGREGATED_GENERATION_PROMPT
@@ -56,19 +57,21 @@ class AggregatedGenerator(BaseGenerator):
56
  return prompt
57
 
58
  @staticmethod
59
- def parse_rephrased_text(response: str) -> str:
60
  """
61
  Parse the rephrased text from the response.
62
  :param response:
63
  :return: rephrased text
64
  """
65
- if "Rephrased Text:" in response:
66
- rephrased_text = response.split("Rephrased Text:")[1].strip()
67
- elif "重述文本:" in response:
68
- rephrased_text = response.split("重述文本:")[1].strip()
 
69
  else:
70
- rephrased_text = response.strip()
71
- return rephrased_text.strip('"')
 
72
 
73
  @staticmethod
74
  def _build_prompt_for_question_generation(answer: str) -> str:
@@ -85,15 +88,13 @@ class AggregatedGenerator(BaseGenerator):
85
 
86
  @staticmethod
87
  def parse_response(response: str) -> dict:
88
- if response.startswith("Question:"):
89
- question = response[len("Question:") :].strip()
90
- elif response.startswith("问题:"):
91
- question = response[len("问题:") :].strip()
92
  else:
93
- question = response.strip()
94
- return {
95
- "question": question,
96
- }
97
 
98
  async def generate(
99
  self,
@@ -110,9 +111,13 @@ class AggregatedGenerator(BaseGenerator):
110
  rephrasing_prompt = self.build_prompt(batch)
111
  response = await self.llm_client.generate_answer(rephrasing_prompt)
112
  context = self.parse_rephrased_text(response)
 
 
113
  question_generation_prompt = self._build_prompt_for_question_generation(context)
114
  response = await self.llm_client.generate_answer(question_generation_prompt)
115
  question = self.parse_response(response)["question"]
 
 
116
  logger.debug("Question: %s", question)
117
  logger.debug("Answer: %s", context)
118
  qa_pairs = {
 
1
+ import re
2
+ from typing import Any, Optional
3
 
4
  from graphgen.bases import BaseGenerator
5
  from graphgen.templates import AGGREGATED_GENERATION_PROMPT
 
57
  return prompt
58
 
59
  @staticmethod
60
+ def parse_rephrased_text(response: str) -> Optional[str]:
61
  """
62
  Parse the rephrased text from the response.
63
  :param response:
64
  :return: rephrased text
65
  """
66
+ rephrased_match = re.search(
67
+ r"<rephrased_text>(.*?)</rephrased_text>", response, re.DOTALL
68
+ )
69
+ if rephrased_match:
70
+ rephrased_text = rephrased_match.group(1).strip()
71
  else:
72
+ logger.warning("Failed to parse rephrased text from response: %s", response)
73
+ return None
74
+ return rephrased_text.strip('"').strip("'")
75
 
76
  @staticmethod
77
  def _build_prompt_for_question_generation(answer: str) -> str:
 
88
 
89
  @staticmethod
90
  def parse_response(response: str) -> dict:
91
+ question_match = re.search(r"<question>(.*?)</question>", response, re.DOTALL)
92
+ if question_match:
93
+ question = question_match.group(1).strip()
 
94
  else:
95
+ logger.warning("Failed to parse question from response: %s", response)
96
+ return {"question": ""}
97
+ return {"question": question.strip('"').strip("'")}
 
98
 
99
  async def generate(
100
  self,
 
111
  rephrasing_prompt = self.build_prompt(batch)
112
  response = await self.llm_client.generate_answer(rephrasing_prompt)
113
  context = self.parse_rephrased_text(response)
114
+ if not context:
115
+ return result
116
  question_generation_prompt = self._build_prompt_for_question_generation(context)
117
  response = await self.llm_client.generate_answer(question_generation_prompt)
118
  question = self.parse_response(response)["question"]
119
+ if not question:
120
+ return result
121
  logger.debug("Question: %s", question)
122
  logger.debug("Answer: %s", context)
123
  qa_pairs = {
graphgen/models/generator/atomic_generator.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from typing import Any
2
 
3
  from graphgen.bases import BaseGenerator
@@ -29,17 +30,18 @@ class AtomicGenerator(BaseGenerator):
29
  :param response:
30
  :return:
31
  """
32
- if "Question:" in response and "Answer:" in response:
33
- question = response.split("Question:")[1].split("Answer:")[0].strip()
34
- answer = response.split("Answer:")[1].strip()
35
- elif "问题:" in response and "答案:" in response:
36
- question = response.split("问题:")[1].split("答案:")[0].strip()
37
- answer = response.split("答案:")[1].strip()
38
  else:
39
  logger.warning("Failed to parse response: %s", response)
40
  return {}
41
- question = question.strip('"')
42
- answer = answer.strip('"')
 
43
  logger.debug("Question: %s", question)
44
  logger.debug("Answer: %s", answer)
45
  return {
 
1
+ import re
2
  from typing import Any
3
 
4
  from graphgen.bases import BaseGenerator
 
30
  :param response:
31
  :return:
32
  """
33
+ question_match = re.search(r"<question>(.*?)</question>", response, re.DOTALL)
34
+ answer_match = re.search(r"<answer>(.*?)</answer>", response, re.DOTALL)
35
+
36
+ if question_match and answer_match:
37
+ question = question_match.group(1).strip()
38
+ answer = answer_match.group(1).strip()
39
  else:
40
  logger.warning("Failed to parse response: %s", response)
41
  return {}
42
+
43
+ question = question.strip('"').strip("'")
44
+ answer = answer.strip('"').strip("'")
45
  logger.debug("Question: %s", question)
46
  logger.debug("Answer: %s", answer)
47
  return {
graphgen/models/generator/cot_generator.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from typing import Any
2
 
3
  from graphgen.bases import BaseGenerator
@@ -67,22 +68,26 @@ class CoTGenerator(BaseGenerator):
67
 
68
  @staticmethod
69
  def parse_response(response: str) -> dict:
70
- if "Question:" in response and "Reasoning-Path Design:" in response:
71
- question = (
72
- response.split("Question:")[1]
73
- .split("Reasoning-Path Design:")[0]
74
- .strip()
75
- )
76
- reasoning_path = response.split("Reasoning-Path Design:")[1].strip()
77
- elif "问题:" in response and "推理路径设计:" in response:
78
- question = response.split("问题:")[1].split("推理路径设计:")[0].strip()
79
- reasoning_path = response.split("推理路径设计:")[1].strip()
 
 
 
80
  else:
81
- logger.warning("Failed to parse CoT template: %s", response)
82
  return {}
83
 
84
- question = question.strip('"')
85
- reasoning_path = reasoning_path.strip('"')
 
86
  logger.debug("CoT Question: %s", question)
87
  logger.debug("CoT Reasoning Path: %s", reasoning_path)
88
  return {
@@ -105,6 +110,8 @@ class CoTGenerator(BaseGenerator):
105
  prompt = self.build_prompt(batch)
106
  response = await self.llm_client.generate_answer(prompt)
107
  response = self.parse_response(response)
 
 
108
  question, reasoning_path = response["question"], response["reasoning_path"]
109
  prompt = self.build_prompt_for_cot_generation(batch, question, reasoning_path)
110
  cot_answer = await self.llm_client.generate_answer(prompt)
 
1
+ import re
2
  from typing import Any
3
 
4
  from graphgen.bases import BaseGenerator
 
68
 
69
  @staticmethod
70
  def parse_response(response: str) -> dict:
71
+ """
72
+ Parse CoT template from response.
73
+ :param response:
74
+ :return: dict with question and reasoning_path
75
+ """
76
+ question_match = re.search(r"<question>(.*?)</question>", response, re.DOTALL)
77
+ reasoning_path_match = re.search(
78
+ r"<reasoning_path>(.*?)</reasoning_path>", response, re.DOTALL
79
+ )
80
+
81
+ if question_match and reasoning_path_match:
82
+ question = question_match.group(1).strip()
83
+ reasoning_path = reasoning_path_match.group(1).strip()
84
  else:
85
+ logger.warning("Failed to parse response: %s", response)
86
  return {}
87
 
88
+ question = question.strip('"').strip("'")
89
+ reasoning_path = reasoning_path.strip('"').strip("'")
90
+
91
  logger.debug("CoT Question: %s", question)
92
  logger.debug("CoT Reasoning Path: %s", reasoning_path)
93
  return {
 
110
  prompt = self.build_prompt(batch)
111
  response = await self.llm_client.generate_answer(prompt)
112
  response = self.parse_response(response)
113
+ if not response:
114
+ return result
115
  question, reasoning_path = response["question"], response["reasoning_path"]
116
  prompt = self.build_prompt_for_cot_generation(batch, question, reasoning_path)
117
  cot_answer = await self.llm_client.generate_answer(prompt)
graphgen/models/generator/multi_hop_generator.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from typing import Any
2
 
3
  from graphgen.bases import BaseGenerator
@@ -32,17 +33,18 @@ class MultiHopGenerator(BaseGenerator):
32
 
33
  @staticmethod
34
  def parse_response(response: str) -> dict:
35
- if "Question:" in response and "Answer:" in response:
36
- question = response.split("Question:")[1].split("Answer:")[0].strip()
37
- answer = response.split("Answer:")[1].strip()
38
- elif "问题:" in response and "答案:" in response:
39
- question = response.split("问题:")[1].split("答案:")[0].strip()
40
- answer = response.split("答案:")[1].strip()
41
  else:
42
  logger.warning("Failed to parse response: %s", response)
43
  return {}
44
- question = question.strip('"')
45
- answer = answer.strip('"')
 
46
  logger.debug("Question: %s", question)
47
  logger.debug("Answer: %s", answer)
48
  return {
 
1
+ import re
2
  from typing import Any
3
 
4
  from graphgen.bases import BaseGenerator
 
33
 
34
  @staticmethod
35
  def parse_response(response: str) -> dict:
36
+ question_match = re.search(r"<question>(.*?)</question>", response, re.DOTALL)
37
+ answer_match = re.search(r"<answer>(.*?)</answer>", response, re.DOTALL)
38
+
39
+ if question_match and answer_match:
40
+ question = question_match.group(1).strip()
41
+ answer = answer_match.group(1).strip()
42
  else:
43
  logger.warning("Failed to parse response: %s", response)
44
  return {}
45
+
46
+ question = question.strip('"').strip("'")
47
+ answer = answer.strip('"').strip("'")
48
  logger.debug("Question: %s", question)
49
  logger.debug("Answer: %s", answer)
50
  return {
graphgen/models/generator/vqa_generator.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from typing import Any
2
 
3
  from graphgen.bases import BaseGenerator
@@ -38,25 +39,21 @@ class VQAGenerator(BaseGenerator):
38
  :return: QA pairs
39
  """
40
  qa_pairs = {}
41
- qa_list = response.strip().split("\n\n")
42
- for qa in qa_list:
43
- if "Question:" in qa and "Answer:" in qa:
44
- question = qa.split("Question:")[1].split("Answer:")[0].strip()
45
- answer = qa.split("Answer:")[1].strip()
46
- elif "问题:" in qa and "答案:" in qa:
47
- question = qa.split("问题:")[1].split("答案:")[0].strip()
48
- answer = qa.split("答案:")[1].strip()
49
- else:
50
- logger.error("Failed to parse QA pair: %s", qa)
51
- continue
52
- question = question.strip('"')
53
- answer = answer.strip('"')
54
- logger.debug("Question: %s", question)
55
- logger.debug("Answer: %s", answer)
56
- qa_pairs[compute_content_hash(question)] = {
57
- "question": question,
58
- "answer": answer,
59
- }
60
  return qa_pairs
61
 
62
  async def generate(
 
1
+ import re
2
  from typing import Any
3
 
4
  from graphgen.bases import BaseGenerator
 
39
  :return: QA pairs
40
  """
41
  qa_pairs = {}
42
+ pattern = r"<question>(.*?)</question>\s*<answer>(.*?)</answer>"
43
+ matches = re.findall(pattern, response, re.DOTALL)
44
+
45
+ if matches:
46
+ for question, answer in matches:
47
+ question = question.strip().strip('"').strip("'")
48
+ answer = answer.strip().strip('"').strip("'")
49
+ logger.debug("Question: %s", question)
50
+ logger.debug("Answer: %s", answer)
51
+ qa_pairs[compute_content_hash(question)] = {
52
+ "question": question,
53
+ "answer": answer,
54
+ }
55
+ else:
56
+ logger.warning("Error parsing the response %s", response)
 
 
 
 
57
  return qa_pairs
58
 
59
  async def generate(
graphgen/models/llm/local/vllm_wrapper.py CHANGED
@@ -16,7 +16,7 @@ class VLLMWrapper(BaseLLMWrapper):
16
  model: str,
17
  tensor_parallel_size: int = 1,
18
  gpu_memory_utilization: float = 0.9,
19
- temperature: float = 0.0,
20
  top_p: float = 1.0,
21
  topk: int = 5,
22
  **kwargs: Any,
@@ -66,7 +66,7 @@ class VLLMWrapper(BaseLLMWrapper):
66
  sp = self.SamplingParams(
67
  temperature=self.temperature if self.temperature > 0 else 1.0,
68
  top_p=self.top_p if self.temperature > 0 else 1.0,
69
- max_tokens=extra.get("max_new_tokens", 512),
70
  )
71
 
72
  result_generator = self.engine.generate(full_prompt, sp, request_id=request_id)
@@ -82,7 +82,7 @@ class VLLMWrapper(BaseLLMWrapper):
82
 
83
  async def generate_topk_per_token(
84
  self, text: str, history: Optional[List[str]] = None, **extra: Any
85
- ) -> List[Token]:
86
  full_prompt = self._build_inputs(text, history)
87
  request_id = f"graphgen_topk_{uuid.uuid4()}"
88
 
@@ -110,7 +110,9 @@ class VLLMWrapper(BaseLLMWrapper):
110
 
111
  candidate_tokens = []
112
  for _, logprob_obj in top_logprobs.items():
113
- tok_str = logprob_obj.decoded_token.strip() if logprob_obj.decoded_token else ""
 
 
114
  prob = float(math.exp(logprob_obj.logprob))
115
  candidate_tokens.append(Token(tok_str, prob))
116
 
@@ -120,7 +122,7 @@ class VLLMWrapper(BaseLLMWrapper):
120
  main_token = Token(
121
  text=candidate_tokens[0].text,
122
  prob=candidate_tokens[0].prob,
123
- top_candidates=candidate_tokens
124
  )
125
  return [main_token]
126
  return []
 
16
  model: str,
17
  tensor_parallel_size: int = 1,
18
  gpu_memory_utilization: float = 0.9,
19
+ temperature: float = 0.6,
20
  top_p: float = 1.0,
21
  topk: int = 5,
22
  **kwargs: Any,
 
66
  sp = self.SamplingParams(
67
  temperature=self.temperature if self.temperature > 0 else 1.0,
68
  top_p=self.top_p if self.temperature > 0 else 1.0,
69
+ max_tokens=extra.get("max_new_tokens", 2048),
70
  )
71
 
72
  result_generator = self.engine.generate(full_prompt, sp, request_id=request_id)
 
82
 
83
  async def generate_topk_per_token(
84
  self, text: str, history: Optional[List[str]] = None, **extra: Any
85
+ ) -> List[Token]:
86
  full_prompt = self._build_inputs(text, history)
87
  request_id = f"graphgen_topk_{uuid.uuid4()}"
88
 
 
110
 
111
  candidate_tokens = []
112
  for _, logprob_obj in top_logprobs.items():
113
+ tok_str = (
114
+ logprob_obj.decoded_token.strip() if logprob_obj.decoded_token else ""
115
+ )
116
  prob = float(math.exp(logprob_obj.logprob))
117
  candidate_tokens.append(Token(tok_str, prob))
118
 
 
122
  main_token = Token(
123
  text=candidate_tokens[0].text,
124
  prob=candidate_tokens[0].prob,
125
+ top_candidates=candidate_tokens,
126
  )
127
  return [main_token]
128
  return []
graphgen/operators/generate/generate_service.py CHANGED
@@ -61,6 +61,9 @@ class GenerateService(BaseOperator):
61
  unit="batch",
62
  )
63
 
 
 
 
64
  results = self.generator.format_generation_results(
65
  results, output_data_format=self.data_format
66
  )
 
61
  unit="batch",
62
  )
63
 
64
+ # Filter out empty results
65
+ results = [res for res in results if res]
66
+
67
  results = self.generator.format_generation_results(
68
  results, output_data_format=self.data_format
69
  )
graphgen/templates/generation/aggregated_generation.py CHANGED
@@ -132,6 +132,8 @@ To generate a version of the text that is rephrased and conveys the same meaning
132
  - Logical consistency throughout
133
  - Clear cause-and-effect relationships
134
 
 
 
135
  ################
136
  -ENTITIES-
137
  ################
@@ -175,6 +177,8 @@ ANSWER_REPHRASING_ZH: str = """---角色---
175
  - 整体逻辑一致性
176
  - 清晰的因果关系
177
 
 
 
178
  ################
179
  -实体-
180
  ################
@@ -191,6 +195,9 @@ REQUIREMENT_ZH = """
191
  ################
192
  请在下方直接输出连贯的重述文本,不要输出任何额外的内容。
193
 
 
 
 
194
  重述文本:
195
  """
196
 
@@ -198,25 +205,42 @@ REQUIREMENT_EN = """
198
  ################
199
  Please directly output the coherent rephrased text below, without any additional content.
200
 
 
 
 
201
  Rephrased Text:
202
  """
203
 
204
  QUESTION_GENERATION_EN: str = """The answer to a question is provided. Please generate a question that corresponds to the answer.
205
 
206
- ################
207
- Answer:
208
- {answer}
209
- ################
 
 
 
 
 
 
 
210
  Question:
211
  """
212
 
213
  QUESTION_GENERATION_ZH: str = """下面提供了一个问题的答案,请生成一个与答案对应的问题。
214
 
215
- ################
216
- 答案:
217
- {answer}
218
- ################
219
- 问题:
 
 
 
 
 
 
 
220
  """
221
 
222
  AGGREGATED_GENERATION_PROMPT = {
 
132
  - Logical consistency throughout
133
  - Clear cause-and-effect relationships
134
 
135
+ **Attention: Please directly provide the rephrased text without any additional content or analysis.**
136
+
137
  ################
138
  -ENTITIES-
139
  ################
 
177
  - 整体逻辑一致性
178
  - 清晰的因果关系
179
 
180
+ **注意: 请你直接给出重述文本,不要输出任何额外的内容,也不要进行任何分析。**
181
+
182
  ################
183
  -实体-
184
  ################
 
195
  ################
196
  请在下方直接输出连贯的重述文本,不要输出任何额外的内容。
197
 
198
+ 输出格式:
199
+ <rephrased_text>rephrased_text_here</rephrased_text>
200
+
201
  重述文本:
202
  """
203
 
 
205
  ################
206
  Please directly output the coherent rephrased text below, without any additional content.
207
 
208
+ Output format:
209
+ <rephrased_text>rephrased_text_here</rephrased_text>
210
+
211
  Rephrased Text:
212
  """
213
 
214
  QUESTION_GENERATION_EN: str = """The answer to a question is provided. Please generate a question that corresponds to the answer.
215
 
216
+ The answer for which a question needs to be generated is as follows:
217
+ <answer>{answer}</answer>
218
+
219
+ Please note the following requirements:
220
+ 1. Only output one question text without any additional explanations or analysis.
221
+ 2. Do not repeat the content of the answer or any fragments of it.
222
+ 3. The question must be independently understandable and fully match the answer.
223
+
224
+ Output format:
225
+ <question>question_text</question>
226
+
227
  Question:
228
  """
229
 
230
  QUESTION_GENERATION_ZH: str = """下面提供了一个问题的答案,请生成一个与答案对应的问题。
231
 
232
+ 需要生成问题的答案如下:
233
+ <answer>{answer}</answer>
234
+
235
+ 请注意下列要求:
236
+ 1. 仅输出一个问题文本,不得包含任何额外说明或分析
237
+ 2. 不得重复答案内容或其中任何片段
238
+ 3. 问题必须可独立理解且与答案完全匹配
239
+
240
+ 输出格式:
241
+ <question>question_text</question>
242
+
243
+ 问题:
244
  """
245
 
246
  AGGREGATED_GENERATION_PROMPT = {
graphgen/templates/generation/atomic_generation.py CHANGED
@@ -1,28 +1,44 @@
1
  # pylint: disable=C0301
2
  TEMPLATE_EN: str = """You are given a text passage. Your task is to generate a question and answer (QA) pair based on the content of that text.
3
- The answer should be accurate and directly derived from the text. Make sure the QA pair is relevant to the main theme or important details of the given text.
4
- For example:
5
- Question: What is the effect of overexpressing the BG1 gene on grain size and development?
6
- Answer: Overexpression of the BG1 gene leads to significantly increased grain size, demonstrating its role in grain development.
7
 
8
- Question: What role does TAC4 play in the gravitropism of rice shoots?
9
- Answer: TAC4 is a key regulator of gravitropism in rice shoots, promoting the bending of shoots towards the gravity vector.
 
 
 
 
 
 
 
 
 
 
10
 
11
  Here is the text passage you need to generate a QA pair for:
12
  {context}
 
 
13
  """
14
 
15
  TEMPLATE_ZH: str = """给定一个文本段落。你的任务是根据该文本的内容生成一个问答(QA)对。
16
- 答案应准确且直接从文本中得出。确保QA对与给定文本的主题或重要细节相关。
17
- 例如:
18
- 问题:过表达BG1基因对谷粒大小和发育有什么影响?
19
- 答案:BG1基因的过表达显著增加了谷粒大小,表明其在谷物发育中的作用。
20
 
21
- 问题:TAC4在水稻茎的重力性状中扮演什么角色?
22
- 答案:TAC4是水稻茎重力性状的关键调节因子,促进茎向重力矢量弯曲。
 
 
 
 
 
 
 
 
 
 
23
 
24
  以下是你需要为其生成QA对的文本段落:
25
  {context}
 
 
26
  """
27
 
28
 
 
1
  # pylint: disable=C0301
2
  TEMPLATE_EN: str = """You are given a text passage. Your task is to generate a question and answer (QA) pair based on the content of that text.
 
 
 
 
3
 
4
+ Please note the following requirements:
5
+ 1. Output only one QA pair without any additional explanations or analysis.
6
+ 2. Do not repeat the content of the answer or any part of it.
7
+ 3. The answer should be accurate and directly derived from the text. Make sure the QA pair is relevant to the main theme or important details of the given text.
8
+
9
+ Output format:
10
+ <question>question_text</question>
11
+ <answer>answer_text</answer>
12
+
13
+ For example:
14
+ <question>What is the effect of overexpressing the BG1 gene on grain size and development?</question>
15
+ <answer>Overexpression of the BG1 gene leads to significantly increased grain size, demonstrating its role in grain development.</answer>
16
 
17
  Here is the text passage you need to generate a QA pair for:
18
  {context}
19
+
20
+ Output:
21
  """
22
 
23
  TEMPLATE_ZH: str = """给定一个文本段落。你的任务是根据该文本的内容生成一个问答(QA)对。
 
 
 
 
24
 
25
+ 请注意下列要求:
26
+ 1. 仅输出一个问答(QA)对,不得包含任何额外说明或分析
27
+ 2. 不得重复答案内容或其中任何片段
28
+ 3. 答案应准确且直接从文本中得出。确保QA对与给定文本的主题或重要细节相关。
29
+
30
+ 输出格式如下:
31
+ <question>question_text</question>
32
+ <answer>answer_text</answer>
33
+
34
+ 例如:
35
+ <question>过表达BG1基因对谷粒大小和发育有什么影响?</question>
36
+ <answer>BG1基因的过表达显著增加了谷粒大小,表明其在谷物发育中的作用。</answer>
37
 
38
  以下是你需要为其生成QA对的文本段落:
39
  {context}
40
+
41
+ 输出:
42
  """
43
 
44
 
graphgen/templates/generation/cot_generation.py CHANGED
@@ -81,7 +81,7 @@ Input:
81
  Output:
82
  """
83
 
84
- COT_TEMPLATE_DESIGN_ZH = """你是一位“元推理架构师”。你的任务不是回答问题,\
85
  而是根据给定的知识图谱中的实体和关系的名称以及描述信息,设计一条可复用、可泛化的 CoT 推理路径模板。\
86
 
87
  -步骤-
@@ -115,8 +115,8 @@ COT_TEMPLATE_DESIGN_ZH = """你是一位“元推理架构师”。你的任务
115
  4. 不要出现具体数值或结论,不要出现“识别实体”、“识别关系”这类无意义的操作描述。
116
  5. 使用中文作为输出语言。
117
  6. 输出格式为:
118
- 问题:
119
- 推理路径设计:
120
 
121
  -真实数据-
122
  输入:
@@ -130,7 +130,7 @@ COT_TEMPLATE_DESIGN_ZH = """你是一位“元推理架构师”。你的任务
130
  """
131
 
132
 
133
- COT_TEMPLATE_DESIGN_EN = """You are a “meta-reasoning architect”. \
134
  Your task is NOT to answer the question, but to design a reusable, generalizable CoT reasoning-path \
135
  template based solely on the names and descriptions of entities and \
136
  relationships in the provided knowledge graph.
@@ -168,8 +168,8 @@ relationships in the provided knowledge graph.
168
  and DO NOT describing meaningless operations like "Identify the entity" or "Identify the relationship".
169
  5. Use English as the output language.
170
  6. The output format is:
171
- Question:
172
- Reasoning-Path Design:
173
 
174
  Please summarize the information expressed by the knowledge graph based on the following [Entities:] and [Relationships:] provided.
175
 
 
81
  Output:
82
  """
83
 
84
+ COT_TEMPLATE_DESIGN_ZH: str = """你是一位“元推理架构师”。你的任务不是回答问题,\
85
  而是根据给定的知识图谱中的实体和关系的名称以及描述信息,设计一条可复用、可泛化的 CoT 推理路径模板。\
86
 
87
  -步骤-
 
115
  4. 不要出现具体数值或结论,不要出现“识别实体”、“识别关系”这类无意义的操作描述。
116
  5. 使用中文作为输出语言。
117
  6. 输出格式为:
118
+ <question>问题文本</question>
119
+ <reasoning_path>推理路径设计文本</reasoning_path>
120
 
121
  -真实数据-
122
  输入:
 
130
  """
131
 
132
 
133
+ COT_TEMPLATE_DESIGN_EN: str = """You are a “meta-reasoning architect”. \
134
  Your task is NOT to answer the question, but to design a reusable, generalizable CoT reasoning-path \
135
  template based solely on the names and descriptions of entities and \
136
  relationships in the provided knowledge graph.
 
168
  and DO NOT describing meaningless operations like "Identify the entity" or "Identify the relationship".
169
  5. Use English as the output language.
170
  6. The output format is:
171
+ <question>question text</question>
172
+ <reasoning_path>reasoning path design text</reasoning_path>
173
 
174
  Please summarize the information expressed by the knowledge graph based on the following [Entities:] and [Relationships:] provided.
175
 
graphgen/templates/generation/multi_hop_generation.py CHANGED
@@ -1,56 +1,68 @@
1
  # pylint: disable=C0301
2
- TEMPLATE_ZH: str = """请基于以下知识子图生成多跳推理问题和答案。你将获得一个知识子图,其中包含一系列实体、关系和事实。你的任务是提出一个问题,该问题需要经过多次推理才能回答。问题的答案应该是从给定的知识子图中推断出来的。确保问题的难度适中,需要多步推理才能回答。
 
 
 
 
 
 
 
 
 
 
3
 
4
  例如:
5
- ########
6
  --实体--
7
  1. 苹果
8
  2. 水果
9
  3. 维生素C
10
- ########
11
  --关系--
12
  1. 苹果-水果:苹果是一种水果
13
  2. 水果-维生素C:水果中富含维生素C
14
- ########
15
- 问题:通过吃苹果补充的什么物质,有助于维持健康?
16
- 答案:维生素C
17
- ########
18
 
19
- #########
 
 
 
 
20
  --实体--
21
  {entities}
22
- #########
23
  --关系--
24
  {relationships}
25
- #########
26
- 直接输出生成的问题和答案,请不要直接复制示例问题和答案,不要输出无关内容。
27
  """
28
 
29
- TEMPLATE_EN: str = """Please generate a multi-hop reasoning question and answer based on the following knowledge subgraph. You will be provided with a knowledge subgraph that contains a series of entities, relations, and facts. Your task is to generate a question that requires multiple steps of reasoning to answer. The answer to the question should be inferred from the given knowledge subgraph. Ensure that the question is of moderate difficulty and requires multiple steps of reasoning to answer.
 
 
 
 
 
 
30
 
31
  For example:
32
- ########
33
  --Entities--
34
  1. Apple
35
  2. Fruit
36
  3. Vitamin C
37
- ########
38
  --Relations--
39
  1. Apple-Fruit: Apple is a type of fruit
40
  2. Fruit-Vitamin C: Fruits are rich in Vitamin C
41
- ########
42
- Question: What substance, obtained through eating apples, helps maintain health?
43
- Answer: Vitamin C
44
- ########
45
 
46
- ########
 
 
 
 
47
  --Entities--
48
  {entities}
49
- ########
50
  --Relations--
51
  {relationships}
52
- ########
53
- Output the generated question and answer directly, please do not copy the example question and answer directly, and do not provide irrelevant information.
54
  """
55
 
56
  MULTI_HOP_GENERATION_PROMPT = {"en": TEMPLATE_EN, "zh": TEMPLATE_ZH}
 
1
  # pylint: disable=C0301
2
+ TEMPLATE_ZH: str = """请基于以下知识子图生成多跳推理问题和答案。你将获得一个知识子图,其中包含一系列实体、关系和事实。
3
+ 你的任务是生成一个问答对,其中问题需要经过多次推理才能回答。问题的答案应该是从给定的知识子图中推断出来的。确保问题的难度适中,需要多步推理才能回答。
4
+
5
+ 请注意下列要求:
6
+ 1. 仅输出一个问答(QA)对,不得包含任何额外说明或分析
7
+ 2. 不得重复答案内容或其中任何片段,不要直接复制示例问题和答案
8
+ 3. 答案应准确且直接从文本中得出。确保QA对与给定文本的主题或重要细节相关。
9
+
10
+ 输出格式:
11
+ <question>question_text</question>
12
+ <answer>answer_text</answer>
13
 
14
  例如:
15
+ 输入为:
16
  --实体--
17
  1. 苹果
18
  2. 水果
19
  3. 维生素C
 
20
  --关系--
21
  1. 苹果-水果:苹果是一种水果
22
  2. 水果-维生素C:水果中富含维生素C
 
 
 
 
23
 
24
+ 输出:
25
+ <question>通过吃苹果补充的什么物质,有助于维持健康?</question>
26
+ <answer>维生素C</answer>
27
+
28
+ 真实输入如下:
29
  --实体--
30
  {entities}
 
31
  --关系--
32
  {relationships}
33
+
34
+ 输出:
35
  """
36
 
37
+ TEMPLATE_EN: str = """Please generate a multi-hop reasoning question and answer based on the following knowledge subgraph. You will be provided with a knowledge subgraph that contains a series of entities, relations, and facts.
38
+ Your task is to generate a question-answer (QA) pair where the question requires multiple steps of reasoning to answer. The answer to the question should be inferred from the given knowledge subgraph. Ensure that the question is of moderate difficulty and requires multiple steps of reasoning to answer.
39
+
40
+ Please note the following requirements:
41
+ 1. Output only one QA pair without any additional explanations or analysis.
42
+ 2. Do not repeat the content of the answer or any part of it. Do not directly copy the example question and answer.
43
+ 3. The answer should be accurate and directly derived from the text. Make sure the QA pair is relevant to the main theme or important details of the given text.
44
 
45
  For example:
46
+ Input:
47
  --Entities--
48
  1. Apple
49
  2. Fruit
50
  3. Vitamin C
 
51
  --Relations--
52
  1. Apple-Fruit: Apple is a type of fruit
53
  2. Fruit-Vitamin C: Fruits are rich in Vitamin C
 
 
 
 
54
 
55
+ Output:
56
+ <question>What substance, obtained by eating apples, helps maintain health?</question>
57
+ <answer>Vitamin C</answer>
58
+
59
+ Real input:
60
  --Entities--
61
  {entities}
 
62
  --Relations--
63
  {relationships}
64
+
65
+ Output:
66
  """
67
 
68
  MULTI_HOP_GENERATION_PROMPT = {"en": TEMPLATE_EN, "zh": TEMPLATE_ZH}
graphgen/templates/generation/vqa_generation.py CHANGED
@@ -39,14 +39,16 @@ Create multiple sets of VQA question-answer pairs that satisfy the following:
39
  ################
40
  {relationships}
41
  ################
42
- Directly output the generated questions and answers, please do not directly copy the example questions and answers, and do not provide irrelevant information.
43
- Here is the response format you should follow:
44
- Question: <Question1>
45
- Answer: <Answer1>
46
 
47
- Question: <Question2>
48
- Answer: <Answer2>
 
 
 
 
 
49
 
 
50
  """
51
 
52
  TEMPLATE_ZH: str = """---角色---
@@ -91,14 +93,15 @@ TEMPLATE_ZH: str = """---角色---
91
  ################
92
  {relationships}
93
  ################
94
- 直接输出生成的问题和答案,请不要直接复制示例问题和答案,不要输出无关内容。
95
- 以下是你应该遵循的响应格式:
96
- 问题: <问题1>
97
- 答案: <答案1>
98
 
99
- 问题: <问题2>
100
- 答案: <答案2>
 
 
 
 
101
 
 
102
  """
103
 
104
  VQA_GENERATION_PROMPT = {"en": TEMPLATE_EN, "zh": TEMPLATE_ZH}
 
39
  ################
40
  {relationships}
41
  ################
 
 
 
 
42
 
43
+ Please directly output the generated questions and answers, do not directly copy the example questions and answers, and do not provide irrelevant information.
44
+
45
+ Here is the response format you should follow:
46
+ <question>question1</question>
47
+ <answer>answer1</answer>
48
+ <question>question2</question>
49
+ <answer>answer2</answer>
50
 
51
+ Output:
52
  """
53
 
54
  TEMPLATE_ZH: str = """---角色---
 
93
  ################
94
  {relationships}
95
  ################
 
 
 
 
96
 
97
+ 请直接输出生成的问题和答案,不要直接复制示例问题和答案,也不要提供无关信息。
98
+ 以下是你应遵循的响应格式:
99
+ <question>question1</question>
100
+ <answer>answer1</answer>
101
+ <question>question2</question>
102
+ <answer>answer2</answer>
103
 
104
+ 输出:
105
  """
106
 
107
  VQA_GENERATION_PROMPT = {"en": TEMPLATE_EN, "zh": TEMPLATE_ZH}