Spaces:

chenzihong
/

GraphGen

Build error

App Files Files Community

github-actions[bot] commited on Jan 16

Commit

caa2d9c

1 Parent(s): f57aa27

Auto-sync from demo at Fri Jan 16 06:05:51 UTC 2026

Browse files

Files changed (9) hide show

app.py +1 -0
graphgen/models/__init__.py +1 -0
graphgen/models/generator/__init__.py +1 -0
graphgen/models/generator/true_false_generator.py +91 -0
graphgen/operators/generate/generate_service.py +7 -0
graphgen/templates/__init__.py +1 -0
graphgen/templates/generation/__init__.py +1 -0
graphgen/templates/generation/true_false_generation.py +75 -0
webui/app.py +1 -0

app.py CHANGED Viewed

@@ -524,6 +524,7 @@ with gr.Blocks(title="GraphGen Demo", theme=gr.themes.Glass(), css=css) as demo:
                     "multi_choice",
                     "multi_answer",
                     "fill_in_blank",
                 ],
                 label=_("Mode"),
                 value="aggregated",

                     "multi_choice",
                     "multi_answer",
                     "fill_in_blank",
+                    "true_false",
                 ],
                 label=_("Mode"),
                 value="aggregated",

graphgen/models/__init__.py CHANGED Viewed

@@ -16,6 +16,7 @@ from .generator import (
     MultiChoiceGenerator,
     MultiHopGenerator,
     QuizGenerator,
     VQAGenerator,
 )
 from .kg_builder import LightRAGKGBuilder, MMKGBuilder

     MultiChoiceGenerator,
     MultiHopGenerator,
     QuizGenerator,
+    TrueFalseGenerator,
     VQAGenerator,
 )
 from .kg_builder import LightRAGKGBuilder, MMKGBuilder

graphgen/models/generator/__init__.py CHANGED Viewed

@@ -6,4 +6,5 @@ from .multi_answer_generator import MultiAnswerGenerator
 from .multi_choice_generator import MultiChoiceGenerator
 from .multi_hop_generator import MultiHopGenerator
 from .quiz_generator import QuizGenerator
 from .vqa_generator import VQAGenerator

 from .multi_choice_generator import MultiChoiceGenerator
 from .multi_hop_generator import MultiHopGenerator
 from .quiz_generator import QuizGenerator
+from .true_false_generator import TrueFalseGenerator
 from .vqa_generator import VQAGenerator

graphgen/models/generator/true_false_generator.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import re
+from typing import Any
+from graphgen.bases import BaseGenerator
+from graphgen.templates import TF_GENERATION_PROMPT
+from graphgen.utils import compute_content_hash, detect_main_language, logger
+class TrueFalseGenerator(BaseGenerator):
+    def __init__(self, llm_client, num_of_questions) -> None:
+        super().__init__(llm_client)
+        self.num_of_questions = num_of_questions
+    @staticmethod
+    def parse_response(response: str) -> Any:
+        """
+        Parse true/false QA pairs from the LLM response.
+        Each QA pair contains a statement question and True/False answer.
+        :param response: The LLM response containing XML-formatted QA pairs
+        :return: Dictionary mapping question hash to question data, where each
+                 value is a dict with "question", "options", and "answer" keys
+        """
+        qa_pairs: dict[str, dict[str, Any]] = {}
+        # Extract all QA pair blocks
+        qa_blocks = re.findall(r"<qa_pair>(.*?)</qa_pair>", response, re.DOTALL)
+        if not qa_blocks:
+            logger.warning("No QA pairs found in response: %s", response)
+            return {}
+        for block in qa_blocks:
+            # Extract and clean question text
+            q_match = re.search(r"<question>(.*?)</question>", block, re.DOTALL)
+            if not q_match:
+                logger.warning("Failed to parse question from block: %s", block)
+                continue
+            question = q_match.group(1).strip().strip('"').strip("'")
+            # Extract and validate answer
+            ans_match = re.search(r"<answer>(.*?)</answer>", block, re.DOTALL)
+            if not ans_match:
+                logger.warning("Failed to parse answer from block: %s", block)
+                continue
+            answer = ans_match.group(1).strip().strip('"').strip("'")
+            # Ensure answer exists in options
+            if answer.lower() not in ["true", "false"]:
+                logger.warning("Invalid answer '%s' in block: %s", answer, block)
+                continue
+            # Build result entry with question hash as key
+            question_hash = compute_content_hash(question)
+            qa_pairs[question_hash] = {
+                "question": question,
+                "answer": answer,  # "True" or "False"
+            }
+            logger.debug("Successfully parsed TF question: %s", question[:50])
+        if not qa_pairs:
+            logger.error("Failed to parse any valid true/false pairs from response")
+        return qa_pairs
+    # pylint: disable=W0221
+    def build_prompt(
+        self, batch: tuple[list[tuple[str, dict]], list[tuple[Any, Any, dict]]]
+    ) -> str:
+        nodes, edges = batch
+        entities_str = "\n".join(
+            [
+                f"{index + 1}. {node[0]}: {node[1]['description']}"
+                for index, node in enumerate(nodes)
+            ]
+        )
+        relationships_str = "\n".join(
+            [
+                f"{index + 1}. {edge[0]} -- {edge[1]}: {edge[2]['description']}"
+                for index, edge in enumerate(edges)
+            ]
+        )
+        context = entities_str + "\n" + relationships_str
+        language = detect_main_language(entities_str + relationships_str)
+        prompt = TF_GENERATION_PROMPT[language].format(
+            context=context,
+            num_of_questions=self.num_of_questions,
+        )
+        return prompt

graphgen/operators/generate/generate_service.py CHANGED Viewed

@@ -64,6 +64,13 @@ class GenerateService(BaseOperator):
                 self.llm_client,
                 num_of_questions=generate_kwargs.get("num_of_questions", 5),
             )
         else:
             raise ValueError(f"Unsupported generation mode: {method}")

                 self.llm_client,
                 num_of_questions=generate_kwargs.get("num_of_questions", 5),
             )
+        elif self.method == "true_false":
+            from graphgen.models import TrueFalseGenerator
+            self.generator = TrueFalseGenerator(
+                self.llm_client,
+                num_of_questions=generate_kwargs.get("num_of_questions", 5),
+            )
         else:
             raise ValueError(f"Unsupported generation mode: {method}")

graphgen/templates/__init__.py CHANGED Viewed

@@ -10,6 +10,7 @@ from .generation import (
     MAQ_GENERATION_PROMPT,
     MCQ_GENERATION_PROMPT,
     MULTI_HOP_GENERATION_PROMPT,
     VQA_GENERATION_PROMPT,
 )
 from .kg import KG_EXTRACTION_PROMPT, KG_SUMMARIZATION_PROMPT, MMKG_EXTRACTION_PROMPT

     MAQ_GENERATION_PROMPT,
     MCQ_GENERATION_PROMPT,
     MULTI_HOP_GENERATION_PROMPT,
+    TF_GENERATION_PROMPT,
     VQA_GENERATION_PROMPT,
 )
 from .kg import KG_EXTRACTION_PROMPT, KG_SUMMARIZATION_PROMPT, MMKG_EXTRACTION_PROMPT

graphgen/templates/generation/__init__.py CHANGED Viewed

@@ -5,4 +5,5 @@ from .fill_in_blank_generation import FILL_IN_BLANK_GENERATION_PROMPT
 from .multi_answer_generation import MAQ_GENERATION_PROMPT
 from .multi_choice_generation import MCQ_GENERATION_PROMPT
 from .multi_hop_generation import MULTI_HOP_GENERATION_PROMPT
 from .vqa_generation import VQA_GENERATION_PROMPT

 from .multi_answer_generation import MAQ_GENERATION_PROMPT
 from .multi_choice_generation import MCQ_GENERATION_PROMPT
 from .multi_hop_generation import MULTI_HOP_GENERATION_PROMPT
+from .true_false_generation import TF_GENERATION_PROMPT
 from .vqa_generation import VQA_GENERATION_PROMPT

graphgen/templates/generation/true_false_generation.py ADDED Viewed

	@@ -0,0 +1,75 @@

+TEMPLATE_TF_ZH: str = """请根据上下文资料生成独立的知识判断题，每个判断题包含一个陈述句，答案只能是正确(True)或错误(False)。
+生成要求：
+1. **语言一致性**：若上下文资料为中文，则生成中文问题；若为英文，则生成英文问题
+2. **数量**：每个上下文资料生成{num_of_questions}个判断题
+3. **独立性**：每个问题必须完整独立，不依赖其他问题
+4. **准确性**：正确答案必须能从原文直接得出，陈述需有明确的判断依据
+输出格式：
+<qa_pairs>
+<qa_pair>
+<question>陈述句文本</question>
+<answer>True或False</answer>
+</qa_pair>
+</qa_pairs>
+示例（根据iPad Air 2生成2题）：
+<qa_pairs>
+<qa_pair>
+<question>iPad Air 2于2014年发布。</question>
+<answer>True</answer>
+</qa_pair>
+<qa_pair>
+<question>iPad Air 2搭载的是A10处理器。</question>
+<answer>False</answer>
+</qa_pair>
+</qa_pairs>
+上下文资料：
+{context}
+请为以下资料生成{num_of_questions}个判断题：
+"""
+TEMPLATE_TF_EN: str = """Generate independent true/false questions based on the provided context. \
+Each question should be a factual statement that can be clearly determined as true or false.
+Requirements:
+1. **Language Consistency**: Generate in the same language as the context (Chinese/English)
+2. **Quantity**: Generate {num_of_questions} true/false questions per context
+3. **Independence**: Each question must be self-contained
+4. **Accuracy**: Correct answer must be directly derivable from the text with clear evidence
+Output Format:
+<qa_pairs>
+<qa_pair>
+<question>Statement text</question>
+<answer>True or False</answer>
+</qa_pair>
+</qa_pairs>
+Example (2 questions):
+<qa_pairs>
+<qa_pair>
+<question>The iPad Air 2 was released in 2014.</question>
+<answer>True</answer>
+</qa_pair>
+<qa_pair>
+<question>The iPad Air 2 uses an A10 processor.</question>
+<options>True
+False</options>
+<answer>False</answer>
+</qa_pair>
+</qa_pairs>
+Context:
+{context}
+Please generate {num_of_questions} true/false questions for the following context:
+"""
+TF_GENERATION_PROMPT = {"zh": TEMPLATE_TF_ZH, "en": TEMPLATE_TF_EN}

webui/app.py CHANGED Viewed

@@ -524,6 +524,7 @@ with gr.Blocks(title="GraphGen Demo", theme=gr.themes.Glass(), css=css) as demo:
                     "multi_choice",
                     "multi_answer",
                     "fill_in_blank",
                 ],
                 label=_("Mode"),
                 value="aggregated",

                     "multi_choice",
                     "multi_answer",
                     "fill_in_blank",
+                    "true_false",
                 ],
                 label=_("Mode"),
                 value="aggregated",