linhnguyen02 commited on
Commit
581b2e8
·
1 Parent(s): 19d49a8
src/enums/question.py CHANGED
@@ -11,4 +11,12 @@ class QuestionTypeEnum(str, Enum):
11
 
12
  class ChoiceTypeEnum(str, Enum):
13
  SINGLE_CHOICE = "single-choice"
14
- MULTIPLE_CHOICE = "multiple-choice"
 
 
 
 
 
 
 
 
 
11
 
12
  class ChoiceTypeEnum(str, Enum):
13
  SINGLE_CHOICE = "single-choice"
14
+ MULTIPLE_CHOICE = "multiple-choice"
15
+
16
+ class ParagraphQuestionTypeEnum(str, Enum):
17
+ FACT = "fact"
18
+ MAIN_IDEA = "main_idea"
19
+ VOCAB = "vocab"
20
+ INFERENCE = "inference"
21
+ PURPOSE = "purpose"
22
+
src/factories/gen_question_for_paragraph/factory.ts ADDED
File without changes
src/factories/gen_question_for_paragraph/types/base.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+
3
+
4
+ from src.interfaces.question import ICreateQuestionForParagraph
5
+
6
+ class Question(ABC):
7
+ @abstractmethod
8
+ def generate_questions(self, data: ICreateQuestionForParagraph):
9
+ pass
src/factories/gen_question_for_paragraph/types/synthetic.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+
3
+ from src.enums import QuestionTypeEnum
4
+ from src.interfaces.question import ICreateQuestionForParagraph
5
+ from src.factories.gen_question_for_paragraph.types.base import Question
6
+ from src.llms.models import GeminiLLM
7
+
8
+
9
+ class ParagraphQuestion(Question):
10
+ def __init__(self):
11
+ self.llm = GeminiLLM()
12
+
13
+ def generate_questions(self, data: ICreateQuestionForParagraph):
14
+
src/interfaces/question.py CHANGED
@@ -1,7 +1,7 @@
1
  from pydantic import BaseModel, Field, field_validator
2
- from typing import Optional, List
3
 
4
- from src.enums import QuestionTypeEnum
5
 
6
 
7
  class ModelInput(BaseModel):
@@ -9,13 +9,19 @@ class ModelInput(BaseModel):
9
  user_id: Optional[str] = None
10
  context: str
11
  name: str
 
 
 
 
 
12
 
13
- class ICQuestion(BaseModel):
14
- context: str
15
- name: str
 
16
 
17
  class ICreateQuestion(BaseModel):
18
- question_type: QuestionTypeEnum
19
  list_words: List[str]
20
  num_ans_per_question: int = Field(..., ge=2, le=10)
21
  num_question: int = Field(..., ge=1, le=10)
 
1
  from pydantic import BaseModel, Field, field_validator
2
+ from typing import Optional, List, Text
3
 
4
+ from src.enums import QuestionTypeEnum, ParagraphQuestionTypeEnum
5
 
6
 
7
  class ModelInput(BaseModel):
 
9
  user_id: Optional[str] = None
10
  context: str
11
  name: str
12
+
13
+ class IQuestionConfig(BaseModel):
14
+ question_type: QuestionTypeEnum
15
+ list_words: List[str]
16
+ num_question: int = Field(..., ge=1, le=5)
17
 
18
+ class ICreateQuestionForParagraph(BaseModel):
19
+ description: Text
20
+ num_ans_per_question: int = Field(..., ge=2, le=6)
21
+ list_create_question: List[IQuestionConfig]
22
 
23
  class ICreateQuestion(BaseModel):
24
+ question_type: ParagraphQuestionTypeEnum
25
  list_words: List[str]
26
  num_ans_per_question: int = Field(..., ge=2, le=10)
27
  num_question: int = Field(..., ge=1, le=10)
src/llms/prompts/paragraph.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GEN_QUESTION_FOR_PARAGRAPH = """
2
+ You are an expert Artificial Intelligence specializing in creating reading comprehension questions from a given English paragraph for language learners. Your task is to generate a set of high-quality, diverse multiple-choice questions with a precise JSON structure.
3
+
4
+ ### Primary Task
5
+ Generate a list of multiple-choice questions about the provided English paragraph, strictly adhering to the input parameters. The total number of questions must equal the sum of all count parameters (FACT_COUNT + MAIN_IDEA_COUNT + ...).
6
+
7
+ ### Input
8
+ You will receive the following parameters:
9
+ 1. **Paragraph (PARAGRAPH):** [The English text for which questions should be generated]
10
+ 2. **Count of Fact-based Questions (FACT_COUNT):** [Integer]
11
+ 3. **Count of Main Idea Questions (MAIN_IDEA_COUNT):** [Integer]
12
+ 4. **Count of Vocabulary-in-Context Questions (VOCAB_COUNT):** [Integer]
13
+ 5. **Count of Inference Questions (INFERENCE_COUNT):** [Integer]
14
+ 6. **Count of Author's Purpose/Tone Questions (AUTHOR_PURPOSE_COUNT):** [Integer]
15
+ 7. **Total Options Per Question (OPTIONS_PER_QUESTION):** [Integer, e.g., 4]
16
+
17
+ ### Detailed Guidelines
18
+
19
+ 1. **Adherence to Counts:** Strictly adhere to the specified number of questions for each type.
20
+ 2. **Language:** The questions, choices, and paragraph must all be in **English**.
21
+ 3. **Answer Structure:** Each question must have **EXACTLY ONE** correct answer.
22
+
23
+ #### Distractor Generation Rules
24
+ * **Fact & Main Idea:** Distractors must contain information **present in the paragraph** but which does not correctly answer the question, or is a **slightly altered/incorrect fact**.
25
+ * **Inference:** Distractors should be plausible-sounding inferences that **cannot be definitively proven** by the text alone.
26
+ * **Vocabulary:** Distractors should be synonyms or related words that are **incorrect** in the specific context of the sentence.
27
+
28
+ #### Specific Techniques for Each Question Type:
29
+ * **Fact:** Focus on extracting Named Entities (NER) such as Names, Dates, Figures, or direct definitions.
30
+ * **MainIdea:** Questions should start with phrases like: *What is the main idea of this paragraph?*, *Which of the following best summarizes...*
31
+ * **Inference:** Questions must use keywords: *It can be inferred that...*, *What does the author imply by...*, *Which statement is most likely true based on...*
32
+ * **Purpose:** Questions should focus on: *What is the author's primary purpose?*, *What is the tone of the paragraph?*
33
+
34
+ ### Output Format
35
+ Generate a **single JSON object** (with no preceding or trailing text explanations) with the following structure:
36
+
37
+ ```json
38
+ {
39
+ "paragraph": "[The English text used]",
40
+ "questions": [
41
+ {
42
+ "question": "...",
43
+ "type": "FACT", // Type must be one of the following exact values: FACT, MAIN_IDEA, VOCAB, INFERENCE, PURPOSE
44
+ "choices": [
45
+ "...", // Option A
46
+ "...", // Option B
47
+ "...", // Option C
48
+ "..." // Option D (Total choices must equal OPTIONS_PER_QUESTION)
49
+ ],
50
+ "answer": "A" // The correct answer (must be a single character 'A', 'B', 'C', or 'D')
51
+ }
52
+ // ... (continue until the total required number of questions is met)
53
+ ]
54
+ }
55
+ """
src/llms/tools/paragraph.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GEN_QUESTION_FOR_PARAGRAPH_OUTPUT_TOOL = {
2
+ "type": "function",
3
+ "function": {
4
+ "name": "parse_paragraph_questions",
5
+ "description": (
6
+ "Parse the generated reading comprehension questions from a paragraph into structured JSON."
7
+ ),
8
+ "parameters": {
9
+ "type": "object",
10
+ "properties": {
11
+ "paragraph": {
12
+ "type": "string",
13
+ "description": "The English paragraph used to generate questions."
14
+ },
15
+ "questions": {
16
+ "type": "array",
17
+ "items": {
18
+ "type": "object",
19
+ "properties": {
20
+ "question": {
21
+ "type": "string",
22
+ "description": "The text of the generated multiple-choice question."
23
+ },
24
+ "type": {
25
+ "type": "string",
26
+ "enum": ["FACT", "MAIN_IDEA", "VOCAB", "INFERENCE", "PURPOSE"],
27
+ "description": "The type of question."
28
+ },
29
+ "choices": {
30
+ "type": "array",
31
+ "items": {"type": "string"},
32
+ "description": "List of answer options. Length must match `options_per_question`."
33
+ },
34
+ "answer": {
35
+ "type": "string",
36
+ "enum": ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"],
37
+ "description": "The correct answer (single character representing the choice)."
38
+ }
39
+ },
40
+ "required": ["question", "type", "choices", "answer"]
41
+ },
42
+ "description": "List of generated questions with choices and answers."
43
+ }
44
+ },
45
+ "required": ["paragraph", "questions"]
46
+ }
47
+ }
48
+ }
src/routers/public/quesion.py CHANGED
@@ -4,7 +4,7 @@ from fastapi.responses import JSONResponse
4
  from src.factories.gen_question.factory import create_question_instance
5
  from src.utils.response import res_ok
6
  from src.utils.text_process import vietnamese_to_english, english_to_vietnamese, get_all_summary, get_all_questions
7
- from src.interfaces.question import ModelInput, ICQuestion, ICreateQuestion
8
  from src.services.AI.abstractive_summarizer import AbstractiveSummarizer
9
  from src.services.AI.question_generator import QuestionGenerator
10
  from src.services.AI.false_ans_generator import FalseAnswerGenerator
@@ -21,37 +21,10 @@ async def generate_question(body: ICreateQuestion):
21
  num_question=body.num_question,
22
  num_ans_per_question=body.num_ans_per_question,
23
  )
24
- print(list_questions)
25
  return JSONResponse(status_code=200, content=res_ok(list_questions))
26
 
27
  @route.post('/sentence')
28
- async def generate_questions_from_sentence(body: ICQuestion, request: Request):
29
- """Process user request
30
-
31
- Args:
32
- request (ModelInput): request model
33
- bg_task (BackgroundTasks): run process_request() on other thread
34
- and respond to request
35
-
36
- Returns:
37
- dict(str: int): response
38
- """
39
- # bg_task.add_task(process_request, request)
40
-
41
-
42
- # # Tạo một dictionary để lưu trữ kết quả
43
- # results = []
44
-
45
- # def background_task():
46
- # nonlocal results
47
- # results = process_request(request)
48
-
49
- # # Thêm tác vụ nền để xử lý yêu cầu
50
- # bg_task.add_task(background_task)
51
-
52
-
53
- # Thực hiện xử lý yêu cầu và lưu kết quả vào Firestore
54
- # Không dùng background vì để nó chạy trong cùng 1 thread để chờ xử lí xong mới có results
55
  new_questions = []
56
  error_sentences = []
57
  model_input = ModelInput(**body.model_dump(), user_id=None)
 
4
  from src.factories.gen_question.factory import create_question_instance
5
  from src.utils.response import res_ok
6
  from src.utils.text_process import vietnamese_to_english, english_to_vietnamese, get_all_summary, get_all_questions
7
+ from src.interfaces.question import ModelInput, ICreateQuestion, ICreateQuestionForParagraph
8
  from src.services.AI.abstractive_summarizer import AbstractiveSummarizer
9
  from src.services.AI.question_generator import QuestionGenerator
10
  from src.services.AI.false_ans_generator import FalseAnswerGenerator
 
21
  num_question=body.num_question,
22
  num_ans_per_question=body.num_ans_per_question,
23
  )
 
24
  return JSONResponse(status_code=200, content=res_ok(list_questions))
25
 
26
  @route.post('/sentence')
27
+ async def generate_questions_from_sentence(body: ICreateQuestionForParagraph, request: Request):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  new_questions = []
29
  error_sentences = []
30
  model_input = ModelInput(**body.model_dump(), user_id=None)