Spaces:
Sleeping
Sleeping
linhnguyen02
commited on
Commit
·
581b2e8
1
Parent(s):
19d49a8
tmp
Browse files- src/enums/question.py +9 -1
- src/factories/gen_question_for_paragraph/factory.ts +0 -0
- src/factories/gen_question_for_paragraph/types/base.py +9 -0
- src/factories/gen_question_for_paragraph/types/synthetic.py +14 -0
- src/interfaces/question.py +12 -6
- src/llms/prompts/paragraph.py +55 -0
- src/llms/tools/paragraph.py +48 -0
- src/routers/public/quesion.py +2 -29
src/enums/question.py
CHANGED
|
@@ -11,4 +11,12 @@ class QuestionTypeEnum(str, Enum):
|
|
| 11 |
|
| 12 |
class ChoiceTypeEnum(str, Enum):
|
| 13 |
SINGLE_CHOICE = "single-choice"
|
| 14 |
-
MULTIPLE_CHOICE = "multiple-choice"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
class ChoiceTypeEnum(str, Enum):
|
| 13 |
SINGLE_CHOICE = "single-choice"
|
| 14 |
+
MULTIPLE_CHOICE = "multiple-choice"
|
| 15 |
+
|
| 16 |
+
class ParagraphQuestionTypeEnum(str, Enum):
|
| 17 |
+
FACT = "fact"
|
| 18 |
+
MAIN_IDEA = "main_idea"
|
| 19 |
+
VOCAB = "vocab"
|
| 20 |
+
INFERENCE = "inference"
|
| 21 |
+
PURPOSE = "purpose"
|
| 22 |
+
|
src/factories/gen_question_for_paragraph/factory.ts
ADDED
|
File without changes
|
src/factories/gen_question_for_paragraph/types/base.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from abc import ABC, abstractmethod
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
from src.interfaces.question import ICreateQuestionForParagraph
|
| 5 |
+
|
| 6 |
+
class Question(ABC):
|
| 7 |
+
@abstractmethod
|
| 8 |
+
def generate_questions(self, data: ICreateQuestionForParagraph):
|
| 9 |
+
pass
|
src/factories/gen_question_for_paragraph/types/synthetic.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
|
| 3 |
+
from src.enums import QuestionTypeEnum
|
| 4 |
+
from src.interfaces.question import ICreateQuestionForParagraph
|
| 5 |
+
from src.factories.gen_question_for_paragraph.types.base import Question
|
| 6 |
+
from src.llms.models import GeminiLLM
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class ParagraphQuestion(Question):
|
| 10 |
+
def __init__(self):
|
| 11 |
+
self.llm = GeminiLLM()
|
| 12 |
+
|
| 13 |
+
def generate_questions(self, data: ICreateQuestionForParagraph):
|
| 14 |
+
|
src/interfaces/question.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from pydantic import BaseModel, Field, field_validator
|
| 2 |
-
from typing import Optional, List
|
| 3 |
|
| 4 |
-
from src.enums import QuestionTypeEnum
|
| 5 |
|
| 6 |
|
| 7 |
class ModelInput(BaseModel):
|
|
@@ -9,13 +9,19 @@ class ModelInput(BaseModel):
|
|
| 9 |
user_id: Optional[str] = None
|
| 10 |
context: str
|
| 11 |
name: str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
-
class
|
| 14 |
-
|
| 15 |
-
|
|
|
|
| 16 |
|
| 17 |
class ICreateQuestion(BaseModel):
|
| 18 |
-
question_type:
|
| 19 |
list_words: List[str]
|
| 20 |
num_ans_per_question: int = Field(..., ge=2, le=10)
|
| 21 |
num_question: int = Field(..., ge=1, le=10)
|
|
|
|
| 1 |
from pydantic import BaseModel, Field, field_validator
|
| 2 |
+
from typing import Optional, List, Text
|
| 3 |
|
| 4 |
+
from src.enums import QuestionTypeEnum, ParagraphQuestionTypeEnum
|
| 5 |
|
| 6 |
|
| 7 |
class ModelInput(BaseModel):
|
|
|
|
| 9 |
user_id: Optional[str] = None
|
| 10 |
context: str
|
| 11 |
name: str
|
| 12 |
+
|
| 13 |
+
class IQuestionConfig(BaseModel):
|
| 14 |
+
question_type: QuestionTypeEnum
|
| 15 |
+
list_words: List[str]
|
| 16 |
+
num_question: int = Field(..., ge=1, le=5)
|
| 17 |
|
| 18 |
+
class ICreateQuestionForParagraph(BaseModel):
|
| 19 |
+
description: Text
|
| 20 |
+
num_ans_per_question: int = Field(..., ge=2, le=6)
|
| 21 |
+
list_create_question: List[IQuestionConfig]
|
| 22 |
|
| 23 |
class ICreateQuestion(BaseModel):
|
| 24 |
+
question_type: ParagraphQuestionTypeEnum
|
| 25 |
list_words: List[str]
|
| 26 |
num_ans_per_question: int = Field(..., ge=2, le=10)
|
| 27 |
num_question: int = Field(..., ge=1, le=10)
|
src/llms/prompts/paragraph.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GEN_QUESTION_FOR_PARAGRAPH = """
|
| 2 |
+
You are an expert Artificial Intelligence specializing in creating reading comprehension questions from a given English paragraph for language learners. Your task is to generate a set of high-quality, diverse multiple-choice questions with a precise JSON structure.
|
| 3 |
+
|
| 4 |
+
### Primary Task
|
| 5 |
+
Generate a list of multiple-choice questions about the provided English paragraph, strictly adhering to the input parameters. The total number of questions must equal the sum of all count parameters (FACT_COUNT + MAIN_IDEA_COUNT + ...).
|
| 6 |
+
|
| 7 |
+
### Input
|
| 8 |
+
You will receive the following parameters:
|
| 9 |
+
1. **Paragraph (PARAGRAPH):** [The English text for which questions should be generated]
|
| 10 |
+
2. **Count of Fact-based Questions (FACT_COUNT):** [Integer]
|
| 11 |
+
3. **Count of Main Idea Questions (MAIN_IDEA_COUNT):** [Integer]
|
| 12 |
+
4. **Count of Vocabulary-in-Context Questions (VOCAB_COUNT):** [Integer]
|
| 13 |
+
5. **Count of Inference Questions (INFERENCE_COUNT):** [Integer]
|
| 14 |
+
6. **Count of Author's Purpose/Tone Questions (AUTHOR_PURPOSE_COUNT):** [Integer]
|
| 15 |
+
7. **Total Options Per Question (OPTIONS_PER_QUESTION):** [Integer, e.g., 4]
|
| 16 |
+
|
| 17 |
+
### Detailed Guidelines
|
| 18 |
+
|
| 19 |
+
1. **Adherence to Counts:** Strictly adhere to the specified number of questions for each type.
|
| 20 |
+
2. **Language:** The questions, choices, and paragraph must all be in **English**.
|
| 21 |
+
3. **Answer Structure:** Each question must have **EXACTLY ONE** correct answer.
|
| 22 |
+
|
| 23 |
+
#### Distractor Generation Rules
|
| 24 |
+
* **Fact & Main Idea:** Distractors must contain information **present in the paragraph** but which does not correctly answer the question, or is a **slightly altered/incorrect fact**.
|
| 25 |
+
* **Inference:** Distractors should be plausible-sounding inferences that **cannot be definitively proven** by the text alone.
|
| 26 |
+
* **Vocabulary:** Distractors should be synonyms or related words that are **incorrect** in the specific context of the sentence.
|
| 27 |
+
|
| 28 |
+
#### Specific Techniques for Each Question Type:
|
| 29 |
+
* **Fact:** Focus on extracting Named Entities (NER) such as Names, Dates, Figures, or direct definitions.
|
| 30 |
+
* **MainIdea:** Questions should start with phrases like: *What is the main idea of this paragraph?*, *Which of the following best summarizes...*
|
| 31 |
+
* **Inference:** Questions must use keywords: *It can be inferred that...*, *What does the author imply by...*, *Which statement is most likely true based on...*
|
| 32 |
+
* **Purpose:** Questions should focus on: *What is the author's primary purpose?*, *What is the tone of the paragraph?*
|
| 33 |
+
|
| 34 |
+
### Output Format
|
| 35 |
+
Generate a **single JSON object** (with no preceding or trailing text explanations) with the following structure:
|
| 36 |
+
|
| 37 |
+
```json
|
| 38 |
+
{
|
| 39 |
+
"paragraph": "[The English text used]",
|
| 40 |
+
"questions": [
|
| 41 |
+
{
|
| 42 |
+
"question": "...",
|
| 43 |
+
"type": "FACT", // Type must be one of the following exact values: FACT, MAIN_IDEA, VOCAB, INFERENCE, PURPOSE
|
| 44 |
+
"choices": [
|
| 45 |
+
"...", // Option A
|
| 46 |
+
"...", // Option B
|
| 47 |
+
"...", // Option C
|
| 48 |
+
"..." // Option D (Total choices must equal OPTIONS_PER_QUESTION)
|
| 49 |
+
],
|
| 50 |
+
"answer": "A" // The correct answer (must be a single character 'A', 'B', 'C', or 'D')
|
| 51 |
+
}
|
| 52 |
+
// ... (continue until the total required number of questions is met)
|
| 53 |
+
]
|
| 54 |
+
}
|
| 55 |
+
"""
|
src/llms/tools/paragraph.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GEN_QUESTION_FOR_PARAGRAPH_OUTPUT_TOOL = {
|
| 2 |
+
"type": "function",
|
| 3 |
+
"function": {
|
| 4 |
+
"name": "parse_paragraph_questions",
|
| 5 |
+
"description": (
|
| 6 |
+
"Parse the generated reading comprehension questions from a paragraph into structured JSON."
|
| 7 |
+
),
|
| 8 |
+
"parameters": {
|
| 9 |
+
"type": "object",
|
| 10 |
+
"properties": {
|
| 11 |
+
"paragraph": {
|
| 12 |
+
"type": "string",
|
| 13 |
+
"description": "The English paragraph used to generate questions."
|
| 14 |
+
},
|
| 15 |
+
"questions": {
|
| 16 |
+
"type": "array",
|
| 17 |
+
"items": {
|
| 18 |
+
"type": "object",
|
| 19 |
+
"properties": {
|
| 20 |
+
"question": {
|
| 21 |
+
"type": "string",
|
| 22 |
+
"description": "The text of the generated multiple-choice question."
|
| 23 |
+
},
|
| 24 |
+
"type": {
|
| 25 |
+
"type": "string",
|
| 26 |
+
"enum": ["FACT", "MAIN_IDEA", "VOCAB", "INFERENCE", "PURPOSE"],
|
| 27 |
+
"description": "The type of question."
|
| 28 |
+
},
|
| 29 |
+
"choices": {
|
| 30 |
+
"type": "array",
|
| 31 |
+
"items": {"type": "string"},
|
| 32 |
+
"description": "List of answer options. Length must match `options_per_question`."
|
| 33 |
+
},
|
| 34 |
+
"answer": {
|
| 35 |
+
"type": "string",
|
| 36 |
+
"enum": ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"],
|
| 37 |
+
"description": "The correct answer (single character representing the choice)."
|
| 38 |
+
}
|
| 39 |
+
},
|
| 40 |
+
"required": ["question", "type", "choices", "answer"]
|
| 41 |
+
},
|
| 42 |
+
"description": "List of generated questions with choices and answers."
|
| 43 |
+
}
|
| 44 |
+
},
|
| 45 |
+
"required": ["paragraph", "questions"]
|
| 46 |
+
}
|
| 47 |
+
}
|
| 48 |
+
}
|
src/routers/public/quesion.py
CHANGED
|
@@ -4,7 +4,7 @@ from fastapi.responses import JSONResponse
|
|
| 4 |
from src.factories.gen_question.factory import create_question_instance
|
| 5 |
from src.utils.response import res_ok
|
| 6 |
from src.utils.text_process import vietnamese_to_english, english_to_vietnamese, get_all_summary, get_all_questions
|
| 7 |
-
from src.interfaces.question import ModelInput,
|
| 8 |
from src.services.AI.abstractive_summarizer import AbstractiveSummarizer
|
| 9 |
from src.services.AI.question_generator import QuestionGenerator
|
| 10 |
from src.services.AI.false_ans_generator import FalseAnswerGenerator
|
|
@@ -21,37 +21,10 @@ async def generate_question(body: ICreateQuestion):
|
|
| 21 |
num_question=body.num_question,
|
| 22 |
num_ans_per_question=body.num_ans_per_question,
|
| 23 |
)
|
| 24 |
-
print(list_questions)
|
| 25 |
return JSONResponse(status_code=200, content=res_ok(list_questions))
|
| 26 |
|
| 27 |
@route.post('/sentence')
|
| 28 |
-
async def generate_questions_from_sentence(body:
|
| 29 |
-
"""Process user request
|
| 30 |
-
|
| 31 |
-
Args:
|
| 32 |
-
request (ModelInput): request model
|
| 33 |
-
bg_task (BackgroundTasks): run process_request() on other thread
|
| 34 |
-
and respond to request
|
| 35 |
-
|
| 36 |
-
Returns:
|
| 37 |
-
dict(str: int): response
|
| 38 |
-
"""
|
| 39 |
-
# bg_task.add_task(process_request, request)
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
# # Tạo một dictionary để lưu trữ kết quả
|
| 43 |
-
# results = []
|
| 44 |
-
|
| 45 |
-
# def background_task():
|
| 46 |
-
# nonlocal results
|
| 47 |
-
# results = process_request(request)
|
| 48 |
-
|
| 49 |
-
# # Thêm tác vụ nền để xử lý yêu cầu
|
| 50 |
-
# bg_task.add_task(background_task)
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
# Thực hiện xử lý yêu cầu và lưu kết quả vào Firestore
|
| 54 |
-
# Không dùng background vì để nó chạy trong cùng 1 thread để chờ xử lí xong mới có results
|
| 55 |
new_questions = []
|
| 56 |
error_sentences = []
|
| 57 |
model_input = ModelInput(**body.model_dump(), user_id=None)
|
|
|
|
| 4 |
from src.factories.gen_question.factory import create_question_instance
|
| 5 |
from src.utils.response import res_ok
|
| 6 |
from src.utils.text_process import vietnamese_to_english, english_to_vietnamese, get_all_summary, get_all_questions
|
| 7 |
+
from src.interfaces.question import ModelInput, ICreateQuestion, ICreateQuestionForParagraph
|
| 8 |
from src.services.AI.abstractive_summarizer import AbstractiveSummarizer
|
| 9 |
from src.services.AI.question_generator import QuestionGenerator
|
| 10 |
from src.services.AI.false_ans_generator import FalseAnswerGenerator
|
|
|
|
| 21 |
num_question=body.num_question,
|
| 22 |
num_ans_per_question=body.num_ans_per_question,
|
| 23 |
)
|
|
|
|
| 24 |
return JSONResponse(status_code=200, content=res_ok(list_questions))
|
| 25 |
|
| 26 |
@route.post('/sentence')
|
| 27 |
+
async def generate_questions_from_sentence(body: ICreateQuestionForParagraph, request: Request):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
new_questions = []
|
| 29 |
error_sentences = []
|
| 30 |
model_input = ModelInput(**body.model_dump(), user_id=None)
|