Spaces:
Sleeping
Sleeping
linhnguyen02
commited on
Commit
·
19d49a8
1
Parent(s):
e4ed3b1
fill in blank and rearrange
Browse files- .env +4 -1
- env.py +3 -0
- main.py +1 -0
- src/factories/gen_question/factory.py +6 -0
- src/factories/gen_question/types/fill_in_blank_question.py +41 -62
- src/factories/gen_question/types/incorrect_word_question.py +52 -65
- src/factories/gen_question/types/rearrange.py +62 -0
- src/llms/models/__init__.py +1 -0
- src/llms/models/base.py +33 -65
- src/llms/models/gemini.py +212 -0
- src/llms/models/gemma.py +0 -57
- src/llms/prompts/__init__.py +3 -1
- src/llms/prompts/fill_in_blank.py +54 -0
- src/llms/prompts/incorrect_question.py +72 -35
- src/llms/prompts/natural_sentence.py +25 -0
- src/llms/tools/__init__.py +2 -0
- src/llms/tools/fill_in_blank.py +38 -0
- src/llms/tools/incorrect_question.py +38 -0
- src/routers/public/quesion.py +1 -1
.env
CHANGED
|
@@ -14,4 +14,7 @@ POOL_RECYCLE=64
|
|
| 14 |
# jwt
|
| 15 |
JWT_EXPIRATION_DELTA=24
|
| 16 |
JWT_ALGORITHM=HS256
|
| 17 |
-
JWT_SECRET=key123456
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
# jwt
|
| 15 |
JWT_EXPIRATION_DELTA=24
|
| 16 |
JWT_ALGORITHM=HS256
|
| 17 |
+
JWT_SECRET=key123456
|
| 18 |
+
|
| 19 |
+
# google
|
| 20 |
+
GOOGLE_API_KEY=AIzaSyCciNiuSroJP_rnJnF08TDmIcH80-jey0o
|
env.py
CHANGED
|
@@ -21,5 +21,8 @@ config = {
|
|
| 21 |
"expired_in": int(os.getenv("JWT_EXPIRATION_DELTA")) | 24, # hour
|
| 22 |
"algorithm": os.getenv("JWT_ALGORITHM"),
|
| 23 |
"secret_key": os.getenv("JWT_SECRET"),
|
|
|
|
|
|
|
|
|
|
| 24 |
}
|
| 25 |
}
|
|
|
|
| 21 |
"expired_in": int(os.getenv("JWT_EXPIRATION_DELTA")) | 24, # hour
|
| 22 |
"algorithm": os.getenv("JWT_ALGORITHM"),
|
| 23 |
"secret_key": os.getenv("JWT_SECRET"),
|
| 24 |
+
},
|
| 25 |
+
"google": {
|
| 26 |
+
"api_key": os.getenv("GOOGLE_API_KEY"),
|
| 27 |
}
|
| 28 |
}
|
main.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
from fastapi import FastAPI, Request, HTTPException
|
| 2 |
|
|
|
|
| 3 |
from src.routers.public.public import router
|
| 4 |
from src.utils.response import handler_error
|
| 5 |
|
|
|
|
| 1 |
from fastapi import FastAPI, Request, HTTPException
|
| 2 |
|
| 3 |
+
|
| 4 |
from src.routers.public.public import router
|
| 5 |
from src.utils.response import handler_error
|
| 6 |
|
src/factories/gen_question/factory.py
CHANGED
|
@@ -3,6 +3,8 @@ from src.factories.gen_question.types.antonym_question import AntonymsQuestion
|
|
| 3 |
from src.factories.gen_question.types.incorrect_word_question import IncorrectWordQuestion
|
| 4 |
from src.factories.gen_question.types.stress_question import StressQuestion
|
| 5 |
from src.factories.gen_question.types.synonym_question import SynonymsQuestion
|
|
|
|
|
|
|
| 6 |
from src.utils.exceptions import BadRequestException
|
| 7 |
|
| 8 |
|
|
@@ -17,5 +19,9 @@ def create_question_instance(question_type: QuestionTypeEnum) :
|
|
| 17 |
return AntonymsQuestion()
|
| 18 |
elif question_type == QuestionTypeEnum.INCORRECT_WORD:
|
| 19 |
return IncorrectWordQuestion()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
else:
|
| 21 |
raise BadRequestException('type_invalid')
|
|
|
|
| 3 |
from src.factories.gen_question.types.incorrect_word_question import IncorrectWordQuestion
|
| 4 |
from src.factories.gen_question.types.stress_question import StressQuestion
|
| 5 |
from src.factories.gen_question.types.synonym_question import SynonymsQuestion
|
| 6 |
+
from src.factories.gen_question.types.fill_in_blank_question import FillInBlankQuestion
|
| 7 |
+
from src.factories.gen_question.types.rearrange import RearrangenQuestion
|
| 8 |
from src.utils.exceptions import BadRequestException
|
| 9 |
|
| 10 |
|
|
|
|
| 19 |
return AntonymsQuestion()
|
| 20 |
elif question_type == QuestionTypeEnum.INCORRECT_WORD:
|
| 21 |
return IncorrectWordQuestion()
|
| 22 |
+
elif question_type == QuestionTypeEnum.FILL_IN_BLANK:
|
| 23 |
+
return FillInBlankQuestion()
|
| 24 |
+
elif question_type == QuestionTypeEnum.REARRANGE:
|
| 25 |
+
return RearrangenQuestion()
|
| 26 |
else:
|
| 27 |
raise BadRequestException('type_invalid')
|
src/factories/gen_question/types/fill_in_blank_question.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
from typing import List, Optional
|
| 2 |
import random
|
| 3 |
|
| 4 |
-
from src.enums import QuestionTypeEnum
|
| 5 |
-
from src.
|
| 6 |
-
from src.
|
| 7 |
-
from src.
|
| 8 |
-
from src.
|
| 9 |
|
| 10 |
|
| 11 |
class FillInBlankQuestion(Question):
|
|
@@ -15,6 +15,8 @@ class FillInBlankQuestion(Question):
|
|
| 15 |
It picks a word, generates a sentence containing it, replaces it with a blank,
|
| 16 |
and provides several answer choices (one correct and others incorrect).
|
| 17 |
"""
|
|
|
|
|
|
|
| 18 |
|
| 19 |
def generate_questions(self, list_words: List[str], num_question: int = 1, num_ans_per_question: int = 4):
|
| 20 |
if not list_words:
|
|
@@ -22,7 +24,6 @@ class FillInBlankQuestion(Question):
|
|
| 22 |
|
| 23 |
result = []
|
| 24 |
list_unique_words = set(list_words)
|
| 25 |
-
sentence_generator = SentenceGeneratorModel()
|
| 26 |
|
| 27 |
def choice_word_to_gen_sentence():
|
| 28 |
number_choice_word = random.randint(1, 4)
|
|
@@ -32,61 +33,39 @@ class FillInBlankQuestion(Question):
|
|
| 32 |
choice_word = random.sample(available_words, number_choice_word)
|
| 33 |
for w in choice_word:
|
| 34 |
list_unique_words.remove(w)
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
choice_word = available_words.copy()
|
| 38 |
-
remaining = number_choice_word - len(choice_word)
|
| 39 |
-
additional_words = random.sample(nltk_words, remaining)
|
| 40 |
-
choice_word += additional_words
|
| 41 |
-
list_unique_words.clear()
|
| 42 |
-
|
| 43 |
-
return choice_word
|
| 44 |
|
| 45 |
for _ in range(num_question):
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
"
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
distractors = set()
|
| 79 |
-
|
| 80 |
-
for t in list_transform_type:
|
| 81 |
-
transformer = transform_word_instance(t)
|
| 82 |
-
transformed = transformer.transform_word(word)
|
| 83 |
-
if transformed and transformed != word:
|
| 84 |
-
distractors.add(transformed)
|
| 85 |
-
if len(distractors) >= num_distractors:
|
| 86 |
-
break
|
| 87 |
-
|
| 88 |
-
if len(distractors) < num_distractors and nltk_words:
|
| 89 |
-
additional = random.sample(nltk_words, num_distractors - len(distractors))
|
| 90 |
-
distractors.update(additional)
|
| 91 |
-
|
| 92 |
-
return list(distractors)
|
|
|
|
| 1 |
from typing import List, Optional
|
| 2 |
import random
|
| 3 |
|
| 4 |
+
from src.enums import QuestionTypeEnum, ChoiceTypeEnum
|
| 5 |
+
from src.factories.gen_question.types.base import Question
|
| 6 |
+
from src.llms.models import GeminiLLM
|
| 7 |
+
from src.llms.tools import GEN_FILL_IN_BLANK_QUESTION_TOOL
|
| 8 |
+
from src.llms.prompts import GEN_FILL_IN_BLANK_QUESTION_PROMPT
|
| 9 |
|
| 10 |
|
| 11 |
class FillInBlankQuestion(Question):
|
|
|
|
| 15 |
It picks a word, generates a sentence containing it, replaces it with a blank,
|
| 16 |
and provides several answer choices (one correct and others incorrect).
|
| 17 |
"""
|
| 18 |
+
def __init__(self):
|
| 19 |
+
self.llm = GeminiLLM()
|
| 20 |
|
| 21 |
def generate_questions(self, list_words: List[str], num_question: int = 1, num_ans_per_question: int = 4):
|
| 22 |
if not list_words:
|
|
|
|
| 24 |
|
| 25 |
result = []
|
| 26 |
list_unique_words = set(list_words)
|
|
|
|
| 27 |
|
| 28 |
def choice_word_to_gen_sentence():
|
| 29 |
number_choice_word = random.randint(1, 4)
|
|
|
|
| 33 |
choice_word = random.sample(available_words, number_choice_word)
|
| 34 |
for w in choice_word:
|
| 35 |
list_unique_words.remove(w)
|
| 36 |
+
return choice_word
|
| 37 |
+
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
for _ in range(num_question):
|
| 40 |
+
list_choice_words = choice_word_to_gen_sentence()
|
| 41 |
+
|
| 42 |
+
prompt = GEN_FILL_IN_BLANK_QUESTION_PROMPT
|
| 43 |
+
_tools = [GEN_FILL_IN_BLANK_QUESTION_TOOL]
|
| 44 |
+
raw_output = self.llm.generate_response(
|
| 45 |
+
messages=[
|
| 46 |
+
{
|
| 47 |
+
"role": "system",
|
| 48 |
+
"content": prompt
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"role": "user",
|
| 52 |
+
"content": f"List of words: {', '.join(list_choice_words)}, Type of question: {ChoiceTypeEnum.SINGLE_CHOICE.value}, Number of answer choices: {num_ans_per_question}"
|
| 53 |
+
}
|
| 54 |
+
],
|
| 55 |
+
tools=_tools,
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
if "tool_calls" in raw_output and raw_output["tool_calls"]:
|
| 59 |
+
for call in raw_output["tool_calls"]:
|
| 60 |
+
if call.get("name") == "gen_fill_in_blank_question":
|
| 61 |
+
data = call.get("arguments", {})
|
| 62 |
+
result.append({
|
| 63 |
+
"question": data.get("question"),
|
| 64 |
+
"type": QuestionTypeEnum.FILL_IN_BLANK,
|
| 65 |
+
"choices": data.get("choices", []),
|
| 66 |
+
"answer": data.get("answer"),
|
| 67 |
+
"explanation": data.get("explanation"),
|
| 68 |
+
"tags": data.get("tags", []),
|
| 69 |
+
})
|
| 70 |
+
|
| 71 |
+
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/factories/gen_question/types/incorrect_word_question.py
CHANGED
|
@@ -1,22 +1,16 @@
|
|
| 1 |
from typing import List, Optional
|
| 2 |
import random
|
| 3 |
|
| 4 |
-
from src.
|
| 5 |
-
from src.
|
| 6 |
-
from src.
|
| 7 |
-
from src.
|
| 8 |
-
from src.
|
| 9 |
-
|
| 10 |
|
| 11 |
class IncorrectWordQuestion(Question):
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
It selects a word from the list, generates a sentence using a simple pattern,
|
| 17 |
-
and injects a grammatically incorrect word into the sentence.
|
| 18 |
-
"""
|
| 19 |
-
|
| 20 |
def generate_questions(self, list_words: List[str], num_question: int = 1, num_ans_per_question: int = 4):
|
| 21 |
if list_words is None:
|
| 22 |
list_words = []
|
|
@@ -24,8 +18,6 @@ class IncorrectWordQuestion(Question):
|
|
| 24 |
result = []
|
| 25 |
list_unique_words = set(list_words)
|
| 26 |
|
| 27 |
-
sentence_generator = SentenceGeneratorModel()
|
| 28 |
-
|
| 29 |
def choice_word_to_gen_sentence():
|
| 30 |
number_choice_word = random.randint(1, 4)
|
| 31 |
|
|
@@ -34,55 +26,50 @@ class IncorrectWordQuestion(Question):
|
|
| 34 |
choice_word = random.sample(available_words, number_choice_word)
|
| 35 |
for w in choice_word:
|
| 36 |
list_unique_words.remove(w)
|
| 37 |
-
|
| 38 |
-
# Lấy tất cả từ còn lại và thêm từ nltk_words
|
| 39 |
-
choice_word = available_words.copy()
|
| 40 |
-
remaining = number_choice_word - len(choice_word)
|
| 41 |
-
additional_words = random.sample(nltk_words, remaining)
|
| 42 |
-
choice_word += additional_words
|
| 43 |
-
list_unique_words.clear()
|
| 44 |
|
| 45 |
-
return
|
| 46 |
|
| 47 |
for _ in range(num_question):
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
"
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
|
|
|
|
|
| 1 |
from typing import List, Optional
|
| 2 |
import random
|
| 3 |
|
| 4 |
+
from src.enums import ChoiceTypeEnum, QuestionTypeEnum
|
| 5 |
+
from src.factories.gen_question.types.base import Question
|
| 6 |
+
from src.llms.models import GeminiLLM
|
| 7 |
+
from src.llms.tools import GEN_INCORRECT_WORD_QUESTION_TOOL
|
| 8 |
+
from src.llms.prompts import GEN_INCORRECT_WORD_QUESTION_PROMPT
|
|
|
|
| 9 |
|
| 10 |
class IncorrectWordQuestion(Question):
|
| 11 |
+
def __init__(self):
|
| 12 |
+
self.llm = GeminiLLM()
|
| 13 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
def generate_questions(self, list_words: List[str], num_question: int = 1, num_ans_per_question: int = 4):
|
| 15 |
if list_words is None:
|
| 16 |
list_words = []
|
|
|
|
| 18 |
result = []
|
| 19 |
list_unique_words = set(list_words)
|
| 20 |
|
|
|
|
|
|
|
| 21 |
def choice_word_to_gen_sentence():
|
| 22 |
number_choice_word = random.randint(1, 4)
|
| 23 |
|
|
|
|
| 26 |
choice_word = random.sample(available_words, number_choice_word)
|
| 27 |
for w in choice_word:
|
| 28 |
list_unique_words.remove(w)
|
| 29 |
+
return choice_word
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
return []
|
| 32 |
|
| 33 |
for _ in range(num_question):
|
| 34 |
+
list_choice_words = choice_word_to_gen_sentence()
|
| 35 |
+
|
| 36 |
+
prompt = GEN_INCORRECT_WORD_QUESTION_PROMPT
|
| 37 |
+
_tools = [GEN_INCORRECT_WORD_QUESTION_TOOL]
|
| 38 |
+
raw_output = self.llm.generate_response(
|
| 39 |
+
messages=[
|
| 40 |
+
{
|
| 41 |
+
"role": "system",
|
| 42 |
+
"content": prompt
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"role": "user",
|
| 46 |
+
"content": f"List of words: {', '.join(list_choice_words)}, Type of question: {ChoiceTypeEnum.SINGLE_CHOICE.value}, Number of answer choices: {num_ans_per_question}"
|
| 47 |
+
}
|
| 48 |
+
],
|
| 49 |
+
tools=_tools,
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
if "tool_calls" in raw_output and raw_output["tool_calls"]:
|
| 53 |
+
for call in raw_output["tool_calls"]:
|
| 54 |
+
if call.get("name") == "gen_find_error_question":
|
| 55 |
+
data = call.get("arguments", {})
|
| 56 |
+
result.append({
|
| 57 |
+
"question": data.get("question"),
|
| 58 |
+
"type": QuestionTypeEnum.INCORRECT_WORD,
|
| 59 |
+
"choices": data.get("choices", []),
|
| 60 |
+
"answer": data.get("answer"),
|
| 61 |
+
"explanation": data.get("explanation"),
|
| 62 |
+
"tags": data.get("tags", []),
|
| 63 |
+
})
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# random.shuffle(choices)
|
| 67 |
+
# result.append({
|
| 68 |
+
# "question": modified_sentence,
|
| 69 |
+
# "type": QuestionTypeEnum.INCORRECT_WORD,
|
| 70 |
+
# "choices": choices,
|
| 71 |
+
# "answer": choices.index(incorrect_word),
|
| 72 |
+
# "explain": ["Correct: {sequence}"],
|
| 73 |
+
# })
|
| 74 |
+
|
| 75 |
+
return result
|
src/factories/gen_question/types/rearrange.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Optional
|
| 2 |
+
import random
|
| 3 |
+
|
| 4 |
+
from src.enums import QuestionTypeEnum
|
| 5 |
+
from src.factories.gen_question.types.base import Question
|
| 6 |
+
from src.llms.models import GeminiLLM
|
| 7 |
+
from src.llms.prompts import GEN_NATURAL_SENTENCE_PROMPT
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class RearrangenQuestion(Question):
|
| 11 |
+
"""
|
| 12 |
+
This class generates multiple-choice 'rearrange' questions.
|
| 13 |
+
"""
|
| 14 |
+
def __init__(self):
|
| 15 |
+
self.llm = GeminiLLM()
|
| 16 |
+
|
| 17 |
+
def generate_questions(self, list_words: List[str], num_question: int = 1, num_ans_per_question: int = 4):
|
| 18 |
+
if not list_words:
|
| 19 |
+
list_words = []
|
| 20 |
+
|
| 21 |
+
result = []
|
| 22 |
+
list_unique_words = set(list_words)
|
| 23 |
+
|
| 24 |
+
def choice_word_to_gen_sentence():
|
| 25 |
+
number_choice_word = random.randint(1, 4)
|
| 26 |
+
|
| 27 |
+
available_words = list(list_unique_words)
|
| 28 |
+
if number_choice_word <= len(available_words):
|
| 29 |
+
choice_word = random.sample(available_words, number_choice_word)
|
| 30 |
+
for w in choice_word:
|
| 31 |
+
list_unique_words.remove(w)
|
| 32 |
+
return choice_word
|
| 33 |
+
return []
|
| 34 |
+
|
| 35 |
+
for _ in range(num_question):
|
| 36 |
+
list_choice_words = choice_word_to_gen_sentence()
|
| 37 |
+
|
| 38 |
+
prompt = GEN_NATURAL_SENTENCE_PROMPT
|
| 39 |
+
sentence = self.llm.generate_response(
|
| 40 |
+
messages=[
|
| 41 |
+
{
|
| 42 |
+
"role": "system",
|
| 43 |
+
"content": prompt
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"role": "user",
|
| 47 |
+
"content": f"List of words: {', '.join(list_choice_words)}"
|
| 48 |
+
}
|
| 49 |
+
],
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
words = sentence.split()
|
| 53 |
+
shuffled_words = words[:]
|
| 54 |
+
random.shuffle(shuffled_words)
|
| 55 |
+
|
| 56 |
+
result.append({
|
| 57 |
+
"question": " / ".join(shuffled_words),
|
| 58 |
+
"type": QuestionTypeEnum.REARRANGE,
|
| 59 |
+
"answer": sentence
|
| 60 |
+
})
|
| 61 |
+
|
| 62 |
+
return result
|
src/llms/models/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .gemini import GeminiLLM
|
src/llms/models/base.py
CHANGED
|
@@ -1,75 +1,43 @@
|
|
| 1 |
-
import
|
| 2 |
-
import
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
class
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
"""
|
| 11 |
-
|
| 12 |
|
| 13 |
Args:
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
|
| 19 |
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
|
| 25 |
-
device_map="auto" if torch.cuda.is_available() else None,
|
| 26 |
-
).to(self.device)
|
| 27 |
-
print("✅ Model and tokenizer loaded successfully.\n")
|
| 28 |
-
|
| 29 |
-
def tokenize_corpus(self, text: str, max_length: int = 256):
|
| 30 |
-
"""Tokenize input text and return tensors."""
|
| 31 |
-
encode = self.tokenizer(
|
| 32 |
-
text,
|
| 33 |
-
return_tensors="pt",
|
| 34 |
-
max_length=max_length,
|
| 35 |
-
truncation=True,
|
| 36 |
-
padding=False,
|
| 37 |
-
)
|
| 38 |
-
return encode["input_ids"].to(self.device), encode["attention_mask"].to(self.device)
|
| 39 |
|
| 40 |
-
def inference(
|
| 41 |
-
self,
|
| 42 |
-
prompt: str,
|
| 43 |
-
temperature: float = 0.7,
|
| 44 |
-
top_p: float = 0.9,
|
| 45 |
-
num_beams: int = 1,
|
| 46 |
-
max_new_tokens: int = 128,
|
| 47 |
-
token_max_length: int = 256,
|
| 48 |
-
):
|
| 49 |
"""
|
| 50 |
-
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
temperature (float): Sampling temperature (higher = more creative).
|
| 55 |
-
top_p (float): Nucleus sampling parameter.
|
| 56 |
-
num_beams (int): Number of beams (set 1 for sampling).
|
| 57 |
-
max_new_tokens (int): Maximum number of tokens to generate.
|
| 58 |
-
token_max_length (int): Max length for tokenization.
|
| 59 |
"""
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
input_ids=input_ids,
|
| 65 |
-
attention_mask=attention_mask,
|
| 66 |
-
do_sample=True if num_beams == 1 else False,
|
| 67 |
-
temperature=temperature,
|
| 68 |
-
top_p=top_p,
|
| 69 |
-
num_beams=num_beams,
|
| 70 |
-
max_new_tokens=max_new_tokens,
|
| 71 |
-
pad_token_id=self.tokenizer.eos_token_id,
|
| 72 |
-
)
|
| 73 |
|
| 74 |
-
|
| 75 |
-
return decoded.strip()
|
|
|
|
| 1 |
+
from abc import ABC, abstractmethod
|
| 2 |
+
from typing import Dict, List, Optional
|
| 3 |
+
|
| 4 |
+
class LLMBase(ABC):
|
| 5 |
+
"""
|
| 6 |
+
Base class for all LLM providers.
|
| 7 |
+
Handles common functionality and delegates provider-specific logic to subclasses.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
@abstractmethod
|
| 11 |
+
def generate_response(
|
| 12 |
+
self, messages: List[Dict[str, str]], tools: Optional[List[Dict]] = None, tool_choice: str = "auto", **kwargs
|
| 13 |
+
):
|
| 14 |
"""
|
| 15 |
+
Generate a response based on the given messages.
|
| 16 |
|
| 17 |
Args:
|
| 18 |
+
messages (list): List of message dicts containing 'role' and 'content'.
|
| 19 |
+
tools (list, optional): List of tools that the model can call. Defaults to None.
|
| 20 |
+
tool_choice (str, optional): Tool choice method. Defaults to "auto".
|
| 21 |
+
**kwargs: Additional provider-specific parameters.
|
|
|
|
| 22 |
|
| 23 |
+
Returns:
|
| 24 |
+
str or dict: The generated response.
|
| 25 |
+
"""
|
| 26 |
+
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
"""
|
| 29 |
+
Get common parameters that most providers use.
|
| 30 |
|
| 31 |
+
Returns:
|
| 32 |
+
Dict: Common parameters dictionary.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
"""
|
| 34 |
+
params = {
|
| 35 |
+
"temperature": self.config.temperature,
|
| 36 |
+
"max_tokens": self.config.max_tokens,
|
| 37 |
+
"top_p": self.config.top_p,
|
| 38 |
+
}
|
| 39 |
|
| 40 |
+
# Add provider-specific parameters from kwargs
|
| 41 |
+
params.update(kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
+
return params
|
|
|
src/llms/models/gemini.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import threading
|
| 3 |
+
|
| 4 |
+
from typing import Dict, List, Optional
|
| 5 |
+
|
| 6 |
+
try:
|
| 7 |
+
from google import genai
|
| 8 |
+
from google.genai import types
|
| 9 |
+
except ImportError:
|
| 10 |
+
raise ImportError("The 'google-genai' library is required. Please install it using 'pip install google-genai'.")
|
| 11 |
+
|
| 12 |
+
from .base import LLMBase
|
| 13 |
+
from env import config
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class GeminiLLM(LLMBase):
|
| 17 |
+
_instance = None
|
| 18 |
+
_lock = threading.Lock()
|
| 19 |
+
|
| 20 |
+
def __new__(cls, *args, **kwargs):
|
| 21 |
+
with cls._lock:
|
| 22 |
+
if cls._instance is None:
|
| 23 |
+
cls._instance = super(GeminiLLM, cls).__new__(cls)
|
| 24 |
+
return cls._instance
|
| 25 |
+
|
| 26 |
+
def __init__(self, temperature=0.7, max_tokens=1024, top_p=0.9):
|
| 27 |
+
if hasattr(self, "_initialized") and self._initialized:
|
| 28 |
+
return
|
| 29 |
+
self.model = "gemini-2.5-flash"
|
| 30 |
+
self.temperature = temperature
|
| 31 |
+
self.max_tokens = max_tokens
|
| 32 |
+
self.top_p = top_p
|
| 33 |
+
self.client = genai.Client(api_key=config["google"]["api_key"])
|
| 34 |
+
self._initialized = True
|
| 35 |
+
|
| 36 |
+
def _parse_response(self, response, tools):
|
| 37 |
+
"""
|
| 38 |
+
Process the response based on whether tools are used or not.
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
response: The raw response from API.
|
| 42 |
+
tools: The list of tools provided in the request.
|
| 43 |
+
|
| 44 |
+
Returns:
|
| 45 |
+
str or dict: The processed response.
|
| 46 |
+
"""
|
| 47 |
+
if tools:
|
| 48 |
+
processed_response = {
|
| 49 |
+
"content": None,
|
| 50 |
+
"tool_calls": [],
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
# Extract content from the first candidate
|
| 54 |
+
if response.candidates and response.candidates[0].content.parts:
|
| 55 |
+
for part in response.candidates[0].content.parts:
|
| 56 |
+
if hasattr(part, "text") and part.text:
|
| 57 |
+
processed_response["content"] = part.text
|
| 58 |
+
break
|
| 59 |
+
|
| 60 |
+
# Extract function calls
|
| 61 |
+
if response.candidates and response.candidates[0].content.parts:
|
| 62 |
+
for part in response.candidates[0].content.parts:
|
| 63 |
+
if hasattr(part, "function_call") and part.function_call:
|
| 64 |
+
fn = part.function_call
|
| 65 |
+
processed_response["tool_calls"].append(
|
| 66 |
+
{
|
| 67 |
+
"name": fn.name,
|
| 68 |
+
"arguments": dict(fn.args) if fn.args else {},
|
| 69 |
+
}
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
return processed_response
|
| 73 |
+
else:
|
| 74 |
+
if response.candidates and response.candidates[0].content.parts:
|
| 75 |
+
for part in response.candidates[0].content.parts:
|
| 76 |
+
if hasattr(part, "text") and part.text:
|
| 77 |
+
return part.text
|
| 78 |
+
return ""
|
| 79 |
+
|
| 80 |
+
def _reformat_messages(self, messages: List[Dict[str, str]]):
|
| 81 |
+
"""
|
| 82 |
+
Reformat messages for Gemini.
|
| 83 |
+
|
| 84 |
+
Args:
|
| 85 |
+
messages: The list of messages provided in the request.
|
| 86 |
+
|
| 87 |
+
Returns:
|
| 88 |
+
tuple: (system_instruction, contents_list)
|
| 89 |
+
"""
|
| 90 |
+
system_instruction = None
|
| 91 |
+
contents = []
|
| 92 |
+
|
| 93 |
+
for message in messages:
|
| 94 |
+
if message["role"] == "system":
|
| 95 |
+
system_instruction = message["content"]
|
| 96 |
+
else:
|
| 97 |
+
content = types.Content(
|
| 98 |
+
parts=[types.Part(text=message["content"])],
|
| 99 |
+
role=message["role"],
|
| 100 |
+
)
|
| 101 |
+
contents.append(content)
|
| 102 |
+
|
| 103 |
+
return system_instruction, contents
|
| 104 |
+
|
| 105 |
+
def _reformat_tools(self, tools: Optional[List[Dict]]):
|
| 106 |
+
"""
|
| 107 |
+
Reformat tools for Gemini.
|
| 108 |
+
|
| 109 |
+
Args:
|
| 110 |
+
tools: The list of tools provided in the request.
|
| 111 |
+
|
| 112 |
+
Returns:
|
| 113 |
+
list: The list of tools in the required format.
|
| 114 |
+
"""
|
| 115 |
+
|
| 116 |
+
def remove_additional_properties(data):
|
| 117 |
+
"""Recursively removes 'additionalProperties' from nested dictionaries."""
|
| 118 |
+
if isinstance(data, dict):
|
| 119 |
+
filtered_dict = {
|
| 120 |
+
key: remove_additional_properties(value)
|
| 121 |
+
for key, value in data.items()
|
| 122 |
+
if not (key == "additionalProperties")
|
| 123 |
+
}
|
| 124 |
+
return filtered_dict
|
| 125 |
+
else:
|
| 126 |
+
return data
|
| 127 |
+
|
| 128 |
+
if tools:
|
| 129 |
+
function_declarations = []
|
| 130 |
+
for tool in tools:
|
| 131 |
+
func = tool["function"].copy()
|
| 132 |
+
cleaned_func = remove_additional_properties(func)
|
| 133 |
+
|
| 134 |
+
function_declaration = types.FunctionDeclaration(
|
| 135 |
+
name=cleaned_func["name"],
|
| 136 |
+
description=cleaned_func.get("description", ""),
|
| 137 |
+
parameters=cleaned_func.get("parameters", {}),
|
| 138 |
+
)
|
| 139 |
+
function_declarations.append(function_declaration)
|
| 140 |
+
|
| 141 |
+
tool_obj = types.Tool(function_declarations=function_declarations)
|
| 142 |
+
return [tool_obj]
|
| 143 |
+
else:
|
| 144 |
+
return None
|
| 145 |
+
|
| 146 |
+
def generate_response(
|
| 147 |
+
self,
|
| 148 |
+
messages: List[Dict[str, str]],
|
| 149 |
+
response_format=None,
|
| 150 |
+
tools: Optional[List[Dict]] = None,
|
| 151 |
+
tool_choice: str = "auto",
|
| 152 |
+
):
|
| 153 |
+
"""
|
| 154 |
+
Generate a response based on the given messages using Gemini.
|
| 155 |
+
|
| 156 |
+
Args:
|
| 157 |
+
messages (list): List of message dicts containing 'role' and 'content'.
|
| 158 |
+
response_format (str or object, optional): Format for the response. Defaults to "text".
|
| 159 |
+
tools (list, optional): List of tools that the model can call. Defaults to None.
|
| 160 |
+
tool_choice (str, optional): Tool choice method. Defaults to "auto".
|
| 161 |
+
|
| 162 |
+
Returns:
|
| 163 |
+
str: The generated response.
|
| 164 |
+
"""
|
| 165 |
+
|
| 166 |
+
# Extract system instruction and reformat messages
|
| 167 |
+
system_instruction, contents = self._reformat_messages(messages)
|
| 168 |
+
|
| 169 |
+
# Prepare generation config
|
| 170 |
+
config_params = {
|
| 171 |
+
"temperature": self.temperature,
|
| 172 |
+
"max_output_tokens": self.max_tokens,
|
| 173 |
+
"top_p": self.top_p,
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
# Add system instruction to config if present
|
| 177 |
+
if system_instruction:
|
| 178 |
+
config_params["system_instruction"] = system_instruction
|
| 179 |
+
|
| 180 |
+
if response_format is not None and response_format["type"] == "json_object":
|
| 181 |
+
config_params["response_mime_type"] = "application/json"
|
| 182 |
+
if "schema" in response_format:
|
| 183 |
+
config_params["response_schema"] = response_format["schema"]
|
| 184 |
+
|
| 185 |
+
if tools:
|
| 186 |
+
formatted_tools = self._reformat_tools(tools)
|
| 187 |
+
config_params["tools"] = formatted_tools
|
| 188 |
+
|
| 189 |
+
if tool_choice:
|
| 190 |
+
if tool_choice == "auto":
|
| 191 |
+
mode = types.FunctionCallingConfigMode.AUTO
|
| 192 |
+
elif tool_choice == "any":
|
| 193 |
+
mode = types.FunctionCallingConfigMode.ANY
|
| 194 |
+
else:
|
| 195 |
+
mode = types.FunctionCallingConfigMode.NONE
|
| 196 |
+
|
| 197 |
+
tool_config = types.ToolConfig(
|
| 198 |
+
function_calling_config=types.FunctionCallingConfig(
|
| 199 |
+
mode=mode,
|
| 200 |
+
allowed_function_names=(
|
| 201 |
+
[tool["function"]["name"] for tool in tools] if tool_choice == "any" else None
|
| 202 |
+
),
|
| 203 |
+
)
|
| 204 |
+
)
|
| 205 |
+
config_params["tool_config"] = tool_config
|
| 206 |
+
|
| 207 |
+
generation_config = types.GenerateContentConfig(**config_params)
|
| 208 |
+
response = self.client.models.generate_content(
|
| 209 |
+
model=self.model, contents=contents, config=generation_config
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
return self._parse_response(response, tools)
|
src/llms/models/gemma.py
DELETED
|
@@ -1,57 +0,0 @@
|
|
| 1 |
-
from typing import List
|
| 2 |
-
|
| 3 |
-
from .base import Model
|
| 4 |
-
from src.enums import ChoiceTypeEnum
|
| 5 |
-
from src.llms.prompts import GEN_INCORRECT_WORD_QUESTION_PROMPT
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
class GemmaModel(Model):
|
| 9 |
-
"""Generalized text generation model (compatible with Gemma / GPT-style models)."""
|
| 10 |
-
|
| 11 |
-
_instance = None
|
| 12 |
-
|
| 13 |
-
def __new__(cls):
|
| 14 |
-
if cls._instance is None:
|
| 15 |
-
cls._instance = super(GemmaModel, cls).__new__(cls)
|
| 16 |
-
cls._instance._init_model()
|
| 17 |
-
return cls._instance
|
| 18 |
-
|
| 19 |
-
def _init_model(self):
|
| 20 |
-
super().__init__(model_name="google/gemma-2b-it", device=None)
|
| 21 |
-
|
| 22 |
-
def generate(self, list_words: List[str], question_type: ChoiceTypeEnum, num_ans_per_question: int = 4) -> str:
|
| 23 |
-
"""Tạo câu hỏi trắc nghiệm từ danh sách từ và loại câu hỏi.
|
| 24 |
-
|
| 25 |
-
Args:
|
| 26 |
-
list_words (List[str]): Danh sách các từ để tạo câu hỏi.
|
| 27 |
-
question_type (ChoiceTypeEnum): Loại câu hỏi trắc nghiệm.
|
| 28 |
-
num_ans_per_question (int): Số lượng lựa chọn cho mỗi câu hỏi (mặc định là 4).
|
| 29 |
-
|
| 30 |
-
Returns:
|
| 31 |
-
str: Câu hỏi được sinh ra.
|
| 32 |
-
|
| 33 |
-
Raises:
|
| 34 |
-
ValueError: Nếu danh sách từ rỗng hoặc số lượng lựa chọn không hợp lệ.
|
| 35 |
-
"""
|
| 36 |
-
if not list_words:
|
| 37 |
-
raise ValueError("Danh sách từ không được rỗng.")
|
| 38 |
-
if num_ans_per_question < 2:
|
| 39 |
-
raise ValueError("Số lượng lựa chọn phải lớn hơn hoặc bằng 2.")
|
| 40 |
-
|
| 41 |
-
prompt = GEN_INCORRECT_WORD_QUESTION_PROMPT.format(
|
| 42 |
-
list_of_words=", ".join(list_words),
|
| 43 |
-
question_type=question_type.value,
|
| 44 |
-
num_choices=num_ans_per_question
|
| 45 |
-
)
|
| 46 |
-
|
| 47 |
-
try:
|
| 48 |
-
return self.inference(
|
| 49 |
-
prompt,
|
| 50 |
-
num_beams=4,
|
| 51 |
-
no_repeat_ngram_size=2,
|
| 52 |
-
model_max_length=128,
|
| 53 |
-
num_return_sequences=1,
|
| 54 |
-
token_max_length=256,
|
| 55 |
-
)
|
| 56 |
-
except Exception as e:
|
| 57 |
-
raise RuntimeError(f"Lỗi khi sinh câu hỏi: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/llms/prompts/__init__.py
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
-
from .incorrect_question import GEN_INCORRECT_WORD_QUESTION_PROMPT
|
|
|
|
|
|
|
|
|
| 1 |
+
from .incorrect_question import GEN_INCORRECT_WORD_QUESTION_PROMPT
|
| 2 |
+
from .fill_in_blank import GEN_FILL_IN_BLANK_QUESTION_PROMPT
|
| 3 |
+
from .natural_sentence import GEN_NATURAL_SENTENCE_PROMPT
|
src/llms/prompts/fill_in_blank.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GEN_FILL_IN_BLANK_QUESTION_PROMPT = """
|
| 2 |
+
### Task
|
| 3 |
+
You are an expert in automatically generating English exam questions.
|
| 4 |
+
Create **one "Fill in the Blank" question** in English.
|
| 5 |
+
|
| 6 |
+
### Input
|
| 7 |
+
- A list of words to be used to create the sentence.
|
| 8 |
+
- Question type: "single-choice" or "multiple-choice".
|
| 9 |
+
- Desired number of answer choices.
|
| 10 |
+
|
| 11 |
+
### Core requirements
|
| 12 |
+
1. First produce a **grammatically correct, natural, meaningful English sentence** using all or most of the given words.
|
| 13 |
+
- Sentence may be simple or composed (up to 3 clauses).
|
| 14 |
+
- Sentence length: **8–30 words**.
|
| 15 |
+
- This correct sentence will be used later as the `explanation` value (the full correct version).
|
| 16 |
+
- If no words are provided, you may freely create a sentence.
|
| 17 |
+
|
| 18 |
+
2. **Validation step (mandatory):** Before creating the blank, ensure the sentence is fully grammatical and natural.
|
| 19 |
+
- Avoid unnatural collocations or redundant connectors.
|
| 20 |
+
- If any connector misuse or tense inconsistency is found, rewrite the sentence until it is correct.
|
| 21 |
+
|
| 22 |
+
### Connector rules (important — follow exactly)
|
| 23 |
+
- **Do NOT combine a subordinating conjunction like "although", "though", "while", or "despite" with a coordinating conjunction "but" in the same sentence.**
|
| 24 |
+
- Incorrect: "Although he was tired, but he continued."
|
| 25 |
+
- Correct: "Although he was tired, he continued." or "He was tired, but he continued."
|
| 26 |
+
- Do NOT use duplicate connectors (e.g., "although ... however").
|
| 27 |
+
- Avoid redundant fillers such as "but yet", "and also", "and then then".
|
| 28 |
+
- If you use "because", do not also use "so" in the same causal relationship.
|
| 29 |
+
|
| 30 |
+
---
|
| 31 |
+
|
| 32 |
+
### Blank-creation rules
|
| 33 |
+
3. After you have a validated correct sentence, choose **1 (for single-choice)** or **2 or more (for multiple-choice)** important words to replace with blanks (`____`).
|
| 34 |
+
- Prefer key grammatical or lexical targets (e.g., verbs, prepositions, conjunctions, or collocations).
|
| 35 |
+
- Do not remove punctuation or articles unless necessary.
|
| 36 |
+
- Example:
|
| 37 |
+
- Original: "She went to the market because it was near her home."
|
| 38 |
+
- Fill-in: "She went to the market ____ it was near her home."
|
| 39 |
+
|
| 40 |
+
4. The blank(s) must make sense — the question should be solvable through grammar or meaning, not guessing.
|
| 41 |
+
|
| 42 |
+
---
|
| 43 |
+
|
| 44 |
+
### Choices and answer
|
| 45 |
+
5. Create a `choices` list of answer options (equal to the desired number).
|
| 46 |
+
- Include the correct word(s) from the original sentence.
|
| 47 |
+
- For incorrect distractors, use words of similar part of speech or similar meaning but wrong in context.
|
| 48 |
+
|
| 49 |
+
6. The `answer` field must list the correct word(s) that fill the blank(s).
|
| 50 |
+
7. The `explanation` field must contain the **full correct sentence** (before blanking).
|
| 51 |
+
8. The `tags` field must list the linguistic skill tested (e.g., "preposition", "connector", "verb tense", "collocation", "vocabulary").
|
| 52 |
+
|
| 53 |
+
Follow these rules strictly and make the question natural, educational, and clear.
|
| 54 |
+
"""
|
src/llms/prompts/incorrect_question.py
CHANGED
|
@@ -1,40 +1,77 @@
|
|
| 1 |
GEN_INCORRECT_WORD_QUESTION_PROMPT = """
|
| 2 |
-
You are an expert English question generator.
|
| 3 |
-
|
| 4 |
### Task
|
| 5 |
-
|
|
|
|
| 6 |
|
| 7 |
### Input
|
| 8 |
-
-
|
| 9 |
-
- Question type:
|
| 10 |
-
-
|
| 11 |
-
|
| 12 |
-
###
|
| 13 |
-
1.
|
| 14 |
-
|
| 15 |
-
-
|
| 16 |
-
|
| 17 |
-
- If
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
###
|
| 36 |
-
|
| 37 |
-
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
"""
|
|
|
|
| 1 |
GEN_INCORRECT_WORD_QUESTION_PROMPT = """
|
|
|
|
|
|
|
| 2 |
### Task
|
| 3 |
+
You are an expert in automatically generating English exam questions.
|
| 4 |
+
Create **one "Find the Error" question** in English.
|
| 5 |
|
| 6 |
### Input
|
| 7 |
+
- A list of words to be used to create the sentence.
|
| 8 |
+
- Question type: "single-choice" or "multiple-choice".
|
| 9 |
+
- Desired number of answer choices.
|
| 10 |
+
|
| 11 |
+
### Core requirements
|
| 12 |
+
1. First produce a **grammatically correct, natural, meaningful English sentence** using all or most of the given words.
|
| 13 |
+
- Sentence may be simple or composed (up to 3 clauses).
|
| 14 |
+
- Sentence length: **8–30 words**.
|
| 15 |
+
- This correct sentence will be used later as the `explanation` value (without indices).
|
| 16 |
+
- If no words are provided, you may freely create a sentence.
|
| 17 |
+
|
| 18 |
+
2. **Validation step (mandatory):** Before adding any errors, ensure the correct sentence is fully grammatical. If any common connector misuse (see "Connector rules" below) or other obvious mistake is present, rewrite the sentence until it is correct.
|
| 19 |
+
|
| 20 |
+
### Connector rules (important — follow exactly)
|
| 21 |
+
- **Do NOT combine a subordinating conjunction like "although", "though", "while", or "despite" with a coordinating conjunction "but" in the same sentence.**
|
| 22 |
+
- Incorrect: "Although he was tired, but he continued."
|
| 23 |
+
- Correct: "Although he was tired, he continued." or "He was tired, but he continued."
|
| 24 |
+
- Do NOT repeat equivalent connectors (e.g., do not use "although" and "however" together to signal the same contrast).
|
| 25 |
+
- Avoid redundant fillers such as "but yet", "and also", "and then then".
|
| 26 |
+
- If you use "because", ensure the result clause logically follows and you do not also use "so" to repeat causation.
|
| 27 |
+
|
| 28 |
+
### Error-introduction rules
|
| 29 |
+
3. After you have a validated correct sentence, create an **erroneous version** by introducing errors according to `question_type`:
|
| 30 |
+
- `single-choice`: **exactly 1 error**.
|
| 31 |
+
- `multiple-choice`: **2 or more errors**.
|
| 32 |
+
4. Add an index number to each word in the erroneous sentence (e.g., "He(1) talk(2) when(3) I(4) talk(5).").
|
| 33 |
+
|
| 34 |
+
### Choices and answer
|
| 35 |
+
5. Create a `choices` list (each option must include the index number as shown in the sentence). The total number of choices must equal the desired number.
|
| 36 |
+
- Include at least 1 incorrect option (for single-choice) or at least 2 incorrect options (for multiple-choice); the rest should be correct words/phrases.
|
| 37 |
+
6. The `answer` field must list exactly the incorrect word(s)/phrase(s) with their indices — these must appear among `choices`.
|
| 38 |
+
7. The `explanation` field must contain the validated **correct sentence** from step 1 (no indices).
|
| 39 |
+
8. The `tags` field must list the error types that were introduced (e.g., "verb tense", "article", "vocabulary", "singular/plural", "sentence structure", "connector misuse", etc.).
|
| 40 |
+
|
| 41 |
+
Follow these rules strictly to avoid connector redundancy and other common grammatical mistakes.
|
| 42 |
+
"""
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
GEN_INCORRECT_WORD_QUESTION_PROMPT_VI = """
|
| 47 |
+
### Nhiệm vụ
|
| 48 |
+
Bạn là chuyên gia tạo câu hỏi tiếng Anh tự động cho các bài thi.
|
| 49 |
+
Hãy tạo **một câu hỏi dạng "Tìm lỗi sai" (Find the Error)** bằng tiếng Anh.
|
| 50 |
+
|
| 51 |
+
### Đầu vào
|
| 52 |
+
- Danh sách các từ được dùng để tạo câu.
|
| 53 |
+
- Loại câu hỏi: "single-choice" hoặc "multiple-choice".
|
| 54 |
+
- Số lượng lựa chọn mong muốn.
|
| 55 |
+
|
| 56 |
+
### Hướng dẫn
|
| 57 |
+
1. Tạo **một câu tiếng Anh tự nhiên, đúng ngữ pháp và đúng ngữ nghĩa**, sử dụng tất cả hoặc hầu hết các từ được cung cấp.
|
| 58 |
+
- Câu có thể là câu đơn hoặc câu phức (tối đa 3 mệnh đề, nối bằng *and, but, because, when, although*...).
|
| 59 |
+
- Độ dài câu: **8–30 từ**.
|
| 60 |
+
- Câu đúng này sẽ được sử dụng làm giá trị cho trường explanation ở cuối.
|
| 61 |
+
|
| 62 |
+
2. Tạo ra **lỗi ngữ pháp hoặc lỗi từ vựng** trong câu dựa theo loại câu hỏi:
|
| 63 |
+
- Nếu `question_type = "single-choice"` thì tạo ra câu có đúng 1 lỗi.
|
| 64 |
+
- Nếu `question_type = "multiple-choice"` thì tạo ra câu có từ 2 lỗi trở lên.
|
| 65 |
+
|
| 66 |
+
3. **Đánh số chỉ mục cho từng từ** trong câu (ví dụ: `"He(1) talk(2) when(3) I(4) talk(5)."`).
|
| 67 |
+
|
| 68 |
+
4. Tạo danh sách **choices** bao gồm cả từ đúng và từ sai trong câu (có chỉ mục).
|
| 69 |
+
- Tổng số lượng lựa chọn = giá trị đã yêu cầu trong đầu vào.
|
| 70 |
+
- Phân bổ hợp lý: ít nhất 1 hoặc 2 từ sai, phần còn lại là từ đúng.
|
| 71 |
+
|
| 72 |
+
5. Trường **answer** chứa chính xác các từ hoặc cụm sai (phải nằm trong `choices`).
|
| 73 |
+
6. Trường **explanation** chứa chính xác Câu Đúng từ Mục 1 (không đánh số chỉ mục).
|
| 74 |
+
7. Trường **tags** liệt kê loại lỗi (ví dụ: `"thì động từ"`, `"mạo từ"`, `"từ vựng"`, `"số ít/số nhiều"`, `"cấu trúc câu"`, v.v.).
|
| 75 |
+
|
| 76 |
+
Tuân thủ chặt chẽ các hướng dẫn trên để đảm bảo chất lượng cao trong quá trình tạo câu hỏi chất lượng cao.
|
| 77 |
"""
|
src/llms/prompts/natural_sentence.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GEN_NATURAL_SENTENCE_PROMPT = """
|
| 2 |
+
### Task
|
| 3 |
+
You are an expert English sentence generator.
|
| 4 |
+
Your task is to create **one natural, grammatically correct, and meaningful English sentence**.
|
| 5 |
+
|
| 6 |
+
### Input
|
| 7 |
+
- A list of English words that should appear in the sentence (if provided).
|
| 8 |
+
|
| 9 |
+
### Requirements
|
| 10 |
+
1. Use **all or most** of the given words naturally and in the correct grammatical order.
|
| 11 |
+
- If no words are provided, freely create a natural sentence on any general topic.
|
| 12 |
+
2. The sentence must:
|
| 13 |
+
- Be **fully grammatical and fluent**.
|
| 14 |
+
- Contain **8–20words**.
|
| 15 |
+
- Be **coherent** (logical meaning, not random).
|
| 16 |
+
- Sound **natural** as if written by a native English speaker.
|
| 17 |
+
3. Allowed topics: everyday life, travel, work, study, hobbies, nature, or simple human experiences.
|
| 18 |
+
4. Avoid:
|
| 19 |
+
- Unnecessary repetition.
|
| 20 |
+
- Connector misuse (e.g., “Although … but …”).
|
| 21 |
+
- Unnatural collocations or incomplete clauses.
|
| 22 |
+
|
| 23 |
+
### Output
|
| 24 |
+
Return **only one English sentence** that satisfies the above requirements.
|
| 25 |
+
"""
|
src/llms/tools/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .incorrect_question import GEN_INCORRECT_WORD_QUESTION_TOOL
|
| 2 |
+
from .fill_in_blank import GEN_FILL_IN_BLANK_QUESTION_TOOL
|
src/llms/tools/fill_in_blank.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GEN_FILL_IN_BLANK_QUESTION_TOOL = {
|
| 2 |
+
"type": "function",
|
| 3 |
+
"function": {
|
| 4 |
+
"name": "gen_fill_in_blank_question",
|
| 5 |
+
"description": (
|
| 6 |
+
"Extract the components of a Fill in the Blank English question."
|
| 7 |
+
),
|
| 8 |
+
"parameters": {
|
| 9 |
+
"type": "object",
|
| 10 |
+
"properties": {
|
| 11 |
+
"question": {
|
| 12 |
+
"type": "string",
|
| 13 |
+
"description": "The generated fill-in-the-blank question (e.g., 'She went to the market ____ it was near her home.')."
|
| 14 |
+
},
|
| 15 |
+
"choices": {
|
| 16 |
+
"type": "array",
|
| 17 |
+
"items": {"type": "string"},
|
| 18 |
+
"description": "List of answer choices (e.g., ['because', 'although', 'and', 'but'])."
|
| 19 |
+
},
|
| 20 |
+
"answer": {
|
| 21 |
+
"type": "array",
|
| 22 |
+
"items": {"type": "string"},
|
| 23 |
+
"description": "List of the correct answer word(s) (e.g., ['because'])."
|
| 24 |
+
},
|
| 25 |
+
"explanation": {
|
| 26 |
+
"type": "string",
|
| 27 |
+
"description": "The correct full version of the sentence (before creating blanks)."
|
| 28 |
+
},
|
| 29 |
+
"tags": {
|
| 30 |
+
"type": "array",
|
| 31 |
+
"items": {"type": "string"},
|
| 32 |
+
"description": "List of linguistic categories tested (e.g., 'connector', 'preposition', 'verb tense', etc.)."
|
| 33 |
+
}
|
| 34 |
+
},
|
| 35 |
+
"required": ["question", "choices", "answer", "explanation", "tags"]
|
| 36 |
+
}
|
| 37 |
+
}
|
| 38 |
+
}
|
src/llms/tools/incorrect_question.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GEN_INCORRECT_WORD_QUESTION_TOOL = {
|
| 2 |
+
"type": "function",
|
| 3 |
+
"function": {
|
| 4 |
+
"name": "gen_find_error_question",
|
| 5 |
+
"description": (
|
| 6 |
+
"Extract infomations of question."
|
| 7 |
+
),
|
| 8 |
+
"parameters": {
|
| 9 |
+
"type": "object",
|
| 10 |
+
"properties": {
|
| 11 |
+
"question": {
|
| 12 |
+
"type": "string",
|
| 13 |
+
"description": "The generated question: an erroneous English sentence with each word indexed (e.g., 'He(1) talk(2) when(3) I(4) talk(5).')."
|
| 14 |
+
},
|
| 15 |
+
"choices": {
|
| 16 |
+
"type": "array",
|
| 17 |
+
"items": {"type": "string"},
|
| 18 |
+
"description": "List of answer choices. Each choice is an indexed word/phrase from the question (e.g., 'talk(2)')."
|
| 19 |
+
},
|
| 20 |
+
"answer": {
|
| 21 |
+
"type": "array",
|
| 22 |
+
"items": {"type": "string"},
|
| 23 |
+
"description": "List of the *exact* incorrect word(s)/phrase(s) *with their indices* (e.g., ['talk(2)', 'angry(8)']). This must match the incorrect options in 'choices'."
|
| 24 |
+
},
|
| 25 |
+
"explanation": {
|
| 26 |
+
"type": "string",
|
| 27 |
+
"description": "The grammatically correct version of the sentence (the original sentence from step 1 of the prompt), without indices."
|
| 28 |
+
},
|
| 29 |
+
"tags": {
|
| 30 |
+
"type": "array",
|
| 31 |
+
"items": {"type": "string"},
|
| 32 |
+
"description": "List of linguistic error types introduced in the question (e.g., 'verb tense', 'article', 'vocabulary')."
|
| 33 |
+
}
|
| 34 |
+
},
|
| 35 |
+
"required": ["question", "choices", "answer", "explanation", "tags"]
|
| 36 |
+
}
|
| 37 |
+
}
|
| 38 |
+
}
|
src/routers/public/quesion.py
CHANGED
|
@@ -25,7 +25,7 @@ async def generate_question(body: ICreateQuestion):
|
|
| 25 |
return JSONResponse(status_code=200, content=res_ok(list_questions))
|
| 26 |
|
| 27 |
@route.post('/sentence')
|
| 28 |
-
async def generate_questions_from_sentence(
|
| 29 |
"""Process user request
|
| 30 |
|
| 31 |
Args:
|
|
|
|
| 25 |
return JSONResponse(status_code=200, content=res_ok(list_questions))
|
| 26 |
|
| 27 |
@route.post('/sentence')
|
| 28 |
+
async def generate_questions_from_sentence(body: ICQuestion, request: Request):
|
| 29 |
"""Process user request
|
| 30 |
|
| 31 |
Args:
|