Spaces:

kbl2810
/

gen-question

Sleeping

App Files Files Community

linhnguyen02 commited on Oct 28, 2025

Commit

19d49a8

1 Parent(s): e4ed3b1

fill in blank and rearrange

Browse files

Files changed (19) hide show

.env +4 -1
env.py +3 -0
main.py +1 -0
src/factories/gen_question/factory.py +6 -0
src/factories/gen_question/types/fill_in_blank_question.py +41 -62
src/factories/gen_question/types/incorrect_word_question.py +52 -65
src/factories/gen_question/types/rearrange.py +62 -0
src/llms/models/__init__.py +1 -0
src/llms/models/base.py +33 -65
src/llms/models/gemini.py +212 -0
src/llms/models/gemma.py +0 -57
src/llms/prompts/__init__.py +3 -1
src/llms/prompts/fill_in_blank.py +54 -0
src/llms/prompts/incorrect_question.py +72 -35
src/llms/prompts/natural_sentence.py +25 -0
src/llms/tools/__init__.py +2 -0
src/llms/tools/fill_in_blank.py +38 -0
src/llms/tools/incorrect_question.py +38 -0
src/routers/public/quesion.py +1 -1

.env CHANGED Viewed

@@ -14,4 +14,7 @@ POOL_RECYCLE=64
 # jwt
 JWT_EXPIRATION_DELTA=24
 JWT_ALGORITHM=HS256
-JWT_SECRET=key123456

 # jwt
 JWT_EXPIRATION_DELTA=24
 JWT_ALGORITHM=HS256
+JWT_SECRET=key123456
+# google
+GOOGLE_API_KEY=AIzaSyCciNiuSroJP_rnJnF08TDmIcH80-jey0o

env.py CHANGED Viewed

@@ -21,5 +21,8 @@ config = {
         "expired_in": int(os.getenv("JWT_EXPIRATION_DELTA")) | 24, # hour
         "algorithm": os.getenv("JWT_ALGORITHM"),
         "secret_key": os.getenv("JWT_SECRET"),
     }
 }

         "expired_in": int(os.getenv("JWT_EXPIRATION_DELTA")) | 24, # hour
         "algorithm": os.getenv("JWT_ALGORITHM"),
         "secret_key": os.getenv("JWT_SECRET"),
+    },
+    "google": {
+        "api_key": os.getenv("GOOGLE_API_KEY"),
     }
 }

main.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from fastapi import FastAPI, Request, HTTPException
 from src.routers.public.public import router
 from src.utils.response import handler_error

 from fastapi import FastAPI, Request, HTTPException
 from src.routers.public.public import router
 from src.utils.response import handler_error

src/factories/gen_question/factory.py CHANGED Viewed

@@ -3,6 +3,8 @@ from src.factories.gen_question.types.antonym_question import AntonymsQuestion
 from src.factories.gen_question.types.incorrect_word_question import IncorrectWordQuestion
 from src.factories.gen_question.types.stress_question import StressQuestion
 from src.factories.gen_question.types.synonym_question import SynonymsQuestion
 from src.utils.exceptions import BadRequestException
@@ -17,5 +19,9 @@ def create_question_instance(question_type: QuestionTypeEnum) :
         return AntonymsQuestion()
     elif question_type == QuestionTypeEnum.INCORRECT_WORD:
         return IncorrectWordQuestion()
     else:
         raise BadRequestException('type_invalid')

 from src.factories.gen_question.types.incorrect_word_question import IncorrectWordQuestion
 from src.factories.gen_question.types.stress_question import StressQuestion
 from src.factories.gen_question.types.synonym_question import SynonymsQuestion
+from src.factories.gen_question.types.fill_in_blank_question import FillInBlankQuestion
+from src.factories.gen_question.types.rearrange import RearrangenQuestion
 from src.utils.exceptions import BadRequestException
         return AntonymsQuestion()
     elif question_type == QuestionTypeEnum.INCORRECT_WORD:
         return IncorrectWordQuestion()
+    elif question_type == QuestionTypeEnum.FILL_IN_BLANK:
+        return FillInBlankQuestion()
+    elif question_type == QuestionTypeEnum.REARRANGE:
+        return RearrangenQuestion()
     else:
         raise BadRequestException('type_invalid')

src/factories/gen_question/types/fill_in_blank_question.py CHANGED Viewed

@@ -1,11 +1,11 @@
 from typing import List, Optional
 import random
-from src.enums import QuestionTypeEnum
-from src.enums.word import TransformWordType
-from src.factories.gen_question.types.base import Question, nltk_words
-from src.factories.transform_word.factory import transform_word_instance
-from src.services.AI.sentence_generator import SentenceGeneratorModel
 class FillInBlankQuestion(Question):
@@ -15,6 +15,8 @@ class FillInBlankQuestion(Question):
     It picks a word, generates a sentence containing it, replaces it with a blank,
     and provides several answer choices (one correct and others incorrect).
     """
     def generate_questions(self, list_words: List[str], num_question: int = 1, num_ans_per_question: int = 4):
         if not list_words:
@@ -22,7 +24,6 @@ class FillInBlankQuestion(Question):
         result = []
         list_unique_words = set(list_words)
-        sentence_generator = SentenceGeneratorModel()
         def choice_word_to_gen_sentence():
             number_choice_word = random.randint(1, 4)
@@ -32,61 +33,39 @@ class FillInBlankQuestion(Question):
                 choice_word = random.sample(available_words, number_choice_word)
                 for w in choice_word:
                     list_unique_words.remove(w)
-            else:
-                # Lấy tất cả từ còn lại và thêm từ nltk_words
-                choice_word = available_words.copy()
-                remaining = number_choice_word - len(choice_word)
-                additional_words = random.sample(nltk_words, remaining)
-                choice_word += additional_words
-                list_unique_words.clear()
-            return choice_word
         for _ in range(num_question):
-            list_choice_word = choice_word_to_gen_sentence()
-            # 1. Generate a sentence using chosen words
-            sentence = sentence_generator.generate_sentence_from_words(list_choice_word)
-            # 2. Randomly select one target word to blank out
-            sentence_words = sentence.strip(".").split()
-            target_word = random.choice(sentence_words)
-            # 3. Replace the word with a blank
-            modified_sentence = sentence.replace(target_word, "_____", 1)
-            # 4. Generate incorrect (distractor) options
-            incorrect_choices = self.create_incorrect_options(target_word, num_ans_per_question - 1)
-            all_choices = incorrect_choices + [target_word]
-            random.shuffle(all_choices)
-            result.append({
-                "question": modified_sentence,
-                "type": QuestionTypeEnum.FILL_IN_BLANK,
-                "choices": all_choices,
-                "answer": all_choices.index(target_word),
-                "explain": [f"The correct word is '{target_word}'."],
-            })
-        return result
-    @staticmethod
-    def create_incorrect_options(word: str, num_distractors: int) -> List[str]:
-        """Generate a list of incorrect forms of the given word."""
-        list_transform_type = list(TransformWordType)
-        random.shuffle(list_transform_type)
-        distractors = set()
-        for t in list_transform_type:
-            transformer = transform_word_instance(t)
-            transformed = transformer.transform_word(word)
-            if transformed and transformed != word:
-                distractors.add(transformed)
-            if len(distractors) >= num_distractors:
-                break
-        if len(distractors) < num_distractors and nltk_words:
-            additional = random.sample(nltk_words, num_distractors - len(distractors))
-            distractors.update(additional)
-        return list(distractors)

 from typing import List, Optional
 import random
+from src.enums import QuestionTypeEnum, ChoiceTypeEnum
+from src.factories.gen_question.types.base import Question
+from src.llms.models import GeminiLLM
+from src.llms.tools import GEN_FILL_IN_BLANK_QUESTION_TOOL
+from src.llms.prompts import GEN_FILL_IN_BLANK_QUESTION_PROMPT
 class FillInBlankQuestion(Question):
     It picks a word, generates a sentence containing it, replaces it with a blank,
     and provides several answer choices (one correct and others incorrect).
     """
+    def __init__(self):
+        self.llm = GeminiLLM()
     def generate_questions(self, list_words: List[str], num_question: int = 1, num_ans_per_question: int = 4):
         if not list_words:
         result = []
         list_unique_words = set(list_words)
         def choice_word_to_gen_sentence():
             number_choice_word = random.randint(1, 4)
                 choice_word = random.sample(available_words, number_choice_word)
                 for w in choice_word:
                     list_unique_words.remove(w)
+                return choice_word
+            return []
         for _ in range(num_question):
+            list_choice_words = choice_word_to_gen_sentence()
+            prompt = GEN_FILL_IN_BLANK_QUESTION_PROMPT
+            _tools = [GEN_FILL_IN_BLANK_QUESTION_TOOL]
+            raw_output = self.llm.generate_response(
+                 messages=[
+                    {
+                        "role": "system",
+                        "content": prompt
+                    },
+                    {
+                        "role": "user",
+                        "content": f"List of words: {', '.join(list_choice_words)}, Type of question: {ChoiceTypeEnum.SINGLE_CHOICE.value}, Number of answer choices: {num_ans_per_question}"
+                    }
+                ],
+                tools=_tools,
+            )
+            if "tool_calls" in raw_output and raw_output["tool_calls"]:
+                for call in raw_output["tool_calls"]:
+                    if call.get("name") == "gen_fill_in_blank_question":
+                        data = call.get("arguments", {})
+                        result.append({
+                            "question": data.get("question"),
+                            "type": QuestionTypeEnum.FILL_IN_BLANK,
+                            "choices": data.get("choices", []),
+                            "answer": data.get("answer"),
+                            "explanation": data.get("explanation"),
+                            "tags": data.get("tags", []),
+                        })
+        return result

src/factories/gen_question/types/incorrect_word_question.py CHANGED Viewed

@@ -1,22 +1,16 @@
 from typing import List, Optional
 import random
-from src.enum import QuestionTypeEnum
-from src.enum import TransformWordType
-from src.factories.gen_question.types.base import Question, nltk_words
-from src.factories.transform_word.factory import transform_word_instance
-from src.services.AI.sentence_generator import SentenceGeneratorModel
 class IncorrectWordQuestion(Question):
-    """
-    This class generates multiple-choice questions that ask the user
-    to find the incorrect word in a sentence.
-    It selects a word from the list, generates a sentence using a simple pattern,
-    and injects a grammatically incorrect word into the sentence.
-    """
     def generate_questions(self, list_words: List[str], num_question: int = 1, num_ans_per_question: int = 4):
         if list_words is None:
             list_words = []
@@ -24,8 +18,6 @@ class IncorrectWordQuestion(Question):
         result = []
         list_unique_words = set(list_words)
-        sentence_generator = SentenceGeneratorModel()
         def choice_word_to_gen_sentence():
             number_choice_word = random.randint(1, 4)
@@ -34,55 +26,50 @@ class IncorrectWordQuestion(Question):
                 choice_word = random.sample(available_words, number_choice_word)
                 for w in choice_word:
                     list_unique_words.remove(w)
-            else:
-                # Lấy tất cả từ còn lại và thêm từ nltk_words
-                choice_word = available_words.copy()
-                remaining = number_choice_word - len(choice_word)
-                additional_words = random.sample(nltk_words, remaining)
-                choice_word += additional_words
-                list_unique_words.clear()
-            return choice_word
         for _ in range(num_question):
-            list_choice_word = choice_word_to_gen_sentence()
-            # 1. Generate a simple sentence using a template
-            sentence = sentence_generator.generate_sentence_from_words(list_choice_word, )
-            # 2. Randomly choose a word to make incorrect in sequence
-            sentence_words = sentence.strip(".").split()
-            correct_word = random.sample(list(set(sentence_words)), 1)[0]
-            sentence_words.remove(correct_word)
-            # 3. Replace it with a grammatically incorrect word
-            incorrect_word = self.create_incorrect_word(correct_word)
-            modified_sentence = sentence.replace(correct_word, incorrect_word, 1)
-            # 4. Create choices (including incorrect_word and distractors)
-            choices = random.sample(list(set(sentence_words)), num_ans_per_question -1) + incorrect_word
-            random.shuffle(choices)
-            result.append({
-                "question": modified_sentence,
-                "type": QuestionTypeEnum.INCORRECT_WORD,
-                "choices": choices,
-                "answer": choices.index(incorrect_word),
-                "explain": ["Correct: {sequence}"],
-            })
-        return result
-    @staticmethod
-    def create_incorrect_word(word: str) -> Optional[str]:
-        list_transform_type = list(TransformWordType)
-        random.shuffle(list_transform_type)
-        for t in list_transform_type:
-            transformer = transform_word_instance(t)
-            incorrect_word = transformer.transform_word(word)
-            if incorrect_word is not None and incorrect_word != word:
-                return incorrect_word
-        try:
-            return random.choice(nltk_words) if nltk_words else None
-        except ImportError:
-            return None

 from typing import List, Optional
 import random
+from src.enums import ChoiceTypeEnum, QuestionTypeEnum
+from src.factories.gen_question.types.base import Question
+from src.llms.models import GeminiLLM
+from src.llms.tools import GEN_INCORRECT_WORD_QUESTION_TOOL
+from src.llms.prompts import GEN_INCORRECT_WORD_QUESTION_PROMPT
 class IncorrectWordQuestion(Question):
+    def __init__(self):
+        self.llm = GeminiLLM()
     def generate_questions(self, list_words: List[str], num_question: int = 1, num_ans_per_question: int = 4):
         if list_words is None:
             list_words = []
         result = []
         list_unique_words = set(list_words)
         def choice_word_to_gen_sentence():
             number_choice_word = random.randint(1, 4)
                 choice_word = random.sample(available_words, number_choice_word)
                 for w in choice_word:
                     list_unique_words.remove(w)
+                return choice_word
+            return []
         for _ in range(num_question):
+            list_choice_words = choice_word_to_gen_sentence()
+            prompt = GEN_INCORRECT_WORD_QUESTION_PROMPT
+            _tools = [GEN_INCORRECT_WORD_QUESTION_TOOL]
+            raw_output = self.llm.generate_response(
+                 messages=[
+                    {
+                        "role": "system",
+                        "content": prompt
+                    },
+                    {
+                        "role": "user",
+                        "content": f"List of words: {', '.join(list_choice_words)}, Type of question: {ChoiceTypeEnum.SINGLE_CHOICE.value}, Number of answer choices: {num_ans_per_question}"
+                    }
+                ],
+                tools=_tools,
+            )
+            if "tool_calls" in raw_output and raw_output["tool_calls"]:
+                for call in raw_output["tool_calls"]:
+                    if call.get("name") == "gen_find_error_question":
+                        data = call.get("arguments", {})
+                        result.append({
+                            "question": data.get("question"),
+                            "type": QuestionTypeEnum.INCORRECT_WORD,
+                            "choices": data.get("choices", []),
+                            "answer": data.get("answer"),
+                            "explanation": data.get("explanation"),
+                            "tags": data.get("tags", []),
+                        })
+            # random.shuffle(choices)
+            # result.append({
+            #     "question": modified_sentence,
+            #     "type": QuestionTypeEnum.INCORRECT_WORD,
+            #     "choices": choices,
+            #     "answer": choices.index(incorrect_word),
+            #     "explain": ["Correct: {sequence}"],
+            # })
+        return result

src/factories/gen_question/types/rearrange.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from typing import List, Optional
+import random
+from src.enums import QuestionTypeEnum
+from src.factories.gen_question.types.base import Question
+from src.llms.models import GeminiLLM
+from src.llms.prompts import GEN_NATURAL_SENTENCE_PROMPT
+class RearrangenQuestion(Question):
+    """
+    This class generates multiple-choice 'rearrange' questions.
+    """
+    def __init__(self):
+        self.llm = GeminiLLM()
+    def generate_questions(self, list_words: List[str], num_question: int = 1, num_ans_per_question: int = 4):
+        if not list_words:
+            list_words = []
+        result = []
+        list_unique_words = set(list_words)
+        def choice_word_to_gen_sentence():
+            number_choice_word = random.randint(1, 4)
+            available_words = list(list_unique_words)
+            if number_choice_word <= len(available_words):
+                choice_word = random.sample(available_words, number_choice_word)
+                for w in choice_word:
+                    list_unique_words.remove(w)
+                return choice_word
+            return []
+        for _ in range(num_question):
+            list_choice_words = choice_word_to_gen_sentence()
+            prompt = GEN_NATURAL_SENTENCE_PROMPT
+            sentence = self.llm.generate_response(
+                 messages=[
+                    {
+                        "role": "system",
+                        "content": prompt
+                    },
+                    {
+                        "role": "user",
+                        "content": f"List of words: {', '.join(list_choice_words)}"
+                    }
+                ],
+            )
+            words = sentence.split()
+            shuffled_words = words[:]
+            random.shuffle(shuffled_words)
+            result.append({
+                "question": " / ".join(shuffled_words),
+                "type": QuestionTypeEnum.REARRANGE,
+                "answer": sentence
+            })
+        return result

src/llms/models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .gemini import GeminiLLM

src/llms/models/base.py CHANGED Viewed

@@ -1,75 +1,43 @@
-import os
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-class Model:
-    """Generalized text generation model (compatible with Gemma / GPT-style models)."""
-    def __init__(self, model_name: str = "google/gemma-2b-it", device: str = None):
         """
-        Load model and tokenizer into memory.
         Args:
-            model_name (str): Name or path of the Hugging Face model.
-            device (str): 'cpu' or 'cuda'. Defaults to GPU if available.
-        """
-        os.environ["TOKENIZERS_PARALLELISM"] = "false"
-        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
-        print(f"🔹 Loading model: {model_name} on [{self.device.upper()}] ...")
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
-            device_map="auto" if torch.cuda.is_available() else None,
-        ).to(self.device)
-        print("✅ Model and tokenizer loaded successfully.\n")
-    def tokenize_corpus(self, text: str, max_length: int = 256):
-        """Tokenize input text and return tensors."""
-        encode = self.tokenizer(
-            text,
-            return_tensors="pt",
-            max_length=max_length,
-            truncation=True,
-            padding=False,
-        )
-        return encode["input_ids"].to(self.device), encode["attention_mask"].to(self.device)
-    def inference(
-        self,
-        prompt: str,
-        temperature: float = 0.7,
-        top_p: float = 0.9,
-        num_beams: int = 1,
-        max_new_tokens: int = 128,
-        token_max_length: int = 256,
-    ):
         """
-        Generate text from a given prompt.
-        Args:
-            prompt (str): Input text for the model.
-            temperature (float): Sampling temperature (higher = more creative).
-            top_p (float): Nucleus sampling parameter.
-            num_beams (int): Number of beams (set 1 for sampling).
-            max_new_tokens (int): Maximum number of tokens to generate.
-            token_max_length (int): Max length for tokenization.
         """
-        input_ids, attention_mask = self.tokenize_corpus(prompt, token_max_length)
-        with torch.no_grad():
-            outputs = self.model.generate(
-                input_ids=input_ids,
-                attention_mask=attention_mask,
-                do_sample=True if num_beams == 1 else False,
-                temperature=temperature,
-                top_p=top_p,
-                num_beams=num_beams,
-                max_new_tokens=max_new_tokens,
-                pad_token_id=self.tokenizer.eos_token_id,
-            )
-        decoded = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return decoded.strip()

+from abc import ABC, abstractmethod
+from typing import Dict, List, Optional
+class LLMBase(ABC):
+    """
+    Base class for all LLM providers.
+    Handles common functionality and delegates provider-specific logic to subclasses.
+    """
+    @abstractmethod
+    def generate_response(
+        self, messages: List[Dict[str, str]], tools: Optional[List[Dict]] = None, tool_choice: str = "auto", **kwargs
+    ):
         """
+        Generate a response based on the given messages.
         Args:
+            messages (list): List of message dicts containing 'role' and 'content'.
+            tools (list, optional): List of tools that the model can call. Defaults to None.
+            tool_choice (str, optional): Tool choice method. Defaults to "auto".
+            **kwargs: Additional provider-specific parameters.
+        Returns:
+            str or dict: The generated response.
+        """
+        pass
         """
+        Get common parameters that most providers use.
+        Returns:
+            Dict: Common parameters dictionary.
         """
+        params = {
+            "temperature": self.config.temperature,
+            "max_tokens": self.config.max_tokens,
+            "top_p": self.config.top_p,
+        }
+        # Add provider-specific parameters from kwargs
+        params.update(kwargs)
+        return params

src/llms/models/gemini.py ADDED Viewed

	@@ -0,0 +1,212 @@

+import os
+import threading
+from typing import Dict, List, Optional
+try:
+    from google import genai
+    from google.genai import types
+except ImportError:
+    raise ImportError("The 'google-genai' library is required. Please install it using 'pip install google-genai'.")
+from .base import LLMBase
+from env import config
+class GeminiLLM(LLMBase):
+    _instance = None
+    _lock = threading.Lock()
+    def __new__(cls, *args, **kwargs):
+        with cls._lock:
+            if cls._instance is None:
+                cls._instance = super(GeminiLLM, cls).__new__(cls)
+        return cls._instance
+    def __init__(self, temperature=0.7, max_tokens=1024, top_p=0.9):
+        if hasattr(self, "_initialized") and self._initialized:
+            return
+        self.model = "gemini-2.5-flash"
+        self.temperature = temperature
+        self.max_tokens = max_tokens
+        self.top_p = top_p
+        self.client = genai.Client(api_key=config["google"]["api_key"])
+        self._initialized = True
+    def _parse_response(self, response, tools):
+        """
+        Process the response based on whether tools are used or not.
+        Args:
+            response: The raw response from API.
+            tools: The list of tools provided in the request.
+        Returns:
+            str or dict: The processed response.
+        """
+        if tools:
+            processed_response = {
+                "content": None,
+                "tool_calls": [],
+            }
+            # Extract content from the first candidate
+            if response.candidates and response.candidates[0].content.parts:
+                for part in response.candidates[0].content.parts:
+                    if hasattr(part, "text") and part.text:
+                        processed_response["content"] = part.text
+                        break
+            # Extract function calls
+            if response.candidates and response.candidates[0].content.parts:
+                for part in response.candidates[0].content.parts:
+                    if hasattr(part, "function_call") and part.function_call:
+                        fn = part.function_call
+                        processed_response["tool_calls"].append(
+                            {
+                                "name": fn.name,
+                                "arguments": dict(fn.args) if fn.args else {},
+                            }
+                        )
+            return processed_response
+        else:
+            if response.candidates and response.candidates[0].content.parts:
+                for part in response.candidates[0].content.parts:
+                    if hasattr(part, "text") and part.text:
+                        return part.text
+            return ""
+    def _reformat_messages(self, messages: List[Dict[str, str]]):
+        """
+        Reformat messages for Gemini.
+        Args:
+            messages: The list of messages provided in the request.
+        Returns:
+            tuple: (system_instruction, contents_list)
+        """
+        system_instruction = None
+        contents = []
+        for message in messages:
+            if message["role"] == "system":
+                system_instruction = message["content"]
+            else:
+                content = types.Content(
+                    parts=[types.Part(text=message["content"])],
+                    role=message["role"],
+                )
+                contents.append(content)
+        return system_instruction, contents
+    def _reformat_tools(self, tools: Optional[List[Dict]]):
+        """
+        Reformat tools for Gemini.
+        Args:
+            tools: The list of tools provided in the request.
+        Returns:
+            list: The list of tools in the required format.
+        """
+        def remove_additional_properties(data):
+            """Recursively removes 'additionalProperties' from nested dictionaries."""
+            if isinstance(data, dict):
+                filtered_dict = {
+                    key: remove_additional_properties(value)
+                    for key, value in data.items()
+                    if not (key == "additionalProperties")
+                }
+                return filtered_dict
+            else:
+                return data
+        if tools:
+            function_declarations = []
+            for tool in tools:
+                func = tool["function"].copy()
+                cleaned_func = remove_additional_properties(func)
+                function_declaration = types.FunctionDeclaration(
+                    name=cleaned_func["name"],
+                    description=cleaned_func.get("description", ""),
+                    parameters=cleaned_func.get("parameters", {}),
+                )
+                function_declarations.append(function_declaration)
+            tool_obj = types.Tool(function_declarations=function_declarations)
+            return [tool_obj]
+        else:
+            return None
+    def generate_response(
+        self,
+        messages: List[Dict[str, str]],
+        response_format=None,
+        tools: Optional[List[Dict]] = None,
+        tool_choice: str = "auto",
+    ):
+        """
+        Generate a response based on the given messages using Gemini.
+        Args:
+            messages (list): List of message dicts containing 'role' and 'content'.
+            response_format (str or object, optional): Format for the response. Defaults to "text".
+            tools (list, optional): List of tools that the model can call. Defaults to None.
+            tool_choice (str, optional): Tool choice method. Defaults to "auto".
+        Returns:
+            str: The generated response.
+        """
+        # Extract system instruction and reformat messages
+        system_instruction, contents = self._reformat_messages(messages)
+        # Prepare generation config
+        config_params = {
+            "temperature": self.temperature,
+            "max_output_tokens": self.max_tokens,
+            "top_p": self.top_p,
+        }
+        # Add system instruction to config if present
+        if system_instruction:
+            config_params["system_instruction"] = system_instruction
+        if response_format is not None and response_format["type"] == "json_object":
+            config_params["response_mime_type"] = "application/json"
+            if "schema" in response_format:
+                config_params["response_schema"] = response_format["schema"]
+        if tools:
+            formatted_tools = self._reformat_tools(tools)
+            config_params["tools"] = formatted_tools
+            if tool_choice:
+                if tool_choice == "auto":
+                    mode = types.FunctionCallingConfigMode.AUTO
+                elif tool_choice == "any":
+                    mode = types.FunctionCallingConfigMode.ANY
+                else:
+                    mode = types.FunctionCallingConfigMode.NONE
+                tool_config = types.ToolConfig(
+                    function_calling_config=types.FunctionCallingConfig(
+                        mode=mode,
+                        allowed_function_names=(
+                            [tool["function"]["name"] for tool in tools] if tool_choice == "any" else None
+                        ),
+                    )
+                )
+                config_params["tool_config"] = tool_config
+        generation_config = types.GenerateContentConfig(**config_params)
+        response = self.client.models.generate_content(
+            model=self.model, contents=contents, config=generation_config
+        )
+        return self._parse_response(response, tools)

src/llms/models/gemma.py DELETED Viewed

@@ -1,57 +0,0 @@
-from typing import List
-from .base import Model
-from src.enums import ChoiceTypeEnum
-from src.llms.prompts import GEN_INCORRECT_WORD_QUESTION_PROMPT
-class GemmaModel(Model):
-    """Generalized text generation model (compatible with Gemma / GPT-style models)."""
-    _instance = None
-    def __new__(cls):
-        if cls._instance is None:
-            cls._instance = super(GemmaModel, cls).__new__(cls)
-            cls._instance._init_model()
-        return cls._instance
-    def _init_model(self):
-        super().__init__(model_name="google/gemma-2b-it", device=None)
-    def generate(self, list_words: List[str], question_type: ChoiceTypeEnum, num_ans_per_question: int = 4) -> str:
-        """Tạo câu hỏi trắc nghiệm từ danh sách từ và loại câu hỏi.
-        Args:
-            list_words (List[str]): Danh sách các từ để tạo câu hỏi.
-            question_type (ChoiceTypeEnum): Loại câu hỏi trắc nghiệm.
-            num_ans_per_question (int): Số lượng lựa chọn cho mỗi câu hỏi (mặc định là 4).
-        Returns:
-            str: Câu hỏi được sinh ra.
-        Raises:
-            ValueError: Nếu danh sách từ rỗng hoặc số lượng lựa chọn không hợp lệ.
-        """
-        if not list_words:
-            raise ValueError("Danh sách từ không được rỗng.")
-        if num_ans_per_question < 2:
-            raise ValueError("Số lượng lựa chọn phải lớn hơn hoặc bằng 2.")
-        prompt = GEN_INCORRECT_WORD_QUESTION_PROMPT.format(
-            list_of_words=", ".join(list_words),
-            question_type=question_type.value,
-            num_choices=num_ans_per_question
-        )
-        try:
-            return self.inference(
-                prompt,
-                num_beams=4,
-                no_repeat_ngram_size=2,
-                model_max_length=128,
-                num_return_sequences=1,
-                token_max_length=256,
-            )
-        except Exception as e:
-            raise RuntimeError(f"Lỗi khi sinh câu hỏi: {e}")

src/llms/prompts/__init__.py CHANGED Viewed

	@@ -1 +1,3 @@
1	- from .incorrect_question import GEN_INCORRECT_WORD_QUESTION_PROMPT

+from .incorrect_question import GEN_INCORRECT_WORD_QUESTION_PROMPT
+from .fill_in_blank import GEN_FILL_IN_BLANK_QUESTION_PROMPT
+from .natural_sentence import GEN_NATURAL_SENTENCE_PROMPT

src/llms/prompts/fill_in_blank.py ADDED Viewed

	@@ -0,0 +1,54 @@

+GEN_FILL_IN_BLANK_QUESTION_PROMPT = """
+### Task
+You are an expert in automatically generating English exam questions.
+Create **one "Fill in the Blank" question** in English.
+### Input
+- A list of words to be used to create the sentence.
+- Question type: "single-choice" or "multiple-choice".
+- Desired number of answer choices.
+### Core requirements
+1. First produce a **grammatically correct, natural, meaningful English sentence** using all or most of the given words.
+   - Sentence may be simple or composed (up to 3 clauses).
+   - Sentence length: **8–30 words**.
+   - This correct sentence will be used later as the `explanation` value (the full correct version).
+   - If no words are provided, you may freely create a sentence.
+2. **Validation step (mandatory):** Before creating the blank, ensure the sentence is fully grammatical and natural.
+   - Avoid unnatural collocations or redundant connectors.
+   - If any connector misuse or tense inconsistency is found, rewrite the sentence until it is correct.
+### Connector rules (important — follow exactly)
+- **Do NOT combine a subordinating conjunction like "although", "though", "while", or "despite" with a coordinating conjunction "but" in the same sentence.**
+  - Incorrect: "Although he was tired, but he continued."
+  - Correct: "Although he was tired, he continued." or "He was tired, but he continued."
+- Do NOT use duplicate connectors (e.g., "although ... however").
+- Avoid redundant fillers such as "but yet", "and also", "and then then".
+- If you use "because", do not also use "so" in the same causal relationship.
+---
+### Blank-creation rules
+3. After you have a validated correct sentence, choose **1 (for single-choice)** or **2 or more (for multiple-choice)** important words to replace with blanks (`____`).
+   - Prefer key grammatical or lexical targets (e.g., verbs, prepositions, conjunctions, or collocations).
+   - Do not remove punctuation or articles unless necessary.
+   - Example:
+     - Original: "She went to the market because it was near her home."
+     - Fill-in: "She went to the market ____ it was near her home."
+4. The blank(s) must make sense — the question should be solvable through grammar or meaning, not guessing.
+---
+### Choices and answer
+5. Create a `choices` list of answer options (equal to the desired number).
+   - Include the correct word(s) from the original sentence.
+   - For incorrect distractors, use words of similar part of speech or similar meaning but wrong in context.
+6. The `answer` field must list the correct word(s) that fill the blank(s).
+7. The `explanation` field must contain the **full correct sentence** (before blanking).
+8. The `tags` field must list the linguistic skill tested (e.g., "preposition", "connector", "verb tense", "collocation", "vocabulary").
+Follow these rules strictly and make the question natural, educational, and clear.
+"""

src/llms/prompts/incorrect_question.py CHANGED Viewed

@@ -1,40 +1,77 @@
 GEN_INCORRECT_WORD_QUESTION_PROMPT = """
-You are an expert English question generator.
 ### Task
-Generate one "Find the Error" question in English.
 ### Input
-- Words to use: {list_of_words}
-- Question type: {question_type} ("single-choice" or "multiple-choice")
-- Number of answer choices: {num_choices}
-### Requirements
-1. Create a natural English sentence using all or most of the given words.
-2. The sentence can be **simple** (one clause) or **compound/complex** (two or more clauses joined by conjunctions like "and", "but", "when", "because", "although", etc.).
-   - Choose whichever structure sounds most natural for the given words.
-3. Introduce grammatical or vocabulary error(s):
-   - If question_type = "single-choice": include exactly 1 incorrect word or phrase.
-   - If question_type = "multiple-choice": include 2 or more incorrect words or phrases.
-4. Mark each word in the sentence with an index number (e.g., "He(1) talk(2) when(3) I(4) talk(5).").
-   - Each occurrence of a repeated word must have a unique index.
-5. The `choices` must contain the **exact incorrect words or phrases** (no duplicates, no labels like A/B/C/D).
-6. The `answer` must exactly match the incorrect word(s) with their indices.
-7. Provide a corrected version of the sentence in `explanation`.
-8. Include linguistic tags indicating the type of error (e.g., "verb tense", "article", "vocabulary", "subject-verb agreement", etc.).
-9. Output must be **valid JSON only** in this format:
-{
-  "question": "He(1) talk(2) when(3) I(4) talk(5) because(6) he(7) angry(8).",
-  "choices": ["talk(2)", "angry(8)", "because(6)", "he(7)"],
-  "answer": ["talk(2)", "angry(8)"],
-  "explanation": "He talks when I talk because he is angry.",
-  "tags": ["verb tense", "adjective form"]
-}
-### Notes
-- Ensure that the generated sentence sounds natural in English.
-- You may use conjunctions or subordinate clauses when appropriate.
-- Do not repeat the same word as two different answers.
-- Output only the JSON object.
 """

 GEN_INCORRECT_WORD_QUESTION_PROMPT = """
 ### Task
+You are an expert in automatically generating English exam questions.
+Create **one "Find the Error" question** in English.
 ### Input
+- A list of words to be used to create the sentence.
+- Question type: "single-choice" or "multiple-choice".
+- Desired number of answer choices.
+### Core requirements
+1. First produce a **grammatically correct, natural, meaningful English sentence** using all or most of the given words.
+   - Sentence may be simple or composed (up to 3 clauses).
+   - Sentence length: **8–30 words**.
+   - This correct sentence will be used later as the `explanation` value (without indices).
+   - If no words are provided, you may freely create a sentence.
+2. **Validation step (mandatory):** Before adding any errors, ensure the correct sentence is fully grammatical. If any common connector misuse (see "Connector rules" below) or other obvious mistake is present, rewrite the sentence until it is correct.
+### Connector rules (important — follow exactly)
+- **Do NOT combine a subordinating conjunction like "although", "though", "while", or "despite" with a coordinating conjunction "but" in the same sentence.**
+  - Incorrect: "Although he was tired, but he continued."
+  - Correct: "Although he was tired, he continued." or "He was tired, but he continued."
+- Do NOT repeat equivalent connectors (e.g., do not use "although" and "however" together to signal the same contrast).
+- Avoid redundant fillers such as "but yet", "and also", "and then then".
+- If you use "because", ensure the result clause logically follows and you do not also use "so" to repeat causation.
+### Error-introduction rules
+3. After you have a validated correct sentence, create an **erroneous version** by introducing errors according to `question_type`:
+   - `single-choice`: **exactly 1 error**.
+   - `multiple-choice`: **2 or more errors**.
+4. Add an index number to each word in the erroneous sentence (e.g., "He(1) talk(2) when(3) I(4) talk(5).").
+### Choices and answer
+5. Create a `choices` list (each option must include the index number as shown in the sentence). The total number of choices must equal the desired number.
+   - Include at least 1 incorrect option (for single-choice) or at least 2 incorrect options (for multiple-choice); the rest should be correct words/phrases.
+6. The `answer` field must list exactly the incorrect word(s)/phrase(s) with their indices — these must appear among `choices`.
+7. The `explanation` field must contain the validated **correct sentence** from step 1 (no indices).
+8. The `tags` field must list the error types that were introduced (e.g., "verb tense", "article", "vocabulary", "singular/plural", "sentence structure", "connector misuse", etc.).
+Follow these rules strictly to avoid connector redundancy and other common grammatical mistakes.
+"""
+GEN_INCORRECT_WORD_QUESTION_PROMPT_VI = """
+### Nhiệm vụ
+Bạn là chuyên gia tạo câu hỏi tiếng Anh tự động cho các bài thi.
+Hãy tạo **một câu hỏi dạng "Tìm lỗi sai" (Find the Error)** bằng tiếng Anh.
+### Đầu vào
+- Danh sách các từ được dùng để tạo câu.
+- Loại câu hỏi: "single-choice" hoặc "multiple-choice".
+- Số lượng lựa chọn mong muốn.
+### Hướng dẫn
+1. Tạo **một câu tiếng Anh tự nhiên, đúng ngữ pháp và đúng ngữ nghĩa**, sử dụng tất cả hoặc hầu hết các từ được cung cấp.
+   - Câu có thể là câu đơn hoặc câu phức (tối đa 3 mệnh đề, nối bằng *and, but, because, when, although*...).
+   - Độ dài câu: **8–30 từ**.
+   - Câu đúng này sẽ được sử dụng làm giá trị cho trường explanation ở cuối.
+2. Tạo ra **lỗi ngữ pháp hoặc lỗi từ vựng** trong câu dựa theo loại câu hỏi:
+   - Nếu `question_type = "single-choice"` thì tạo ra câu có đúng 1 lỗi.
+   - Nếu `question_type = "multiple-choice"` thì tạo ra câu có từ 2 lỗi trở lên.
+3. **Đánh số chỉ mục cho từng từ** trong câu (ví dụ: `"He(1) talk(2) when(3) I(4) talk(5)."`).
+4. Tạo danh sách **choices** bao gồm cả từ đúng và từ sai trong câu (có chỉ mục).
+   - Tổng số lượng lựa chọn = giá trị đã yêu cầu trong đầu vào.
+   - Phân bổ hợp lý: ít nhất 1 hoặc 2 từ sai, phần còn lại là từ đúng.
+5. Trường **answer** chứa chính xác các từ hoặc cụm sai (phải nằm trong `choices`).
+6. Trường **explanation** chứa chính xác Câu Đúng từ Mục 1 (không đánh số chỉ mục).
+7. Trường **tags** liệt kê loại lỗi (ví dụ: `"thì động từ"`, `"mạo từ"`, `"từ vựng"`, `"số ít/số nhiều"`, `"cấu trúc câu"`, v.v.).
+Tuân thủ chặt chẽ các hướng dẫn trên để đảm bảo chất lượng cao trong quá trình tạo câu hỏi chất lượng cao.
 """

src/llms/prompts/natural_sentence.py ADDED Viewed

	@@ -0,0 +1,25 @@

+GEN_NATURAL_SENTENCE_PROMPT = """
+### Task
+You are an expert English sentence generator.
+Your task is to create **one natural, grammatically correct, and meaningful English sentence**.
+### Input
+- A list of English words that should appear in the sentence (if provided).
+### Requirements
+1. Use **all or most** of the given words naturally and in the correct grammatical order.
+   - If no words are provided, freely create a natural sentence on any general topic.
+2. The sentence must:
+   - Be **fully grammatical and fluent**.
+   - Contain **8–20words**.
+   - Be **coherent** (logical meaning, not random).
+   - Sound **natural** as if written by a native English speaker.
+3. Allowed topics: everyday life, travel, work, study, hobbies, nature, or simple human experiences.
+4. Avoid:
+   - Unnecessary repetition.
+   - Connector misuse (e.g., “Although … but …”).
+   - Unnatural collocations or incomplete clauses.
+### Output
+Return **only one English sentence** that satisfies the above requirements.
+"""

src/llms/tools/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .incorrect_question import GEN_INCORRECT_WORD_QUESTION_TOOL
2	+ from .fill_in_blank import GEN_FILL_IN_BLANK_QUESTION_TOOL

src/llms/tools/fill_in_blank.py ADDED Viewed

	@@ -0,0 +1,38 @@

+GEN_FILL_IN_BLANK_QUESTION_TOOL = {
+    "type": "function",
+    "function": {
+        "name": "gen_fill_in_blank_question",
+        "description": (
+            "Extract the components of a Fill in the Blank English question."
+        ),
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "question": {
+                    "type": "string",
+                    "description": "The generated fill-in-the-blank question (e.g., 'She went to the market ____ it was near her home.')."
+                },
+                "choices": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "List of answer choices (e.g., ['because', 'although', 'and', 'but'])."
+                },
+                "answer": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "List of the correct answer word(s) (e.g., ['because'])."
+                },
+                "explanation": {
+                    "type": "string",
+                    "description": "The correct full version of the sentence (before creating blanks)."
+                },
+                "tags": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "List of linguistic categories tested (e.g., 'connector', 'preposition', 'verb tense', etc.)."
+                }
+            },
+            "required": ["question", "choices", "answer", "explanation", "tags"]
+        }
+    }
+}

src/llms/tools/incorrect_question.py ADDED Viewed

	@@ -0,0 +1,38 @@

+GEN_INCORRECT_WORD_QUESTION_TOOL = {
+    "type": "function",
+    "function": {
+        "name": "gen_find_error_question",
+        "description": (
+            "Extract infomations of question."
+        ),
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "question": {
+                    "type": "string",
+                    "description": "The generated question: an erroneous English sentence with each word indexed (e.g., 'He(1) talk(2) when(3) I(4) talk(5).')."
+                },
+                "choices": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "List of answer choices. Each choice is an indexed word/phrase from the question (e.g., 'talk(2)')."
+                },
+                "answer": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "List of the *exact* incorrect word(s)/phrase(s) *with their indices* (e.g., ['talk(2)', 'angry(8)']). This must match the incorrect options in 'choices'."
+                },
+                "explanation": {
+                    "type": "string",
+                    "description": "The grammatically correct version of the sentence (the original sentence from step 1 of the prompt), without indices."
+                },
+                "tags": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "List of linguistic error types introduced in the question (e.g., 'verb tense', 'article', 'vocabulary')."
+                }
+            },
+            "required": ["question", "choices", "answer", "explanation", "tags"]
+        }
+    }
+}

src/routers/public/quesion.py CHANGED Viewed

@@ -25,7 +25,7 @@ async def generate_question(body: ICreateQuestion):
     return JSONResponse(status_code=200, content=res_ok(list_questions))
 @route.post('/sentence')
-async def generate_questions_from_sentence(bßody: ICQuestion, request: Request):
     """Process user request
     Args:

     return JSONResponse(status_code=200, content=res_ok(list_questions))
 @route.post('/sentence')
+async def generate_questions_from_sentence(body: ICQuestion, request: Request):
     """Process user request
     Args: