Spaces:
Running on Zero
Running on Zero
| """ | |
| core/questioner.py β Generate study questions from text chunks using an LLM. | |
| Responsibility: | |
| Given a thematic chunk of course material, craft a single, focused | |
| open-ended question that tests conceptual understanding. The question | |
| should be answerable solely from the provided chunk. | |
| The prompt instructs the model to produce exactly one question with no | |
| surrounding commentary so the output can be displayed directly to the student. | |
| Public API: | |
| generate_question(chunk: str) -> str | |
| """ | |
| import re | |
| import json | |
| from model.llm import get_llm | |
| from core.lang import ensure_english | |
| _DIFFICULTY_HINT = { | |
| "Easy": "Ask for simple factual recall (What is X? Define X.).", | |
| "Normal": "Ask for conceptual understanding (Explain X. Why does X happen?).", | |
| "Hard": "Ask for analysis or application (Compare X and Y. How would you apply X to Y?).", | |
| } | |
| _PROMPT_TEMPLATE = """\ | |
| You are a university professor creating exam questions. | |
| Given the following excerpt from a course, write ONE focused question. | |
| Difficulty β {difficulty_hint} | |
| Rules: | |
| - ONE question only, on ONE concept | |
| - Maximum 25 words | |
| - No sub-questions, no "and", no compound questions | |
| - IMPORTANT: Always write the question in English, even if the source text is in another language | |
| - Output only the question, nothing else | |
| Excerpt: | |
| {chunk} | |
| Question:""" | |
| _MCQ_TEMPLATE = """\ | |
| You are a university professor creating a multiple choice exam question. | |
| Given the following excerpt, write ONE multiple choice question. | |
| Rules: | |
| - One clear question, maximum 25 words | |
| - The question must NOT contain or reveal the answer β do not quote the correct option in the question | |
| - Exactly 4 options (A, B, C, D), only ONE is correct | |
| - ALL 4 options MUST be completely different from each other β no two options may say the same thing | |
| - All wrong options must be plausible β no obviously wrong answers | |
| - IMPORTANT: Write ONLY in English β translate all concepts, do NOT quote the source in its original language | |
| For each option, write a 1-sentence explanation of why it is correct or incorrect. | |
| Output format (use EXACTLY these labels, one per line, nothing else): | |
| QUESTION: <question> | |
| A) <option> | |
| B) <option> | |
| C) <option> | |
| D) <option> | |
| CORRECT: <A, B, C or D> | |
| EXPLAIN_A: <explanation> | |
| EXPLAIN_B: <explanation> | |
| EXPLAIN_C: <explanation> | |
| EXPLAIN_D: <explanation> | |
| Excerpt: | |
| {chunk} | |
| """ | |
| def parse_mcq(raw: str) -> dict: | |
| """Parse the LLM's structured MCQ output into a dict.""" | |
| result: dict = {"question": "", "choices": {}, "correct": "", "explanations": {}} | |
| for line in raw.splitlines(): | |
| line = line.strip() | |
| if line.startswith("QUESTION:"): | |
| result["question"] = line[9:].strip() | |
| elif m := re.match(r'^([ABCD])\)\s+(.*)', line): | |
| result["choices"][m.group(1)] = m.group(2) | |
| elif line.startswith("CORRECT:"): | |
| c = line[8:].strip().upper() | |
| result["correct"] = c[0] if c else "" | |
| elif m := re.match(r'^EXPLAIN_([ABCD]):\s+(.*)', line): | |
| result["explanations"][m.group(1)] = m.group(2) | |
| return result | |
| def generate_mcq(chunk: str, language: str = "English") -> dict: | |
| """Return a multiple-choice question dict generated from *chunk*.""" | |
| llm = get_llm() | |
| prompt = _MCQ_TEMPLATE.format(chunk=ensure_english(chunk.strip()), language=language) | |
| mcq: dict = {} | |
| for _ in range(3): | |
| raw = llm.generate(prompt, temperature=0.8).strip() | |
| mcq = parse_mcq(raw) | |
| choices = list(mcq.get("choices", {}).values()) | |
| if len(choices) == 4 and len({c.lower().strip() for c in choices}) == 4: | |
| return mcq | |
| return mcq | |
| def _clean_question(raw: str) -> str: | |
| """Keep only the first question β the model can ramble past it.""" | |
| text = raw.strip() | |
| if text.lower().startswith("question:"): | |
| text = text[9:].strip() | |
| qmark = text.find("?") | |
| if qmark != -1: | |
| return text[: qmark + 1].strip() | |
| for marker in ("\nAnswer:", "Answer:", "\nQuestion:"): | |
| idx = text.find(marker) | |
| if idx > 0: | |
| text = text[:idx] | |
| return text.strip() | |
| def generate_question(chunk: str, language: str = "English", difficulty: str = "Normal") -> str: | |
| """Return a single study question generated from *chunk*.""" | |
| llm = get_llm() | |
| prompt = _PROMPT_TEMPLATE.format( | |
| chunk=ensure_english(chunk.strip()), | |
| language=language, | |
| difficulty_hint=_DIFFICULTY_HINT.get(difficulty, _DIFFICULTY_HINT["Normal"]), | |
| ) | |
| # Questions are 25 words max β 80 tokens is plenty, and 6x faster | |
| # than the default 512 when the model fails to stop. | |
| return _clean_question(llm.generate(prompt, max_new_tokens=80)) | |