Spaces:
Running on Zero
Running on Zero
File size: 4,697 Bytes
e1c0b77 978d90d e1c0b77 67da08d e1c0b77 e984a0a e1c0b77 e984a0a 3e7d6d6 e984a0a 3e7d6d6 490c5f1 3e7d6d6 e1c0b77 978d90d c8e8b73 978d90d c8e8b73 978d90d c8e8b73 978d90d 67da08d c8e8b73 8f2e039 c8e8b73 978d90d 81e2542 e984a0a e1c0b77 e984a0a 67da08d e984a0a 81e2542 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | """
core/questioner.py β Generate study questions from text chunks using an LLM.
Responsibility:
Given a thematic chunk of course material, craft a single, focused
open-ended question that tests conceptual understanding. The question
should be answerable solely from the provided chunk.
The prompt instructs the model to produce exactly one question with no
surrounding commentary so the output can be displayed directly to the student.
Public API:
generate_question(chunk: str) -> str
"""
import re
import json
from model.llm import get_llm
from core.lang import ensure_english
_DIFFICULTY_HINT = {
"Easy": "Ask for simple factual recall (What is X? Define X.).",
"Normal": "Ask for conceptual understanding (Explain X. Why does X happen?).",
"Hard": "Ask for analysis or application (Compare X and Y. How would you apply X to Y?).",
}
_PROMPT_TEMPLATE = """\
You are a university professor creating exam questions.
Given the following excerpt from a course, write ONE focused question.
Difficulty β {difficulty_hint}
Rules:
- ONE question only, on ONE concept
- Maximum 25 words
- No sub-questions, no "and", no compound questions
- IMPORTANT: Always write the question in English, even if the source text is in another language
- Output only the question, nothing else
Excerpt:
{chunk}
Question:"""
_MCQ_TEMPLATE = """\
You are a university professor creating a multiple choice exam question.
Given the following excerpt, write ONE multiple choice question.
Rules:
- One clear question, maximum 25 words
- The question must NOT contain or reveal the answer β do not quote the correct option in the question
- Exactly 4 options (A, B, C, D), only ONE is correct
- ALL 4 options MUST be completely different from each other β no two options may say the same thing
- All wrong options must be plausible β no obviously wrong answers
- IMPORTANT: Write ONLY in English β translate all concepts, do NOT quote the source in its original language
For each option, write a 1-sentence explanation of why it is correct or incorrect.
Output format (use EXACTLY these labels, one per line, nothing else):
QUESTION: <question>
A) <option>
B) <option>
C) <option>
D) <option>
CORRECT: <A, B, C or D>
EXPLAIN_A: <explanation>
EXPLAIN_B: <explanation>
EXPLAIN_C: <explanation>
EXPLAIN_D: <explanation>
Excerpt:
{chunk}
"""
def parse_mcq(raw: str) -> dict:
"""Parse the LLM's structured MCQ output into a dict."""
result: dict = {"question": "", "choices": {}, "correct": "", "explanations": {}}
for line in raw.splitlines():
line = line.strip()
if line.startswith("QUESTION:"):
result["question"] = line[9:].strip()
elif m := re.match(r'^([ABCD])\)\s+(.*)', line):
result["choices"][m.group(1)] = m.group(2)
elif line.startswith("CORRECT:"):
c = line[8:].strip().upper()
result["correct"] = c[0] if c else ""
elif m := re.match(r'^EXPLAIN_([ABCD]):\s+(.*)', line):
result["explanations"][m.group(1)] = m.group(2)
return result
def generate_mcq(chunk: str, language: str = "English") -> dict:
"""Return a multiple-choice question dict generated from *chunk*."""
llm = get_llm()
prompt = _MCQ_TEMPLATE.format(chunk=ensure_english(chunk.strip()), language=language)
mcq: dict = {}
for _ in range(3):
raw = llm.generate(prompt, temperature=0.8).strip()
mcq = parse_mcq(raw)
choices = list(mcq.get("choices", {}).values())
if len(choices) == 4 and len({c.lower().strip() for c in choices}) == 4:
return mcq
return mcq
def _clean_question(raw: str) -> str:
"""Keep only the first question β the model can ramble past it."""
text = raw.strip()
if text.lower().startswith("question:"):
text = text[9:].strip()
qmark = text.find("?")
if qmark != -1:
return text[: qmark + 1].strip()
for marker in ("\nAnswer:", "Answer:", "\nQuestion:"):
idx = text.find(marker)
if idx > 0:
text = text[:idx]
return text.strip()
def generate_question(chunk: str, language: str = "English", difficulty: str = "Normal") -> str:
"""Return a single study question generated from *chunk*."""
llm = get_llm()
prompt = _PROMPT_TEMPLATE.format(
chunk=ensure_english(chunk.strip()),
language=language,
difficulty_hint=_DIFFICULTY_HINT.get(difficulty, _DIFFICULTY_HINT["Normal"]),
)
# Questions are 25 words max β 80 tokens is plenty, and 6x faster
# than the default 512 when the model fails to stop.
return _clean_question(llm.generate(prompt, max_new_tokens=80))
|