Spaces:
Sleeping
Sleeping
Update src/core/QuizEngine.py
Browse files- src/core/QuizEngine.py +32 -33
src/core/QuizEngine.py
CHANGED
|
@@ -24,27 +24,30 @@ class QuizEngine:
|
|
| 24 |
"question": f"What does **{acronym}** stand for?"
|
| 25 |
}
|
| 26 |
|
| 27 |
-
# --- MODE 2: DOCUMENTS (
|
| 28 |
-
def get_document_context(self, username):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
user_dir = os.path.join(self.source_dir, username)
|
| 30 |
if not os.path.exists(user_dir): return None
|
| 31 |
|
| 32 |
files = [f for f in os.listdir(user_dir) if f.lower().endswith(('.txt', '.md'))]
|
| 33 |
if not files: return None
|
| 34 |
|
| 35 |
-
#
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
| 37 |
selected_file = random.choice(files)
|
| 38 |
try:
|
| 39 |
file_path = os.path.join(user_dir, selected_file)
|
| 40 |
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
| 41 |
text = f.read()
|
| 42 |
|
| 43 |
-
# ---
|
| 44 |
-
# Instead of splitting by paragraphs (which kills lists),
|
| 45 |
-
# we grab chunks of roughly 1000-1500 characters.
|
| 46 |
-
# This ensures we get the Header + The Bullets together.
|
| 47 |
-
|
| 48 |
step_size = 1000
|
| 49 |
window_size = 1500
|
| 50 |
candidates = []
|
|
@@ -53,18 +56,19 @@ class QuizEngine:
|
|
| 53 |
if len(text) < window_size:
|
| 54 |
candidates.append(text)
|
| 55 |
else:
|
| 56 |
-
# Slide a window across the text
|
| 57 |
for i in range(0, len(text) - window_size, step_size):
|
| 58 |
chunk = text[i : i + window_size]
|
| 59 |
|
| 60 |
-
#
|
| 61 |
-
# 1. Must have content
|
| 62 |
if len(chunk.strip()) < 100: continue
|
| 63 |
-
|
| 64 |
-
# 2. Must not be pure administrative junk
|
| 65 |
-
# We let the LLM decide mostly, but we filter obvious "Blank Pages"
|
| 66 |
if "intentionally left blank" in chunk.lower(): continue
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
candidates.append(chunk)
|
| 69 |
|
| 70 |
if not candidates: continue
|
|
@@ -85,24 +89,19 @@ class QuizEngine:
|
|
| 85 |
return None
|
| 86 |
|
| 87 |
def construct_question_generation_prompt(self, context_text):
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
""
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
f"3. Only output 'UNABLE' if the text is completely meaningless (e.g., a string of page numbers, a blank page, or garbled characters).\n\n"
|
| 102 |
-
f"OUTPUT FORMAT:\n"
|
| 103 |
-
f"- If valid: Just the question text.\n"
|
| 104 |
-
f"- If invalid: The word 'UNABLE'."
|
| 105 |
-
)
|
| 106 |
|
| 107 |
def construct_grading_prompt(self, question, answer, context_text):
|
| 108 |
return (
|
|
|
|
| 24 |
"question": f"What does **{acronym}** stand for?"
|
| 25 |
}
|
| 26 |
|
| 27 |
+
# --- MODE 2: DOCUMENTS (Updated for Guided Quizzing) ---
|
| 28 |
+
def get_document_context(self, username, topic_filter=None):
|
| 29 |
+
"""
|
| 30 |
+
Fetches a context chunk.
|
| 31 |
+
If 'topic_filter' is provided, only returns chunks containing that text.
|
| 32 |
+
"""
|
| 33 |
user_dir = os.path.join(self.source_dir, username)
|
| 34 |
if not os.path.exists(user_dir): return None
|
| 35 |
|
| 36 |
files = [f for f in os.listdir(user_dir) if f.lower().endswith(('.txt', '.md'))]
|
| 37 |
if not files: return None
|
| 38 |
|
| 39 |
+
# Shuffle files to ensure randomness even when filtering
|
| 40 |
+
random.shuffle(files)
|
| 41 |
+
|
| 42 |
+
# Try up to 20 times (increased from 10 to handle filtering)
|
| 43 |
+
for attempt in range(20):
|
| 44 |
selected_file = random.choice(files)
|
| 45 |
try:
|
| 46 |
file_path = os.path.join(user_dir, selected_file)
|
| 47 |
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
| 48 |
text = f.read()
|
| 49 |
|
| 50 |
+
# --- SLIDING WINDOW STRATEGY ---
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
step_size = 1000
|
| 52 |
window_size = 1500
|
| 53 |
candidates = []
|
|
|
|
| 56 |
if len(text) < window_size:
|
| 57 |
candidates.append(text)
|
| 58 |
else:
|
|
|
|
| 59 |
for i in range(0, len(text) - window_size, step_size):
|
| 60 |
chunk = text[i : i + window_size]
|
| 61 |
|
| 62 |
+
# 1. Sanity Check
|
|
|
|
| 63 |
if len(chunk.strip()) < 100: continue
|
|
|
|
|
|
|
|
|
|
| 64 |
if "intentionally left blank" in chunk.lower(): continue
|
| 65 |
|
| 66 |
+
# 2. TOPIC FILTER (The New Logic)
|
| 67 |
+
if topic_filter:
|
| 68 |
+
# Case-insensitive search
|
| 69 |
+
if topic_filter.lower() not in chunk.lower():
|
| 70 |
+
continue
|
| 71 |
+
|
| 72 |
candidates.append(chunk)
|
| 73 |
|
| 74 |
if not candidates: continue
|
|
|
|
| 89 |
return None
|
| 90 |
|
| 91 |
def construct_question_generation_prompt(self, context_text):
|
| 92 |
+
return (
|
| 93 |
+
f"Act as a Navy Board Examiner.\n"
|
| 94 |
+
f"Here is a raw text excerpt from Navy documentation:\n"
|
| 95 |
+
f"'''{context_text}'''\n\n"
|
| 96 |
+
f"TASK: Generate a single question based on this text.\n\n"
|
| 97 |
+
f"RULES:\n"
|
| 98 |
+
f"1. If the text contains ANY facts (dates, definitions, locations, process steps, responsibilities), you MUST generate a question.\n"
|
| 99 |
+
f"2. Do not judge the 'quality' of the fact. If the text says 'The NSWC is in Maryland', ask 'Where is the NSWC located?'.\n"
|
| 100 |
+
f"3. Only output 'UNABLE' if the text is completely meaningless (e.g., a string of page numbers, a blank page, or garbled characters).\n\n"
|
| 101 |
+
f"OUTPUT FORMAT:\n"
|
| 102 |
+
f"- If valid: Just the question text.\n"
|
| 103 |
+
f"- If invalid: The word 'UNABLE'."
|
| 104 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
def construct_grading_prompt(self, question, answer, context_text):
|
| 107 |
return (
|