NavyDevilDoc commited on
Commit
c0086e2
·
verified ·
1 Parent(s): 68d8aff

Update src/core/QuizEngine.py

Browse files
Files changed (1) hide show
  1. src/core/QuizEngine.py +32 -33
src/core/QuizEngine.py CHANGED
@@ -24,27 +24,30 @@ class QuizEngine:
24
  "question": f"What does **{acronym}** stand for?"
25
  }
26
 
27
- # --- MODE 2: DOCUMENTS (The Fix) ---
28
- def get_document_context(self, username):
 
 
 
 
29
  user_dir = os.path.join(self.source_dir, username)
30
  if not os.path.exists(user_dir): return None
31
 
32
  files = [f for f in os.listdir(user_dir) if f.lower().endswith(('.txt', '.md'))]
33
  if not files: return None
34
 
35
- # Try up to 10 times to find a file with valid chunks
36
- for attempt in range(10):
 
 
 
37
  selected_file = random.choice(files)
38
  try:
39
  file_path = os.path.join(user_dir, selected_file)
40
  with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
41
  text = f.read()
42
 
43
- # --- NEW CHUNKING STRATEGY: SLIDING WINDOW ---
44
- # Instead of splitting by paragraphs (which kills lists),
45
- # we grab chunks of roughly 1000-1500 characters.
46
- # This ensures we get the Header + The Bullets together.
47
-
48
  step_size = 1000
49
  window_size = 1500
50
  candidates = []
@@ -53,18 +56,19 @@ class QuizEngine:
53
  if len(text) < window_size:
54
  candidates.append(text)
55
  else:
56
- # Slide a window across the text
57
  for i in range(0, len(text) - window_size, step_size):
58
  chunk = text[i : i + window_size]
59
 
60
- # BASIC SANITY CHECKS (Very Relaxed)
61
- # 1. Must have content
62
  if len(chunk.strip()) < 100: continue
63
-
64
- # 2. Must not be pure administrative junk
65
- # We let the LLM decide mostly, but we filter obvious "Blank Pages"
66
  if "intentionally left blank" in chunk.lower(): continue
67
 
 
 
 
 
 
 
68
  candidates.append(chunk)
69
 
70
  if not candidates: continue
@@ -85,24 +89,19 @@ class QuizEngine:
85
  return None
86
 
87
  def construct_question_generation_prompt(self, context_text):
88
- """
89
- REVISED PROMPT: 'Action-Based' Generation.
90
- Forces the model to generate a question for ANY valid information,
91
- lowering the bar for what constitutes 'examinable'.
92
- """
93
- return (
94
- f"Act as a Navy Board Examiner.\n"
95
- f"Here is a raw text excerpt from Navy documentation:\n"
96
- f"'''{context_text}'''\n\n"
97
- f"TASK: Generate a single question based on this text.\n\n"
98
- f"RULES:\n"
99
- f"1. If the text contains ANY facts (dates, definitions, locations, process steps, responsibilities), you MUST generate a question.\n"
100
- f"2. Do not judge the 'quality' of the fact. If the text says 'The NSWC is in Maryland', ask 'Where is the NSWC located?'.\n"
101
- f"3. Only output 'UNABLE' if the text is completely meaningless (e.g., a string of page numbers, a blank page, or garbled characters).\n\n"
102
- f"OUTPUT FORMAT:\n"
103
- f"- If valid: Just the question text.\n"
104
- f"- If invalid: The word 'UNABLE'."
105
- )
106
 
107
  def construct_grading_prompt(self, question, answer, context_text):
108
  return (
 
24
  "question": f"What does **{acronym}** stand for?"
25
  }
26
 
27
+ # --- MODE 2: DOCUMENTS (Updated for Guided Quizzing) ---
28
+ def get_document_context(self, username, topic_filter=None):
29
+ """
30
+ Fetches a context chunk.
31
+ If 'topic_filter' is provided, only returns chunks containing that text.
32
+ """
33
  user_dir = os.path.join(self.source_dir, username)
34
  if not os.path.exists(user_dir): return None
35
 
36
  files = [f for f in os.listdir(user_dir) if f.lower().endswith(('.txt', '.md'))]
37
  if not files: return None
38
 
39
+ # Shuffle files to ensure randomness even when filtering
40
+ random.shuffle(files)
41
+
42
+ # Try up to 20 times (increased from 10 to handle filtering)
43
+ for attempt in range(20):
44
  selected_file = random.choice(files)
45
  try:
46
  file_path = os.path.join(user_dir, selected_file)
47
  with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
48
  text = f.read()
49
 
50
+ # --- SLIDING WINDOW STRATEGY ---
 
 
 
 
51
  step_size = 1000
52
  window_size = 1500
53
  candidates = []
 
56
  if len(text) < window_size:
57
  candidates.append(text)
58
  else:
 
59
  for i in range(0, len(text) - window_size, step_size):
60
  chunk = text[i : i + window_size]
61
 
62
+ # 1. Sanity Check
 
63
  if len(chunk.strip()) < 100: continue
 
 
 
64
  if "intentionally left blank" in chunk.lower(): continue
65
 
66
+ # 2. TOPIC FILTER (The New Logic)
67
+ if topic_filter:
68
+ # Case-insensitive search
69
+ if topic_filter.lower() not in chunk.lower():
70
+ continue
71
+
72
  candidates.append(chunk)
73
 
74
  if not candidates: continue
 
89
  return None
90
 
91
  def construct_question_generation_prompt(self, context_text):
92
+ return (
93
+ f"Act as a Navy Board Examiner.\n"
94
+ f"Here is a raw text excerpt from Navy documentation:\n"
95
+ f"'''{context_text}'''\n\n"
96
+ f"TASK: Generate a single question based on this text.\n\n"
97
+ f"RULES:\n"
98
+ f"1. If the text contains ANY facts (dates, definitions, locations, process steps, responsibilities), you MUST generate a question.\n"
99
+ f"2. Do not judge the 'quality' of the fact. If the text says 'The NSWC is in Maryland', ask 'Where is the NSWC located?'.\n"
100
+ f"3. Only output 'UNABLE' if the text is completely meaningless (e.g., a string of page numbers, a blank page, or garbled characters).\n\n"
101
+ f"OUTPUT FORMAT:\n"
102
+ f"- If valid: Just the question text.\n"
103
+ f"- If invalid: The word 'UNABLE'."
104
+ )
 
 
 
 
 
105
 
106
  def construct_grading_prompt(self, question, answer, context_text):
107
  return (