NavyDevilDoc commited on
Commit
174e731
·
verified ·
1 Parent(s): 9e30b0a

Update src/core/QuizEngine.py

Browse files
Files changed (1) hide show
  1. src/core/QuizEngine.py +40 -15
src/core/QuizEngine.py CHANGED
@@ -27,8 +27,9 @@ class QuizEngine:
27
  # --- MODE 2: DOCUMENTS (Updated for Guided Quizzing) ---
28
  def get_document_context(self, username, topic_filter=None):
29
  """
30
- Fetches a context chunk.
31
- If 'topic_filter' is provided, only returns chunks containing that text.
 
32
  """
33
  user_dir = os.path.join(self.source_dir, username)
34
  if not os.path.exists(user_dir): return None
@@ -36,10 +37,13 @@ class QuizEngine:
36
  files = [f for f in os.listdir(user_dir) if f.lower().endswith(('.txt', '.md'))]
37
  if not files: return None
38
 
39
- # Shuffle files to ensure randomness even when filtering
40
  random.shuffle(files)
41
 
42
- # Try up to 20 times (increased from 10 to handle filtering)
 
 
 
43
  for attempt in range(20):
44
  selected_file = random.choice(files)
45
  try:
@@ -47,30 +51,47 @@ class QuizEngine:
47
  with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
48
  text = f.read()
49
 
50
- # --- SLIDING WINDOW STRATEGY ---
 
 
 
 
 
 
 
 
 
 
51
  step_size = 1000
52
  window_size = 1500
53
  candidates = []
54
 
55
- # If text is short, just take the whole thing
56
  if len(text) < window_size:
57
  candidates.append(text)
58
  else:
 
59
  for i in range(0, len(text) - window_size, step_size):
60
  chunk = text[i : i + window_size]
61
 
62
- # 1. Sanity Check
63
- if len(chunk.strip()) < 100: continue
64
- if "intentionally left blank" in chunk.lower(): continue
65
 
66
- # 2. TOPIC FILTER (The New Logic)
67
- if topic_filter:
68
- # Case-insensitive search
69
- if topic_filter.lower() not in chunk.lower():
70
- continue
71
-
72
  candidates.append(chunk)
73
 
 
 
 
 
 
 
 
 
 
74
  if not candidates: continue
75
 
76
  # Success!
@@ -86,6 +107,10 @@ class QuizEngine:
86
  self.logger.error(f"Error fetching context from {selected_file}: {e}")
87
  continue
88
 
 
 
 
 
89
  return None
90
 
91
  def construct_question_generation_prompt(self, context_text):
 
27
  # --- MODE 2: DOCUMENTS (Updated for Guided Quizzing) ---
28
  def get_document_context(self, username, topic_filter=None):
29
  """
30
+ Fetches a context chunk with Tiered Fallback.
31
+ Returns None if absolutely no files exist.
32
+ Returns {'error': 'topic_not_found'} if the filter is too strict.
33
  """
34
  user_dir = os.path.join(self.source_dir, username)
35
  if not os.path.exists(user_dir): return None
 
37
  files = [f for f in os.listdir(user_dir) if f.lower().endswith(('.txt', '.md'))]
38
  if not files: return None
39
 
40
+ # Shuffle files to ensure randomness
41
  random.shuffle(files)
42
 
43
+ # Track if we found ANY matching files for the topic (for debugging)
44
+ topic_match_found = False
45
+
46
+ # Attempt loop
47
  for attempt in range(20):
48
  selected_file = random.choice(files)
49
  try:
 
51
  with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
52
  text = f.read()
53
 
54
+ # CRITICAL CHECK: Is file empty?
55
+ if len(text.strip()) < 50: continue
56
+
57
+ # --- TIER 1: FILTERING ---
58
+ if topic_filter:
59
+ if topic_filter.lower() not in text.lower():
60
+ continue # Skip this file, it doesn't have the topic
61
+ topic_match_found = True # We found a file that has the topic!
62
+
63
+ # --- TIER 2: CHUNKING ---
64
+ # Sliding Window Strategy
65
  step_size = 1000
66
  window_size = 1500
67
  candidates = []
68
 
69
+ # If text is small, take it all
70
  if len(text) < window_size:
71
  candidates.append(text)
72
  else:
73
+ # Scan the file
74
  for i in range(0, len(text) - window_size, step_size):
75
  chunk = text[i : i + window_size]
76
 
77
+ # Soft Filter: Skip if mostly empty
78
+ if len(chunk.strip()) < 50: continue
 
79
 
80
+ # Topic Check (Fine-grained)
81
+ if topic_filter and topic_filter.lower() not in chunk.lower():
82
+ continue
83
+
 
 
84
  candidates.append(chunk)
85
 
86
+ # FALLBACK: If candidates is empty (maybe topic is in file but split across chunks?)
87
+ # We just grab a random chunk from the file that contains the topic
88
+ if not candidates and topic_filter and topic_match_found:
89
+ # Crude fallback: Find the index of the word and grab text around it
90
+ idx = text.lower().find(topic_filter.lower())
91
+ start = max(0, idx - 500)
92
+ end = min(len(text), idx + 1000)
93
+ candidates.append(text[start:end])
94
+
95
  if not candidates: continue
96
 
97
  # Success!
 
107
  self.logger.error(f"Error fetching context from {selected_file}: {e}")
108
  continue
109
 
110
+ # If we failed after 20 tries:
111
+ if topic_filter and not topic_match_found:
112
+ return {"error": "topic_not_found"}
113
+
114
  return None
115
 
116
  def construct_question_generation_prompt(self, context_text):