Spaces:

NavyDevilDoc
/

AI_Toolkit

Sleeping

App Files Files Community

NavyDevilDoc commited on Dec 24, 2025

Commit

174e731

verified ·

1 Parent(s): 9e30b0a

Update src/core/QuizEngine.py

Browse files

Files changed (1) hide show

src/core/QuizEngine.py +40 -15

src/core/QuizEngine.py CHANGED Viewed

@@ -27,8 +27,9 @@ class QuizEngine:
     # --- MODE 2: DOCUMENTS (Updated for Guided Quizzing) ---
     def get_document_context(self, username, topic_filter=None):
         """
-        Fetches a context chunk.
-        If 'topic_filter' is provided, only returns chunks containing that text.
         """
         user_dir = os.path.join(self.source_dir, username)
         if not os.path.exists(user_dir): return None
@@ -36,10 +37,13 @@ class QuizEngine:
         files = [f for f in os.listdir(user_dir) if f.lower().endswith(('.txt', '.md'))]
         if not files: return None
-        # Shuffle files to ensure randomness even when filtering
         random.shuffle(files)
-        # Try up to 20 times (increased from 10 to handle filtering)
         for attempt in range(20):
             selected_file = random.choice(files)
             try:
@@ -47,30 +51,47 @@ class QuizEngine:
                 with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                     text = f.read()
-                # --- SLIDING WINDOW STRATEGY ---
                 step_size = 1000
                 window_size = 1500
                 candidates = []
-                # If text is short, just take the whole thing
                 if len(text) < window_size:
                     candidates.append(text)
                 else:
                     for i in range(0, len(text) - window_size, step_size):
                         chunk = text[i : i + window_size]
-                        # 1. Sanity Check
-                        if len(chunk.strip()) < 100: continue
-                        if "intentionally left blank" in chunk.lower(): continue
-                        # 2. TOPIC FILTER (The New Logic)
-                        if topic_filter:
-                            # Case-insensitive search
-                            if topic_filter.lower() not in chunk.lower():
-                                continue
                         candidates.append(chunk)
                 if not candidates: continue
                 # Success!
@@ -86,6 +107,10 @@ class QuizEngine:
                 self.logger.error(f"Error fetching context from {selected_file}: {e}")
                 continue
         return None
     def construct_question_generation_prompt(self, context_text):

     # --- MODE 2: DOCUMENTS (Updated for Guided Quizzing) ---
     def get_document_context(self, username, topic_filter=None):
         """
+        Fetches a context chunk with Tiered Fallback.
+        Returns None if absolutely no files exist.
+        Returns {'error': 'topic_not_found'} if the filter is too strict.
         """
         user_dir = os.path.join(self.source_dir, username)
         if not os.path.exists(user_dir): return None
         files = [f for f in os.listdir(user_dir) if f.lower().endswith(('.txt', '.md'))]
         if not files: return None
+        # Shuffle files to ensure randomness
         random.shuffle(files)
+        # Track if we found ANY matching files for the topic (for debugging)
+        topic_match_found = False
+        # Attempt loop
         for attempt in range(20):
             selected_file = random.choice(files)
             try:
                 with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                     text = f.read()
+                # CRITICAL CHECK: Is file empty?
+                if len(text.strip()) < 50: continue
+                # --- TIER 1: FILTERING ---
+                if topic_filter:
+                    if topic_filter.lower() not in text.lower():
+                        continue # Skip this file, it doesn't have the topic
+                    topic_match_found = True # We found a file that has the topic!
+                # --- TIER 2: CHUNKING ---
+                # Sliding Window Strategy
                 step_size = 1000
                 window_size = 1500
                 candidates = []
+                # If text is small, take it all
                 if len(text) < window_size:
                     candidates.append(text)
                 else:
+                    # Scan the file
                     for i in range(0, len(text) - window_size, step_size):
                         chunk = text[i : i + window_size]
+                        # Soft Filter: Skip if mostly empty
+                        if len(chunk.strip()) < 50: continue
+                        # Topic Check (Fine-grained)
+                        if topic_filter and topic_filter.lower() not in chunk.lower():
+                            continue
                         candidates.append(chunk)
+                # FALLBACK: If candidates is empty (maybe topic is in file but split across chunks?)
+                # We just grab a random chunk from the file that contains the topic
+                if not candidates and topic_filter and topic_match_found:
+                    # Crude fallback: Find the index of the word and grab text around it
+                    idx = text.lower().find(topic_filter.lower())
+                    start = max(0, idx - 500)
+                    end = min(len(text), idx + 1000)
+                    candidates.append(text[start:end])
                 if not candidates: continue
                 # Success!
                 self.logger.error(f"Error fetching context from {selected_file}: {e}")
                 continue
+        # If we failed after 20 tries:
+        if topic_filter and not topic_match_found:
+            return {"error": "topic_not_found"}
         return None
     def construct_question_generation_prompt(self, context_text):